From 7baf1e9d7972aa71e24c05783bf3a1225c9ec07f Mon Sep 17 00:00:00 2001 From: Basit Ayantunde Date: Tue, 4 Feb 2025 18:02:38 +0000 Subject: [PATCH] Replaced std::string with std::string_view and removed excessive copies in cudf::io (#17734) As part of the improvement effort discussed in #15907, this merge request removes some of the excessive `std::string` copies and uses `std::string_view` in place of `std::string` when the lifetime semantics are clear. `std::string` is only replaced in this MR in linear functions and constructors, but not in structs as there's no established ownership or lifetime semantics to guarantee the `string_view`s will not outlive their source. There were also some cases of excessive copies, i.e. consider: ```cpp struct source_info{ source_info(std::string const& s) : str{s}{} private: std::string str; }; ``` In the above example, the string is likely to be allocated twice if a temporary/string-literal is used to construct "s": one for the temporary and one for the copy constructor for `str` ```cpp struct source_info{ source_info(std::string s) : str{std::move(s)}{} private: std::string str; }; ``` The string is only allocated once in all scenarios. This also applies to `std::vector` and is arguably worse as there's no small-vector-optimization (i.e. `std::string`'s small-string-optimization/SSO). Authors: - Basit Ayantunde (https://github.com/lamarrr) - Muhammad Haseeb (https://github.com/mhaseeb123) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Tianyu Liu (https://github.com/kingcrimsontianyu) - Muhammad Haseeb (https://github.com/mhaseeb123) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/17734 --- cpp/benchmarks/json/json.cu | 10 +-- cpp/benchmarks/string/join_strings.cpp | 6 +- cpp/benchmarks/string/like.cpp | 4 +- cpp/benchmarks/string/replace_re.cpp | 4 +- cpp/examples/strings/libcudf_apis.cpp | 4 +- cpp/include/cudf/io/csv.hpp | 20 ++--- cpp/include/cudf/io/text/detail/trie.hpp | 9 +- cpp/include/cudf/io/text/multibyte_split.hpp | 5 +- cpp/include/cudf/io/types.hpp | 26 +++--- cpp/include/cudf/scalar/scalar.hpp | 4 +- cpp/src/io/avro/avro.cpp | 15 ++-- cpp/src/io/avro/avro.hpp | 4 +- cpp/src/io/csv/reader_impl.cu | 34 ++++++-- cpp/src/io/text/multibyte_split.cu | 6 +- cpp/src/scalar/scalar.cpp | 2 +- cpp/tests/json/json_tests.cpp | 86 ++++++++++---------- cpp/tests/strings/case_tests.cpp | 8 +- cpp/tests/strings/like_tests.cpp | 52 ++++++------ cpp/tests/text/bpe_tests.cpp | 4 +- cpp/tests/text/ngrams_tokenize_tests.cpp | 11 ++- 20 files changed, 170 insertions(+), 144 deletions(-) diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu index 6d01f132189..23c86995876 100644 --- a/cpp/benchmarks/json/json.cu +++ b/cpp/benchmarks/json/json.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -190,10 +190,10 @@ static void bench_query(nvbench::state& state) { srand(5236); - auto const num_rows = static_cast(state.get_int64("num_rows")); - auto const desired_bytes = static_cast(state.get_int64("bytes")); - auto const query = state.get_int64("query"); - auto const json_path = queries[query]; + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const desired_bytes = static_cast(state.get_int64("bytes")); + auto const query = state.get_int64("query"); + std::string_view const json_path = queries[query]; auto const stream = cudf::get_default_stream(); auto input = build_json_string_column(desired_bytes, num_rows); diff --git a/cpp/benchmarks/string/join_strings.cpp b/cpp/benchmarks/string/join_strings.cpp index 27652193b7b..5efb4a517e6 100644 --- a/cpp/benchmarks/string/join_strings.cpp +++ b/cpp/benchmarks/string/join_strings.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,8 +41,8 @@ static void bench_join(nvbench::state& state) state.add_global_memory_reads(chars_size); // all bytes are read; state.add_global_memory_writes(chars_size); // all bytes are written - std::string separator(":"); - std::string narep("null"); + std::string_view separator(":"); + std::string_view narep("null"); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::strings::join_strings(input, separator, narep); }); diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp index f6410aaef30..fa7a70f1fe8 100644 --- a/cpp/benchmarks/string/like.cpp +++ b/cpp/benchmarks/string/like.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ static void bench_like(nvbench::state& state) auto input = cudf::strings_column_view(col->view()); // This pattern forces reading the entire target string (when matched expected) - auto pattern = std::string("% 5W4_"); // regex equivalent: ".* 5W4.$" + auto pattern = std::string_view("% 5W4_"); // regex equivalent: ".* 5W4.$" state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp index 69426a2d484..11984e5defd 100644 --- a/cpp/benchmarks/string/replace_re.cpp +++ b/cpp/benchmarks/string/replace_re.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -49,7 +49,7 @@ static void bench_replace(nvbench::state& state) cudf::strings::replace_with_backrefs(input, *program, replacement); }); } else { - auto replacement = std::string("77"); + auto replacement = std::string_view("77"); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { cudf::strings::replace_re(input, *program, replacement); }); diff --git a/cpp/examples/strings/libcudf_apis.cpp b/cpp/examples/strings/libcudf_apis.cpp index f5f1eb048f1..68b90ddb0a6 100644 --- a/cpp/examples/strings/libcudf_apis.cpp +++ b/cpp/examples/strings/libcudf_apis.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ std::unique_ptr redact_strings(cudf::column_view const& names, auto const last_initial_first = cudf::table_view({last_initial->view(), first}); - auto result = cudf::strings::concatenate(last_initial_first, std::string(" ")); + auto result = cudf::strings::concatenate(last_initial_first, std::string_view(" ")); cudaStreamSynchronize(0); diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index 9b2de7c72ec..bb2684cff7b 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -916,7 +916,7 @@ class csv_reader_options_builder { */ csv_reader_options_builder& prefix(std::string pfx) { - options._prefix = pfx; + options._prefix = std::move(pfx); return *this; } @@ -1450,7 +1450,7 @@ class csv_writer_options { * * @return string to used for null entries */ - [[nodiscard]] std::string get_na_rep() const { return _na_rep; } + [[nodiscard]] std::string const& get_na_rep() const { return _na_rep; } /** * @brief Whether to write headers to csv. @@ -1471,7 +1471,7 @@ class csv_writer_options { * * @return Character used for separating lines */ - [[nodiscard]] std::string get_line_terminator() const { return _line_terminator; } + [[nodiscard]] std::string const& get_line_terminator() const { return _line_terminator; } /** * @brief Returns character used for separating column values. @@ -1485,14 +1485,14 @@ class csv_writer_options { * * @return string used for values != 0 in INT8 types */ - [[nodiscard]] std::string get_true_value() const { return _true_value; } + [[nodiscard]] std::string const& get_true_value() const { return _true_value; } /** * @brief Returns string used for values == 0 in INT8 types. * * @return string used for values == 0 in INT8 types */ - [[nodiscard]] std::string get_false_value() const { return _false_value; } + [[nodiscard]] std::string const& get_false_value() const { return _false_value; } /** * @brief Returns the quote style for the writer. @@ -1519,7 +1519,7 @@ class csv_writer_options { * * @param val String to represent null value */ - void set_na_rep(std::string val) { _na_rep = val; } + void set_na_rep(std::string val) { _na_rep = std::move(val); } /** * @brief Enables/Disables headers being written to csv. @@ -1540,7 +1540,7 @@ class csv_writer_options { * * @param term Character to represent line termination */ - void set_line_terminator(std::string term) { _line_terminator = term; } + void set_line_terminator(std::string term) { _line_terminator = std::move(term); } /** * @brief Sets character used for separating column values. @@ -1554,14 +1554,14 @@ class csv_writer_options { * * @param val String to represent values != 0 in INT8 types */ - void set_true_value(std::string val) { _true_value = val; } + void set_true_value(std::string val) { _true_value = std::move(val); } /** * @brief Sets string used for values == 0 in INT8 types. * * @param val String to represent values == 0 in INT8 types */ - void set_false_value(std::string val) { _false_value = val; } + void set_false_value(std::string val) { _false_value = std::move(val); } /** * @brief (Re)sets the table being written. diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp index 70e06eeac93..d55195c7871 100644 --- a/cpp/include/cudf/io/text/detail/trie.hpp +++ b/cpp/include/cudf/io/text/detail/trie.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -128,7 +129,7 @@ struct trie { /** * @brief Insert the string in to the trie tree, growing the trie as necessary */ - void insert(std::string s) { insert(s.c_str(), s.size(), 0); } + void insert(std::string_view s) { insert(s.data(), s.size(), 0); } private: trie_builder_node& insert(char const* s, uint16_t size, uint8_t depth) @@ -164,12 +165,12 @@ struct trie { * @param mr Memory resource to use for the device memory allocation * @return The trie. */ - static trie create(std::string const& pattern, + static trie create(std::string pattern, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return create(std::vector{pattern}, stream, mr); + return create(std::vector{std::move(pattern)}, stream, mr); } /** diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp index 99f9e7534ac..afbce74c096 100644 --- a/cpp/include/cudf/io/text/multibyte_split.hpp +++ b/cpp/include/cudf/io/text/multibyte_split.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include +#include namespace CUDF_EXPORT cudf { namespace io { @@ -90,7 +91,7 @@ struct parse_options { */ std::unique_ptr multibyte_split( data_chunk_source const& source, - std::string const& delimiter, + std::string_view delimiter, parse_options options = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 9e171a62f78..ba9ac33a984 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -353,8 +353,8 @@ struct source_info { * * @param file_paths Input files paths */ - explicit source_info(std::vector const& file_paths) - : _type(io_type::FILEPATH), _filepaths(file_paths) + explicit source_info(std::vector file_paths) + : _type(io_type::FILEPATH), _filepaths(std::move(file_paths)) { } @@ -363,8 +363,8 @@ struct source_info { * * @param file_path Single input file */ - explicit source_info(std::string const& file_path) - : _type(io_type::FILEPATH), _filepaths({file_path}) + explicit source_info(std::string file_path) + : _type(io_type::FILEPATH), _filepaths({std::move(file_path)}) { } @@ -534,8 +534,8 @@ struct sink_info { * * @param file_paths Output files paths */ - explicit sink_info(std::vector const& file_paths) - : _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(file_paths) + explicit sink_info(std::vector file_paths) + : _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(std::move(file_paths)) { } @@ -544,8 +544,8 @@ struct sink_info { * * @param file_path Single output file path */ - explicit sink_info(std::string const& file_path) - : _type(io_type::FILEPATH), _filepaths({file_path}) + explicit sink_info(std::string file_path) + : _type(io_type::FILEPATH), _filepaths({std::move(file_path)}) { } @@ -554,8 +554,8 @@ struct sink_info { * * @param buffers Output host buffers */ - explicit sink_info(std::vector*> const& buffers) - : _type(io_type::HOST_BUFFER), _num_sinks(buffers.size()), _buffers(buffers) + explicit sink_info(std::vector*> buffers) + : _type(io_type::HOST_BUFFER), _num_sinks(buffers.size()), _buffers(std::move(buffers)) { } /** @@ -571,7 +571,9 @@ struct sink_info { * @param user_sinks Output user-implemented sinks */ explicit sink_info(std::vector const& user_sinks) - : _type(io_type::USER_IMPLEMENTED), _num_sinks(user_sinks.size()), _user_sinks(user_sinks) + : _type(io_type::USER_IMPLEMENTED), + _num_sinks(user_sinks.size()), + _user_sinks(std::move(user_sinks)) { } @@ -821,7 +823,7 @@ class column_in_metadata { * * @return The name of this column */ - [[nodiscard]] std::string get_name() const noexcept { return _name; } + [[nodiscard]] std::string const& get_name() const noexcept { return _name; } /** * @brief Get whether nullability has been explicitly set for this column. diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 4bee369a123..532448c10d2 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -27,6 +27,8 @@ #include #include +#include + /** * @file * @brief Class definitions for cudf::scalar @@ -454,7 +456,7 @@ class string_scalar : public scalar { * @param stream CUDA stream used for device memory operations. * @param mr Device memory resource to use for device memory allocation. */ - string_scalar(std::string const& string, + string_scalar(std::string_view string, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index c3a7f0f3053..3541732e1c8 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -297,7 +297,7 @@ enum attrtype_e { * * @returns true if successful, false if error */ -bool schema_parser::parse(std::vector& schema, std::string const& json_str) +bool schema_parser::parse(std::vector& schema, std::string_view json_str) { // Empty schema if (json_str == "[]") return true; @@ -306,7 +306,7 @@ bool schema_parser::parse(std::vector& schema, std::string const& int depth = 0, parent_idx = -1, entry_idx = -1; json_state_e state = state_attrname; std::string str; - std::unordered_map const typenames = { + std::unordered_map const typenames = { {"null", type_null}, {"boolean", type_boolean}, {"int", type_int}, @@ -329,7 +329,7 @@ bool schema_parser::parse(std::vector& schema, std::string const& {"local-timestamp-millis", type_local_timestamp_millis}, {"local-timestamp-micros", type_local_timestamp_micros}, {"duration", type_duration}}; - std::unordered_map const attrnames = { + std::unordered_map const attrnames = { {"type", attrtype_type}, {"name", attrtype_name}, {"fields", attrtype_fields}, @@ -337,9 +337,9 @@ bool schema_parser::parse(std::vector& schema, std::string const& {"items", attrtype_items}, {"logicalType", attrtype_logicaltype}}; attrtype_e cur_attr = attrtype_none; - m_base = json_str.c_str(); + m_base = json_str.begin(); m_cur = m_base; - m_end = m_base + json_str.length(); + m_end = json_str.end(); while (more_data()) { int const c = *m_cur++; switch (c) { @@ -487,7 +487,8 @@ std::string schema_parser::get_str() ; auto len = static_cast(cur - start - 1); m_cur = cur; - return s.assign(start, std::max(len, 0)); + s.assign(start, std::max(len, 0)); + return s; } } // namespace avro diff --git a/cpp/src/io/avro/avro.hpp b/cpp/src/io/avro/avro.hpp index fd2c781b8a1..aa438679c34 100644 --- a/cpp/src/io/avro/avro.hpp +++ b/cpp/src/io/avro/avro.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -122,7 +122,7 @@ class schema_parser { public: schema_parser() {} - bool parse(std::vector& schema, std::string const& str); + bool parse(std::vector& schema, std::string_view str); protected: [[nodiscard]] bool more_data() const { return (m_cur < m_end); } diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 0d51526d925..6e049ef78b7 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -104,17 +105,31 @@ class selected_rows_offsets { }; /** - * @brief Removes the first and Last quote in the string + * @brief Discards any other characters found before the first quotechar and after the last + * quotechar in the string (if any quotechar exists) + * + * ``` + * Example: + * "column" => column + * \t"column"\t => column + * "column" => column + * ``` + * */ -string removeQuotes(string str, char quotechar) +std::string_view remove_quotes(std::string_view str, char quotechar) { // Exclude first and last quotation char - size_t const first_quote = str.find(quotechar); - if (first_quote != string::npos) { str.erase(first_quote, 1); } - size_t const last_quote = str.rfind(quotechar); - if (last_quote != string::npos) { str.erase(last_quote, 1); } + auto const first_quote = str.find(quotechar); + + if (first_quote == string::npos) { return str; } + + str = str.substr(first_quote + 1); + + auto const last_quote = str.rfind(quotechar); + + if (last_quote == string::npos) { return str; } - return str; + return str.substr(0, last_quote); } /** @@ -152,8 +167,9 @@ std::vector get_column_names(std::vector const& row, --col_name_len; } - string const new_col_name(row.data() + prev, col_name_len); - col_names.push_back(removeQuotes(new_col_name, parse_opts.quotechar)); + col_names.emplace_back( + remove_quotes(std::string_view{row.data() + prev, static_cast(col_name_len)}, + parse_opts.quotechar)); } else { // This is the first data row, add the automatically generated name col_names.push_back(prefix + std::to_string(col_names.size())); diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index 37b1608463b..c34f5fb314a 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -303,7 +303,7 @@ CUDF_KERNEL __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel( } // namespace std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source const& source, - std::string const& delimiter, + std::string_view delimiter, byte_range_info byte_range, bool strip_delimiters, rmm::cuda_stream_view stream, @@ -315,7 +315,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source auto device_delim = cudf::string_scalar(delimiter, true, stream, mr); - auto sorted_delim = delimiter; + std::string sorted_delim{delimiter}; std::sort(sorted_delim.begin(), sorted_delim.end()); auto [_last_char, _last_char_count, max_duplicate_tokens] = std::accumulate( sorted_delim.begin(), sorted_delim.end(), std::make_tuple('\0', 0, 0), [](auto acc, char c) { @@ -569,7 +569,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source } // namespace detail std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source const& source, - std::string const& delimiter, + std::string_view delimiter, parse_options options, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 03233db6970..c5ca7014686 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -56,7 +56,7 @@ bool* scalar::validity_data() { return _is_valid.data(); } bool const* scalar::validity_data() const { return _is_valid.data(); } -string_scalar::string_scalar(std::string const& string, +string_scalar::string_scalar(std::string_view string, bool is_valid, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) diff --git a/cpp/tests/json/json_tests.cpp b/cpp/tests/json/json_tests.cpp index 53166e04173..f9fabb83663 100644 --- a/cpp/tests/json/json_tests.cpp +++ b/cpp/tests/json/json_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,7 +84,7 @@ TEST_F(JsonPathTests, GetJsonObjectRootOp) { // root cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$"); + std::string_view json_path("$"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -97,7 +97,7 @@ TEST_F(JsonPathTests, GetJsonObjectChildOp) { { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store"); + std::string_view json_path("$.store"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -146,7 +146,7 @@ TEST_F(JsonPathTests, GetJsonObjectChildOp) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book"); + std::string_view json_path("$.store.book"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -192,7 +192,7 @@ TEST_F(JsonPathTests, GetJsonObjectWildcardOp) { { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.*"); + std::string_view json_path("$.store.*"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -241,7 +241,7 @@ TEST_F(JsonPathTests, GetJsonObjectWildcardOp) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("*"); + std::string_view json_path("*"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -296,7 +296,7 @@ TEST_F(JsonPathTests, GetJsonObjectSubscriptOp) { { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book[2]"); + std::string_view json_path("$.store.book[2]"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -318,7 +318,7 @@ TEST_F(JsonPathTests, GetJsonObjectSubscriptOp) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store['bicycle']"); + std::string_view json_path("$.store['bicycle']"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -337,7 +337,7 @@ TEST_F(JsonPathTests, GetJsonObjectSubscriptOp) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book[*]"); + std::string_view json_path("$.store.book[*]"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -386,7 +386,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book[*]['isbn']"); + std::string_view json_path("$.store.book[*]['isbn']"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -398,7 +398,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book[*].category"); + std::string_view json_path("$.store.book[*].category"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -411,7 +411,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book[*].title"); + std::string_view json_path("$.store.book[*].title"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -424,7 +424,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book.*.price"); + std::string_view json_path("$.store.book.*.price"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -439,7 +439,7 @@ TEST_F(JsonPathTests, GetJsonObjectFilter) // standard: "fiction" // spark: fiction cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book[2].category"); + std::string_view json_path("$.store.book[2].category"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -456,7 +456,7 @@ TEST_F(JsonPathTests, GetJsonObjectNullInputs) std::string str(R"({"a" : "b"})"); cudf::test::strings_column_wrapper input({str, str, str, str}, {true, false, true, false}); - std::string json_path("$.a"); + std::string_view json_path("$.a"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path); auto result = drop_whitespace(*result_raw); @@ -472,7 +472,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyQuery) // empty query -> null { cudf::test::strings_column_wrapper input{R"({"a" : "b"})"}; - std::string json_path(""); + std::string_view json_path(""); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -486,7 +486,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyInputsAndOutputs) // empty string input -> null { cudf::test::strings_column_wrapper input{""}; - std::string json_path("$"); + std::string_view json_path("$"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -499,7 +499,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyInputsAndOutputs) // a valid, but empty row { cudf::test::strings_column_wrapper input{R"({"store": { "bicycle" : "" } })"}; - std::string json_path("$.store.bicycle"); + std::string_view json_path("$.store.bicycle"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {1}); @@ -511,7 +511,7 @@ TEST_F(JsonPathTests, GetJsonObjectEmptyInputsAndOutputs) TEST_F(JsonPathTests, GetJsonObjectEmptyInput) { cudf::test::strings_column_wrapper input{}; - std::string json_path("$"); + std::string_view json_path("$"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, input); } @@ -523,7 +523,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) // than the beginning { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("$$"); + std::string_view json_path("$$"); auto query = [&]() { auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -533,7 +533,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) // invalid index { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("$[auh46h-]"); + std::string_view json_path("$[auh46h-]"); auto query = [&]() { auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -543,7 +543,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) // invalid index { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("$[[]]"); + std::string_view json_path("$[[]]"); auto query = [&]() { auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -553,7 +553,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) // negative index { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("$[-1]"); + std::string_view json_path("$[-1]"); auto query = [&]() { auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -563,7 +563,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) // child operator with no name specified { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("."); + std::string_view json_path("."); auto query = [&]() { auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -572,7 +572,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("]["); + std::string_view json_path("]["); auto query = [&]() { auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -581,7 +581,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("6hw6,56i3"); + std::string_view json_path("6hw6,56i3"); auto query = [&]() { auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -590,7 +590,7 @@ TEST_F(JsonPathTests, GetJsonObjectIllegalQuery) { auto const input = cudf::test::strings_column_wrapper{R"({"a": "b"})"}; - auto const json_path = std::string{"${a}"}; + auto const json_path = std::string_view{"${a}"}; auto const query = [&]() { auto const result = cudf::get_json_object(cudf::strings_column_view(input), json_path); }; @@ -604,7 +604,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) // non-existent field { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("$[*].c"); + std::string_view json_path("$[*].c"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -615,7 +615,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) // non-existent field { cudf::test::strings_column_wrapper input{R"({"a": "b"})"}; - std::string json_path("$[*].c[2]"); + std::string_view json_path("$[*].c[2]"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -626,7 +626,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) // non-existent field { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book.price"); + std::string_view json_path("$.store.book.price"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -637,7 +637,7 @@ TEST_F(JsonPathTests, GetJsonObjectInvalidQuery) // out of bounds index { cudf::test::strings_column_wrapper input{json_string}; - std::string json_path("$.store.book[4]"); + std::string_view json_path("$.store.book[4]"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); cudf::test::strings_column_wrapper expected({""}, {0}); @@ -680,7 +680,7 @@ TEST_F(JsonPathTests, MixedOutput) // clang-format on cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); { - std::string json_path("$.a"); + std::string_view json_path("$.a"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off @@ -702,7 +702,7 @@ TEST_F(JsonPathTests, MixedOutput) } { - std::string json_path("$.a[1]"); + std::string_view json_path("$.a[1]"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off @@ -721,7 +721,7 @@ TEST_F(JsonPathTests, MixedOutput) } { - std::string json_path("$.a.b"); + std::string_view json_path("$.a.b"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off @@ -739,7 +739,7 @@ TEST_F(JsonPathTests, MixedOutput) } { - std::string json_path("$.a[*]"); + std::string_view json_path("$.a[*]"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off @@ -760,7 +760,7 @@ TEST_F(JsonPathTests, MixedOutput) } { - std::string json_path("$.a.b[*]"); + std::string_view json_path("$.a.b[*]"); auto result = cudf::get_json_object(cudf::strings_column_view(input), json_path); // clang-format off @@ -791,7 +791,7 @@ TEST_F(JsonPathTests, StripQuotes) cudf::get_json_object_options options; options.set_strip_quotes_from_single_strings(false); - std::string json_path("$.a"); + std::string_view json_path("$.a"); auto result_raw = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); auto result = drop_whitespace(*result_raw); @@ -804,7 +804,7 @@ TEST_F(JsonPathTests, StripQuotes) // a valid, but empty row { cudf::test::strings_column_wrapper input{R"({"store": { "bicycle" : "" } })"}; - std::string json_path("$.store.bicycle"); + std::string_view json_path("$.store.bicycle"); cudf::get_json_object_options options; options.set_strip_quotes_from_single_strings(true); @@ -864,7 +864,7 @@ TEST_F(JsonPathTests, AllowSingleQuotes) cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); { - std::string json_path("$.a"); + std::string_view json_path("$.a"); cudf::get_json_object_options options; options.set_allow_single_quotes(true); @@ -907,7 +907,7 @@ TEST_F(JsonPathTests, StringsWithSpecialChars) cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); { - std::string json_path("$.item"); + std::string_view json_path("$.item"); cudf::get_json_object_options options; options.set_allow_single_quotes(true); @@ -932,7 +932,7 @@ TEST_F(JsonPathTests, StringsWithSpecialChars) cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); { - std::string json_path("$.a"); + std::string_view json_path("$.a"); cudf::get_json_object_options options; options.set_allow_single_quotes(true); @@ -964,7 +964,7 @@ TEST_F(JsonPathTests, EscapeSequences) cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end()); { - std::string json_path("$.a"); + std::string_view json_path("$.a"); cudf::get_json_object_options options; options.set_allow_single_quotes(true); @@ -1034,7 +1034,7 @@ TEST_F(JsonPathTests, QueriesContainingQuotes) auto const& expected_string, bool const& expect_null = false) { auto const input = cudf::test::strings_column_wrapper{input_string}; - auto const json_path = std::string{json_path_string}; + auto const json_path = std::string_view{json_path_string}; cudf::get_json_object_options options; options.set_allow_single_quotes(true); auto const result = cudf::get_json_object(cudf::strings_column_view(input), json_path, options); diff --git a/cpp/tests/strings/case_tests.cpp b/cpp/tests/strings/case_tests.cpp index ce61a1bda8c..50af9a9d5a4 100644 --- a/cpp/tests/strings/case_tests.cpp +++ b/cpp/tests/strings/case_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -110,14 +110,14 @@ TEST_F(StringsCaseTest, Capitalize) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } { - auto results = cudf::strings::capitalize(strings_view, std::string(" ")); + auto results = cudf::strings::capitalize(strings_view, std::string_view(" ")); cudf::test::strings_column_wrapper expected( {"Sⱥⱥnich Xyz", "Examples Abc", "Thesé", "", "Are\tthe", "Tést\tstrings", ""}, {true, true, true, false, true, true, true}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } { - auto results = cudf::strings::capitalize(strings_view, std::string(" \t")); + auto results = cudf::strings::capitalize(strings_view, std::string_view(" \t")); cudf::test::strings_column_wrapper expected( {"Sⱥⱥnich Xyz", "Examples Abc", "Thesé", "", "Are\tThe", "Tést\tStrings", ""}, {true, true, true, false, true, true, true}); @@ -185,7 +185,7 @@ TEST_F(StringsCaseTest, MultiCharUpper) auto results = cudf::strings::to_upper(strings_view); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); - results = cudf::strings::capitalize(strings_view, std::string(" ")); + results = cudf::strings::capitalize(strings_view, std::string_view(" ")); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); results = cudf::strings::title(strings_view); diff --git a/cpp/tests/strings/like_tests.cpp b/cpp/tests/strings/like_tests.cpp index 6aedbdeb537..da1a87aab8d 100644 --- a/cpp/tests/strings/like_tests.cpp +++ b/cpp/tests/strings/like_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ TEST_F(StringsLikeTests, Basic) {true, true, true, true, true, true, false, true}); auto const sv = cudf::strings_column_view(input); auto const pattern = std::string("abc"); - auto const results = cudf::strings::like(sv, pattern); + auto const results = cudf::strings::like(sv, std::string_view{pattern}); cudf::test::fixed_width_column_wrapper expected( {true, false, false, false, false, false, false, false}, {true, true, true, true, true, true, false, true}); @@ -41,19 +41,19 @@ TEST_F(StringsLikeTests, Leading) cudf::test::strings_column_wrapper input({"a", "aa", "aaa", "b", "bb", "bba", "", "áéêú"}); auto const sv = cudf::strings_column_view(input); { - auto const results = cudf::strings::like(sv, std::string("a%")); + auto const results = cudf::strings::like(sv, std::string_view("a%")); cudf::test::fixed_width_column_wrapper expected( {true, true, true, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const results = cudf::strings::like(sv, std::string("__a%")); + auto const results = cudf::strings::like(sv, std::string_view("__a%")); cudf::test::fixed_width_column_wrapper expected( {false, false, true, false, false, true, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const results = cudf::strings::like(sv, std::string("á%")); + auto const results = cudf::strings::like(sv, std::string_view("á%")); cudf::test::fixed_width_column_wrapper expected( {false, false, false, false, false, false, false, true}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); @@ -65,21 +65,21 @@ TEST_F(StringsLikeTests, Trailing) cudf::test::strings_column_wrapper input({"a", "aa", "aaa", "b", "bb", "bba", "", "áéêú"}); auto const sv = cudf::strings_column_view(input); { - auto results = cudf::strings::like(sv, std::string("%a")); + auto results = cudf::strings::like(sv, std::string_view("%a")); cudf::test::fixed_width_column_wrapper expected( {true, true, true, false, false, true, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); - results = cudf::strings::like(sv, std::string("%a%")); + results = cudf::strings::like(sv, std::string_view("%a%")); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const results = cudf::strings::like(sv, std::string("%_a")); + auto const results = cudf::strings::like(sv, std::string_view("%_a")); cudf::test::fixed_width_column_wrapper expected( {false, true, true, false, false, true, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const results = cudf::strings::like(sv, std::string("%_êú")); + auto const results = cudf::strings::like(sv, std::string_view("%_êú")); cudf::test::fixed_width_column_wrapper expected( {false, false, false, false, false, false, false, true}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); @@ -91,25 +91,25 @@ TEST_F(StringsLikeTests, Place) cudf::test::strings_column_wrapper input({"a", "aa", "aaa", "bab", "ab", "aba", "", "éaé"}); auto const sv = cudf::strings_column_view(input); { - auto const results = cudf::strings::like(sv, std::string("a_")); + auto const results = cudf::strings::like(sv, std::string_view("a_")); cudf::test::fixed_width_column_wrapper expected( {false, true, false, false, true, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const results = cudf::strings::like(sv, std::string("_a_")); + auto const results = cudf::strings::like(sv, std::string_view("_a_")); cudf::test::fixed_width_column_wrapper expected( {false, false, true, true, false, false, false, true}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const results = cudf::strings::like(sv, std::string("__a")); + auto const results = cudf::strings::like(sv, std::string_view("__a")); cudf::test::fixed_width_column_wrapper expected( {false, false, true, false, false, true, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const results = cudf::strings::like(sv, std::string("é_é")); + auto const results = cudf::strings::like(sv, std::string_view("é_é")); cudf::test::fixed_width_column_wrapper expected( {false, false, false, false, false, false, false, true}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); @@ -122,40 +122,40 @@ TEST_F(StringsLikeTests, Escape) {"10%-20%", "10-20", "10%%-20%", "a_b", "b_a", "___", "", "aéb"}); auto const sv = cudf::strings_column_view(input); { - auto const pattern = std::string("10\\%-20\\%"); - auto const escape = std::string("\\"); + auto const pattern = std::string_view("10\\%-20\\%"); + auto const escape = std::string_view("\\"); auto const results = cudf::strings::like(sv, pattern, escape); cudf::test::fixed_width_column_wrapper expected( {true, false, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const pattern = std::string("\\__\\_"); - auto const escape = std::string("\\"); + auto const pattern = std::string_view("\\__\\_"); + auto const escape = std::string_view("\\"); auto const results = cudf::strings::like(sv, pattern, escape); cudf::test::fixed_width_column_wrapper expected( {false, false, false, false, false, true, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const pattern = std::string("10%%%%-20%%"); - auto const escape = std::string("%"); + auto const pattern = std::string_view("10%%%%-20%%"); + auto const escape = std::string_view("%"); auto const results = cudf::strings::like(sv, pattern, escape); cudf::test::fixed_width_column_wrapper expected( {false, false, true, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const pattern = std::string("_%__"); - auto const escape = std::string("%"); + auto const pattern = std::string_view("_%__"); + auto const escape = std::string_view("%"); auto const results = cudf::strings::like(sv, pattern, escape); cudf::test::fixed_width_column_wrapper expected( {false, false, false, true, true, true, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } { - auto const pattern = std::string("a__b"); - auto const escape = std::string("_"); + auto const pattern = std::string_view("a__b"); + auto const escape = std::string_view("_"); auto const results = cudf::strings::like(sv, pattern, escape); cudf::test::fixed_width_column_wrapper expected( {false, false, false, true, false, false, false, false}); @@ -179,13 +179,13 @@ TEST_F(StringsLikeTests, Empty) { cudf::test::strings_column_wrapper input({"ooo", "20%", ""}); auto sv = cudf::strings_column_view(input); - auto results = cudf::strings::like(sv, std::string("")); + auto results = cudf::strings::like(sv, std::string_view("")); auto expected = cudf::test::fixed_width_column_wrapper({false, false, true}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); auto empty = cudf::make_empty_column(cudf::type_id::STRING); sv = cudf::strings_column_view(empty->view()); - results = cudf::strings::like(sv, std::string("20%")); + results = cudf::strings::like(sv, std::string_view("20%")); auto expected_empty = cudf::make_empty_column(cudf::type_id::BOOL8); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected_empty->view()); @@ -200,7 +200,7 @@ TEST_F(StringsLikeTests, Errors) auto const invalid_str = cudf::string_scalar("", false); EXPECT_THROW(cudf::strings::like(sv, invalid_str), cudf::logic_error); - EXPECT_THROW(cudf::strings::like(sv, std::string("3"), invalid_str), cudf::logic_error); + EXPECT_THROW(cudf::strings::like(sv, std::string_view("3"), invalid_str), cudf::logic_error); auto patterns = cudf::test::strings_column_wrapper({"3", ""}, {true, false}); auto const sv_patterns = cudf::strings_column_view(patterns); diff --git a/cpp/tests/text/bpe_tests.cpp b/cpp/tests/text/bpe_tests.cpp index 3b08439612b..cf8c569904d 100644 --- a/cpp/tests/text/bpe_tests.cpp +++ b/cpp/tests/text/bpe_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,7 +83,7 @@ TEST_F(TextBytePairEncoding, BytePairEncodingSeparator) {"Ġthe test sentence", "test Ġthe sentence", "Ġthetest sentence", "testĠthesentence"}); auto sv = cudf::strings_column_view(input); - auto results = nvtext::byte_pair_encoding(sv, *merge_pairs, std::string("$")); + auto results = nvtext::byte_pair_encoding(sv, *merge_pairs, std::string_view("$")); auto expected = cudf::test::strings_column_wrapper({"Ġthe$ $test$ $sent$ence", "test$ $Ġthe$ $sent$ence", diff --git a/cpp/tests/text/ngrams_tokenize_tests.cpp b/cpp/tests/text/ngrams_tokenize_tests.cpp index 998bddedd18..f03ffcea703 100644 --- a/cpp/tests/text/ngrams_tokenize_tests.cpp +++ b/cpp/tests/text/ngrams_tokenize_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -63,7 +63,8 @@ TEST_F(TextNgramsTokenizeTest, Tokenize) "mousé_ate", "ate_the", "the_cheese"}; - auto results = nvtext::ngrams_tokenize(strings_view, 2, std::string(), std::string("_")); + auto results = + nvtext::ngrams_tokenize(strings_view, 2, std::string_view(), std::string_view("_")); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } { @@ -80,7 +81,8 @@ TEST_F(TextNgramsTokenizeTest, Tokenize) "the:mousé:ate", "mousé:ate:the", "ate:the:cheese"}; - auto results = nvtext::ngrams_tokenize(strings_view, 3, std::string{" "}, std::string{":"}); + auto results = + nvtext::ngrams_tokenize(strings_view, 3, std::string_view{" "}, std::string_view{":"}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } { @@ -93,7 +95,8 @@ TEST_F(TextNgramsTokenizeTest, Tokenize) "cat--chased--the--mouse", "the--mousé--ate--the", "mousé--ate--the--cheese"}; - auto results = nvtext::ngrams_tokenize(strings_view, 4, std::string{" "}, std::string{"--"}); + auto results = + nvtext::ngrams_tokenize(strings_view, 4, std::string_view{" "}, std::string_view{"--"}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } }