diff --git a/README.md b/README.md index b65ab5a..52098cb 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ This extension provides a set of utility functions to work with JSON data, focus - **`json_flatten(json[, separator])`**: Recursively flattens nested JSON objects and arrays into a single-level object with path keys (default separator: `.`). - **`json_add_prefix(json, text)`**: Adds a string prefix to every top-level key in a JSON object. +- **`json_extract_columns(json, columns[, separator])`**: Pulls selected root keys into a struct of `VARCHAR` fields using regex patterns. - **`json_group_merge(json [ORDER BY ...])`**: Streams JSON patches with RFC 7396 merge semantics without materializing intermediate lists. ## Quick Start @@ -124,6 +125,33 @@ SELECT json_add_prefix('{"user": {"name": "Alice"}, "count": 5}', 'data_'); **Note:** This function requires the input to be a JSON object. It will raise an error if given a JSON array or primitive value. +### `json_extract_columns(json, columns[, separator]) -> struct` + +Extracts selected root-level fields into a struct of `VARCHAR` columns. The first argument must be a JSON object value (not an array or primitive). `columns` must be a constant JSON object mapping output column names to RE2 regex patterns evaluated against each top-level key (partial matches by default; add anchors to tighten). Patterns are case-sensitive unless you supply inline flags such as `(?i)`. Output columns follow the mapping order. + +`separator` defaults to `''` and is inserted between multiple matches for the same column in the order keys appear in the input object. It can be empty but cannot be `NULL` (even when the JSON input is `NULL`). Columns with no matches return `NULL`. + +Values are stringified: strings pass through unquoted; arrays, objects, numbers, booleans, and `null` become their JSON text. + +**Examples:** +```sql +SELECT (json_extract_columns('{"id": 5, "name": "duck"}', + '{"id":"^id$","name":"^name$"}', ',')).id AS id; +-- Result: 5 + +SELECT (json_extract_columns('{"a":1,"a2":2,"b":3}', + '{"a":"^a","b":"^b$"}', '|')).a AS a_values; +-- Result: 1|2 + +SELECT (json_extract_columns('{"Key": "Value"}', + '{"k":"(?i)^key$"}', ',')).k AS case_insensitive; +-- Result: Value + +SELECT (json_extract_columns('{"x":"a","xx":"b"}', + '{"col":"x"}')).col AS default_separator; +-- Result: ab +``` + ### `json_group_merge(json_expr [, treat_null_values] [ORDER BY ...]) -> json` Applies a sequence of JSON patches using [RFC 7396](https://datatracker.ietf.org/doc/html/rfc7396) merge semantics. Inputs can be `JSON` values or `VARCHAR` text that parses as JSON. SQL `NULL` rows are skipped, and the aggregate returns `'{}'::json` when no non-null inputs are provided. @@ -175,6 +203,7 @@ FROM (VALUES ('{"keep":1}'::json, 1), ('{"keep":null}'::json, 2)) AS t(patch, ts - `json_flatten()` returns an error for malformed JSON - `json_add_prefix()` requires a JSON object (not array or primitive value) +- `json_extract_columns()` requires a JSON object input and a constant JSON object of string regex patterns; it raises on invalid regexes, NULL separators, non-string object keys, or mismatched input shapes - `json_group_merge()` surfaces DuckDB JSON parse errors for invalid text and raises on merge buffers that exceed DuckDB limits - Maximum nesting depth: 1000 levels - Empty objects (`{}`) and arrays (`[]`) are omitted from flattened output diff --git a/src/json_tools_extension.cpp b/src/json_tools_extension.cpp index 133e87f..2e49e91 100644 --- a/src/json_tools_extension.cpp +++ b/src/json_tools_extension.cpp @@ -5,11 +5,15 @@ #include "duckdb/common/allocator.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/common/string_util.hpp" +#include "duckdb/common/types/vector.hpp" +#include "duckdb/common/types/value.hpp" +#include "duckdb/common/types.hpp" #include "duckdb/execution/expression_executor_state.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/function/scalar_function.hpp" #include "duckdb/function/aggregate_function.hpp" #include "duckdb/function/function_set.hpp" +#include "duckdb/function/scalar/regexp.hpp" #include "../duckdb/extension/json/include/json_common.hpp" #include "yyjson.hpp" #include @@ -26,6 +30,7 @@ #include #include #include +#include namespace duckdb { @@ -719,9 +724,11 @@ using duckdb_yyjson::yyjson_arr_iter_with; using duckdb_yyjson::yyjson_doc; using duckdb_yyjson::yyjson_doc_free; using duckdb_yyjson::yyjson_doc_get_root; +using duckdb_yyjson::yyjson_get_tag; using duckdb_yyjson::yyjson_is_arr; using duckdb_yyjson::yyjson_is_null; using duckdb_yyjson::yyjson_is_obj; +using duckdb_yyjson::yyjson_is_str; using duckdb_yyjson::yyjson_mut_doc; using duckdb_yyjson::yyjson_mut_doc_free; using duckdb_yyjson::yyjson_mut_doc_new; @@ -744,6 +751,297 @@ using duckdb_yyjson::yyjson_read_opts; using duckdb_yyjson::yyjson_val; using duckdb_yyjson::yyjson_val_mut_copy; +struct JsonExtractColumnsBindData : public FunctionData { + JsonExtractColumnsBindData(vector column_names_p, vector patterns_p, + child_list_t children_p, duckdb_re2::RE2::Options options_p) + : column_names(std::move(column_names_p)), patterns(std::move(patterns_p)), children(std::move(children_p)), + options(std::move(options_p)) { + CompilePatterns(); + } + + vector column_names; + vector patterns; + child_list_t children; + duckdb_re2::RE2::Options options; + vector> compiled_patterns; + + unique_ptr Copy() const override { + return make_uniq(column_names, patterns, children, options); + } + + bool Equals(const FunctionData &other_p) const override { + const auto &other = other_p.Cast(); + return column_names == other.column_names && patterns == other.patterns && + options.case_sensitive() == other.options.case_sensitive(); + } + +private: + void CompilePatterns() { + compiled_patterns.clear(); + compiled_patterns.reserve(patterns.size()); + for (auto &pattern : patterns) { + auto re = make_uniq(pattern, options); + if (!re->ok()) { + throw BinderException("json_extract_columns: %s", re->error()); + } + compiled_patterns.push_back(std::move(re)); + } + } +}; + +struct JsonExtractColumnsLocalState : public FunctionLocalState { + JsonExtractColumnsLocalState(Allocator &allocator, idx_t column_count) + : json_allocator(std::make_shared(allocator)), buffers(column_count), + has_match(column_count, false) { + } + + shared_ptr json_allocator; + vector buffers; + vector has_match; +}; + +static unique_ptr +JsonExtractColumnsInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr, FunctionData *bind_data) { + auto &context = state.GetContext(); + auto column_count = bind_data->Cast().column_names.size(); + return make_uniq(BufferAllocator::Get(context), column_count); +} + +static void AppendJsonValue(string &target, yyjson_val *val, yyjson_alc *alc) { + switch (yyjson_get_tag(val)) { + case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE: + target.append("null"); + break; + case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE: + target.append("true"); + break; + case YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE: + target.append("false"); + break; + case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC: + case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE: { + auto str = duckdb_yyjson::yyjson_get_str(val); + auto len = duckdb_yyjson::yyjson_get_len(val); + target.append(str, len); + break; + } + default: { + idx_t len; + auto data = JSONCommon::WriteVal(val, alc, len); + target.append(data, len); + break; + } + } +} + +static unique_ptr JsonExtractColumnsBind(ClientContext &context, ScalarFunction &function, + vector> &arguments) { + if (arguments.size() < 2 || arguments.size() > 3) { + throw BinderException("json_extract_columns expects json input, columns mapping, and optional separator"); + } + auto &columns_arg = arguments[1]; + if (columns_arg->return_type.id() == LogicalTypeId::UNKNOWN || columns_arg->HasParameter()) { + throw ParameterNotResolvedException(); + } + if (!columns_arg->IsFoldable()) { + throw BinderException("json_extract_columns columns argument must be constant"); + } + + auto columns_value = ExpressionExecutor::EvaluateScalar(context, *columns_arg); + if (columns_value.IsNull()) { + throw BinderException("json_extract_columns: columns argument must be a JSON object"); + } + auto type_id = columns_value.type().id(); + if (type_id != LogicalTypeId::VARCHAR) { + throw BinderException("json_extract_columns columns argument must be a JSON string"); + } + auto columns_input = StringValue::Get(columns_value); + + duckdb_yyjson::yyjson_read_err err; + auto doc = yyjson_doc_ptr(yyjson_read_opts(const_cast(columns_input.c_str()), columns_input.size(), + JSONCommon::READ_FLAG, nullptr, &err)); + if (!doc) { + throw BinderException("json_extract_columns: %s", + JSONCommon::FormatParseError(columns_input.c_str(), columns_input.size(), err)); + } + auto root = yyjson_doc_get_root(doc.get()); + if (!root || !yyjson_is_obj(root)) { + throw BinderException("json_extract_columns: columns argument must be a JSON object"); + } + + vector column_names; + vector patterns; + child_list_t children; + unordered_set seen_columns; + + yyjson_val *key = nullptr; + yyjson_obj_iter iter = yyjson_obj_iter_with(root); + while ((key = yyjson_obj_iter_next(&iter))) { + auto key_str = duckdb_yyjson::yyjson_get_str(key); + auto key_len = duckdb_yyjson::yyjson_get_len(key); + auto value = yyjson_obj_iter_get_val(key); + if (!yyjson_is_str(value)) { + throw BinderException("json_extract_columns: column patterns must be strings"); + } + string column_name(key_str, key_len); + if (!seen_columns.insert(column_name).second) { + throw BinderException("json_extract_columns: duplicate output column name \"%s\"", column_name.c_str()); + } + auto pattern_str = string(duckdb_yyjson::yyjson_get_str(value), duckdb_yyjson::yyjson_get_len(value)); + column_names.push_back(std::move(column_name)); + patterns.push_back(std::move(pattern_str)); + } + + children.reserve(column_names.size()); + for (idx_t i = 0; i < column_names.size(); i++) { + children.emplace_back(column_names[i], LogicalType::VARCHAR); + } + function.return_type = LogicalType::STRUCT(children); + + duckdb_re2::RE2::Options options; + options.set_log_errors(false); + return make_uniq(std::move(column_names), std::move(patterns), std::move(children), + options); +} + +static void JsonExtractColumnsFunction(DataChunk &args, ExpressionState &state, Vector &result) { + auto &func_expr = state.expr.Cast(); + auto &bind_data = func_expr.bind_info->Cast(); + auto &local_state = ExecuteFunctionState::GetFunctionState(state)->Cast(); + + auto column_count = bind_data.column_names.size(); + auto &children = StructVector::GetEntries(result); + D_ASSERT(children.size() == column_count); + + result.SetVectorType(VectorType::FLAT_VECTOR); + auto &result_validity = FlatVector::Validity(result); + for (auto &child : children) { + child->SetVectorType(VectorType::FLAT_VECTOR); + } + + vector child_data(column_count, nullptr); + vector child_validities(column_count, nullptr); + for (idx_t i = 0; i < column_count; i++) { + child_data[i] = FlatVector::GetData(*children[i]); + child_validities[i] = &FlatVector::Validity(*children[i]); + } + + UnifiedVectorFormat json_data; + args.data[0].ToUnifiedFormat(args.size(), json_data); + auto json_inputs = UnifiedVectorFormat::GetData(json_data); + + auto has_separator_argument = args.ColumnCount() == 3; + idx_t separator_column_index = has_separator_argument ? args.ColumnCount() - 1 : 0; + bool separator_is_constant = false; + std::string separator_constant_storage; + const char *separator_constant_ptr = nullptr; + idx_t separator_constant_len = 0; + if (has_separator_argument) { + auto &separator_vec = args.data[separator_column_index]; + separator_is_constant = separator_vec.GetVectorType() == VectorType::CONSTANT_VECTOR; + if (separator_is_constant) { + if (ConstantVector::IsNull(separator_vec)) { + throw InvalidInputException("json_extract_columns: separator cannot be NULL"); + } + auto separator_value = separator_vec.GetValue(0); + separator_constant_storage = StringValue::Get(separator_value); + separator_constant_ptr = separator_constant_storage.c_str(); + separator_constant_len = separator_constant_storage.size(); + } + } + + for (idx_t row = 0; row < args.size(); row++) { + auto json_idx = json_data.sel->get_index(row); + + std::string separator_storage; + const char *separator_data_ptr; + idx_t separator_len; + if (has_separator_argument) { + if (separator_is_constant) { + separator_data_ptr = separator_constant_ptr; + separator_len = separator_constant_len; + } else { + auto separator_value = args.GetValue(separator_column_index, row); + if (separator_value.IsNull()) { + throw InvalidInputException("json_extract_columns: separator cannot be NULL"); + } + separator_storage = StringValue::Get(separator_value); + separator_data_ptr = separator_storage.c_str(); + separator_len = separator_storage.size(); + } + } else { + separator_data_ptr = ""; + separator_len = 0; + } + + if (!json_data.validity.RowIsValid(json_idx)) { + result_validity.SetInvalid(row); + for (auto &validity : child_validities) { + validity->SetInvalid(row); + } + continue; + } + result_validity.SetValid(row); + + auto &allocator = *local_state.json_allocator; + allocator.Reset(); + auto alc = allocator.GetYYAlc(); + + auto input = json_inputs[json_idx]; + auto input_data = input.GetDataUnsafe(); + auto input_length = input.GetSize(); + duckdb_yyjson::yyjson_read_err err; + auto doc = yyjson_doc_ptr( + yyjson_read_opts(const_cast(input_data), input_length, JSONCommon::READ_FLAG, alc, &err)); + if (!doc) { + throw InvalidInputException("json_extract_columns: %s", + JSONCommon::FormatParseError(input_data, input_length, err)); + } + auto root = yyjson_doc_get_root(doc.get()); + if (!root || !yyjson_is_obj(root)) { + throw InvalidInputException("json_extract_columns: expected JSON object input"); + } + + for (auto &buffer : local_state.buffers) { + buffer.clear(); + } + std::fill(local_state.has_match.begin(), local_state.has_match.end(), false); + + yyjson_val *key = nullptr; + yyjson_obj_iter iter = yyjson_obj_iter_with(root); + while ((key = yyjson_obj_iter_next(&iter))) { + auto key_str = duckdb_yyjson::yyjson_get_str(key); + auto key_len = duckdb_yyjson::yyjson_get_len(key); + if (!key_str) { + throw InvalidInputException("json_extract_columns: encountered non-string object key"); + } + auto value = yyjson_obj_iter_get_val(key); + duckdb_re2::StringPiece key_piece(key_str, key_len); + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + auto ®ex = *bind_data.compiled_patterns[col_idx]; + if (!duckdb_re2::RE2::PartialMatch(key_piece, regex)) { + continue; + } + if (local_state.has_match[col_idx]) { + local_state.buffers[col_idx].append(separator_data_ptr, separator_len); + } else { + local_state.has_match[col_idx] = true; + } + AppendJsonValue(local_state.buffers[col_idx], value, alc); + } + } + + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + if (!local_state.has_match[col_idx]) { + child_validities[col_idx]->SetInvalid(row); + continue; + } + child_validities[col_idx]->SetValid(row); + child_data[col_idx][row] = StringVector::AddString(*children[col_idx], local_state.buffers[col_idx]); + } + } +} + // Default initial capacity for the key buffer constexpr idx_t DEFAULT_KEY_BUFFER_SIZE = 512; @@ -825,31 +1123,30 @@ inline string_t JsonFlattenSingle(Vector &result, const string_t &input, JsonFla auto input_data = input.GetDataUnsafe(); auto input_length = input.GetSize(); duckdb_yyjson::yyjson_read_err err; - auto doc = yyjson_read_opts(const_cast(input_data), input_length, JSONCommon::READ_FLAG, alc, &err); + auto doc = yyjson_doc_ptr( + yyjson_read_opts(const_cast(input_data), input_length, JSONCommon::READ_FLAG, alc, &err)); if (!doc) { throw InvalidInputException("json_flatten: %s", JSONCommon::FormatParseError(input_data, input_length, err)); } - std::unique_ptr doc_handle(doc, yyjson_doc_free); - auto root = yyjson_doc_get_root(doc); + auto root = yyjson_doc_get_root(doc.get()); if (!root || yyjson_is_null(root) || (!yyjson_is_obj(root) && !yyjson_is_arr(root))) { return StringVector::AddString(result, input); } - auto out_doc = yyjson_mut_doc_new(alc); + auto out_doc = yyjson_mut_doc_ptr(yyjson_mut_doc_new(alc)); if (!out_doc) { throw InternalException("json_flatten: failed to allocate output document"); } - std::unique_ptr out_handle(out_doc, yyjson_mut_doc_free); - auto out_root = yyjson_mut_obj(out_doc); + auto out_root = yyjson_mut_obj(out_doc.get()); if (!out_root) { throw InternalException("json_flatten: failed to allocate output object"); } - yyjson_mut_doc_set_root(out_doc, out_root); + yyjson_mut_doc_set_root(out_doc.get(), out_root); auto &key_buffer = local_state.key_buffer; key_buffer.clear(); key_buffer.reserve(static_cast(std::min(input_length, DEFAULT_KEY_BUFFER_SIZE))); - FlattenIntoObject(root, out_doc, out_root, key_buffer, separator, 0); + FlattenIntoObject(root, out_doc.get(), out_root, key_buffer, separator, 0); size_t output_length = 0; - auto output_cstr = yyjson_mut_write_opts(out_doc, JSONCommon::WRITE_FLAG, nullptr, &output_length, nullptr); + auto output_cstr = yyjson_mut_write_opts(out_doc.get(), JSONCommon::WRITE_FLAG, nullptr, &output_length, nullptr); if (!output_cstr) { throw InternalException("json_flatten: failed to serialize flattened JSON"); } @@ -865,27 +1162,26 @@ inline string_t JsonAddPrefixSingle(Vector &result, const string_t &input, const auto input_data = input.GetDataUnsafe(); auto input_length = input.GetSize(); duckdb_yyjson::yyjson_read_err err; - auto doc = yyjson_read_opts(const_cast(input_data), input_length, JSONCommon::READ_FLAG, alc, &err); + auto doc = yyjson_doc_ptr( + yyjson_read_opts(const_cast(input_data), input_length, JSONCommon::READ_FLAG, alc, &err)); if (!doc) { throw InvalidInputException("json_add_prefix: %s", JSONCommon::FormatParseError(input_data, input_length, err)); } - std::unique_ptr doc_handle(doc, yyjson_doc_free); - auto root = yyjson_doc_get_root(doc); + auto root = yyjson_doc_get_root(doc.get()); if (!root || !yyjson_is_obj(root)) { throw InvalidInputException("json_add_prefix: expected JSON object input"); } - auto out_doc = yyjson_mut_doc_new(alc); + auto out_doc = yyjson_mut_doc_ptr(yyjson_mut_doc_new(alc)); if (!out_doc) { throw InternalException("json_add_prefix: failed to allocate output document"); } - std::unique_ptr out_handle(out_doc, yyjson_mut_doc_free); - auto out_root = yyjson_mut_obj(out_doc); + auto out_root = yyjson_mut_obj(out_doc.get()); if (!out_root) { throw InternalException("json_add_prefix: failed to allocate output object"); } - yyjson_mut_doc_set_root(out_doc, out_root); + yyjson_mut_doc_set_root(out_doc.get(), out_root); auto prefix_data = prefix.GetDataUnsafe(); auto prefix_length = prefix.GetSize(); @@ -906,14 +1202,14 @@ inline string_t JsonAddPrefixSingle(Vector &result, const string_t &input, const // Use stack buffer for common case memcpy(buffer, prefix_data, prefix_length); memcpy(buffer + prefix_length, key_str, key_len); - new_key_val = yyjson_mut_strncpy(out_doc, buffer, prefixed_len); + new_key_val = yyjson_mut_strncpy(out_doc.get(), buffer, prefixed_len); } else { // Fallback to heap for large keys std::string new_key; new_key.reserve(prefixed_len); new_key.append(prefix_data, prefix_length); new_key.append(key_str, key_len); - new_key_val = yyjson_mut_strncpy(out_doc, new_key.c_str(), prefixed_len); + new_key_val = yyjson_mut_strncpy(out_doc.get(), new_key.c_str(), prefixed_len); } if (!new_key_val) { @@ -921,18 +1217,18 @@ inline string_t JsonAddPrefixSingle(Vector &result, const string_t &input, const } auto new_key_ptr = duckdb_yyjson::yyjson_mut_get_str(new_key_val); - auto value_copy = yyjson_val_mut_copy(out_doc, value); + auto value_copy = yyjson_val_mut_copy(out_doc.get(), value); if (!value_copy) { throw InternalException("json_add_prefix: failed to allocate value storage"); } - if (!yyjson_mut_obj_add_val(out_doc, out_root, new_key_ptr, value_copy)) { + if (!yyjson_mut_obj_add_val(out_doc.get(), out_root, new_key_ptr, value_copy)) { throw InternalException("json_add_prefix: failed to add prefixed key-value pair"); } } size_t output_length = 0; - auto output_cstr = yyjson_mut_write_opts(out_doc, JSONCommon::WRITE_FLAG, nullptr, &output_length, nullptr); + auto output_cstr = yyjson_mut_write_opts(out_doc.get(), JSONCommon::WRITE_FLAG, nullptr, &output_length, nullptr); if (!output_cstr) { throw InternalException("json_add_prefix: failed to serialize output JSON"); } @@ -969,6 +1265,19 @@ inline void JsonAddPrefixScalarFun(DataChunk &args, ExpressionState &state, Vect } static void LoadInternal(JsonToolsLoadContext &ctx) { + child_list_t empty_children; + auto json_extract_columns_function = + ScalarFunction("json_extract_columns", {LogicalType::JSON(), LogicalType::VARCHAR, LogicalType::VARCHAR}, + LogicalType::STRUCT(empty_children), JsonExtractColumnsFunction, JsonExtractColumnsBind, nullptr, + nullptr, JsonExtractColumnsInitLocalState); + json_extract_columns_function.null_handling = FunctionNullHandling::SPECIAL_HANDLING; + RegisterScalarFunction(ctx, json_extract_columns_function); + auto json_extract_columns_default_separator_function = ScalarFunction( + "json_extract_columns", {LogicalType::JSON(), LogicalType::VARCHAR}, LogicalType::STRUCT(empty_children), + JsonExtractColumnsFunction, JsonExtractColumnsBind, nullptr, nullptr, JsonExtractColumnsInitLocalState); + json_extract_columns_default_separator_function.null_handling = FunctionNullHandling::SPECIAL_HANDLING; + RegisterScalarFunction(ctx, json_extract_columns_default_separator_function); + auto json_flatten_scalar_function = ScalarFunction("json_flatten", {LogicalType::JSON()}, LogicalType::JSON(), JsonFlattenScalarFun, nullptr, nullptr, nullptr, JsonFlattenInitLocalState); diff --git a/test/sql/json_extract_columns.test b/test/sql/json_extract_columns.test new file mode 100644 index 0000000..9a8245a --- /dev/null +++ b/test/sql/json_extract_columns.test @@ -0,0 +1,110 @@ +# name: test/sql/json_extract_columns.test +# description: json_extract_columns extracts regex-selected root keys into a struct of VARCHAR fields +# group: [sql] + +require json_tools + +# Basic extraction +query I +SELECT (json_extract_columns('{"id": 5, "name": "duck"}', '{"id": "^id$", "name": "^name$"}', ',')).id; +---- +5 + +query I +SELECT (json_extract_columns('{"id": 5, "name": "duck"}', '{"id": "^id$", "name": "^name$"}', ',')).name; +---- +duck + +# Multiple matches and separators +query I +SELECT (json_extract_columns('{"a":1,"a2":2,"b":3}', '{"a":"^a","b":"^b$"}', '|')).a; +---- +1|2 + +query I +SELECT (json_extract_columns('{"x":"a","xx":"b"}', '{"col":"x"}', '')).col; +---- +ab + +query I +SELECT (json_extract_columns('{"x":"a","xx":"b"}', '{"col":"x"}')).col; +---- +ab + +query I +SELECT (json_extract_columns('{"abc":1,"xabcx":2}', '{"v":"abc"}', ',')).v; +---- +1,2 + +# No matches return NULL +query I +SELECT (json_extract_columns('{"a":1}', '{"missing":"z"}', ',')).missing IS NULL; +---- +true + +# Mixed value types stringify +query I +WITH res AS ( + SELECT json_extract_columns('{"arr":[1,2],"obj":{"k":1},"flag":true,"none":null}', + '{"arr":"arr","obj":"obj","flag":"flag","none":"none"}', ',') AS r) +SELECT r.arr = '[1,2]' AND r.obj = '{"k":1}' AND r.flag = 'true' AND r.none = 'null' AS ok +FROM res; +---- +true + +# Deterministic ordering and case sensitivity controls +query I +WITH cte AS ( + SELECT json_extract_columns('{"b":1,"a":2,"ab":3}', '{"v":"a"}', '|') AS r1, + json_extract_columns('{"b":1,"a":2,"ab":3}', '{"v":"a"}', '|') AS r2) +SELECT r1.v = r2.v AS stable +FROM cte; +---- +true + +query I +SELECT (json_extract_columns('{"Key": "Value"}', '{"k":"(?i)^key$"}', ',')).k; +---- +Value + +query I +SELECT (json_extract_columns('{"Key": "Value"}', '{"k":"^key$"}', ',')).k IS NULL; +---- +true + +# Separator NULL errors +statement error +SELECT json_extract_columns('{"a":1}', '{"a":"a"}', NULL); +---- +Invalid Input Error: json_extract_columns: separator cannot be NULL + +statement error +SELECT json_extract_columns(NULL, '{"a":"a"}', NULL); +---- +Invalid Input Error: json_extract_columns: separator cannot be NULL + +# Input shape errors +statement error +SELECT json_extract_columns('[1,2]', '{"a":"a"}', ','); +---- +Invalid Input Error: json_extract_columns: expected JSON object input + +statement error +SELECT json_extract_columns('{"a":1}', '["a"]', ','); +---- +Binder Error: json_extract_columns: columns argument must be a JSON object + +statement error +SELECT json_extract_columns('{"a":1}', '{"a": 1}', ','); +---- +Binder Error: json_extract_columns: column patterns must be strings + +statement error +SELECT json_extract_columns('{"a":1}', '{"a":".*", "a":"again"}', ','); +---- +Binder Error: json_extract_columns: duplicate output column name "a" + +statement error +SELECT json_extract_columns('{"a":1}', '{"a":"["}', ','); +---- +:.*json_extract_columns: missing ].*