Skip to content

Commit 5bc4a5d

Browse files
authored
refactor(func): optimize function array_aggregate (#19005)
* refine * fix * update * update
1 parent 51133e7 commit 5bc4a5d

File tree

9 files changed

+479
-121
lines changed

9 files changed

+479
-121
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use chrono_tz::UTC;
16+
use databend_common_exception::ErrorCode;
17+
use databend_common_exception::Result;
18+
use databend_common_expression::types::DataType;
19+
use databend_common_expression::Column;
20+
use databend_common_expression::ColumnBuilder;
21+
use databend_common_io::GeometryDataType;
22+
use jiff::tz::TimeZone;
23+
use serde_json::Value;
24+
25+
use crate::field_decoder::FieldJsonAstDecoder;
26+
use crate::FileFormatOptionsExt;
27+
28+
fn default_json_options() -> FileFormatOptionsExt {
29+
FileFormatOptionsExt {
30+
ident_case_sensitive: false,
31+
headers: 0,
32+
json_compact: false,
33+
json_strings: false,
34+
disable_variant_check: false,
35+
timezone: UTC,
36+
jiff_timezone: TimeZone::UTC,
37+
is_select: false,
38+
is_clickhouse: false,
39+
is_rounding_mode: true,
40+
geometry_format: GeometryDataType::default(),
41+
enable_dst_hour_fix: false,
42+
}
43+
}
44+
45+
pub fn column_from_json_value(data_type: &DataType, json: Value) -> Result<Column> {
46+
let rows = match json {
47+
Value::Array(values) => values,
48+
other => {
49+
return Err(ErrorCode::BadArguments(format!(
50+
"from_json! expects a json array to describe column values, got {other:?}"
51+
)))
52+
}
53+
};
54+
55+
let options = default_json_options();
56+
let decoder = FieldJsonAstDecoder::create(&options);
57+
let mut builder = ColumnBuilder::with_capacity(data_type, rows.len());
58+
for value in rows {
59+
decoder.read_field(&mut builder, &value)?;
60+
}
61+
Ok(builder.build())
62+
}
63+
64+
#[macro_export]
65+
macro_rules! column_from_json {
66+
($data_type:expr, $($json:tt)+) => {{
67+
$crate::column_from_json_value(&$data_type, ::serde_json::json!($($json)+))
68+
.expect("from_json! expects a valid json literal for the provided type")
69+
}};
70+
}
71+
72+
#[cfg(test)]
73+
mod tests {
74+
use databend_common_expression::types::*;
75+
use databend_common_expression::FromData;
76+
77+
#[test]
78+
fn test_from_json_macro_strings() {
79+
let column = column_from_json!(DataType::String, ["a", "b", "c"]);
80+
assert_eq!(column, StringType::from_data(vec!["a", "b", "c"]));
81+
}
82+
83+
#[test]
84+
fn test_from_json_nullable_booleans() {
85+
let data_type = DataType::Nullable(Box::new(DataType::Boolean));
86+
let column = column_from_json!(data_type, [true, null, false]);
87+
assert_eq!(
88+
column,
89+
BooleanType::from_data_with_validity(vec![true, false, false], vec![true, false, true])
90+
);
91+
}
92+
}

src/query/formats/src/field_decoder/json_ast.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,20 @@ impl FieldJsonAstDecoder {
113113
ColumnBuilder::Geography(c) => self.read_geography(c, value),
114114
ColumnBuilder::Interval(c) => self.read_interval(c, value),
115115
ColumnBuilder::Vector(c) => self.read_vector(c, value),
116-
ColumnBuilder::EmptyArray { .. } | ColumnBuilder::EmptyMap { .. } => {
117-
Err(ErrorCode::Unimplemented("empty array/map literal"))
118-
}
116+
ColumnBuilder::EmptyArray { len } => match value.as_array() {
117+
Some(array) if array.is_empty() => {
118+
*len += 1;
119+
Ok(())
120+
}
121+
_ => Err(ErrorCode::BadBytes("Incorrect empty array value")),
122+
},
123+
ColumnBuilder::EmptyMap { len } => match value.as_object() {
124+
Some(array) if array.is_empty() => {
125+
*len += 1;
126+
Ok(())
127+
}
128+
_ => Err(ErrorCode::BadBytes("Incorrect empty map value")),
129+
},
119130
ColumnBuilder::Opaque(_) => Err(ErrorCode::Unimplemented(
120131
"Opaque type not supported in json_ast",
121132
)),

src/query/formats/src/lib.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#![allow(clippy::uninlined_format_args)]
1615
#![feature(box_patterns)]
1716
#![feature(cursor_split)]
1817

19-
extern crate core;
20-
2118
mod binary;
2219
mod clickhouse;
20+
pub mod column_from_json;
2321
mod common_settings;
2422
mod delimiter;
2523
mod field_decoder;
@@ -28,6 +26,7 @@ mod file_format_type;
2826
pub mod output_format;
2927

3028
pub use clickhouse::ClickhouseFormatType;
29+
pub use column_from_json::column_from_json_value;
3130
pub use delimiter::RecordDelimiter;
3231
pub use field_decoder::*;
3332
pub use file_format_type::parse_timezone;

src/query/functions/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ unicase = { workspace = true }
7070
[dev-dependencies]
7171
comfy-table = { workspace = true }
7272
databend-common-ast = { workspace = true }
73+
databend-common-formats = { workspace = true }
7374
divan = { workspace = true }
7475
goldenfile = { workspace = true }
7576

src/query/functions/src/aggregates/aggregator_common.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ pub fn eval_aggr(
173173
rows: usize,
174174
sort_descs: Vec<AggregateFunctionSortDesc>,
175175
) -> Result<(Column, DataType)> {
176-
eval_aggr_for_test(name, params, entries, rows, false, sort_descs)
176+
eval_aggr_inner(name, params, entries, rows, false, sort_descs)
177177
}
178178

179179
pub fn eval_aggr_for_test(
@@ -183,6 +183,18 @@ pub fn eval_aggr_for_test(
183183
rows: usize,
184184
with_serialize: bool,
185185
sort_descs: Vec<AggregateFunctionSortDesc>,
186+
) -> Result<(Column, DataType)> {
187+
eval_aggr_inner(name, params, entries, rows, with_serialize, sort_descs)
188+
}
189+
190+
#[inline]
191+
fn eval_aggr_inner(
192+
name: &str,
193+
params: Vec<Scalar>,
194+
entries: &[BlockEntry],
195+
rows: usize,
196+
with_serialize: bool,
197+
sort_descs: Vec<AggregateFunctionSortDesc>,
186198
) -> Result<(Column, DataType)> {
187199
let factory = AggregateFunctionFactory::instance();
188200
let arguments = entries.iter().map(BlockEntry::data_type).collect();

0 commit comments

Comments
 (0)