Skip to content

Commit ae3ebcb

Browse files
perf: optimize octet_length for string arrays
1 parent d20c5d6 commit ae3ebcb

File tree

1 file changed

+44
-2
lines changed

1 file changed

+44
-2
lines changed

datafusion/functions/src/string/octet_length.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::compute::kernels::length::length;
1918
use arrow::datatypes::DataType;
2019
use std::any::Any;
2120

2221
use crate::utils::utf8_to_int_type;
22+
use arrow::array::{
23+
Array, ArrayRef, Int32Builder, Int64Builder, LargeStringArray, StringArray,
24+
StringViewArray,
25+
};
2326
use datafusion_common::types::logical_string;
2427
use datafusion_common::utils::take_function_args;
2528
use datafusion_common::{Result, ScalarValue};
@@ -28,6 +31,7 @@ use datafusion_expr::{
2831
TypeSignatureClass, Volatility,
2932
};
3033
use datafusion_macros::user_doc;
34+
use std::sync::Arc;
3135

3236
#[user_doc(
3337
doc_section(label = "String Functions"),
@@ -90,7 +94,45 @@ impl ScalarUDFImpl for OctetLengthFunc {
9094
let [array] = take_function_args(self.name(), &args.args)?;
9195

9296
match array {
93-
ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
97+
ColumnarValue::Array(v) => {
98+
let arr: ArrayRef = v.clone();
99+
100+
if let Some(arr) = arr.as_any().downcast_ref::<StringArray>() {
101+
let mut builder = Int32Builder::with_capacity(arr.len());
102+
for i in 0..arr.len() {
103+
if arr.is_null(i) {
104+
builder.append_null();
105+
} else {
106+
builder.append_value(arr.value_length(i) as i32);
107+
}
108+
}
109+
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
110+
} else if let Some(arr) = arr.as_any().downcast_ref::<LargeStringArray>()
111+
{
112+
let mut builder = Int64Builder::with_capacity(arr.len());
113+
for i in 0..arr.len() {
114+
if arr.is_null(i) {
115+
builder.append_null();
116+
} else {
117+
builder.append_value(arr.value_length(i) as i64);
118+
}
119+
}
120+
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
121+
} else if let Some(arr) = arr.as_any().downcast_ref::<StringViewArray>() {
122+
let mut builder = Int32Builder::with_capacity(arr.len());
123+
for i in 0..arr.len() {
124+
if arr.is_null(i) {
125+
builder.append_null();
126+
} else {
127+
builder.append_value(arr.value(i).len() as i32);
128+
}
129+
}
130+
Ok(ColumnarValue::Array(Arc::new(builder.finish())))
131+
} else {
132+
unreachable!("octet_length expects string arrays")
133+
}
134+
}
135+
94136
ColumnarValue::Scalar(v) => match v {
95137
ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
96138
v.as_ref().map(|x| x.len() as i32),

0 commit comments

Comments
 (0)