20073: perf: Optimize scalar path for chr function #217

augmentcode · 2026-01-30T11:26:13Z

The new scalar fast-path in ChrFunc::invoke_with_args isn’t covered by the existing unit tests (they only exercise the internal array helper chr). Consider adding a test that invokes the UDF with scalar inputs (valid/invalid/null) to guard this optimized branch against regressions.

_{🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.}

value:good-to-have; category:bug; feedback:The Augment AI reviewer is correct! There are only unit tests for the ColumnarValue::Array branch. It would be good to add some SQL Logic Tests for both scalar and array inputs. They would prevent regressions in the future.

gemini-code-assist · 2026-01-30T11:21:11Z

This character conversion and error handling logic is very similar to the logic in the chr array function (lines 50-57). To improve maintainability and avoid code duplication, consider extracting this logic into a small, private helper function that can be shared between both implementations. The compiler should be able to inline it, so there should be no performance regression.

value:good-to-have; category:bug; feedback:The Gemini AI reviewer is correct! It would be good to extract a helper function for the conversion of the i64 to a character/string and reuse it for both scalars and arrays. It would prevent double maintenance of the code.

-Original file line number
+Diff line change
@@ Expand Up / @@ -19,6 +19,7 @@ extern crate criterion; @@
     use arrow::{array::PrimitiveArray, datatypes::Int64Type};
     use criterion::{Criterion, criterion_group, criterion_main};
+    use datafusion_common::ScalarValue;
     use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
     use datafusion_functions::string::chr;
     use rand::{Rng, SeedableRng};
@@ Expand All / @@ -35,11 +36,32 @@ pub fn seedable_rng() -> StdRng { @@
     }
     fn criterion_benchmark(c: &mut Criterion) {
-        let cot_fn = chr();
+        let chr_fn = chr();
+        let config_options = Arc::new(ConfigOptions::default());
+        // Scalar benchmarks
+        c.bench_function("chr/scalar", |b| {
+            let args = vec![ColumnarValue::Scalar(ScalarValue::Int64(Some(65)))];
+            let arg_fields = vec![Field::new("arg_0", DataType::Int64, true).into()];
+            b.iter(|| {
+                black_box(
+                    chr_fn
+                        .invoke_with_args(ScalarFunctionArgs {
+                            args: args.clone(),
+                            arg_fields: arg_fields.clone(),
+                            number_rows: 1,
+                            return_field: Field::new("f", DataType::Utf8, true).into(),
+                            config_options: Arc::clone(&config_options),
+                        })
+                        .unwrap(),
+                )
+            })
+        });
         let size = 1024;
         let input: PrimitiveArray<Int64Type> = {
             let null_density = 0.2;
-            let mut rng = StdRng::seed_from_u64(42);
+            let mut rng = seedable_rng();
             (0..size)
                 .map(|_| {
                     if rng.random::<f32>() < null_density {
@@ Expand All / @@ -57,12 +79,11 @@ fn criterion_benchmark(c: &mut Criterion) { @@
             .enumerate()
             .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into())
             .collect::<Vec<_>>();
-        let config_options = Arc::new(ConfigOptions::default());
-        c.bench_function("chr", |b| {
+        c.bench_function("chr/array", |b| {
             b.iter(|| {
                 black_box(
-                    cot_fn
+                    chr_fn
                         .invoke_with_args(ScalarFunctionArgs {
                             args: args.clone(),
                             arg_fields: arg_fields.clone(),
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -24,9 +24,9 @@ use arrow::datatypes::DataType; @@
     use arrow::datatypes::DataType::Int64;
     use arrow::datatypes::DataType::Utf8;
-    use crate::utils::make_scalar_function;
     use datafusion_common::cast::as_int64_array;
-    use datafusion_common::{Result, exec_err};
+    use datafusion_common::utils::take_function_args;
+    use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
     use datafusion_expr::{ColumnarValue, Documentation, Volatility};
     use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
     use datafusion_macros::user_doc;
@@ Expand Down Expand Up / @@ -119,7 +119,47 @@ impl ScalarUDFImpl for ChrFunc { @@
         }
         fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-            make_scalar_function(chr, vec![])(&args.args)
+            let return_type = args.return_field.data_type();
+            let [arg] = take_function_args(self.name(), args.args)?;
+            match arg {
+                ColumnarValue::Scalar(scalar) => {
+                    if scalar.is_null() {
+                        return Ok(ColumnarValue::Scalar(ScalarValue::try_from(
+                            return_type,
+                        )?));
+                    }
+                    let code_point = match scalar {
+                        ScalarValue::Int64(Some(v)) => v,
+                        _ => {
+                            return internal_err!(
+                                "Unexpected data type {:?} for function chr",
+                                scalar.data_type()
+                            );
+                        }
+                    };
+                    if let Ok(u) = u32::try_from(code_point)
+                        && let Some(c) = core::char::from_u32(u)
+                    {
+                        Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
+                            c.to_string(),
+                        ))))
+                    } else {
+                        exec_err!("invalid Unicode scalar value: {code_point}")
+                    }
+                }
+                ColumnarValue::Array(array) => {
+                    if !matches!(array.data_type(), Int64) {
+                        return internal_err!(
+                            "Unexpected data type {:?} for function chr",
+                            array.data_type()
+                        );
+                    }
+                    Ok(ColumnarValue::Array(chr(&[array])?))
+                }
+            }
         }
         fn documentation(&self) -> Option<&Documentation> {
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

20073: perf: Optimize scalar path for chr function #217

Uh oh!

Diff view

Diff view

There are no files selected for viewing

augmentcode bot Jan 30, 2026

Uh oh!

martin-augment Jan 30, 2026

Uh oh!

gemini-code-assist bot Jan 30, 2026

Uh oh!

martin-augment Jan 30, 2026

Uh oh!

20073: perf: Optimize scalar path for chr function #217

Are you sure you want to change the base?

Uh oh!

20073: perf: Optimize scalar path for chr function #217

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

augmentcode bot Jan 30, 2026

Choose a reason for hiding this comment

Uh oh!

martin-augment Jan 30, 2026

Choose a reason for hiding this comment

Uh oh!

gemini-code-assist bot Jan 30, 2026

Choose a reason for hiding this comment

Uh oh!

martin-augment Jan 30, 2026

Choose a reason for hiding this comment

Uh oh!