diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs index 9a6342ca40bb6..fcb72771acc42 100644 --- a/datafusion/functions/benches/chr.rs +++ b/datafusion/functions/benches/chr.rs @@ -19,6 +19,7 @@ extern crate criterion; use arrow::{array::PrimitiveArray, datatypes::Int64Type}; use criterion::{Criterion, criterion_group, criterion_main}; +use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string::chr; use rand::{Rng, SeedableRng}; @@ -35,11 +36,32 @@ pub fn seedable_rng() -> StdRng { } fn criterion_benchmark(c: &mut Criterion) { - let cot_fn = chr(); + let chr_fn = chr(); + let config_options = Arc::new(ConfigOptions::default()); + + // Scalar benchmarks + c.bench_function("chr/scalar", |b| { + let args = vec![ColumnarValue::Scalar(ScalarValue::Int64(Some(65)))]; + let arg_fields = vec![Field::new("arg_0", DataType::Int64, true).into()]; + b.iter(|| { + black_box( + chr_fn + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: 1, + return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), + }) + .unwrap(), + ) + }) + }); + let size = 1024; let input: PrimitiveArray = { let null_density = 0.2; - let mut rng = StdRng::seed_from_u64(42); + let mut rng = seedable_rng(); (0..size) .map(|_| { if rng.random::() < null_density { @@ -57,12 +79,11 @@ fn criterion_benchmark(c: &mut Criterion) { .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); - let config_options = Arc::new(ConfigOptions::default()); - c.bench_function("chr", |b| { + c.bench_function("chr/array", |b| { b.iter(|| { black_box( - cot_fn + chr_fn .invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields: arg_fields.clone(), diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index ba011b94367e3..a8bfe67fd21f4 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -24,9 +24,9 @@ use arrow::datatypes::DataType; use arrow::datatypes::DataType::Int64; use arrow::datatypes::DataType::Utf8; -use crate::utils::make_scalar_function; use datafusion_common::cast::as_int64_array; -use datafusion_common::{Result, exec_err}; +use datafusion_common::utils::take_function_args; +use datafusion_common::{Result, ScalarValue, exec_err, internal_err}; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature}; use datafusion_macros::user_doc; @@ -119,7 +119,47 @@ impl ScalarUDFImpl for ChrFunc { } fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { - make_scalar_function(chr, vec![])(&args.args) + let return_type = args.return_field.data_type(); + let [arg] = take_function_args(self.name(), args.args)?; + + match arg { + ColumnarValue::Scalar(scalar) => { + if scalar.is_null() { + return Ok(ColumnarValue::Scalar(ScalarValue::try_from( + return_type, + )?)); + } + + let code_point = match scalar { + ScalarValue::Int64(Some(v)) => v, + _ => { + return internal_err!( + "Unexpected data type {:?} for function chr", + scalar.data_type() + ); + } + }; + + if let Ok(u) = u32::try_from(code_point) + && let Some(c) = core::char::from_u32(u) + { + Ok(ColumnarValue::Scalar(ScalarValue::Utf8(Some( + c.to_string(), + )))) + } else { + exec_err!("invalid Unicode scalar value: {code_point}") + } + } + ColumnarValue::Array(array) => { + if !matches!(array.data_type(), Int64) { + return internal_err!( + "Unexpected data type {:?} for function chr", + array.data_type() + ); + } + Ok(ColumnarValue::Array(chr(&[array])?)) + } + } } fn documentation(&self) -> Option<&Documentation> {