From 386176add66b89df7bd0b85c2dc1b91afa89dc48 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 30 Dec 2025 15:01:34 -0700 Subject: [PATCH 1/3] optimize split_part --- datafusion/functions/Cargo.toml | 5 ++++ datafusion/functions/src/string/split_part.rs | 26 +++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 5ceeee57b0be4..bf1cc3a7fc802 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -294,3 +294,8 @@ required-features = ["unicode_expressions"] harness = false name = "levenshtein" required-features = ["unicode_expressions"] + +[[bench]] +harness = false +name = "split_part" +required-features = ["string_expressions"] diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index 8ac505bf360f6..c2da3791b06be 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -219,22 +219,22 @@ where .try_for_each(|((string, delimiter), n)| -> Result<(), DataFusionError> { match (string, delimiter, n) { (Some(string), Some(delimiter), Some(n)) => { - let split_string: Vec<&str> = string.split(delimiter).collect(); - let len = split_string.len(); - - let index = match n.cmp(&0) { - std::cmp::Ordering::Less => len as i64 + n, + let result = match n.cmp(&0) { + std::cmp::Ordering::Greater => { + // Positive index: use nth() to avoid collecting all parts + // This stops iteration as soon as we find the nth element + string.split(delimiter).nth((n - 1) as usize) + } + std::cmp::Ordering::Less => { + // Negative index: use rsplit().nth() to efficiently get from the end + // rsplit iterates in reverse, so -1 means first from rsplit (index 0) + string.rsplit(delimiter).nth((-n - 1) as usize) + } std::cmp::Ordering::Equal => { return exec_err!("field position must not be zero"); } - std::cmp::Ordering::Greater => n - 1, - } as usize; - - if index < len { - builder.append_value(split_string[index]); - } else { - builder.append_value(""); - } + }; + builder.append_value(result.unwrap_or("")); } _ => builder.append_null(), } From 36ea121c18dc61360e6c1b7e23dd8933e5c41ae7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 30 Dec 2025 15:02:11 -0700 Subject: [PATCH 2/3] optimize split_part --- datafusion/functions/benches/split_part.rs | 382 +++++++++++++++++++++ 1 file changed, 382 insertions(+) create mode 100644 datafusion/functions/benches/split_part.rs diff --git a/datafusion/functions/benches/split_part.rs b/datafusion/functions/benches/split_part.rs new file mode 100644 index 0000000000000..e524a68c2bf74 --- /dev/null +++ b/datafusion/functions/benches/split_part.rs @@ -0,0 +1,382 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::array::{ArrayRef, Int64Array, StringArray, StringViewArray}; +use arrow::datatypes::{DataType, Field}; +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; +use datafusion_functions::string::split_part; +use rand::distr::Alphanumeric; +use rand::prelude::StdRng; +use rand::{Rng, SeedableRng}; +use std::hint::black_box; +use std::sync::Arc; + +const N_ROWS: usize = 8192; + +/// Generate test data for split_part benchmarks +/// Creates strings with multiple parts separated by the delimiter +fn gen_split_part_data( + n_rows: usize, + num_parts: usize, // number of parts in each string (separated by delimiter) + part_len: usize, // length of each part + delimiter: &str, // the delimiter to use + use_string_view: bool, // false -> StringArray, true -> StringViewArray +) -> (ColumnarValue, ColumnarValue) { + let mut rng = StdRng::seed_from_u64(42); + + let mut strings: Vec = Vec::with_capacity(n_rows); + for _ in 0..n_rows { + let mut parts: Vec = Vec::with_capacity(num_parts); + for _ in 0..num_parts { + let part: String = (&mut rng) + .sample_iter(&Alphanumeric) + .take(part_len) + .map(char::from) + .collect(); + parts.push(part); + } + strings.push(parts.join(delimiter)); + } + + let delimiters: Vec = vec![delimiter.to_string(); n_rows]; + + if use_string_view { + let string_array: StringViewArray = strings.into_iter().map(Some).collect(); + let delimiter_array: StringViewArray = delimiters.into_iter().map(Some).collect(); + ( + ColumnarValue::Array(Arc::new(string_array) as ArrayRef), + ColumnarValue::Array(Arc::new(delimiter_array) as ArrayRef), + ) + } else { + let string_array: StringArray = strings.into_iter().map(Some).collect(); + let delimiter_array: StringArray = delimiters.into_iter().map(Some).collect(); + ( + ColumnarValue::Array(Arc::new(string_array) as ArrayRef), + ColumnarValue::Array(Arc::new(delimiter_array) as ArrayRef), + ) + } +} + +fn gen_positions(n_rows: usize, position: i64) -> ColumnarValue { + let positions: Vec = vec![position; n_rows]; + ColumnarValue::Array(Arc::new(Int64Array::from(positions)) as ArrayRef) +} + +fn criterion_benchmark(c: &mut Criterion) { + let split_part_func = split_part(); + let config_options = Arc::new(ConfigOptions::default()); + + let mut group = c.benchmark_group("split_part"); + + // Test different scenarios + // Scenario 1: Single-char delimiter, first position (should be fastest with optimization) + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); + let positions = gen_positions(N_ROWS, 1); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("single_char_delim", "pos_first"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 2: Single-char delimiter, middle position + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); + let positions = gen_positions(N_ROWS, 5); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("single_char_delim", "pos_middle"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 3: Single-char delimiter, last position + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); + let positions = gen_positions(N_ROWS, 10); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("single_char_delim", "pos_last"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 4: Single-char delimiter, negative position (last element) + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", false); + let positions = gen_positions(N_ROWS, -1); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("single_char_delim", "pos_negative"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 5: Multi-char delimiter, first position + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, "~@~", false); + let positions = gen_positions(N_ROWS, 1); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("multi_char_delim", "pos_first"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 6: Multi-char delimiter, middle position + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, "~@~", false); + let positions = gen_positions(N_ROWS, 5); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("multi_char_delim", "pos_middle"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 7: StringViewArray, single-char delimiter, first position + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 10, 8, ".", true); + let positions = gen_positions(N_ROWS, 1); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("string_view_single_char", "pos_first"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 8: Many parts (20), position near end - shows benefit of early termination + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 20, 8, ".", false); + let positions = gen_positions(N_ROWS, 2); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("many_parts_20", "pos_second"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + // Scenario 9: Long strings with many parts - worst case for old implementation + { + let (strings, delimiters) = gen_split_part_data(N_ROWS, 50, 16, "/", false); + let positions = gen_positions(N_ROWS, 1); + let args = vec![strings, delimiters, positions]; + let arg_fields: Vec<_> = args + .iter() + .enumerate() + .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .collect(); + let return_field = Field::new("f", DataType::Utf8, true).into(); + + group.bench_function( + BenchmarkId::new("long_strings_50_parts", "pos_first"), + |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }, + ); + } + + group.finish(); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); From 04fe9b46a7db848d77d4d02dd2bf9a8e07fab2ad Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 30 Dec 2025 15:04:46 -0700 Subject: [PATCH 3/3] cargo fmt --- datafusion/functions/benches/split_part.rs | 244 ++++++++++----------- 1 file changed, 122 insertions(+), 122 deletions(-) diff --git a/datafusion/functions/benches/split_part.rs b/datafusion/functions/benches/split_part.rs index e524a68c2bf74..e23610338d15c 100644 --- a/datafusion/functions/benches/split_part.rs +++ b/datafusion/functions/benches/split_part.rs @@ -19,7 +19,7 @@ extern crate criterion; use arrow::array::{ArrayRef, Int64Array, StringArray, StringViewArray}; use arrow::datatypes::{DataType, Field}; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string::split_part; @@ -35,10 +35,10 @@ const N_ROWS: usize = 8192; /// Creates strings with multiple parts separated by the delimiter fn gen_split_part_data( n_rows: usize, - num_parts: usize, // number of parts in each string (separated by delimiter) - part_len: usize, // length of each part - delimiter: &str, // the delimiter to use - use_string_view: bool, // false -> StringArray, true -> StringViewArray + num_parts: usize, // number of parts in each string (separated by delimiter) + part_len: usize, // length of each part + delimiter: &str, // the delimiter to use + use_string_view: bool, // false -> StringArray, true -> StringViewArray ) -> (ColumnarValue, ColumnarValue) { let mut rng = StdRng::seed_from_u64(42); @@ -95,28 +95,27 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); - group.bench_function( - BenchmarkId::new("single_char_delim", "pos_first"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, - ); + group.bench_function(BenchmarkId::new("single_char_delim", "pos_first"), |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }); } // Scenario 2: Single-char delimiter, middle position @@ -127,28 +126,27 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); - group.bench_function( - BenchmarkId::new("single_char_delim", "pos_middle"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, - ); + group.bench_function(BenchmarkId::new("single_char_delim", "pos_middle"), |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }); } // Scenario 3: Single-char delimiter, last position @@ -159,28 +157,27 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); - group.bench_function( - BenchmarkId::new("single_char_delim", "pos_last"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, - ); + group.bench_function(BenchmarkId::new("single_char_delim", "pos_last"), |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }); } // Scenario 4: Single-char delimiter, negative position (last element) @@ -191,7 +188,9 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); @@ -223,28 +222,27 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); - group.bench_function( - BenchmarkId::new("multi_char_delim", "pos_first"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, - ); + group.bench_function(BenchmarkId::new("multi_char_delim", "pos_first"), |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }); } // Scenario 6: Multi-char delimiter, middle position @@ -255,28 +253,27 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); - group.bench_function( - BenchmarkId::new("multi_char_delim", "pos_middle"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, - ); + group.bench_function(BenchmarkId::new("multi_char_delim", "pos_middle"), |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }); } // Scenario 7: StringViewArray, single-char delimiter, first position @@ -287,7 +284,9 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); @@ -319,28 +318,27 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into(); - group.bench_function( - BenchmarkId::new("many_parts_20", "pos_second"), - |b| { - b.iter(|| { - black_box( - split_part_func - .invoke_with_args(ScalarFunctionArgs { - args: args.clone(), - arg_fields: arg_fields.clone(), - number_rows: N_ROWS, - return_field: Arc::clone(&return_field), - config_options: Arc::clone(&config_options), - }) - .expect("split_part should work"), - ) - }) - }, - ); + group.bench_function(BenchmarkId::new("many_parts_20", "pos_second"), |b| { + b.iter(|| { + black_box( + split_part_func + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + arg_fields: arg_fields.clone(), + number_rows: N_ROWS, + return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), + }) + .expect("split_part should work"), + ) + }) + }); } // Scenario 9: Long strings with many parts - worst case for old implementation @@ -351,7 +349,9 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields: Vec<_> = args .iter() .enumerate() - .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) + .map(|(idx, arg)| { + Field::new(format!("arg_{idx}"), arg.data_type(), true).into() + }) .collect(); let return_field = Field::new("f", DataType::Utf8, true).into();