From 981f87c73b0cba2dd642a2e0ed50f17c1ee29fe1 Mon Sep 17 00:00:00 2001 From: foskey51 Date: Thu, 20 Nov 2025 09:41:13 +0000 Subject: [PATCH 1/5] chore: enforce clippy lint needless_pass_by_value to datafusion-functions-nested --- datafusion/functions-nested/src/array_has.rs | 26 ++++----- datafusion/functions-nested/src/flatten.rs | 12 ++-- datafusion/functions-nested/src/lib.rs | 3 + datafusion/functions-nested/src/map.rs | 56 +++++++++--------- datafusion/functions-nested/src/position.rs | 4 +- datafusion/functions-nested/src/remove.rs | 10 ++-- datafusion/functions-nested/src/replace.rs | 14 ++--- datafusion/functions-nested/src/set_ops.rs | 18 +++--- datafusion/functions-nested/src/string.rs | 60 ++++++++++---------- 9 files changed, 103 insertions(+), 100 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 080b2f16d92f..a230d86b445f 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -231,12 +231,12 @@ fn array_has_inner_for_scalar( needle: &dyn Datum, ) -> Result { let haystack = haystack.as_ref().try_into()?; - array_has_dispatch_for_scalar(haystack, needle) + array_has_dispatch_for_scalar(&haystack, needle) } fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result { let haystack = haystack.as_ref().try_into()?; - array_has_dispatch_for_array(haystack, needle) + array_has_dispatch_for_array(&haystack, needle) } enum ArrayWrapper<'a> { @@ -317,8 +317,8 @@ impl<'a> ArrayWrapper<'a> { } } -fn array_has_dispatch_for_array( - haystack: ArrayWrapper<'_>, +fn array_has_dispatch_for_array<'a>( + haystack: &ArrayWrapper<'a>, needle: &ArrayRef, ) -> Result { let mut boolean_builder = BooleanArray::builder(haystack.len()); @@ -338,7 +338,7 @@ fn array_has_dispatch_for_array( } fn array_has_dispatch_for_scalar( - haystack: ArrayWrapper<'_>, + haystack: &ArrayWrapper<'_>, needle: &dyn Datum, ) -> Result { let values = haystack.values(); @@ -402,8 +402,8 @@ fn general_array_has_for_all_and_any<'a>( let arr_values = converter.convert_columns(&[arr])?; let sub_arr_values = converter.convert_columns(&[sub_arr])?; boolean_builder.append_value(general_array_has_all_and_any_kernel( - arr_values, - sub_arr_values, + &arr_values, + &sub_arr_values, comparison_type, )); } else { @@ -427,8 +427,8 @@ fn array_has_all_and_any_string_internal<'a>( let haystack_array = string_array_to_vec(&arr); let needle_array = string_array_to_vec(&sub_arr); boolean_builder.append_value(array_has_string_kernel( - haystack_array, - needle_array, + &haystack_array, + &needle_array, comparison_type, )); } @@ -633,8 +633,8 @@ enum ComparisonType { } fn array_has_string_kernel( - haystack: Vec>, - needle: Vec>, + haystack: &[Option<&str>], + needle: &[Option<&str>], comparison_type: ComparisonType, ) -> bool { match comparison_type { @@ -650,8 +650,8 @@ fn array_has_string_kernel( } fn general_array_has_all_and_any_kernel( - haystack_rows: Rows, - needle_rows: Rows, + haystack_rows: &Rows, + needle_rows: &Rows, comparison_type: ComparisonType, ) -> bool { match comparison_type { diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 49f4110faeaa..e84a942fab2a 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -145,7 +145,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { let (inner_field, inner_offsets, inner_values, _) = as_list_array(&values)?.clone().into_parts(); let offsets = - get_offsets_for_flatten::(inner_offsets, offsets); + get_offsets_for_flatten::(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -159,7 +159,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { let (inner_field, inner_offsets, inner_values, _) = as_large_list_array(&values)?.clone().into_parts(); let offsets = - get_offsets_for_flatten::(inner_offsets, offsets); + get_offsets_for_flatten::(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -180,7 +180,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { List(_) => { let (inner_field, inner_offsets, inner_values, _) = as_list_array(&values)?.clone().into_parts(); - let offsets = get_large_offsets_for_flatten(inner_offsets, offsets); + let offsets = get_large_offsets_for_flatten(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -194,7 +194,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { let (inner_field, inner_offsets, inner_values, _) = as_large_list_array(&values)?.clone().into_parts(); let offsets = - get_offsets_for_flatten::(inner_offsets, offsets); + get_offsets_for_flatten::(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -217,7 +217,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { // Create new offsets that are equivalent to `flatten` the array. fn get_offsets_for_flatten( inner_offsets: OffsetBuffer, - outer_offsets: OffsetBuffer

, + outer_offsets: &OffsetBuffer

, ) -> OffsetBuffer { let buffer = inner_offsets.into_inner(); let offsets: Vec = outer_offsets @@ -230,7 +230,7 @@ fn get_offsets_for_flatten( // Create new large offsets that are equivalent to `flatten` the array. fn get_large_offsets_for_flatten( inner_offsets: OffsetBuffer, - outer_offsets: OffsetBuffer

, + outer_offsets: &OffsetBuffer

, ) -> OffsetBuffer { let buffer = inner_offsets.into_inner(); let offsets: Vec = outer_offsets diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index 3a66e6569476..51210b9ae22d 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -23,6 +23,9 @@ // Make sure fast / cheap clones on Arc are explicit: // https://github.com/apache/datafusion/issues/11143 #![deny(clippy::clone_on_ref_ptr)] +// https://github.com/apache/datafusion/issues/18503 +#![deny(clippy::needless_pass_by_value)] +#![cfg_attr(test, allow(clippy::needless_pass_by_value))] //! Nested type Functions for [DataFusion]. //! diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index c3c11beaf967..fe9bc609c013 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -89,7 +89,7 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result { let values = get_first_array_ref(values_arg)?; - make_map_batch_internal(keys, values, can_evaluate_to_const, keys_arg.data_type()) + make_map_batch_internal(&keys, &values, can_evaluate_to_const, &keys_arg.data_type()) } /// Validates that map keys are non-null and unique. @@ -126,10 +126,10 @@ fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result { } fn make_map_batch_internal( - keys: ArrayRef, - values: ArrayRef, + keys: &ArrayRef, + values: &ArrayRef, can_evaluate_to_const: bool, - data_type: DataType, + data_type: &DataType, ) -> Result { if keys.len() != values.len() { return exec_err!("map requires key and value lists to have the same length"); @@ -159,8 +159,8 @@ fn make_map_batch_internal( let mut entry_offsets_buffer = VecDeque::new(); entry_offsets_buffer.push_back(0); - entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(&keys))); - entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(&values))); + entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(keys))); + entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(values))); entry_offsets_buffer.push_back(keys.len() as u32); let entry_struct: Vec<(Arc, ArrayRef)> = entry_struct_buffer.into(); @@ -368,8 +368,8 @@ fn get_element_type(data_type: &DataType) -> Result<&DataType> { /// +-----------+ +-----------+ /// ```text fn make_map_array_internal( - keys: ArrayRef, - values: ArrayRef, + keys: &ArrayRef, + values: &ArrayRef, ) -> Result { // Save original data types and array length before list_to_arrays transforms them let keys_data_type = keys.data_type().clone(); @@ -380,14 +380,14 @@ fn make_map_array_internal( // This tells us which MAP values are NULL (not which keys within maps are null) let nulls_bitmap = keys.nulls().cloned(); - let keys = list_to_arrays::(&keys); - let values = list_to_arrays::(&values); + let keys = list_to_arrays::(keys); + let values = list_to_arrays::(values); build_map_array( - keys, - values, - keys_data_type, - values_data_type, + &keys, + &values, + &keys_data_type, + &values_data_type, original_len, nulls_bitmap, ) @@ -396,8 +396,8 @@ fn make_map_array_internal( /// Helper function specifically for FixedSizeList inputs /// Similar to make_map_array_internal but uses fixed_size_list_to_arrays instead of list_to_arrays fn make_map_array_from_fixed_size_list( - keys: ArrayRef, - values: ArrayRef, + keys: &ArrayRef, + values: &ArrayRef, ) -> Result { // Save original data types and array length let keys_data_type = keys.data_type().clone(); @@ -407,14 +407,14 @@ fn make_map_array_from_fixed_size_list( // Save the nulls bitmap from the original keys array let nulls_bitmap = keys.nulls().cloned(); - let keys = fixed_size_list_to_arrays(&keys); - let values = fixed_size_list_to_arrays(&values); + let keys = fixed_size_list_to_arrays(keys); + let values = fixed_size_list_to_arrays(values); build_map_array( - keys, - values, - keys_data_type, - values_data_type, + &keys, + &values, + &keys_data_type, + &values_data_type, original_len, nulls_bitmap, ) @@ -422,10 +422,10 @@ fn make_map_array_from_fixed_size_list( /// Common logic to build a MapArray from decomposed list arrays fn build_map_array( - keys: Vec, - values: Vec, - keys_data_type: DataType, - values_data_type: DataType, + keys: &[ArrayRef], + values: &[ArrayRef], + keys_data_type: &DataType, + values_data_type: &DataType, original_len: usize, nulls_bitmap: Option, ) -> Result { @@ -470,8 +470,8 @@ fn build_map_array( let (flattened_keys, flattened_values) = if key_array_vec.is_empty() { // All maps are NULL - create empty arrays // We need to infer the data type from the original keys/values arrays - let key_type = get_element_type(&keys_data_type)?; - let value_type = get_element_type(&values_data_type)?; + let key_type = get_element_type(keys_data_type)?; + let value_type = get_element_type(values_data_type)?; ( arrow::array::new_empty_array(key_type), diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index a585da06102a..b390bf3c4226 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -181,13 +181,13 @@ fn general_position_dispatch(args: &[ArrayRef]) -> Result(list_array, element_array, arr_from) + generic_position::(list_array, element_array, &arr_from) } fn generic_position( list_array: &GenericListArray, element_array: &ArrayRef, - arr_from: Vec, // 0-indexed + arr_from: &[i64], // 0-indexed ) -> Result { let mut data = Vec::with_capacity(list_array.len()); diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index d330606cdd89..e1ebc9cda0bf 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -289,7 +289,7 @@ pub fn array_remove_inner(args: &[ArrayRef]) -> Result { let [array, element] = take_function_args("array_remove", args)?; let arr_n = vec![1; array.len()]; - array_remove_internal(array, element, arr_n) + array_remove_internal(array, element, &arr_n) } /// Array_remove_n SQL function @@ -297,7 +297,7 @@ pub fn array_remove_n_inner(args: &[ArrayRef]) -> Result { let [array, element, max] = take_function_args("array_remove_n", args)?; let arr_n = as_int64_array(max)?.values().to_vec(); - array_remove_internal(array, element, arr_n) + array_remove_internal(array, element, &arr_n) } /// Array_remove_all SQL function @@ -305,13 +305,13 @@ pub fn array_remove_all_inner(args: &[ArrayRef]) -> Result { let [array, element] = take_function_args("array_remove_all", args)?; let arr_n = vec![i64::MAX; array.len()]; - array_remove_internal(array, element, arr_n) + array_remove_internal(array, element, &arr_n) } fn array_remove_internal( array: &ArrayRef, element_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { match array.data_type() { DataType::List(_) => { @@ -348,7 +348,7 @@ fn array_remove_internal( fn general_remove( list_array: &GenericListArray, element_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { let data_type = list_array.value_type(); let mut new_values = vec![]; diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 4314d41419bc..079c28175d45 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -328,7 +328,7 @@ fn general_replace( list_array: &GenericListArray, from_array: &ArrayRef, to_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { // Build up the offsets for the final output array let mut offsets: Vec = vec![O::usize_as(0)]; @@ -426,11 +426,11 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::Null => Ok(new_null_array(array.data_type(), 1)), array_type => exec_err!("array_replace does not support type '{array_type}'."), @@ -445,11 +445,11 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::Null => Ok(new_null_array(array.data_type(), 1)), array_type => { @@ -466,11 +466,11 @@ pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::Null => Ok(new_null_array(array.data_type(), 1)), array_type => { diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 1e7ae5c147a8..5c185d946694 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -346,7 +346,7 @@ fn generic_set_lists( l: &GenericListArray, r: &GenericListArray, field: Arc, - set_op: SetOp, + set_op: &SetOp, ) -> Result { if l.is_empty() || l.value_type().is_null() { let field = Arc::new(Field::new_list_field(r.value_type(), true)); @@ -380,7 +380,7 @@ fn generic_set_lists( let l_iter = l_values.iter().sorted().dedup(); let values_set: HashSet<_> = l_iter.clone().collect(); - let mut rows = if set_op == SetOp::Union { + let mut rows = if *set_op == SetOp::Union { l_iter.collect() } else { vec![] @@ -428,7 +428,7 @@ fn generic_set_lists( fn general_set_op( array1: &ArrayRef, array2: &ArrayRef, - set_op: SetOp, + set_op: &SetOp, ) -> Result { fn empty_array(data_type: &DataType, len: usize, large: bool) -> Result { let field = Arc::new(Field::new_list_field(data_type.clone(), true)); @@ -456,28 +456,28 @@ fn general_set_op( array1.len(), ))), (Null, List(field)) => { - if set_op == SetOp::Intersect { + if *set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), false); } let array = as_list_array(&array2)?; general_array_distinct::(array, field) } (List(field), Null) => { - if set_op == SetOp::Intersect { + if *set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), false); } let array = as_list_array(&array1)?; general_array_distinct::(array, field) } (Null, LargeList(field)) => { - if set_op == SetOp::Intersect { + if *set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), true); } let array = as_large_list_array(&array2)?; general_array_distinct::(array, field) } (LargeList(field), Null) => { - if set_op == SetOp::Intersect { + if *set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), true); } let array = as_large_list_array(&array1)?; @@ -504,13 +504,13 @@ fn general_set_op( /// Array_union SQL function fn array_union_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_union", args)?; - general_set_op(array1, array2, SetOp::Union) + general_set_op(array1, array2, &SetOp::Union) } /// array_intersect SQL function fn array_intersect_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_intersect", args)?; - general_set_op(array1, array2, SetOp::Intersect) + general_set_op(array1, array2, &SetOp::Intersect) } fn general_array_distinct( diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index 61caa3ac7076..b87ac0f8c41d 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -358,13 +358,13 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { /// Creates a single string from single element of a ListArray (which is /// itself another Array) - fn compute_array_to_string( - arg: &mut String, - arr: ArrayRef, + fn compute_array_to_string<'a>( + arg: &'a mut String, + arr: &ArrayRef, delimiter: String, null_string: String, with_null_string: bool, - ) -> Result<&mut String> { + ) -> Result<&'a mut String> { match arr.data_type() { List(..) => { let list_array = as_list_array(&arr)?; @@ -372,7 +372,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { if !list_array.is_null(i) { compute_array_to_string( arg, - list_array.value(i), + &list_array.value(i), delimiter.clone(), null_string.clone(), with_null_string, @@ -392,7 +392,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { if !list_array.is_null(i) { compute_array_to_string( arg, - list_array.value(i), + &list_array.value(i), delimiter.clone(), null_string.clone(), with_null_string, @@ -411,7 +411,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { if !list_array.is_null(i) { compute_array_to_string( arg, - list_array.value(i), + &list_array.value(i), delimiter.clone(), null_string.clone(), with_null_string, @@ -434,7 +434,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { })?; compute_array_to_string( arg, - values, + &values, delimiter, null_string, with_null_string, @@ -461,8 +461,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { fn generate_string_array( list_arr: &GenericListArray, - delimiters: Vec>, - null_string: String, + delimiters: &[Option<&str>], + null_string: &str, with_null_string: bool, ) -> Result { let mut res: Vec> = Vec::new(); @@ -471,9 +471,9 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let mut arg = String::from(""); let s = compute_array_to_string( &mut arg, - arr, + &arr, delimiter.to_string(), - null_string.clone(), + null_string.to_string(), with_null_string, )? .clone(); @@ -496,8 +496,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let list_array = as_list_array(&arr)?; generate_string_array::( list_array, - delimiters, - null_string, + &delimiters, + &null_string, with_null_string, )? } @@ -505,8 +505,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let list_array = as_large_list_array(&arr)?; generate_string_array::( list_array, - delimiters, - null_string, + &delimiters, + &null_string, with_null_string, )? } @@ -529,17 +529,17 @@ fn string_to_array_inner(args: &[ArrayRef]) -> Result { let string_array = args[0].as_string::(); let builder = StringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size()); - string_to_array_inner_2::<&GenericStringArray, StringBuilder>(args, string_array, builder) + string_to_array_inner_2::<&GenericStringArray, StringBuilder>(args, &string_array, builder) } Utf8View => { let string_array = args[0].as_string_view(); let builder = StringViewBuilder::with_capacity(string_array.len()); - string_to_array_inner_2::<&StringViewArray, StringViewBuilder>(args, string_array, builder) + string_to_array_inner_2::<&StringViewArray, StringViewBuilder>(args, &string_array, builder) } LargeUtf8 => { let string_array = args[0].as_string::(); let builder = LargeStringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size()); - string_to_array_inner_2::<&GenericStringArray, LargeStringBuilder>(args, string_array, builder) + string_to_array_inner_2::<&GenericStringArray, LargeStringBuilder>(args, &string_array, builder) } other => exec_err!("unsupported type for first argument to string_to_array function as {other:?}") } @@ -547,7 +547,7 @@ fn string_to_array_inner(args: &[ArrayRef]) -> Result( args: &'a [ArrayRef], - string_array: StringArrType, + string_array: &StringArrType, string_builder: StringBuilderType, ) -> Result where @@ -563,11 +563,11 @@ where &GenericStringArray, &StringViewArray, StringBuilderType, - >(string_array, delimiter_array, None, string_builder) + >(string_array, &delimiter_array, None, string_builder) } else { string_to_array_inner_3::, - StringBuilderType>(args, string_array, delimiter_array, string_builder) + StringBuilderType>(args, string_array, &delimiter_array, string_builder) } } Utf8View => { @@ -579,11 +579,11 @@ where &StringViewArray, &StringViewArray, StringBuilderType, - >(string_array, delimiter_array, None, string_builder) + >(string_array, &delimiter_array, None, string_builder) } else { string_to_array_inner_3::(args, string_array, delimiter_array, string_builder) + StringBuilderType>(args, string_array, &delimiter_array, string_builder) } } LargeUtf8 => { @@ -594,11 +594,11 @@ where &GenericStringArray, &StringViewArray, StringBuilderType, - >(string_array, delimiter_array, None, string_builder) + >(string_array, &delimiter_array, None, string_builder) } else { string_to_array_inner_3::, - StringBuilderType>(args, string_array, delimiter_array, string_builder) + StringBuilderType>(args, string_array, &delimiter_array, string_builder) } } other => exec_err!("unsupported type for second argument to string_to_array function as {other:?}") @@ -607,8 +607,8 @@ where fn string_to_array_inner_3<'a, StringArrType, DelimiterArrType, StringBuilderType>( args: &'a [ArrayRef], - string_array: StringArrType, - delimiter_array: DelimiterArrType, + string_array: &StringArrType, + delimiter_array: &DelimiterArrType, string_builder: StringBuilderType, ) -> Result where @@ -672,8 +672,8 @@ fn string_to_array_impl< NullValueArrType, StringBuilderType, >( - string_array: StringArrType, - delimiter_array: DelimiterArrType, + string_array: &StringArrType, + delimiter_array: &DelimiterArrType, null_value_array: Option, string_builder: StringBuilderType, ) -> Result From a220435349cd1620198b52c5f6ac25620aa68db8 Mon Sep 17 00:00:00 2001 From: foskey51 Date: Fri, 21 Nov 2025 16:28:42 +0000 Subject: [PATCH 2/5] feat: add Copy and Clone derives to and enums --- datafusion/functions-nested/src/array_has.rs | 5 +++-- datafusion/functions-nested/src/set_ops.rs | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index a230d86b445f..1c138f410f69 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -236,9 +236,10 @@ fn array_has_inner_for_scalar( fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result { let haystack = haystack.as_ref().try_into()?; - array_has_dispatch_for_array(&haystack, needle) + array_has_dispatch_for_array(haystack, needle) } +#[derive(Copy, Clone)] enum ArrayWrapper<'a> { FixedSizeList(&'a arrow::array::FixedSizeListArray), List(&'a arrow::array::GenericListArray), @@ -318,7 +319,7 @@ impl<'a> ArrayWrapper<'a> { } fn array_has_dispatch_for_array<'a>( - haystack: &ArrayWrapper<'a>, + haystack: ArrayWrapper<'a>, needle: &ArrayRef, ) -> Result { let mut boolean_builder = BooleanArray::builder(haystack.len()); diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 5c185d946694..1a17dea9ca3a 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -327,7 +327,7 @@ fn array_distinct_inner(args: &[ArrayRef]) -> Result { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Copy, Clone)] enum SetOp { Union, Intersect, @@ -428,7 +428,7 @@ fn generic_set_lists( fn general_set_op( array1: &ArrayRef, array2: &ArrayRef, - set_op: &SetOp, + set_op: SetOp, ) -> Result { fn empty_array(data_type: &DataType, len: usize, large: bool) -> Result { let field = Arc::new(Field::new_list_field(data_type.clone(), true)); @@ -456,28 +456,28 @@ fn general_set_op( array1.len(), ))), (Null, List(field)) => { - if *set_op == SetOp::Intersect { + if set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), false); } let array = as_list_array(&array2)?; general_array_distinct::(array, field) } (List(field), Null) => { - if *set_op == SetOp::Intersect { + if set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), false); } let array = as_list_array(&array1)?; general_array_distinct::(array, field) } (Null, LargeList(field)) => { - if *set_op == SetOp::Intersect { + if set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), true); } let array = as_large_list_array(&array2)?; general_array_distinct::(array, field) } (LargeList(field), Null) => { - if *set_op == SetOp::Intersect { + if set_op == SetOp::Intersect { return empty_array(field.data_type(), array1.len(), true); } let array = as_large_list_array(&array1)?; @@ -486,12 +486,12 @@ fn general_set_op( (List(field), List(_)) => { let array1 = as_list_array(&array1)?; let array2 = as_list_array(&array2)?; - generic_set_lists::(array1, array2, Arc::clone(field), set_op) + generic_set_lists::(array1, array2, Arc::clone(field), &set_op) } (LargeList(field), LargeList(_)) => { let array1 = as_large_list_array(&array1)?; let array2 = as_large_list_array(&array2)?; - generic_set_lists::(array1, array2, Arc::clone(field), set_op) + generic_set_lists::(array1, array2, Arc::clone(field), &set_op) } (data_type1, data_type2) => { internal_err!( @@ -504,13 +504,13 @@ fn general_set_op( /// Array_union SQL function fn array_union_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_union", args)?; - general_set_op(array1, array2, &SetOp::Union) + general_set_op(array1, array2, SetOp::Union) } /// array_intersect SQL function fn array_intersect_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_intersect", args)?; - general_set_op(array1, array2, &SetOp::Intersect) + general_set_op(array1, array2, SetOp::Intersect) } fn general_array_distinct( From 8abe9143e59fc27d8bcba04eaed71c333e5096a9 Mon Sep 17 00:00:00 2001 From: foskey51 Date: Fri, 21 Nov 2025 17:31:59 +0000 Subject: [PATCH 3/5] refactor: remove unnecessary references from ArrayWrapper usage --- datafusion/functions-nested/src/array_has.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 1c138f410f69..d16f60e31aa4 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -391,8 +391,8 @@ fn array_has_all_inner(args: &[ArrayRef]) -> Result { // General row comparison for array_has_all and array_has_any fn general_array_has_for_all_and_any<'a>( - haystack: &ArrayWrapper<'a>, - needle: &ArrayWrapper<'a>, + haystack: ArrayWrapper<'a>, + needle: ArrayWrapper<'a>, comparison_type: ComparisonType, ) -> Result { let mut boolean_builder = BooleanArray::builder(haystack.len()); @@ -417,8 +417,8 @@ fn general_array_has_for_all_and_any<'a>( // String comparison for array_has_all and array_has_any fn array_has_all_and_any_string_internal<'a>( - haystack: &ArrayWrapper<'a>, - needle: &ArrayWrapper<'a>, + haystack: ArrayWrapper<'a>, + needle: ArrayWrapper<'a>, comparison_type: ComparisonType, ) -> Result { let mut boolean_builder = BooleanArray::builder(haystack.len()); @@ -443,8 +443,8 @@ fn array_has_all_and_any_string_internal<'a>( } fn array_has_all_and_any_dispatch<'a>( - haystack: &ArrayWrapper<'a>, - needle: &ArrayWrapper<'a>, + haystack: ArrayWrapper<'a>, + needle: ArrayWrapper<'a>, comparison_type: ComparisonType, ) -> Result { if needle.values().is_empty() { @@ -469,7 +469,7 @@ fn array_has_all_and_any_inner( ) -> Result { let haystack: ArrayWrapper = args[0].as_ref().try_into()?; let needle: ArrayWrapper = args[1].as_ref().try_into()?; - array_has_all_and_any_dispatch(&haystack, &needle, comparison_type) + array_has_all_and_any_dispatch(haystack, needle, comparison_type) } fn array_has_any_inner(args: &[ArrayRef]) -> Result { From a567be22489e8ab132367c829fbb55928df36010 Mon Sep 17 00:00:00 2001 From: foskey51 Date: Fri, 21 Nov 2025 17:36:48 +0000 Subject: [PATCH 4/5] refactor: remove redundant reference usage ArrayWrapper --- datafusion/functions-nested/src/array_has.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index d16f60e31aa4..8ae8c42b79d5 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -231,7 +231,7 @@ fn array_has_inner_for_scalar( needle: &dyn Datum, ) -> Result { let haystack = haystack.as_ref().try_into()?; - array_has_dispatch_for_scalar(&haystack, needle) + array_has_dispatch_for_scalar(haystack, needle) } fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result { @@ -339,7 +339,7 @@ fn array_has_dispatch_for_array<'a>( } fn array_has_dispatch_for_scalar( - haystack: &ArrayWrapper<'_>, + haystack: ArrayWrapper<'_>, needle: &dyn Datum, ) -> Result { let values = haystack.values(); From 1c4484ddf380ea0e9236333e9b04abb715aa10cb Mon Sep 17 00:00:00 2001 From: foskey51 Date: Sat, 22 Nov 2025 00:40:59 +0000 Subject: [PATCH 5/5] remove redundant &SetOps --- datafusion/functions-nested/src/set_ops.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 1a17dea9ca3a..71a42531f99e 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -346,7 +346,7 @@ fn generic_set_lists( l: &GenericListArray, r: &GenericListArray, field: Arc, - set_op: &SetOp, + set_op: SetOp, ) -> Result { if l.is_empty() || l.value_type().is_null() { let field = Arc::new(Field::new_list_field(r.value_type(), true)); @@ -380,7 +380,7 @@ fn generic_set_lists( let l_iter = l_values.iter().sorted().dedup(); let values_set: HashSet<_> = l_iter.clone().collect(); - let mut rows = if *set_op == SetOp::Union { + let mut rows = if set_op == SetOp::Union { l_iter.collect() } else { vec![] @@ -486,12 +486,12 @@ fn general_set_op( (List(field), List(_)) => { let array1 = as_list_array(&array1)?; let array2 = as_list_array(&array2)?; - generic_set_lists::(array1, array2, Arc::clone(field), &set_op) + generic_set_lists::(array1, array2, Arc::clone(field), set_op) } (LargeList(field), LargeList(_)) => { let array1 = as_large_list_array(&array1)?; let array2 = as_large_list_array(&array2)?; - generic_set_lists::(array1, array2, Arc::clone(field), &set_op) + generic_set_lists::(array1, array2, Arc::clone(field), set_op) } (data_type1, data_type2) => { internal_err!(