diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index 080b2f16d92f..8ae8c42b79d5 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -239,6 +239,7 @@ fn array_has_inner_for_array(haystack: &ArrayRef, needle: &ArrayRef) -> Result { FixedSizeList(&'a arrow::array::FixedSizeListArray), List(&'a arrow::array::GenericListArray), @@ -317,8 +318,8 @@ impl<'a> ArrayWrapper<'a> { } } -fn array_has_dispatch_for_array( - haystack: ArrayWrapper<'_>, +fn array_has_dispatch_for_array<'a>( + haystack: ArrayWrapper<'a>, needle: &ArrayRef, ) -> Result { let mut boolean_builder = BooleanArray::builder(haystack.len()); @@ -390,8 +391,8 @@ fn array_has_all_inner(args: &[ArrayRef]) -> Result { // General row comparison for array_has_all and array_has_any fn general_array_has_for_all_and_any<'a>( - haystack: &ArrayWrapper<'a>, - needle: &ArrayWrapper<'a>, + haystack: ArrayWrapper<'a>, + needle: ArrayWrapper<'a>, comparison_type: ComparisonType, ) -> Result { let mut boolean_builder = BooleanArray::builder(haystack.len()); @@ -402,8 +403,8 @@ fn general_array_has_for_all_and_any<'a>( let arr_values = converter.convert_columns(&[arr])?; let sub_arr_values = converter.convert_columns(&[sub_arr])?; boolean_builder.append_value(general_array_has_all_and_any_kernel( - arr_values, - sub_arr_values, + &arr_values, + &sub_arr_values, comparison_type, )); } else { @@ -416,8 +417,8 @@ fn general_array_has_for_all_and_any<'a>( // String comparison for array_has_all and array_has_any fn array_has_all_and_any_string_internal<'a>( - haystack: &ArrayWrapper<'a>, - needle: &ArrayWrapper<'a>, + haystack: ArrayWrapper<'a>, + needle: ArrayWrapper<'a>, comparison_type: ComparisonType, ) -> Result { let mut boolean_builder = BooleanArray::builder(haystack.len()); @@ -427,8 +428,8 @@ fn array_has_all_and_any_string_internal<'a>( let haystack_array = string_array_to_vec(&arr); let needle_array = string_array_to_vec(&sub_arr); boolean_builder.append_value(array_has_string_kernel( - haystack_array, - needle_array, + &haystack_array, + &needle_array, comparison_type, )); } @@ -442,8 +443,8 @@ fn array_has_all_and_any_string_internal<'a>( } fn array_has_all_and_any_dispatch<'a>( - haystack: &ArrayWrapper<'a>, - needle: &ArrayWrapper<'a>, + haystack: ArrayWrapper<'a>, + needle: ArrayWrapper<'a>, comparison_type: ComparisonType, ) -> Result { if needle.values().is_empty() { @@ -468,7 +469,7 @@ fn array_has_all_and_any_inner( ) -> Result { let haystack: ArrayWrapper = args[0].as_ref().try_into()?; let needle: ArrayWrapper = args[1].as_ref().try_into()?; - array_has_all_and_any_dispatch(&haystack, &needle, comparison_type) + array_has_all_and_any_dispatch(haystack, needle, comparison_type) } fn array_has_any_inner(args: &[ArrayRef]) -> Result { @@ -633,8 +634,8 @@ enum ComparisonType { } fn array_has_string_kernel( - haystack: Vec>, - needle: Vec>, + haystack: &[Option<&str>], + needle: &[Option<&str>], comparison_type: ComparisonType, ) -> bool { match comparison_type { @@ -650,8 +651,8 @@ fn array_has_string_kernel( } fn general_array_has_all_and_any_kernel( - haystack_rows: Rows, - needle_rows: Rows, + haystack_rows: &Rows, + needle_rows: &Rows, comparison_type: ComparisonType, ) -> bool { match comparison_type { diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 49f4110faeaa..e84a942fab2a 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -145,7 +145,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { let (inner_field, inner_offsets, inner_values, _) = as_list_array(&values)?.clone().into_parts(); let offsets = - get_offsets_for_flatten::(inner_offsets, offsets); + get_offsets_for_flatten::(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -159,7 +159,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { let (inner_field, inner_offsets, inner_values, _) = as_large_list_array(&values)?.clone().into_parts(); let offsets = - get_offsets_for_flatten::(inner_offsets, offsets); + get_offsets_for_flatten::(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -180,7 +180,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { List(_) => { let (inner_field, inner_offsets, inner_values, _) = as_list_array(&values)?.clone().into_parts(); - let offsets = get_large_offsets_for_flatten(inner_offsets, offsets); + let offsets = get_large_offsets_for_flatten(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -194,7 +194,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { let (inner_field, inner_offsets, inner_values, _) = as_large_list_array(&values)?.clone().into_parts(); let offsets = - get_offsets_for_flatten::(inner_offsets, offsets); + get_offsets_for_flatten::(inner_offsets, &offsets); let flattened_array = GenericListArray::::new( inner_field, offsets, @@ -217,7 +217,7 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result { // Create new offsets that are equivalent to `flatten` the array. fn get_offsets_for_flatten( inner_offsets: OffsetBuffer, - outer_offsets: OffsetBuffer

, + outer_offsets: &OffsetBuffer

, ) -> OffsetBuffer { let buffer = inner_offsets.into_inner(); let offsets: Vec = outer_offsets @@ -230,7 +230,7 @@ fn get_offsets_for_flatten( // Create new large offsets that are equivalent to `flatten` the array. fn get_large_offsets_for_flatten( inner_offsets: OffsetBuffer, - outer_offsets: OffsetBuffer

, + outer_offsets: &OffsetBuffer

, ) -> OffsetBuffer { let buffer = inner_offsets.into_inner(); let offsets: Vec = outer_offsets diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index 3a66e6569476..51210b9ae22d 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -23,6 +23,9 @@ // Make sure fast / cheap clones on Arc are explicit: // https://github.com/apache/datafusion/issues/11143 #![deny(clippy::clone_on_ref_ptr)] +// https://github.com/apache/datafusion/issues/18503 +#![deny(clippy::needless_pass_by_value)] +#![cfg_attr(test, allow(clippy::needless_pass_by_value))] //! Nested type Functions for [DataFusion]. //! diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index c3c11beaf967..fe9bc609c013 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -89,7 +89,7 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result { let values = get_first_array_ref(values_arg)?; - make_map_batch_internal(keys, values, can_evaluate_to_const, keys_arg.data_type()) + make_map_batch_internal(&keys, &values, can_evaluate_to_const, &keys_arg.data_type()) } /// Validates that map keys are non-null and unique. @@ -126,10 +126,10 @@ fn get_first_array_ref(columnar_value: &ColumnarValue) -> Result { } fn make_map_batch_internal( - keys: ArrayRef, - values: ArrayRef, + keys: &ArrayRef, + values: &ArrayRef, can_evaluate_to_const: bool, - data_type: DataType, + data_type: &DataType, ) -> Result { if keys.len() != values.len() { return exec_err!("map requires key and value lists to have the same length"); @@ -159,8 +159,8 @@ fn make_map_batch_internal( let mut entry_offsets_buffer = VecDeque::new(); entry_offsets_buffer.push_back(0); - entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(&keys))); - entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(&values))); + entry_struct_buffer.push_back((Arc::clone(&key_field), Arc::clone(keys))); + entry_struct_buffer.push_back((Arc::clone(&value_field), Arc::clone(values))); entry_offsets_buffer.push_back(keys.len() as u32); let entry_struct: Vec<(Arc, ArrayRef)> = entry_struct_buffer.into(); @@ -368,8 +368,8 @@ fn get_element_type(data_type: &DataType) -> Result<&DataType> { /// +-----------+ +-----------+ /// ```text fn make_map_array_internal( - keys: ArrayRef, - values: ArrayRef, + keys: &ArrayRef, + values: &ArrayRef, ) -> Result { // Save original data types and array length before list_to_arrays transforms them let keys_data_type = keys.data_type().clone(); @@ -380,14 +380,14 @@ fn make_map_array_internal( // This tells us which MAP values are NULL (not which keys within maps are null) let nulls_bitmap = keys.nulls().cloned(); - let keys = list_to_arrays::(&keys); - let values = list_to_arrays::(&values); + let keys = list_to_arrays::(keys); + let values = list_to_arrays::(values); build_map_array( - keys, - values, - keys_data_type, - values_data_type, + &keys, + &values, + &keys_data_type, + &values_data_type, original_len, nulls_bitmap, ) @@ -396,8 +396,8 @@ fn make_map_array_internal( /// Helper function specifically for FixedSizeList inputs /// Similar to make_map_array_internal but uses fixed_size_list_to_arrays instead of list_to_arrays fn make_map_array_from_fixed_size_list( - keys: ArrayRef, - values: ArrayRef, + keys: &ArrayRef, + values: &ArrayRef, ) -> Result { // Save original data types and array length let keys_data_type = keys.data_type().clone(); @@ -407,14 +407,14 @@ fn make_map_array_from_fixed_size_list( // Save the nulls bitmap from the original keys array let nulls_bitmap = keys.nulls().cloned(); - let keys = fixed_size_list_to_arrays(&keys); - let values = fixed_size_list_to_arrays(&values); + let keys = fixed_size_list_to_arrays(keys); + let values = fixed_size_list_to_arrays(values); build_map_array( - keys, - values, - keys_data_type, - values_data_type, + &keys, + &values, + &keys_data_type, + &values_data_type, original_len, nulls_bitmap, ) @@ -422,10 +422,10 @@ fn make_map_array_from_fixed_size_list( /// Common logic to build a MapArray from decomposed list arrays fn build_map_array( - keys: Vec, - values: Vec, - keys_data_type: DataType, - values_data_type: DataType, + keys: &[ArrayRef], + values: &[ArrayRef], + keys_data_type: &DataType, + values_data_type: &DataType, original_len: usize, nulls_bitmap: Option, ) -> Result { @@ -470,8 +470,8 @@ fn build_map_array( let (flattened_keys, flattened_values) = if key_array_vec.is_empty() { // All maps are NULL - create empty arrays // We need to infer the data type from the original keys/values arrays - let key_type = get_element_type(&keys_data_type)?; - let value_type = get_element_type(&values_data_type)?; + let key_type = get_element_type(keys_data_type)?; + let value_type = get_element_type(values_data_type)?; ( arrow::array::new_empty_array(key_type), diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index a585da06102a..b390bf3c4226 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -181,13 +181,13 @@ fn general_position_dispatch(args: &[ArrayRef]) -> Result(list_array, element_array, arr_from) + generic_position::(list_array, element_array, &arr_from) } fn generic_position( list_array: &GenericListArray, element_array: &ArrayRef, - arr_from: Vec, // 0-indexed + arr_from: &[i64], // 0-indexed ) -> Result { let mut data = Vec::with_capacity(list_array.len()); diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index d330606cdd89..e1ebc9cda0bf 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -289,7 +289,7 @@ pub fn array_remove_inner(args: &[ArrayRef]) -> Result { let [array, element] = take_function_args("array_remove", args)?; let arr_n = vec![1; array.len()]; - array_remove_internal(array, element, arr_n) + array_remove_internal(array, element, &arr_n) } /// Array_remove_n SQL function @@ -297,7 +297,7 @@ pub fn array_remove_n_inner(args: &[ArrayRef]) -> Result { let [array, element, max] = take_function_args("array_remove_n", args)?; let arr_n = as_int64_array(max)?.values().to_vec(); - array_remove_internal(array, element, arr_n) + array_remove_internal(array, element, &arr_n) } /// Array_remove_all SQL function @@ -305,13 +305,13 @@ pub fn array_remove_all_inner(args: &[ArrayRef]) -> Result { let [array, element] = take_function_args("array_remove_all", args)?; let arr_n = vec![i64::MAX; array.len()]; - array_remove_internal(array, element, arr_n) + array_remove_internal(array, element, &arr_n) } fn array_remove_internal( array: &ArrayRef, element_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { match array.data_type() { DataType::List(_) => { @@ -348,7 +348,7 @@ fn array_remove_internal( fn general_remove( list_array: &GenericListArray, element_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { let data_type = list_array.value_type(); let mut new_values = vec![]; diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 4314d41419bc..079c28175d45 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -328,7 +328,7 @@ fn general_replace( list_array: &GenericListArray, from_array: &ArrayRef, to_array: &ArrayRef, - arr_n: Vec, + arr_n: &[i64], ) -> Result { // Build up the offsets for the final output array let mut offsets: Vec = vec![O::usize_as(0)]; @@ -426,11 +426,11 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::Null => Ok(new_null_array(array.data_type(), 1)), array_type => exec_err!("array_replace does not support type '{array_type}'."), @@ -445,11 +445,11 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::Null => Ok(new_null_array(array.data_type(), 1)), array_type => { @@ -466,11 +466,11 @@ pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result { match array.data_type() { DataType::List(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::LargeList(_) => { let list_array = array.as_list::(); - general_replace::(list_array, from, to, arr_n) + general_replace::(list_array, from, to, &arr_n) } DataType::Null => Ok(new_null_array(array.data_type(), 1)), array_type => { diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 1e7ae5c147a8..71a42531f99e 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -327,7 +327,7 @@ fn array_distinct_inner(args: &[ArrayRef]) -> Result { } } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Copy, Clone)] enum SetOp { Union, Intersect, diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index 61caa3ac7076..b87ac0f8c41d 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -358,13 +358,13 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { /// Creates a single string from single element of a ListArray (which is /// itself another Array) - fn compute_array_to_string( - arg: &mut String, - arr: ArrayRef, + fn compute_array_to_string<'a>( + arg: &'a mut String, + arr: &ArrayRef, delimiter: String, null_string: String, with_null_string: bool, - ) -> Result<&mut String> { + ) -> Result<&'a mut String> { match arr.data_type() { List(..) => { let list_array = as_list_array(&arr)?; @@ -372,7 +372,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { if !list_array.is_null(i) { compute_array_to_string( arg, - list_array.value(i), + &list_array.value(i), delimiter.clone(), null_string.clone(), with_null_string, @@ -392,7 +392,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { if !list_array.is_null(i) { compute_array_to_string( arg, - list_array.value(i), + &list_array.value(i), delimiter.clone(), null_string.clone(), with_null_string, @@ -411,7 +411,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { if !list_array.is_null(i) { compute_array_to_string( arg, - list_array.value(i), + &list_array.value(i), delimiter.clone(), null_string.clone(), with_null_string, @@ -434,7 +434,7 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { })?; compute_array_to_string( arg, - values, + &values, delimiter, null_string, with_null_string, @@ -461,8 +461,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { fn generate_string_array( list_arr: &GenericListArray, - delimiters: Vec>, - null_string: String, + delimiters: &[Option<&str>], + null_string: &str, with_null_string: bool, ) -> Result { let mut res: Vec> = Vec::new(); @@ -471,9 +471,9 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let mut arg = String::from(""); let s = compute_array_to_string( &mut arg, - arr, + &arr, delimiter.to_string(), - null_string.clone(), + null_string.to_string(), with_null_string, )? .clone(); @@ -496,8 +496,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let list_array = as_list_array(&arr)?; generate_string_array::( list_array, - delimiters, - null_string, + &delimiters, + &null_string, with_null_string, )? } @@ -505,8 +505,8 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { let list_array = as_large_list_array(&arr)?; generate_string_array::( list_array, - delimiters, - null_string, + &delimiters, + &null_string, with_null_string, )? } @@ -529,17 +529,17 @@ fn string_to_array_inner(args: &[ArrayRef]) -> Result { let string_array = args[0].as_string::(); let builder = StringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size()); - string_to_array_inner_2::<&GenericStringArray, StringBuilder>(args, string_array, builder) + string_to_array_inner_2::<&GenericStringArray, StringBuilder>(args, &string_array, builder) } Utf8View => { let string_array = args[0].as_string_view(); let builder = StringViewBuilder::with_capacity(string_array.len()); - string_to_array_inner_2::<&StringViewArray, StringViewBuilder>(args, string_array, builder) + string_to_array_inner_2::<&StringViewArray, StringViewBuilder>(args, &string_array, builder) } LargeUtf8 => { let string_array = args[0].as_string::(); let builder = LargeStringBuilder::with_capacity(string_array.len(), string_array.get_buffer_memory_size()); - string_to_array_inner_2::<&GenericStringArray, LargeStringBuilder>(args, string_array, builder) + string_to_array_inner_2::<&GenericStringArray, LargeStringBuilder>(args, &string_array, builder) } other => exec_err!("unsupported type for first argument to string_to_array function as {other:?}") } @@ -547,7 +547,7 @@ fn string_to_array_inner(args: &[ArrayRef]) -> Result( args: &'a [ArrayRef], - string_array: StringArrType, + string_array: &StringArrType, string_builder: StringBuilderType, ) -> Result where @@ -563,11 +563,11 @@ where &GenericStringArray, &StringViewArray, StringBuilderType, - >(string_array, delimiter_array, None, string_builder) + >(string_array, &delimiter_array, None, string_builder) } else { string_to_array_inner_3::, - StringBuilderType>(args, string_array, delimiter_array, string_builder) + StringBuilderType>(args, string_array, &delimiter_array, string_builder) } } Utf8View => { @@ -579,11 +579,11 @@ where &StringViewArray, &StringViewArray, StringBuilderType, - >(string_array, delimiter_array, None, string_builder) + >(string_array, &delimiter_array, None, string_builder) } else { string_to_array_inner_3::(args, string_array, delimiter_array, string_builder) + StringBuilderType>(args, string_array, &delimiter_array, string_builder) } } LargeUtf8 => { @@ -594,11 +594,11 @@ where &GenericStringArray, &StringViewArray, StringBuilderType, - >(string_array, delimiter_array, None, string_builder) + >(string_array, &delimiter_array, None, string_builder) } else { string_to_array_inner_3::, - StringBuilderType>(args, string_array, delimiter_array, string_builder) + StringBuilderType>(args, string_array, &delimiter_array, string_builder) } } other => exec_err!("unsupported type for second argument to string_to_array function as {other:?}") @@ -607,8 +607,8 @@ where fn string_to_array_inner_3<'a, StringArrType, DelimiterArrType, StringBuilderType>( args: &'a [ArrayRef], - string_array: StringArrType, - delimiter_array: DelimiterArrType, + string_array: &StringArrType, + delimiter_array: &DelimiterArrType, string_builder: StringBuilderType, ) -> Result where @@ -672,8 +672,8 @@ fn string_to_array_impl< NullValueArrType, StringBuilderType, >( - string_array: StringArrType, - delimiter_array: DelimiterArrType, + string_array: &StringArrType, + delimiter_array: &DelimiterArrType, null_value_array: Option, string_builder: StringBuilderType, ) -> Result