diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index a8dd857947b12..69a220e125c04 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -22,7 +22,7 @@ use arrow::array::{ Array, ArrayRef, GenericListArray, LargeListArray, ListArray, OffsetSizeTrait, new_null_array, }; -use arrow::buffer::OffsetBuffer; +use arrow::buffer::{NullBuffer, OffsetBuffer}; use arrow::compute; use arrow::datatypes::DataType::{LargeList, List, Null}; use arrow::datatypes::{DataType, Field, FieldRef}; @@ -363,18 +363,23 @@ fn generic_set_lists( let mut offsets = vec![OffsetSize::usize_as(0)]; let mut new_arrays = vec![]; + let mut new_null_buf = vec![]; let converter = RowConverter::new(vec![SortField::new(l.value_type())])?; for (first_arr, second_arr) in l.iter().zip(r.iter()) { + let mut ele_should_be_null = false; + let l_values = if let Some(first_arr) = first_arr { converter.convert_columns(&[first_arr])? } else { - converter.convert_columns(&[])? + ele_should_be_null = true; + converter.empty_rows(0, 0) }; let r_values = if let Some(second_arr) = second_arr { converter.convert_columns(&[second_arr])? } else { - converter.convert_columns(&[])? + ele_should_be_null = true; + converter.empty_rows(0, 0) }; let l_iter = l_values.iter().sorted().dedup(); @@ -414,13 +419,19 @@ fn generic_set_lists( } }; + new_null_buf.push(!ele_should_be_null); new_arrays.push(array); } let offsets = OffsetBuffer::new(offsets.into()); let new_arrays_ref: Vec<_> = new_arrays.iter().map(|v| v.as_ref()).collect(); let values = compute::concat(&new_arrays_ref)?; - let arr = GenericListArray::::try_new(field, offsets, values, None)?; + let arr = GenericListArray::::try_new( + field, + offsets, + values, + Some(NullBuffer::new(new_null_buf.into())), + )?; Ok(Arc::new(arr)) } diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 35e25a15bebba..c31f3d0702358 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -503,6 +503,17 @@ AS FROM array_intersect_table_1D_UTF8 ; +statement ok +CREATE TABLE array_intersect_table_1D_NULL +AS VALUES + ([1, 2, 2, 3], [2, 3, 4]), + ([2, 3, 3], [3]), + ([3], [3, 3, 4]), + (null, [3, 4]), + ([1, 2], null), + (null, null) +; + statement ok CREATE TABLE array_intersect_table_2D AS VALUES @@ -4816,6 +4827,16 @@ select array_union(arrow_cast(['hello'], 'LargeList(Utf8)'), arrow_cast(['hello' ---- [hello, datafusion] +query ? +select array_union(column1, column2) +from array_intersect_table_1D_NULL; +---- +[1, 2, 3, 4] +[2, 3] +[3, 4] +NULL +NULL +NULL # list_to_string scalar function #4 (function alias `array_to_string`) query TTT @@ -6765,6 +6786,17 @@ from large_array_intersect_table_1D_UTF8; [bc] [arrow, rust] [] [] [arrow, datafusion, rust] [arrow, rust] +query ? +select array_intersect(column1, column2) +from array_intersect_table_1D_NULL; +---- +[2, 3] +[3] +[3] +NULL +NULL +NULL + query ?? select array_intersect(column1, column2), array_intersect(column3, column4)