diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index dc82488f6a3..64ea3fd71a7 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -41,6 +41,7 @@ #include "arrow/array/builder_primitive.h" #include "arrow/array/builder_run_end.h" #include "arrow/array/builder_time.h" +#include "arrow/array/concatenate.h" #include "arrow/array/data.h" #include "arrow/array/util.h" #include "arrow/buffer.h" @@ -997,6 +998,123 @@ TEST_F(TestArray, TestAppendArraySlice) { } } +class TestBuilderAppendArraySlice : public TestArray { + public: + virtual void AssertResult(const Array& expected, const Array& actual) { + AssertArraysEqual(expected, actual, true); + } + + void CheckAppendArraySlice(const std::shared_ptr& type) { + auto rag = random::RandomArrayGenerator(0xdeadbeef); + const int64_t total_length = 100; + + for (auto null_probability : {0.0, 0.1, 0.5, 1.0}) { + auto array = rag.ArrayOf(type, total_length, null_probability); + + std::unique_ptr builder; + ASSERT_OK(MakeBuilder(pool_, type, &builder)); + + // Slice the array into multiple pieces + ArrayVector slices; + std::vector offsets = {0, 10, 10, 25, 60, 100}; + for (size_t i = 0; i < offsets.size() - 1; ++i) { + int64_t start = offsets[i]; + int64_t length = offsets[i + 1] - offsets[i]; + auto slice = array->Slice(start, length); + slices.push_back(slice); + + ArraySpan span(*slice->data()); + ASSERT_OK(builder->AppendArraySlice(span, 0, slice->length())); + } + + std::shared_ptr actual; + ASSERT_OK(builder->Finish(&actual)); + ASSERT_OK(actual->ValidateFull()); + + ASSERT_OK_AND_ASSIGN(auto expected, Concatenate(slices, pool_)); + AssertResult(*expected, *actual); + } + } + + void CheckAppendArraySlice(const std::vector>& types) { + for (const auto& type : types) { + ARROW_SCOPED_TRACE("type = ", type->ToString()); + CheckAppendArraySlice(type); + } + } +}; + +TEST_F(TestBuilderAppendArraySlice, Primitives) { + CheckAppendArraySlice(PrimitiveTypes()); +} + +TEST_F(TestBuilderAppendArraySlice, Temporals) { CheckAppendArraySlice(TemporalTypes()); } + +TEST_F(TestBuilderAppendArraySlice, Intervals) { CheckAppendArraySlice(IntervalTypes()); } + +TEST_F(TestBuilderAppendArraySlice, Durations) { CheckAppendArraySlice(DurationTypes()); } + +TEST_F(TestBuilderAppendArraySlice, Decimals) { + CheckAppendArraySlice( + {decimal32(7, 2), decimal64(12, 2), decimal128(10, 2), decimal256(10, 2)}); +} + +TEST_F(TestBuilderAppendArraySlice, Nested) { + CheckAppendArraySlice({list(int32()), large_list(int32()), list_view(int32()), + large_list_view(int32()), fixed_size_list(int32(), 3), + struct_({field("a", int32()), field("b", utf8())}), + sparse_union({field("a", int32()), field("b", utf8())}), + dense_union({field("a", int32()), field("b", utf8())})}); +} + +TEST_F(TestBuilderAppendArraySlice, FixedSizeBinary) { + CheckAppendArraySlice(fixed_size_binary(10)); +} + +TEST_F(TestBuilderAppendArraySlice, Float16) { CheckAppendArraySlice(float16()); } + +TEST_F(TestBuilderAppendArraySlice, RunEndEncoded) { + CheckAppendArraySlice(run_end_encoded(int32(), utf8())); + CheckAppendArraySlice(run_end_encoded(int32(), int64())); +} + +// Dictionary types require a custom AssertResult because DictionaryBuilder +// re-encodes values based on discovery order. This can change both the +// dictionary and the indices, causing standard physical equality checks to fail. +// +// Example: Slicing values ["b", "a"] from an array with dictionary ["a", "b"] +// (indices [1, 0]) and appending them to a fresh builder results in a new +// dictionary ["b", "a"] (indices [0, 1]). Both represent the same logical +// data but differ physically. +class TestBuilderAppendArraySliceDictionary : public TestBuilderAppendArraySlice { + public: + void AssertResult(const Array& expected, const Array& actual) override { + const auto& expected_dict = internal::checked_cast(expected); + const auto& actual_dict = internal::checked_cast(actual); + const auto& expected_values = *expected_dict.dictionary(); + const auto& actual_values = *actual_dict.dictionary(); + + ASSERT_EQ(expected.length(), actual.length()); + for (int64_t i = 0; i < expected.length(); ++i) { + if (expected.IsNull(i)) { + ASSERT_TRUE(actual.IsNull(i)); + } else { + ASSERT_FALSE(actual.IsNull(i)); + ASSERT_OK_AND_ASSIGN(auto expected_val, + expected_values.GetScalar(expected_dict.GetValueIndex(i))); + ASSERT_OK_AND_ASSIGN(auto actual_val, + actual_values.GetScalar(actual_dict.GetValueIndex(i))); + AssertScalarsEqual(*expected_val, *actual_val); + } + } + } +}; + +TEST_F(TestBuilderAppendArraySliceDictionary, Dictionary) { + CheckAppendArraySlice(dictionary(int8(), utf8())); + CheckAppendArraySlice(dictionary(int32(), utf8())); +} + // GH-39976: Test out-of-line data size calculation in // BinaryViewBuilder::AppendArraySlice. TEST_F(TestArray, TestBinaryViewAppendArraySlice) {