Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/paimon/common/data/binary_array_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ TEST(BinaryArrayTest, TestFromLongArray) {
R"([[123, null], [789], [12345], [12]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);

BinaryArray ret = BinaryArray::FromLongArray(&array, pool.get());

Expand Down Expand Up @@ -363,7 +363,7 @@ TEST(BinaryArrayTest, TestFromAllNullLongArray) {
R"([[null, null], [789], [12345], [12]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);

BinaryArray ret = BinaryArray::FromLongArray(&array, pool.get());

Expand Down
2 changes: 1 addition & 1 deletion src/paimon/common/data/columnar/columnar_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ std::shared_ptr<InternalArray> ColumnarArray::GetArray(int32_t pos) const {
assert(list_array);
int32_t offset = list_array->value_offset(offset_ + pos);
int32_t length = list_array->value_length(offset_ + pos);
return std::make_shared<ColumnarArray>(list_array->values(), pool_, offset, length);
return std::make_shared<ColumnarArray>(list_array->values().get(), pool_, offset, length);
}

std::shared_ptr<InternalMap> ColumnarArray::GetMap(int32_t pos) const {
Expand Down
11 changes: 8 additions & 3 deletions src/paimon/common/data/columnar/columnar_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,16 @@ class Bytes;
class MemoryPool;

/// Columnar array to support access to vector column data.
///
/// NOTE: This class holds a non-owning raw pointer to the underlying arrow::Array for efficiency.
/// The caller must ensure that the pointed-to Array outlives this ColumnarArray instance.
/// Typically, lifetime is guaranteed by the owning ColumnarBatchContext or the parent
/// arrow container (e.g., ListArray, MapArray) that holds the shared_ptr.
class ColumnarArray : public InternalArray {
public:
ColumnarArray(const std::shared_ptr<arrow::Array>& array,
const std::shared_ptr<MemoryPool>& pool, int32_t offset, int32_t length)
: pool_(pool), array_(array.get()), offset_(offset), length_(length) {
ColumnarArray(const arrow::Array* array, const std::shared_ptr<MemoryPool>& pool,
int32_t offset, int32_t length)
: pool_(pool), array_(array), offset_(offset), length_(length) {
assert(array_);
assert(array_->length() >= offset + length);
Comment thread
lxy-9602 marked this conversation as resolved.
}
Expand Down
48 changes: 24 additions & 24 deletions src/paimon/common/data/columnar/columnar_array_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ TEST(ColumnarArrayTest, TestSimple) {
arrow::list(arrow::boolean()), "[[true, false], [true], [false], [false, true]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/2, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/2, 1);
ASSERT_EQ(array.Size(), 1);
ASSERT_EQ(array.GetBoolean(0), true);
std::vector<char> expected_array = {static_cast<char>(1)};
Expand All @@ -51,7 +51,7 @@ TEST(ColumnarArrayTest, TestSimple) {
"[[1, 1, 2], [3], [2], [2]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/5, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/5, 1);
ASSERT_EQ(array.GetByte(0), 2);
std::vector<char> expected_array = {static_cast<char>(2)};
ASSERT_EQ(array.ToByteArray().value(), expected_array);
Expand All @@ -61,7 +61,7 @@ TEST(ColumnarArrayTest, TestSimple) {
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 3);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 3);
ASSERT_EQ(array.GetShort(0), 1);
ASSERT_EQ(array.GetShort(1), 1);
ASSERT_EQ(array.GetShort(2), 2);
Expand All @@ -73,7 +73,7 @@ TEST(ColumnarArrayTest, TestSimple) {
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/3, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/3, 1);
ASSERT_EQ(array.GetInt(0), 3);
std::vector<int32_t> expected_array = {3};
ASSERT_EQ(array.ToIntArray().value(), expected_array);
Expand All @@ -83,7 +83,7 @@ TEST(ColumnarArrayTest, TestSimple) {
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/4, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/4, 1);
ASSERT_EQ(array.GetLong(0), 2);
std::vector<int64_t> expected_array = {2};
ASSERT_EQ(array.ToLongArray().value(), expected_array);
Expand All @@ -93,15 +93,15 @@ TEST(ColumnarArrayTest, TestSimple) {
"[[1, 1, 2], [3], [null], null]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/4, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/4, 1);
ASSERT_NOK_WITH_MSG(array.ToLongArray(), "is null");
}
{
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(
arrow::list(arrow::float32()), "[[0.0, 1.1, 2.2], [3.3], [4.4], [5.5]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 3);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 3);
ASSERT_NEAR(array.GetFloat(1), 1.1, 0.001);
std::vector<float> expected_array = {0.0, 1.1, 2.2};
ASSERT_EQ(array.ToFloatArray().value(), expected_array);
Expand All @@ -111,7 +111,7 @@ TEST(ColumnarArrayTest, TestSimple) {
arrow::list(arrow::float64()), "[[0.0, 1.1, 2.2], [3.3], [4.4], [5.5]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/3, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/3, 1);
ASSERT_NEAR(array.GetDouble(0), 3.3, 0.001);
std::vector<double> expected_array = {3.3};
ASSERT_EQ(array.ToDoubleArray().value(), expected_array);
Expand All @@ -121,7 +121,7 @@ TEST(ColumnarArrayTest, TestSimple) {
arrow::list(arrow::utf8()), R"([["abc", "def"], ["efg"], ["hello"], ["hi"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/4, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/4, 1);
ASSERT_EQ(array.GetString(0).ToString(), "hi");
ASSERT_EQ(std::string(array.GetStringView(0)), "hi");
}
Expand All @@ -134,7 +134,7 @@ TEST(ColumnarArrayTest, TestComplexAndNestedType) {
"[[1, 1, 2], [3], [2], [-4]]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/3, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/3, 1);
ASSERT_EQ(array.GetDate(0), 3);
}
{
Expand All @@ -143,7 +143,7 @@ TEST(ColumnarArrayTest, TestComplexAndNestedType) {
R"([["1.234", "1234.000"], ["-9876.543"], ["666.888"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);
ASSERT_EQ(array.GetDecimal(0, 10, 3), Decimal(10, 3, 1234));
}
{
Expand All @@ -153,7 +153,7 @@ TEST(ColumnarArrayTest, TestComplexAndNestedType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 1);
auto ts = array.GetTimestamp(0, 9);
ASSERT_EQ(ts, Timestamp(59000, 0));
}
Expand All @@ -162,7 +162,7 @@ TEST(ColumnarArrayTest, TestComplexAndNestedType) {
R"([["aaa", "bb"], ["ccc"], ["bbb"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);
ASSERT_EQ(*array.GetBinary(1), Bytes("bb", pool.get()));
ASSERT_EQ(std::string(array.GetStringView(1)), "bb");
}
Expand All @@ -181,7 +181,7 @@ TEST(ColumnarArrayTest, TestComplexAndNestedType) {
])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 2);
auto result_row = array.GetRow(1, 4);
ASSERT_EQ(result_row->GetLong(0), 2);
ASSERT_EQ(result_row->GetLong(1), 2);
Expand All @@ -193,7 +193,7 @@ TEST(ColumnarArrayTest, TestComplexAndNestedType) {
arrow::list(arrow::list(arrow::int64())), "[[[1, 2, 3], [4, 5, 6]], []]")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 1);
auto result_array = array.GetArray(0);
auto inner_result_array = array.GetArray(0);
std::vector<int64_t> values = {1, 2, 3};
Expand All @@ -208,7 +208,7 @@ TEST(ColumnarArrayTest, TestComplexAndNestedType) {
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
ASSERT_TRUE(list_array);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/0, 1);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/0, 1);
auto result_key = array.GetMap(0)->KeyArray();
auto result_value = array.GetMap(0)->ValueArray();
ASSERT_EQ(result_key->ToIntArray().value(), std::vector<int32_t>({1, 4}));
Expand All @@ -225,7 +225,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 0);
ASSERT_EQ(ts, Timestamp(951866603000, 0)) << ts.GetMillisecond();
}
Expand All @@ -236,7 +236,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 3);
ASSERT_EQ(ts, Timestamp(951866603001, 0)) << ts.GetMillisecond();
}
Expand All @@ -247,7 +247,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 6);
ASSERT_EQ(ts, Timestamp(951866603001, 1000)) << ts.GetMillisecond();
}
Expand All @@ -258,7 +258,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 9);
ASSERT_EQ(ts, Timestamp(951866603001, 1001)) << ts.GetMillisecond();
}
Expand All @@ -269,7 +269,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 0);
ASSERT_EQ(ts, Timestamp(951866603000, 0)) << ts.GetMillisecond();
}
Expand All @@ -280,7 +280,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 3);
ASSERT_EQ(ts, Timestamp(951866603001, 0)) << ts.GetMillisecond();
}
Expand All @@ -291,7 +291,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 6);
ASSERT_EQ(ts, Timestamp(951866603001, 1000)) << ts.GetMillisecond();
}
Expand All @@ -302,7 +302,7 @@ TEST(ColumnarArrayTest, TestTimestampType) {
"1899-01-01T00:59:20"],["2033-05-18T03:33:20"]])")
.ValueOrDie();
auto list_array = arrow::internal::checked_pointer_cast<arrow::ListArray>(f1);
auto array = ColumnarArray(list_array->values(), pool, /*offset=*/1, 2);
auto array = ColumnarArray(list_array->values().get(), pool, /*offset=*/1, 2);
auto ts = array.GetTimestamp(0, 9);
ASSERT_EQ(ts, Timestamp(951866603001, 1001)) << ts.GetMillisecond();
}
Expand Down
4 changes: 2 additions & 2 deletions src/paimon/common/data/columnar/columnar_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ ColumnarMap::ColumnarMap(const std::shared_ptr<arrow::Array>& key_array,
length_(length) {}

std::shared_ptr<InternalArray> ColumnarMap::KeyArray() const {
return std::make_shared<ColumnarArray>(key_array_, pool_, offset_, length_);
return std::make_shared<ColumnarArray>(key_array_.get(), pool_, offset_, length_);
}
std::shared_ptr<InternalArray> ColumnarMap::ValueArray() const {
return std::make_shared<ColumnarArray>(value_array_, pool_, offset_, length_);
return std::make_shared<ColumnarArray>(value_array_.get(), pool_, offset_, length_);
}

} // namespace paimon
2 changes: 1 addition & 1 deletion src/paimon/common/data/columnar/columnar_row.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ std::shared_ptr<InternalArray> ColumnarRow::GetArray(int32_t pos) const {
assert(list_array);
int32_t offset = list_array->value_offset(row_id_);
int32_t length = list_array->value_length(row_id_);
return std::make_shared<ColumnarArray>(list_array->values(), pool_, offset, length);
return std::make_shared<ColumnarArray>(list_array->values().get(), pool_, offset, length);
}

std::shared_ptr<InternalMap> ColumnarRow::GetMap(int32_t pos) const {
Expand Down
2 changes: 1 addition & 1 deletion src/paimon/common/data/columnar/columnar_row_ref.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ std::shared_ptr<InternalArray> ColumnarRowRef::GetArray(int32_t pos) const {
assert(list_array);
int32_t offset = list_array->value_offset(row_id_);
int32_t length = list_array->value_length(row_id_);
return std::make_shared<ColumnarArray>(list_array->values(), ctx_->pool, offset, length);
return std::make_shared<ColumnarArray>(list_array->values().get(), ctx_->pool, offset, length);
}

std::shared_ptr<InternalMap> ColumnarRowRef::GetMap(int32_t pos) const {
Expand Down
Loading