Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 45 additions & 28 deletions src/paimon/common/global_index/btree/btree_file_meta_selector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ namespace paimon {
BTreeFileMetaSelector::BTreeFileMetaSelector(const std::vector<GlobalIndexIOMeta>& files,
const std::shared_ptr<arrow::DataType>& key_type,
const std::shared_ptr<MemoryPool>& pool)
: key_type_(key_type), pool_(pool) {
: key_type_(key_type),
pool_(pool),
comparator_(KeySerializer::CreateComparator(key_type, pool)) {
files_.reserve(files.size());
for (const auto& file : files) {
auto index_meta = BTreeIndexMeta::Deserialize(file.metadata, pool.get());
Expand All @@ -39,14 +41,15 @@ Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitIsNull() {
}

Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitEqual(const Literal& literal) {
return Filter([this, &literal](const BTreeIndexMeta& meta) -> Result<bool> {
PAIMON_ASSIGN_OR_RAISE(MemorySlice literal_slice, SerializeLiteral(literal));
return Filter([this, &literal_slice](const BTreeIndexMeta& meta) -> Result<bool> {
if (meta.OnlyNulls()) {
return false;
}
PAIMON_ASSIGN_OR_RAISE(Literal min_key, DeserializeKey(meta.FirstKey()));
PAIMON_ASSIGN_OR_RAISE(Literal max_key, DeserializeKey(meta.LastKey()));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_min, literal.CompareTo(min_key));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_max, literal.CompareTo(max_key));
MemorySlice min_key_slice = WrapKeySlice(meta.FirstKey());
MemorySlice max_key_slice = WrapKeySlice(meta.LastKey());
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_min, comparator_(literal_slice, min_key_slice));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_max, comparator_(literal_slice, max_key_slice));
Comment thread
lxy-9602 marked this conversation as resolved.
return cmp_min >= 0 && cmp_max <= 0;
});
}
Expand All @@ -59,66 +62,76 @@ Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitNotEqual(
Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitLessThan(
const Literal& literal) {
// file.minKey < literal
return Filter([this, &literal](const BTreeIndexMeta& meta) -> Result<bool> {
PAIMON_ASSIGN_OR_RAISE(MemorySlice literal_slice, SerializeLiteral(literal));
return Filter([this, &literal_slice](const BTreeIndexMeta& meta) -> Result<bool> {
if (meta.OnlyNulls()) {
return false;
}
PAIMON_ASSIGN_OR_RAISE(Literal min_key, DeserializeKey(meta.FirstKey()));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, min_key.CompareTo(literal));
MemorySlice min_key_slice = WrapKeySlice(meta.FirstKey());
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, comparator_(min_key_slice, literal_slice));
return cmp < 0;
});
}

Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitLessOrEqual(
const Literal& literal) {
// file.minKey <= literal
return Filter([this, &literal](const BTreeIndexMeta& meta) -> Result<bool> {
PAIMON_ASSIGN_OR_RAISE(MemorySlice literal_slice, SerializeLiteral(literal));
return Filter([this, &literal_slice](const BTreeIndexMeta& meta) -> Result<bool> {
if (meta.OnlyNulls()) {
return false;
}
PAIMON_ASSIGN_OR_RAISE(Literal min_key, DeserializeKey(meta.FirstKey()));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, min_key.CompareTo(literal));
MemorySlice min_key_slice = WrapKeySlice(meta.FirstKey());
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, comparator_(min_key_slice, literal_slice));
return cmp <= 0;
});
}

Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitGreaterThan(
const Literal& literal) {
// file.maxKey > literal
return Filter([this, &literal](const BTreeIndexMeta& meta) -> Result<bool> {
PAIMON_ASSIGN_OR_RAISE(MemorySlice literal_slice, SerializeLiteral(literal));
return Filter([this, &literal_slice](const BTreeIndexMeta& meta) -> Result<bool> {
if (meta.OnlyNulls()) {
return false;
}
PAIMON_ASSIGN_OR_RAISE(Literal max_key, DeserializeKey(meta.LastKey()));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, max_key.CompareTo(literal));
MemorySlice max_key_slice = WrapKeySlice(meta.LastKey());
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, comparator_(max_key_slice, literal_slice));
return cmp > 0;
});
}

Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitGreaterOrEqual(
const Literal& literal) {
// file.maxKey >= literal
return Filter([this, &literal](const BTreeIndexMeta& meta) -> Result<bool> {
PAIMON_ASSIGN_OR_RAISE(MemorySlice literal_slice, SerializeLiteral(literal));
return Filter([this, &literal_slice](const BTreeIndexMeta& meta) -> Result<bool> {
if (meta.OnlyNulls()) {
return false;
}
PAIMON_ASSIGN_OR_RAISE(Literal max_key, DeserializeKey(meta.LastKey()));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, max_key.CompareTo(literal));
MemorySlice max_key_slice = WrapKeySlice(meta.LastKey());
PAIMON_ASSIGN_OR_RAISE(int32_t cmp, comparator_(max_key_slice, literal_slice));
return cmp >= 0;
});
}

Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::VisitIn(
const std::vector<Literal>& literals) {
return Filter([this, &literals](const BTreeIndexMeta& meta) -> Result<bool> {
std::vector<MemorySlice> literal_slices;
literal_slices.reserve(literals.size());
for (const auto& literal : literals) {
PAIMON_ASSIGN_OR_RAISE(MemorySlice slice, SerializeLiteral(literal));
literal_slices.push_back(std::move(slice));
}
return Filter([this, &literal_slices](const BTreeIndexMeta& meta) -> Result<bool> {
if (meta.OnlyNulls()) {
return false;
}
PAIMON_ASSIGN_OR_RAISE(Literal min_key, DeserializeKey(meta.FirstKey()));
PAIMON_ASSIGN_OR_RAISE(Literal max_key, DeserializeKey(meta.LastKey()));
for (const auto& literal : literals) {
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_min, literal.CompareTo(min_key));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_max, literal.CompareTo(max_key));
MemorySlice min_key_slice = WrapKeySlice(meta.FirstKey());
MemorySlice max_key_slice = WrapKeySlice(meta.LastKey());
for (const auto& literal_slice : literal_slices) {
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_min, comparator_(literal_slice, min_key_slice));
PAIMON_ASSIGN_OR_RAISE(int32_t cmp_max, comparator_(literal_slice, max_key_slice));
if (cmp_min >= 0 && cmp_max <= 0) {
return true;
}
Expand Down Expand Up @@ -163,10 +176,14 @@ Result<std::vector<GlobalIndexIOMeta>> BTreeFileMetaSelector::Filter(
return result;
}

Result<Literal> BTreeFileMetaSelector::DeserializeKey(
const std::shared_ptr<Bytes>& key_bytes) const {
MemorySlice slice = MemorySlice::Wrap(key_bytes);
return KeySerializer::DeserializeKey(slice, key_type_, pool_.get());
MemorySlice BTreeFileMetaSelector::WrapKeySlice(const std::shared_ptr<Bytes>& key) {
return MemorySlice::Wrap(MemorySegment::WrapView(key->data(), key->size()));
Comment thread
lxy-9602 marked this conversation as resolved.
Comment thread
lxy-9602 marked this conversation as resolved.
}

Result<MemorySlice> BTreeFileMetaSelector::SerializeLiteral(const Literal& literal) const {
PAIMON_ASSIGN_OR_RAISE(std::shared_ptr<Bytes> bytes,
KeySerializer::SerializeKey(literal, key_type_, pool_.get()));
return MemorySlice::Wrap(bytes);
}

} // namespace paimon
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "paimon/common/global_index/btree/btree_index_meta.h"
#include "paimon/common/global_index/btree/key_serializer.h"
#include "paimon/common/memory/memory_slice.h"
#include "paimon/global_index/global_index_io_meta.h"
#include "paimon/predicate/function_visitor.h"

Expand Down Expand Up @@ -55,11 +56,16 @@ class BTreeFileMetaSelector : public FunctionVisitor<std::vector<GlobalIndexIOMe

Result<std::vector<GlobalIndexIOMeta>> Filter(const MetaPredicate& predicate) const;

Comment thread
lxy-9602 marked this conversation as resolved.
Result<Literal> DeserializeKey(const std::shared_ptr<Bytes>& key_bytes) const;
Result<MemorySlice> SerializeLiteral(const Literal& literal) const;

/// Create a non-owning MemorySlice view over the raw bytes of a key,
/// avoiding shared_ptr reference-count overhead.
static MemorySlice WrapKeySlice(const std::shared_ptr<Bytes>& key);

std::vector<std::pair<GlobalIndexIOMeta, std::shared_ptr<BTreeIndexMeta>>> files_;
std::shared_ptr<arrow::DataType> key_type_;
std::shared_ptr<MemoryPool> pool_;
MemorySlice::SliceComparator comparator_;
};

} // namespace paimon
Loading