Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/paimon/common/utils/bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ Status BloomFilter::AddHash(int32_t hash1) {
auto hash2 = static_cast<int32_t>(static_cast<uint32_t>(hash1) >> 16);

for (int32_t i = 1; i <= num_hash_functions_; i++) {
int32_t combined_hash = hash1 + (i * hash2);
// Use uint32_t arithmetic to avoid signed overflow UB (matches Java int wrap semantics)
auto combined_hash =
static_cast<int32_t>(static_cast<uint32_t>(hash1) +
(static_cast<uint32_t>(i) * static_cast<uint32_t>(hash2)));
// hashcode should be positive, flip all the bits if it's negative
if (combined_hash < 0) {
combined_hash = ~combined_hash;
Expand All @@ -74,7 +77,10 @@ bool BloomFilter::TestHash(int32_t hash1) const {
auto hash2 = static_cast<int32_t>(static_cast<uint32_t>(hash1) >> 16);

for (int32_t i = 1; i <= num_hash_functions_; i++) {
int32_t combined_hash = hash1 + (i * hash2);
// Use uint32_t arithmetic to avoid signed overflow UB (matches Java int wrap semantics)
auto combined_hash =
static_cast<int32_t>(static_cast<uint32_t>(hash1) +
(static_cast<uint32_t>(i) * static_cast<uint32_t>(hash2)));
// hashcode should be positive, flip all the bits if it's negative
if (combined_hash < 0) {
combined_hash = ~combined_hash;
Expand Down
10 changes: 8 additions & 2 deletions src/paimon/common/utils/bloom_filter64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,10 @@ void BloomFilter64::AddHash(int64_t hash64) {
auto hash2 = static_cast<int32_t>(static_cast<uint64_t>(hash64) >> 32);

for (int32_t i = 1; i <= num_hash_functions_; i++) {
int32_t combined_hash = hash1 + (i * hash2);
// Use uint32_t arithmetic to avoid signed overflow UB (matches Java int wrap semantics)
auto combined_hash =
static_cast<int32_t>(static_cast<uint32_t>(hash1) +
(static_cast<uint32_t>(i) * static_cast<uint32_t>(hash2)));
// hashcode should be positive, flip all the bits if it's negative
if (combined_hash < 0) {
combined_hash = ~combined_hash;
Expand All @@ -83,7 +86,10 @@ bool BloomFilter64::TestHash(int64_t hash64) const {
auto hash2 = static_cast<int32_t>(static_cast<uint64_t>(hash64) >> 32);

for (int32_t i = 1; i <= num_hash_functions_; i++) {
int32_t combined_hash = hash1 + (i * hash2);
// Use uint32_t arithmetic to avoid signed overflow UB (matches Java int wrap semantics)
auto combined_hash =
static_cast<int32_t>(static_cast<uint32_t>(hash1) +
(static_cast<uint32_t>(i) * static_cast<uint32_t>(hash2)));
// hashcode should be positive, flip all the bits if it's negative
if (combined_hash < 0) {
combined_hash = ~combined_hash;
Expand Down
12 changes: 8 additions & 4 deletions src/paimon/common/utils/delta_varint_compressor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ std::vector<char> DeltaVarintCompressor::Compress(const std::vector<int64_t>& da
return {};
}

// 1. Delta encoding
// 1. Delta encoding (use unsigned subtraction to avoid signed overflow UB)
std::vector<int64_t> deltas;
deltas.reserve(data.size());
deltas.push_back(data[0]);
for (size_t i = 1; i < data.size(); i++) {
deltas.push_back(data[i] - data[i - 1]);
uint64_t unsigned_delta =
static_cast<uint64_t>(data[i]) - static_cast<uint64_t>(data[i - 1]);
deltas.push_back(static_cast<int64_t>(unsigned_delta));
}

// 2. ZigZag + Varint
Expand All @@ -61,11 +63,13 @@ Result<std::vector<int64_t>> DeltaVarintCompressor::Decompress(const std::vector
deltas.push_back(delta);
}

// 2. Delta decoding
// 2. Delta decoding (use unsigned addition to avoid signed overflow UB)
std::vector<int64_t> result(deltas.size());
result[0] = deltas[0];
for (size_t i = 1; i < result.size(); i++) {
result[i] = result[i - 1] + deltas[i];
uint64_t reconstructed =
static_cast<uint64_t>(result[i - 1]) + static_cast<uint64_t>(deltas[i]);
result[i] = static_cast<int64_t>(reconstructed);
}
return result;
}
Expand Down
Loading