Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions include/paimon/predicate/literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,13 @@ class PAIMON_EXPORT Literal {
std::string ToString() const;

/// Gets the hash code for this literal.
/// @note HashCode() hashes the exact bit representation (including Decimal scale), while
/// operator== delegates to CompareTo() which uses numeric equality (e.g. decimals with
/// different scales can compare equal). This means the hash-equality contract (equal objects
/// must have equal hashes) may be violated for Decimal literals with different scales. In
/// practice this is safe because all current std::unordered_map<Literal, ...> usages (bitmap
/// file index) only store values from the same column, which guarantees a fixed precision and
/// scale.
size_t HashCode() const;

/// Compares this literal with another literal. The comparison follows SQL semantics for the
Expand Down
63 changes: 5 additions & 58 deletions src/paimon/common/predicate/literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,8 @@ Result<int32_t> Literal::CompareTo(const Literal& other) const {
case FieldType::BINARY: {
std::string_view v1(impl_->value_.Buffer, impl_->size_);
std::string_view v2(other.impl_->value_.Buffer, other.impl_->size_);
return (*this == other) ? 0 : (v1 < v2 ? -1 : 1);
int32_t cmp = v1.compare(v2);
return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0);
}
case FieldType::TIMESTAMP:
return impl_->value_.TimestampVal == other.impl_->value_.TimestampVal
Expand All @@ -361,65 +362,11 @@ bool Literal::operator==(const Literal& other) const {
if (this == &other) {
return true;
}
if (GetType() != other.GetType() || IsNull() != other.IsNull()) {
auto result = CompareTo(other);
if (!result.ok()) {
return false;
}
if (IsNull()) {
return true;
}
if (GetType() != FieldType::FLOAT && GetType() != FieldType::DOUBLE &&
HashCode() != other.HashCode()) {
return false;
}
switch (GetType()) {
case FieldType::BOOLEAN:
return impl_->value_.BooleanVal == other.impl_->value_.BooleanVal;
case FieldType::TINYINT:
return impl_->value_.TinyIntVal == other.impl_->value_.TinyIntVal;
case FieldType::SMALLINT:
return impl_->value_.SmallIntVal == other.impl_->value_.SmallIntVal;
case FieldType::INT:
return impl_->value_.IntVal == other.impl_->value_.IntVal;
case FieldType::BIGINT:
return impl_->value_.BigIntVal == other.impl_->value_.BigIntVal;
case FieldType::FLOAT: {
if (std::isnan(impl_->value_.FloatVal) && std::isnan(other.impl_->value_.FloatVal)) {
return true;
}
if (impl_->value_.FloatVal == INFINITY && other.impl_->value_.FloatVal == INFINITY) {
return true;
}
if (impl_->value_.FloatVal == -INFINITY && other.impl_->value_.FloatVal == -INFINITY) {
return true;
}
return std::fabs(impl_->value_.FloatVal - other.impl_->value_.FloatVal) < 1E-5;
}
case FieldType::DOUBLE: {
if (std::isnan(impl_->value_.DoubleVal) && std::isnan(other.impl_->value_.DoubleVal)) {
return true;
}
if (impl_->value_.DoubleVal == INFINITY && other.impl_->value_.DoubleVal == INFINITY) {
return true;
}
if (impl_->value_.DoubleVal == -INFINITY &&
other.impl_->value_.DoubleVal == -INFINITY) {
return true;
}
return std::fabs(impl_->value_.DoubleVal - other.impl_->value_.DoubleVal) < 1E-5;
}
case FieldType::STRING:
case FieldType::BINARY:
return impl_->size_ == other.impl_->size_ &&
memcmp(impl_->value_.Buffer, other.impl_->value_.Buffer, impl_->size_) == 0;
case FieldType::TIMESTAMP:
return impl_->value_.TimestampVal == other.impl_->value_.TimestampVal;
case FieldType::DECIMAL:
return impl_->value_.DecimalVal == other.impl_->value_.DecimalVal;
case FieldType::DATE:
return impl_->value_.IntVal == other.impl_->value_.IntVal;
default:
return false;
}
return result.value() == 0;
}

bool Literal::operator!=(const Literal& r) const {
Expand Down
19 changes: 17 additions & 2 deletions src/paimon/core/casting/cast_executor_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,23 @@ class CastExecutorTest : public ::testing::Test {
for (size_t i = 0; i < src_data.size(); i++) {
ASSERT_OK_AND_ASSIGN(Literal result, cast_executor->Cast(src_literals[i], target_type));
ASSERT_FALSE(result.IsNull());
ASSERT_EQ(target_literals[i], result)
<< target_literals[i].ToString() << "->" << result.ToString();
if (target_literals[i] != result) {
// Fall back to approximate comparison for floating-point types, because
// different conversion paths (e.g. decimal->float vs float literal) may
// produce slightly different bit representations.
if (target_field_type == FieldType::FLOAT) {
ASSERT_NEAR(target_literals[i].GetValue<float>(), result.GetValue<float>(),
1E-5)
<< target_literals[i].ToString() << "->" << result.ToString();
} else if (target_field_type == FieldType::DOUBLE) {
ASSERT_NEAR(target_literals[i].GetValue<double>(), result.GetValue<double>(),
1E-5)
<< target_literals[i].ToString() << "->" << result.ToString();
} else {
ASSERT_EQ(target_literals[i], result)
<< target_literals[i].ToString() << "->" << result.ToString();
}
}
}
// test null
Literal null_literal(src_type);
Expand Down
Loading