From 3748b171c5309ed0b0b000bcf92376314c932aa4 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Thu, 28 May 2026 15:56:17 +0800 Subject: [PATCH 1/2] refact: Refactor Literal operator== to delegate to CompareTo for consistent equality semantics --- src/paimon/common/predicate/literal.cpp | 63 ++----------------- .../core/casting/cast_executor_test.cpp | 19 +++++- 2 files changed, 22 insertions(+), 60 deletions(-) diff --git a/src/paimon/common/predicate/literal.cpp b/src/paimon/common/predicate/literal.cpp index bc37a1b4d..bdc58458d 100644 --- a/src/paimon/common/predicate/literal.cpp +++ b/src/paimon/common/predicate/literal.cpp @@ -339,7 +339,8 @@ Result Literal::CompareTo(const Literal& other) const { case FieldType::BINARY: { std::string_view v1(impl_->value_.Buffer, impl_->size_); std::string_view v2(other.impl_->value_.Buffer, other.impl_->size_); - return (*this == other) ? 0 : (v1 < v2 ? -1 : 1); + int32_t cmp = v1.compare(v2); + return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0); } case FieldType::TIMESTAMP: return impl_->value_.TimestampVal == other.impl_->value_.TimestampVal @@ -361,65 +362,11 @@ bool Literal::operator==(const Literal& other) const { if (this == &other) { return true; } - if (GetType() != other.GetType() || IsNull() != other.IsNull()) { + auto result = CompareTo(other); + if (!result.ok()) { return false; } - if (IsNull()) { - return true; - } - if (GetType() != FieldType::FLOAT && GetType() != FieldType::DOUBLE && - HashCode() != other.HashCode()) { - return false; - } - switch (GetType()) { - case FieldType::BOOLEAN: - return impl_->value_.BooleanVal == other.impl_->value_.BooleanVal; - case FieldType::TINYINT: - return impl_->value_.TinyIntVal == other.impl_->value_.TinyIntVal; - case FieldType::SMALLINT: - return impl_->value_.SmallIntVal == other.impl_->value_.SmallIntVal; - case FieldType::INT: - return impl_->value_.IntVal == other.impl_->value_.IntVal; - case FieldType::BIGINT: - return impl_->value_.BigIntVal == other.impl_->value_.BigIntVal; - case FieldType::FLOAT: { - if (std::isnan(impl_->value_.FloatVal) && std::isnan(other.impl_->value_.FloatVal)) { - return true; - } - if (impl_->value_.FloatVal == INFINITY && other.impl_->value_.FloatVal == INFINITY) { - return true; - } - if (impl_->value_.FloatVal == -INFINITY && other.impl_->value_.FloatVal == -INFINITY) { - return true; - } - return std::fabs(impl_->value_.FloatVal - other.impl_->value_.FloatVal) < 1E-5; - } - case FieldType::DOUBLE: { - if (std::isnan(impl_->value_.DoubleVal) && std::isnan(other.impl_->value_.DoubleVal)) { - return true; - } - if (impl_->value_.DoubleVal == INFINITY && other.impl_->value_.DoubleVal == INFINITY) { - return true; - } - if (impl_->value_.DoubleVal == -INFINITY && - other.impl_->value_.DoubleVal == -INFINITY) { - return true; - } - return std::fabs(impl_->value_.DoubleVal - other.impl_->value_.DoubleVal) < 1E-5; - } - case FieldType::STRING: - case FieldType::BINARY: - return impl_->size_ == other.impl_->size_ && - memcmp(impl_->value_.Buffer, other.impl_->value_.Buffer, impl_->size_) == 0; - case FieldType::TIMESTAMP: - return impl_->value_.TimestampVal == other.impl_->value_.TimestampVal; - case FieldType::DECIMAL: - return impl_->value_.DecimalVal == other.impl_->value_.DecimalVal; - case FieldType::DATE: - return impl_->value_.IntVal == other.impl_->value_.IntVal; - default: - return false; - } + return result.value() == 0; } bool Literal::operator!=(const Literal& r) const { diff --git a/src/paimon/core/casting/cast_executor_test.cpp b/src/paimon/core/casting/cast_executor_test.cpp index 1ff9397e4..8c97d58cb 100644 --- a/src/paimon/core/casting/cast_executor_test.cpp +++ b/src/paimon/core/casting/cast_executor_test.cpp @@ -162,8 +162,23 @@ class CastExecutorTest : public ::testing::Test { for (size_t i = 0; i < src_data.size(); i++) { ASSERT_OK_AND_ASSIGN(Literal result, cast_executor->Cast(src_literals[i], target_type)); ASSERT_FALSE(result.IsNull()); - ASSERT_EQ(target_literals[i], result) - << target_literals[i].ToString() << "->" << result.ToString(); + if (target_literals[i] != result) { + // Fall back to approximate comparison for floating-point types, because + // different conversion paths (e.g. decimal->float vs float literal) may + // produce slightly different bit representations. + if (target_field_type == FieldType::FLOAT) { + ASSERT_NEAR(target_literals[i].GetValue(), result.GetValue(), + 1E-5) + << target_literals[i].ToString() << "->" << result.ToString(); + } else if (target_field_type == FieldType::DOUBLE) { + ASSERT_NEAR(target_literals[i].GetValue(), result.GetValue(), + 1E-5) + << target_literals[i].ToString() << "->" << result.ToString(); + } else { + ASSERT_EQ(target_literals[i], result) + << target_literals[i].ToString() << "->" << result.ToString(); + } + } } // test null Literal null_literal(src_type); From df0cfc27969e330d71e67ef7cfc2d8d99caa9803 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Thu, 28 May 2026 16:36:25 +0800 Subject: [PATCH 2/2] fix review --- include/paimon/predicate/literal.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/paimon/predicate/literal.h b/include/paimon/predicate/literal.h index 7c3447dbf..473137490 100644 --- a/include/paimon/predicate/literal.h +++ b/include/paimon/predicate/literal.h @@ -89,6 +89,13 @@ class PAIMON_EXPORT Literal { std::string ToString() const; /// Gets the hash code for this literal. + /// @note HashCode() hashes the exact bit representation (including Decimal scale), while + /// operator== delegates to CompareTo() which uses numeric equality (e.g. decimals with + /// different scales can compare equal). This means the hash-equality contract (equal objects + /// must have equal hashes) may be violated for Decimal literals with different scales. In + /// practice this is safe because all current std::unordered_map usages (bitmap + /// file index) only store values from the same column, which guarantees a fixed precision and + /// scale. size_t HashCode() const; /// Compares this literal with another literal. The comparison follows SQL semantics for the