diff --git a/be/src/formats/parquet/column_converter.cpp b/be/src/formats/parquet/column_converter.cpp index b70bd54e1544bf..5b2a7ee7fcdaed 100644 --- a/be/src/formats/parquet/column_converter.cpp +++ b/be/src/formats/parquet/column_converter.cpp @@ -912,6 +912,14 @@ Status Int64ToDateTimeConverter::convert(const Column* src, Column* dst) { if (!src_null_data[i]) { int64_t seconds = src_data[i] / _second_mask; int64_t nanoseconds = (src_data[i] % _second_mask) * _scale_to_nano_factor; + // Truncating division leaves a negative sub-second remainder for a pre-1970 tick; + // borrow a whole second so nanoseconds stays in [0, NANOSECS_PER_SEC), matching the + // floor split the FE boundary computation uses. Without this, of_epoch_second packs + // a negative microsecond into the timestamp and corrupts the value. + if (nanoseconds < 0) { + seconds -= 1; + nanoseconds += NANOSECS_PER_SEC; + } if constexpr (UTC_TO_TZ) { int offset = _offset; diff --git a/be/test/formats/parquet/column_converter_test.cpp b/be/test/formats/parquet/column_converter_test.cpp index 9452b2328c06b3..98779589aacb69 100644 --- a/be/test/formats/parquet/column_converter_test.cpp +++ b/be/test/formats/parquet/column_converter_test.cpp @@ -766,4 +766,25 @@ TEST_F(ColumnConverterTest, Int64_2_Timestamp) { } } } + +// A pre-1970 (negative epoch tick) timestamp with a nonzero sub-second component must decode to the +// correct wall clock. C++ truncating division splits a negative tick into a too-high second and a +// negative sub-second; without a floor-borrow that negative sub-second corrupts the packed DATETIME. +TEST_F(ColumnConverterTest, Int64PreEpochTimestampSubSecond) { + const std::string file_path = + "./be/test/formats/parquet/test_data/column_converter/int64_timestamp_pre_epoch.parquet"; + const size_t expected_rows = 5; + const std::string expected_value = "[1969-12-31 23:59:59.500000]"; + + { + const std::string col_name = "timestamp_millis"; + const TypeDescriptor col_type = TypeDescriptor::from_logical_type(LogicalType::TYPE_DATETIME); + check(file_path, col_type, col_name, expected_value, expected_rows); + } + { + const std::string col_name = "timestamp_micros"; + const TypeDescriptor col_type = TypeDescriptor::from_logical_type(LogicalType::TYPE_DATETIME); + check(file_path, col_type, col_name, expected_value, expected_rows); + } +} } // namespace starrocks::parquet diff --git a/be/test/formats/parquet/test_data/column_converter/int64_timestamp_pre_epoch.parquet b/be/test/formats/parquet/test_data/column_converter/int64_timestamp_pre_epoch.parquet new file mode 100644 index 00000000000000..3d5ae745da8a49 Binary files /dev/null and b/be/test/formats/parquet/test_data/column_converter/int64_timestamp_pre_epoch.parquet differ