From d3389f90d385ee38e7a2deb35480da5e9370c294 Mon Sep 17 00:00:00 2001 From: duanyyyyyyy Date: Tue, 12 May 2026 12:07:20 +0800 Subject: [PATCH 1/2] Fix FieldMappingReader skipping mapping when only column names differ --- src/paimon/core/io/field_mapping_reader.cpp | 9 ++++ .../core/io/field_mapping_reader_test.cpp | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/paimon/core/io/field_mapping_reader.cpp b/src/paimon/core/io/field_mapping_reader.cpp index 286778b28..4125c82a1 100644 --- a/src/paimon/core/io/field_mapping_reader.cpp +++ b/src/paimon/core/io/field_mapping_reader.cpp @@ -65,6 +65,15 @@ FieldMappingReader::FieldMappingReader(int32_t field_count, if (non_partition_info_.cast_executors[i] != nullptr) { need_casting_ = true; } + // Field name change (RENAME COLUMN) also requires mapping: data schema + // carries the file's physical name while read schema carries the + // post-rename logical name. If we skipped mapping, the inner reader's + // batch would be passed through with the old physical name and the + // consumer's name-based lookup against the read schema would fail. + if (non_partition_info_.non_partition_data_schema[i].Name() != + non_partition_info_.non_partition_read_schema[i].Name()) { + need_mapping_ = true; + } } } diff --git a/src/paimon/core/io/field_mapping_reader_test.cpp b/src/paimon/core/io/field_mapping_reader_test.cpp index 4bb36adc8..b3e741ca8 100644 --- a/src/paimon/core/io/field_mapping_reader_test.cpp +++ b/src/paimon/core/io/field_mapping_reader_test.cpp @@ -597,6 +597,48 @@ TEST_F(FieldMappingReaderTest, TestReadWithSchemaEvolutionWithRenameAndModifyTyp CheckResult(read_schema, /*predicate=*/nullptr, expect_data); } +TEST_F(FieldMappingReaderTest, TestReadWithSchemaEvolutionPureRename) { + // Regression: pure RENAME (same field ids, same types, identity order, no + // partition / non-exist) used to leave need_mapping_ false, taking the + // FieldMappingReader PASSTHRU path. The inner reader's batch was emitted + // unchanged carrying the file's physical column names, so a consumer that + // looked columns up by name against the read schema (post-rename logical + // names) failed to find them. + + // File schema: physical names f0, f1 + std::vector data_fields = {DataField(0, arrow::field("f0", arrow::utf8())), + DataField(1, arrow::field("f1", arrow::int32()))}; + auto data_schema = DataField::ConvertDataFieldsToArrowSchema(data_fields); + auto data_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({data_schema->fields()}), + R"([ + ["Alice", 1], + ["Bob", 2], + ["Carol", 3] + ])") + .ValueOrDie()); + + // Read schema: same field ids, RENAMED names, same types, identity order + std::vector read_fields = { + DataField(0, arrow::field("name_new", arrow::utf8())), + DataField(1, arrow::field("age_new", arrow::int32()))}; + auto read_schema = DataField::ConvertDataFieldsToArrowSchema(read_fields); + + // Expected output uses the post-rename names; verifies mapping actually + // ran (PASSTHRU would keep f0/f1 and Equals would fail). + auto expected = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({read_schema->fields()}), + R"([ + ["Alice", 1], + ["Bob", 2], + ["Carol", 3] + ])") + .ValueOrDie()); + + CheckResult(data_schema, data_array, read_schema, /*predicate=*/nullptr, + /*partition_keys=*/{}, BinaryRow::EmptyRow(), expected); +} + TEST_F(FieldMappingReaderTest, TestReadWithSchemaEvolutionWithRenameAndModifyTypeAndPredicate) { // field_0 and field_3 are rename and modify type // result is not filtered by predicate, as DOUBLE->STRING alter table does not support predicate From 6483e7ac5353d58fbb9dcb7da216e81cd16b9b21 Mon Sep 17 00:00:00 2001 From: duanyyyyyyy Date: Tue, 12 May 2026 13:34:09 +0800 Subject: [PATCH 2/2] fix pre-commit --- src/paimon/core/io/field_mapping_reader_test.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/paimon/core/io/field_mapping_reader_test.cpp b/src/paimon/core/io/field_mapping_reader_test.cpp index b3e741ca8..9ea5fa144 100644 --- a/src/paimon/core/io/field_mapping_reader_test.cpp +++ b/src/paimon/core/io/field_mapping_reader_test.cpp @@ -619,9 +619,8 @@ TEST_F(FieldMappingReaderTest, TestReadWithSchemaEvolutionPureRename) { .ValueOrDie()); // Read schema: same field ids, RENAMED names, same types, identity order - std::vector read_fields = { - DataField(0, arrow::field("name_new", arrow::utf8())), - DataField(1, arrow::field("age_new", arrow::int32()))}; + std::vector read_fields = {DataField(0, arrow::field("name_new", arrow::utf8())), + DataField(1, arrow::field("age_new", arrow::int32()))}; auto read_schema = DataField::ConvertDataFieldsToArrowSchema(read_fields); // Expected output uses the post-rename names; verifies mapping actually