From e8a1c215d866ff3e179e18137517164b6a96567c Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Tue, 12 May 2026 07:49:59 +0000 Subject: [PATCH 1/3] feat: remove global index external path while drop table --- .../core/catalog/file_system_catalog.cpp | 16 ++- .../core/catalog/file_system_catalog_test.cpp | 125 ++++++++++++++++++ 2 files changed, 138 insertions(+), 3 deletions(-) diff --git a/src/paimon/core/catalog/file_system_catalog.cpp b/src/paimon/core/catalog/file_system_catalog.cpp index c5bee3faf..a4e82f7b9 100644 --- a/src/paimon/core/catalog/file_system_catalog.cpp +++ b/src/paimon/core/catalog/file_system_catalog.cpp @@ -338,9 +338,10 @@ Result> FileSystemCatalog::GetSchemaExternalPaths( std::set external_paths_set; for (const auto& schema : schemas) { const auto& options = schema->Options(); - auto iter = options.find(Options::DATA_FILE_EXTERNAL_PATHS); - if (iter != options.end() && !iter->second.empty()) { - auto paths = StringUtils::Split(iter->second, ",", /*ignore_empty=*/true); + // collect external data file path + auto data_iter = options.find(Options::DATA_FILE_EXTERNAL_PATHS); + if (data_iter != options.end() && !data_iter->second.empty()) { + auto paths = StringUtils::Split(data_iter->second, ",", /*ignore_empty=*/true); for (const auto& path : paths) { std::string trimmed_path = path; StringUtils::Trim(&trimmed_path); @@ -349,6 +350,15 @@ Result> FileSystemCatalog::GetSchemaExternalPaths( } } } + // collect external global index path + auto index_iter = options.find(Options::GLOBAL_INDEX_EXTERNAL_PATH); + if (index_iter != options.end() && !index_iter->second.empty()) { + std::string trimmed_path = index_iter->second; + StringUtils::Trim(&trimmed_path); + if (!trimmed_path.empty()) { + external_paths_set.insert(trimmed_path); + } + } } return std::vector(external_paths_set.begin(), external_paths_set.end()); } diff --git a/src/paimon/core/catalog/file_system_catalog_test.cpp b/src/paimon/core/catalog/file_system_catalog_test.cpp index 1c320f30d..4e7b4b6d4 100644 --- a/src/paimon/core/catalog/file_system_catalog_test.cpp +++ b/src/paimon/core/catalog/file_system_catalog_test.cpp @@ -744,6 +744,131 @@ TEST(FileSystemCatalogTest, TestDropTableWithMultipleExternalPaths) { ArrowSchemaRelease(&schema); } +TEST(FileSystemCatalogTest, TestDropTableWithGlobalIndexExternalPathOnMainBranch) { + std::map options; + options[Options::FILE_SYSTEM] = "local"; + options[Options::FILE_FORMAT] = "orc"; + ASSERT_OK_AND_ASSIGN(auto core_options, CoreOptions::FromMap(options)); + auto dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(dir); + FileSystemCatalog catalog(core_options.GetFileSystem(), dir->Str()); + ASSERT_OK(catalog.CreateDatabase("test_db", options, /*ignore_if_exists=*/false)); + + // Create external path directory for global index + auto global_index_dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(global_index_dir); + std::string global_index_path = global_index_dir->Str(); + + // Create a file in external path to simulate global index data + ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); + std::string index_file = PathUtil::JoinPath(global_index_path, "index-file.bin"); + ASSERT_OK(fs->WriteFile(index_file, "index data", /*overwrite=*/true)); + + // Verify index file exists + ASSERT_OK_AND_ASSIGN(bool index_file_exists, fs->Exists(index_file)); + ASSERT_TRUE(index_file_exists); + + // Create table with global index external path on main branch + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), + arrow::field("f1", arrow::utf8()), + }; + arrow::Schema typed_schema(fields); + ::ArrowSchema schema; + ASSERT_TRUE(arrow::ExportSchema(typed_schema, &schema).ok()); + + std::map table_options = options; + table_options[Options::GLOBAL_INDEX_EXTERNAL_PATH] = global_index_path; + + ASSERT_OK(catalog.CreateTable(Identifier("test_db", "tbl_global_index"), &schema, {}, {}, + table_options, false)); + + // Verify table exists + ASSERT_OK_AND_ASSIGN(bool table_exists, + catalog.TableExists(Identifier("test_db", "tbl_global_index"))); + ASSERT_TRUE(table_exists); + + // Drop the table + ASSERT_OK(catalog.DropTable(Identifier("test_db", "tbl_global_index"), + /*ignore_if_not_exists=*/false)); + + // Verify table is dropped + ASSERT_OK_AND_ASSIGN(table_exists, + catalog.TableExists(Identifier("test_db", "tbl_global_index"))); + ASSERT_FALSE(table_exists); + + // Verify global index external path is also cleaned up + ASSERT_OK_AND_ASSIGN(bool global_index_exists, fs->Exists(global_index_path)); + ASSERT_FALSE(global_index_exists); + + ArrowSchemaRelease(&schema); +} + +TEST(FileSystemCatalogTest, TestDropTableWithGlobalIndexExternalPathOnBranch) { + // Copy the real append_table_with_rt_branch.db test data, then patch the + // branch-rt schema-1 to include a GLOBAL_INDEX_EXTERNAL_PATH option. + // DropTable should discover and clean up that external path. + std::map options; + options[Options::FILE_SYSTEM] = "local"; + options[Options::FILE_FORMAT] = "orc"; + ASSERT_OK_AND_ASSIGN(auto core_options, CoreOptions::FromMap(options)); + auto dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(dir); + + // Copy the real test_data db into a temp directory + std::string test_data_path = GetDataDir() + "/orc/append_table_with_rt_branch.db"; + std::string db_path = dir->Str() + "/test_db.db"; + ASSERT_TRUE(TestUtil::CopyDirectory(test_data_path, db_path)); + + // Create a temporary external directory that represents branch global index data + auto branch_external_dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(branch_external_dir); + std::string branch_external_path = branch_external_dir->Str(); + + ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); + ASSERT_OK(fs->WriteFile(PathUtil::JoinPath(branch_external_path, "index.bin"), + "branch global index data", /*overwrite=*/true)); + + // Patch branch-rt/schema/schema-1: add GLOBAL_INDEX_EXTERNAL_PATH to options + std::string branch_schema_path = + PathUtil::JoinPath(db_path, "append_table_with_rt_branch/branch/branch-rt/schema/schema-1"); + std::string schema_content; + ASSERT_OK(fs->ReadFile(branch_schema_path, &schema_content)); + + // Insert the global index external path option into the JSON options block. + // Original: "file.format" : "orc" + // Patched: "file.format" : "orc", + // "global-index.external-path" : "" + std::string search_str = "\"file.format\" : \"orc\""; + std::string replace_str = "\"file.format\" : \"orc\",\n \"" + + std::string(Options::GLOBAL_INDEX_EXTERNAL_PATH) + "\" : \"" + + branch_external_path + "\""; + auto pos = schema_content.rfind(search_str); + ASSERT_NE(pos, std::string::npos); + schema_content.replace(pos, search_str.length(), replace_str); + ASSERT_OK(fs->WriteFile(branch_schema_path, schema_content, /*overwrite=*/true)); + + // Verify external path exists before drop + ASSERT_OK_AND_ASSIGN(bool external_exists, fs->Exists(branch_external_path)); + ASSERT_TRUE(external_exists); + + // Drop the table via catalog + FileSystemCatalog catalog(core_options.GetFileSystem(), dir->Str()); + Identifier identifier("test_db", "append_table_with_rt_branch"); + ASSERT_OK_AND_ASSIGN(bool table_exists, catalog.TableExists(identifier)); + ASSERT_TRUE(table_exists); + + ASSERT_OK(catalog.DropTable(identifier, /*ignore_if_not_exists=*/false)); + + // Verify table is dropped + ASSERT_OK_AND_ASSIGN(table_exists, catalog.TableExists(identifier)); + ASSERT_FALSE(table_exists); + + // Verify the branch global index external path is cleaned up by DropTable + ASSERT_OK_AND_ASSIGN(external_exists, fs->Exists(branch_external_path)); + ASSERT_FALSE(external_exists); +} + TEST(FileSystemCatalogTest, TestListSnapshots) { std::map options; options[Options::FILE_SYSTEM] = "local"; From f954e4e88ea7288ddd146238bb85558ef7059e57 Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Wed, 13 May 2026 06:36:07 +0000 Subject: [PATCH 2/3] fix comment --- .../core/catalog/file_system_catalog.cpp | 27 +++++++------------ .../core/catalog/file_system_catalog_test.cpp | 15 ++++++++--- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/paimon/core/catalog/file_system_catalog.cpp b/src/paimon/core/catalog/file_system_catalog.cpp index a4e82f7b9..5f8c0f4a1 100644 --- a/src/paimon/core/catalog/file_system_catalog.cpp +++ b/src/paimon/core/catalog/file_system_catalog.cpp @@ -29,6 +29,7 @@ #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/path_util.h" #include "paimon/common/utils/string_utils.h" +#include "paimon/core/core_options.h" #include "paimon/core/snapshot.h" #include "paimon/core/table/system/system_table.h" #include "paimon/core/table/system/system_table_schema.h" @@ -338,26 +339,18 @@ Result> FileSystemCatalog::GetSchemaExternalPaths( std::set external_paths_set; for (const auto& schema : schemas) { const auto& options = schema->Options(); + PAIMON_ASSIGN_OR_RAISE(CoreOptions core_options, CoreOptions::FromMap(options)); // collect external data file path - auto data_iter = options.find(Options::DATA_FILE_EXTERNAL_PATHS); - if (data_iter != options.end() && !data_iter->second.empty()) { - auto paths = StringUtils::Split(data_iter->second, ",", /*ignore_empty=*/true); - for (const auto& path : paths) { - std::string trimmed_path = path; - StringUtils::Trim(&trimmed_path); - if (!trimmed_path.empty()) { - external_paths_set.insert(trimmed_path); - } - } + PAIMON_ASSIGN_OR_RAISE(std::vector data_external_paths, + core_options.CreateExternalPaths()); + for (const auto& path : data_external_paths) { + external_paths_set.insert(path); } // collect external global index path - auto index_iter = options.find(Options::GLOBAL_INDEX_EXTERNAL_PATH); - if (index_iter != options.end() && !index_iter->second.empty()) { - std::string trimmed_path = index_iter->second; - StringUtils::Trim(&trimmed_path); - if (!trimmed_path.empty()) { - external_paths_set.insert(trimmed_path); - } + PAIMON_ASSIGN_OR_RAISE(std::optional index_external_path, + core_options.CreateGlobalIndexExternalPath()); + if (index_external_path != std::nullopt) { + external_paths_set.insert(index_external_path.value()); } } return std::vector(external_paths_set.begin(), external_paths_set.end()); diff --git a/src/paimon/core/catalog/file_system_catalog_test.cpp b/src/paimon/core/catalog/file_system_catalog_test.cpp index 4e7b4b6d4..f45cfeeb8 100644 --- a/src/paimon/core/catalog/file_system_catalog_test.cpp +++ b/src/paimon/core/catalog/file_system_catalog_test.cpp @@ -644,6 +644,7 @@ TEST(FileSystemCatalogTest, TestDropTableWithExternalPath) { auto external_dir = UniqueTestDirectory::Create(); ASSERT_TRUE(external_dir); std::string external_path = external_dir->Str(); + external_path = "FILE://" + external_path; // Create a file in external path to simulate external data ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); @@ -665,6 +666,7 @@ TEST(FileSystemCatalogTest, TestDropTableWithExternalPath) { std::map table_options = options; table_options[Options::DATA_FILE_EXTERNAL_PATHS] = external_path; + table_options[Options::DATA_FILE_EXTERNAL_PATHS_STRATEGY] = "round-robin"; ASSERT_OK(catalog.CreateTable(Identifier("test_db", "tbl_with_external"), &schema, {}, {}, table_options, false)); @@ -707,7 +709,9 @@ TEST(FileSystemCatalogTest, TestDropTableWithMultipleExternalPaths) { ASSERT_TRUE(external_dir2); std::string external_path1 = external_dir1->Str(); + external_path1 = "FILE://" + external_path1; std::string external_path2 = external_dir2->Str(); + external_path2 = "FILE://" + external_path2; // Create files in external paths ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); @@ -727,6 +731,7 @@ TEST(FileSystemCatalogTest, TestDropTableWithMultipleExternalPaths) { std::map table_options = options; table_options[Options::DATA_FILE_EXTERNAL_PATHS] = external_path1 + "," + external_path2; + table_options[Options::DATA_FILE_EXTERNAL_PATHS_STRATEGY] = "round-robin"; ASSERT_OK(catalog.CreateTable(Identifier("test_db", "tbl_multi_external"), &schema, {}, {}, table_options, false)); @@ -758,6 +763,7 @@ TEST(FileSystemCatalogTest, TestDropTableWithGlobalIndexExternalPathOnMainBranch auto global_index_dir = UniqueTestDirectory::Create(); ASSERT_TRUE(global_index_dir); std::string global_index_path = global_index_dir->Str(); + global_index_path = "FILE://" + global_index_path; // Create a file in external path to simulate global index data ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); @@ -824,6 +830,7 @@ TEST(FileSystemCatalogTest, TestDropTableWithGlobalIndexExternalPathOnBranch) { auto branch_external_dir = UniqueTestDirectory::Create(); ASSERT_TRUE(branch_external_dir); std::string branch_external_path = branch_external_dir->Str(); + branch_external_path = "FILE://" + branch_external_path; ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); ASSERT_OK(fs->WriteFile(PathUtil::JoinPath(branch_external_path, "index.bin"), @@ -839,10 +846,10 @@ TEST(FileSystemCatalogTest, TestDropTableWithGlobalIndexExternalPathOnBranch) { // Original: "file.format" : "orc" // Patched: "file.format" : "orc", // "global-index.external-path" : "" - std::string search_str = "\"file.format\" : \"orc\""; - std::string replace_str = "\"file.format\" : \"orc\",\n \"" + - std::string(Options::GLOBAL_INDEX_EXTERNAL_PATH) + "\" : \"" + - branch_external_path + "\""; + std::string search_str = R"("file.format" : "orc")"; + std::string replace_str = R"("file.format" : "orc", + "global-index.external-path" : ")" + + branch_external_path + R"(")"; auto pos = schema_content.rfind(search_str); ASSERT_NE(pos, std::string::npos); schema_content.replace(pos, search_str.length(), replace_str); From ce16125cd225292d5bc0809527e657205ff68ec7 Mon Sep 17 00:00:00 2001 From: "lisizhuo.lsz" Date: Wed, 13 May 2026 07:55:02 +0000 Subject: [PATCH 3/3] fix ci --- src/paimon/core/catalog/file_system_catalog_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/paimon/core/catalog/file_system_catalog_test.cpp b/src/paimon/core/catalog/file_system_catalog_test.cpp index 26a26dc70..d66fb1c1d 100644 --- a/src/paimon/core/catalog/file_system_catalog_test.cpp +++ b/src/paimon/core/catalog/file_system_catalog_test.cpp @@ -949,6 +949,7 @@ TEST(FileSystemCatalogTest, TestDropTableWithBranchExternalPaths) { auto branch_external_dir = UniqueTestDirectory::Create(); ASSERT_TRUE(branch_external_dir); std::string branch_external_path = branch_external_dir->Str(); + branch_external_path = "FILE://" + branch_external_path; ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); ASSERT_OK(fs->WriteFile(PathUtil::JoinPath(branch_external_path, "data.orc"), @@ -966,6 +967,7 @@ TEST(FileSystemCatalogTest, TestDropTableWithBranchExternalPaths) { // "data-file.external-paths" : "" std::string search_str = R"("file.format" : "orc")"; std::string replace_str = R"("file.format" : "orc", + "data-file.external-paths.strategy" : "round-robin", "data-file.external-paths" : ")" + branch_external_path + R"(")"; auto pos = schema_content.rfind(search_str);