From 5d873733d74422b46b11cf306a03e129864ea07d Mon Sep 17 00:00:00 2001 From: "zhanghaobo@kanzhun.com" Date: Mon, 13 Oct 2025 20:12:49 +0800 Subject: [PATCH 1/3] feat: add an option to skip validate lance table. --- .../lance/namespace/hive2/Hive2Namespace.java | 6 +++--- .../lance/namespace/hive2/Hive2NamespaceConfig.java | 11 +++++++++++ .../com/lancedb/lance/namespace/hive2/Hive2Util.java | 11 +++++++++++ .../namespace/hive2/TestHive2NamespaceConfig.java | 4 +++- .../lance/namespace/hive3/Hive3Namespace.java | 6 +++--- .../lance/namespace/hive3/Hive3NamespaceConfig.java | 11 +++++++++++ .../com/lancedb/lance/namespace/hive3/Hive3Util.java | 12 ++++++++++++ .../namespace/hive3/TestHive3NamespaceConfig.java | 4 +++- 8 files changed, 57 insertions(+), 8 deletions(-) diff --git a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java index 7b43fa85..5ac3b5dd 100644 --- a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java +++ b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java @@ -254,7 +254,7 @@ public void tableExists(TableExistsRequest request) { CommonUtil.formatCurrentStackTrace()); } - Hive2Util.validateLanceTable(hmsTable.get()); + Hive2Util.validateLanceTable(hmsTable.get(), config.isSkipValidationLanceFormatTable()); } @Override @@ -461,7 +461,7 @@ protected Optional doDescribeTable(ObjectIdentifier id) { return Optional.empty(); } - Hive2Util.validateLanceTable(hmsTable.get()); + Hive2Util.validateLanceTable(hmsTable.get(), config.isSkipValidationLanceFormatTable()); return Optional.of(hmsTable.get().getSd().getLocation()); } @@ -594,7 +594,7 @@ protected String doDropTable(ObjectIdentifier id) { CommonUtil.formatCurrentStackTrace()); } - Hive2Util.validateLanceTable(hmsTable.get()); + Hive2Util.validateLanceTable(hmsTable.get(), config.isSkipValidationLanceFormatTable()); String location = hmsTable.get().getSd().getLocation(); clientPool.run( diff --git a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java index ffb94e52..fd24c9f6 100644 --- a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java +++ b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java @@ -36,9 +36,13 @@ public class Hive2NamespaceConfig { public static final String ROOT_DEFAULT = System.getProperty("user.dir"); + public static final String SKIP_LANCE_TABLE_VALIDATION = "skip.validate.lance.format.table"; + public static final boolean SKIP_LANCE_TABLE_VALIDATION_DEFAULT = false; + private final int clientPoolSize; private final Map storageOptions; private final String root; + private final boolean skipValidationLanceFormatTable; public Hive2NamespaceConfig(Map properties) { this.clientPoolSize = @@ -47,6 +51,9 @@ public Hive2NamespaceConfig(Map properties) { this.root = OpenDalUtil.stripTrailingSlash( PropertyUtil.propertyAsString(properties, ROOT, ROOT_DEFAULT)); + this.skipValidationLanceFormatTable = + PropertyUtil.propertyAsBoolean(properties, + SKIP_LANCE_TABLE_VALIDATION, SKIP_LANCE_TABLE_VALIDATION_DEFAULT); } public int getClientPoolSize() { @@ -60,4 +67,8 @@ public Map getStorageOptions() { public String getRoot() { return root; } + + public boolean isSkipValidationLanceFormatTable() { + return skipValidationLanceFormatTable; + } } diff --git a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Util.java b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Util.java index e82e7cdc..6bf239e4 100644 --- a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Util.java +++ b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Util.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.HashMap; @@ -34,6 +36,7 @@ import static com.lancedb.lance.namespace.hive2.Hive2ErrorType.InvalidLanceTable; public class Hive2Util { + private static final Logger LOG = LoggerFactory.getLogger(Hive2Util.class); public static Database getDatabaseOrNull(Hive2ClientPool clientPool, String db) { try { @@ -109,6 +112,14 @@ public static Optional getTable(Hive2ClientPool clientPool, String db, St } public static void validateLanceTable(Table table) { + validateLanceTable(table, false); + } + + public static void validateLanceTable(Table table, boolean skipValidation) { + if (skipValidation) { + LOG.info("Skip validate lance format table procedure."); + return; + } Map params = table.getParameters(); if (params == null || !"lance".equalsIgnoreCase(params.get("table_type"))) { throw LanceNamespaceException.badRequest( diff --git a/java/lance-namespace-hive2/src/test/java/com/lancedb/lance/namespace/hive2/TestHive2NamespaceConfig.java b/java/lance-namespace-hive2/src/test/java/com/lancedb/lance/namespace/hive2/TestHive2NamespaceConfig.java index 1179a06d..f7d39513 100644 --- a/java/lance-namespace-hive2/src/test/java/com/lancedb/lance/namespace/hive2/TestHive2NamespaceConfig.java +++ b/java/lance-namespace-hive2/src/test/java/com/lancedb/lance/namespace/hive2/TestHive2NamespaceConfig.java @@ -58,11 +58,13 @@ public void testOtherConfigurationValues() { ImmutableMap.of( "root", "/custom/root", "client.pool-size", "5", - "storage.s3.region", "us-west-2"); + "storage.s3.region", "us-west-2", + "skip.validate.lance.format.table", "true"); Hive2NamespaceConfig config = new Hive2NamespaceConfig(properties); assertEquals("/custom/root", config.getRoot()); assertEquals(5, config.getClientPoolSize()); assertEquals("us-west-2", config.getStorageOptions().get("s3.region")); + assertEquals(true, config.isSkipValidationLanceFormatTable()); } } diff --git a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java index 5cf63b86..a655149e 100644 --- a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java +++ b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java @@ -270,7 +270,7 @@ public void tableExists(TableExistsRequest request) { CommonUtil.formatCurrentStackTrace()); } - Hive3Util.validateLanceTable(hmsTable.get()); + Hive3Util.validateLanceTable(hmsTable.get(), config.isSkipValidationLanceFormatTable()); } @Override @@ -550,7 +550,7 @@ protected Optional doDescribeTable(ObjectIdentifier id) { return Optional.empty(); } - Hive3Util.validateLanceTable(hmsTable.get()); + Hive3Util.validateLanceTable(hmsTable.get(), config.isSkipValidationLanceFormatTable()); return Optional.of(hmsTable.get().getSd().getLocation()); } @@ -691,7 +691,7 @@ protected String doDropTable(ObjectIdentifier id) { CommonUtil.formatCurrentStackTrace()); } - Hive3Util.validateLanceTable(hmsTable.get()); + Hive3Util.validateLanceTable(hmsTable.get(), config.isSkipValidationLanceFormatTable()); String location = hmsTable.get().getSd().getLocation(); clientPool.run( diff --git a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java index 4d647f3d..2e7aab62 100644 --- a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java +++ b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java @@ -37,9 +37,13 @@ public class Hive3NamespaceConfig { public static final String ROOT_DEFAULT = System.getProperty("user.dir"); + public static final String SKIP_LANCE_TABLE_VALIDATION = "skip.validate.lance.format.table"; + public static final boolean SKIP_LANCE_TABLE_VALIDATION_DEFAULT = false; + private final int clientPoolSize; private final Map storageOptions; private final String root; + private final boolean skipValidationLanceFormatTable; public Hive3NamespaceConfig(Map properties) { this.clientPoolSize = @@ -48,6 +52,9 @@ public Hive3NamespaceConfig(Map properties) { this.root = OpenDalUtil.stripTrailingSlash( PropertyUtil.propertyAsString(properties, ROOT, ROOT_DEFAULT)); + this.skipValidationLanceFormatTable = + PropertyUtil.propertyAsBoolean(properties, + SKIP_LANCE_TABLE_VALIDATION, SKIP_LANCE_TABLE_VALIDATION_DEFAULT); } public int getClientPoolSize() { @@ -61,4 +68,8 @@ public Map getStorageOptions() { public String getRoot() { return root; } + + public boolean isSkipValidationLanceFormatTable() { + return skipValidationLanceFormatTable; + } } diff --git a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Util.java b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Util.java index 628d9599..40665e52 100644 --- a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Util.java +++ b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Util.java @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.security.UserGroupInformation; import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.HashMap; @@ -36,6 +38,8 @@ import static com.lancedb.lance.namespace.hive3.Hive3ErrorType.UnknownCatalog; public class Hive3Util { + private static final Logger LOG = LoggerFactory.getLogger(Hive3Util.class); + public static Catalog getCatalogOrNull(Hive3ClientPool clientPool, String catalog) { try { return clientPool.run(client -> client.getCatalog(catalog)); @@ -172,6 +176,14 @@ public static Optional
getTable( } public static void validateLanceTable(Table table) { + validateLanceTable(table, false); + } + + public static void validateLanceTable(Table table, boolean skipValidation) { + if (skipValidation) { + LOG.info("Skip validate lance format table procedure."); + return; + } Map params = table.getParameters(); if (params == null || !"lance".equalsIgnoreCase(params.get("table_type"))) { throw LanceNamespaceException.badRequest( diff --git a/java/lance-namespace-hive3/src/test/java/com/lancedb/lance/namespace/hive3/TestHive3NamespaceConfig.java b/java/lance-namespace-hive3/src/test/java/com/lancedb/lance/namespace/hive3/TestHive3NamespaceConfig.java index 81f95d4e..fe55c645 100644 --- a/java/lance-namespace-hive3/src/test/java/com/lancedb/lance/namespace/hive3/TestHive3NamespaceConfig.java +++ b/java/lance-namespace-hive3/src/test/java/com/lancedb/lance/namespace/hive3/TestHive3NamespaceConfig.java @@ -58,11 +58,13 @@ public void testOtherConfigurationValues() { ImmutableMap.of( "root", "/custom/root", "client.pool-size", "5", - "storage.s3.region", "us-west-2"); + "storage.s3.region", "us-west-2", + "skip.validate.lance.format.table", "true"); Hive3NamespaceConfig config = new Hive3NamespaceConfig(properties); assertEquals("/custom/root", config.getRoot()); assertEquals(5, config.getClientPoolSize()); assertEquals("us-west-2", config.getStorageOptions().get("s3.region")); + assertEquals(true, config.isSkipValidationLanceFormatTable()); } } From e9c921f709a8693c62afcb7a17ce677f8d9a06ef Mon Sep 17 00:00:00 2001 From: "zhanghaobo@kanzhun.com" Date: Mon, 13 Oct 2025 21:16:18 +0800 Subject: [PATCH 2/3] add getter method for clientPool field for lance-spark. --- .../com/lancedb/lance/namespace/hive2/Hive2Namespace.java | 4 ++++ .../com/lancedb/lance/namespace/hive3/Hive3Namespace.java | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java index 5ac3b5dd..b4d77e90 100644 --- a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java +++ b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2Namespace.java @@ -729,4 +729,8 @@ private String getDefaultTableLocation(String namespaceName, String tableName) { return String.format( "%s/%s/%s.lance", config.getRoot(), namespaceName.toLowerCase(), tableName.toLowerCase()); } + + public Hive2ClientPool getClientPool() { + return clientPool; + } } diff --git a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java index a655149e..a824f9b1 100644 --- a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java +++ b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3Namespace.java @@ -915,4 +915,8 @@ private String getDefaultTableLocation(String namespaceName, String tableName) { return String.format( "%s/%s/%s.lance", config.getRoot(), namespaceName.toLowerCase(), tableName.toLowerCase()); } + + public Hive3ClientPool getClientPool() { + return clientPool; + } } From f349c4daecefd1fb808f965649dbca0fcac09a34 Mon Sep 17 00:00:00 2001 From: "zhanghaobo@kanzhun.com" Date: Tue, 14 Oct 2025 15:17:02 +0800 Subject: [PATCH 3/3] fix format violations. --- .../lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java | 4 ++-- .../lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java index fd24c9f6..7eeaaec9 100644 --- a/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java +++ b/java/lance-namespace-hive2/src/main/java/com/lancedb/lance/namespace/hive2/Hive2NamespaceConfig.java @@ -52,8 +52,8 @@ public Hive2NamespaceConfig(Map properties) { OpenDalUtil.stripTrailingSlash( PropertyUtil.propertyAsString(properties, ROOT, ROOT_DEFAULT)); this.skipValidationLanceFormatTable = - PropertyUtil.propertyAsBoolean(properties, - SKIP_LANCE_TABLE_VALIDATION, SKIP_LANCE_TABLE_VALIDATION_DEFAULT); + PropertyUtil.propertyAsBoolean( + properties, SKIP_LANCE_TABLE_VALIDATION, SKIP_LANCE_TABLE_VALIDATION_DEFAULT); } public int getClientPoolSize() { diff --git a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java index 2e7aab62..af7516ea 100644 --- a/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java +++ b/java/lance-namespace-hive3/src/main/java/com/lancedb/lance/namespace/hive3/Hive3NamespaceConfig.java @@ -53,8 +53,8 @@ public Hive3NamespaceConfig(Map properties) { OpenDalUtil.stripTrailingSlash( PropertyUtil.propertyAsString(properties, ROOT, ROOT_DEFAULT)); this.skipValidationLanceFormatTable = - PropertyUtil.propertyAsBoolean(properties, - SKIP_LANCE_TABLE_VALIDATION, SKIP_LANCE_TABLE_VALIDATION_DEFAULT); + PropertyUtil.propertyAsBoolean( + properties, SKIP_LANCE_TABLE_VALIDATION, SKIP_LANCE_TABLE_VALIDATION_DEFAULT); } public int getClientPoolSize() {