From 9bdeda5f51e9bd9d885ecec31c197bd46a3bd0e0 Mon Sep 17 00:00:00 2001 From: Akhil-Pathivada Date: Thu, 4 Dec 2025 19:39:03 +0530 Subject: [PATCH] feat(oss-opensearch): Add KNN derived source configuration option --- .../backend/clients/oss_opensearch/config.py | 3 +++ .../clients/oss_opensearch/oss_opensearch.py | 6 ++++++ vectordb_bench/frontend/config/dbCaseConfigs.py | 14 ++++++++++++++ vectordb_bench/models.py | 1 + 4 files changed, 24 insertions(+) diff --git a/vectordb_bench/backend/clients/oss_opensearch/config.py b/vectordb_bench/backend/clients/oss_opensearch/config.py index cc8097121..c891ce084 100644 --- a/vectordb_bench/backend/clients/oss_opensearch/config.py +++ b/vectordb_bench/backend/clients/oss_opensearch/config.py @@ -77,6 +77,7 @@ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig): oversample_factor: float = 1.0 quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.fp32 replication_type: str | None = "DOCUMENT" + knn_derived_source_enabled: bool = False @root_validator def validate_engine_name(cls, values: dict): @@ -103,6 +104,7 @@ def __eq__(self, obj: any): and self.use_routing == obj.use_routing and self.quantization_type == obj.quantization_type and self.replication_type == obj.replication_type + and self.knn_derived_source_enabled == obj.knn_derived_source_enabled ) def __hash__(self) -> int: @@ -117,6 +119,7 @@ def __hash__(self) -> int: self.use_routing, self.quantization_type, self.replication_type, + self.knn_derived_source_enabled, ) ) diff --git a/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py b/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py index d0ad3a61f..99a545343 100644 --- a/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py +++ b/vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py @@ -27,6 +27,11 @@ "applies": lambda version, _: version >= Version("3.0"), "value": lambda _: "-1", }, + { + "name": "knn.derived_source.enabled", + "applies": lambda version, _: version >= Version("3.0"), + "value": lambda case_config: case_config.knn_derived_source_enabled, + }, ] @@ -270,6 +275,7 @@ def _create_index(self, client: OpenSearch) -> None: log.info(f"Creating index with ef_search: {ef_search_value}") log.info(f"Creating index with number_of_replicas: {self.case_config.number_of_replicas}") log.info(f"Creating index with replication_type: {self.case_config.replication_type}") + log.info(f"Creating index with knn_derived_source_enabled: {self.case_config.knn_derived_source_enabled}") log.info(f"Creating index with engine: {self.case_config.engine}") log.info(f"Creating index with metric type: {self.case_config.metric_type_name}") log.info(f"All case_config parameters: {self.case_config.__dict__}") diff --git a/vectordb_bench/frontend/config/dbCaseConfigs.py b/vectordb_bench/frontend/config/dbCaseConfigs.py index 75435e8e6..b56e26500 100644 --- a/vectordb_bench/frontend/config/dbCaseConfigs.py +++ b/vectordb_bench/frontend/config/dbCaseConfigs.py @@ -1848,6 +1848,16 @@ class CaseConfigInput(BaseModel): }, ) +CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch = CaseConfigInput( + label=CaseConfigParamType.knn_derived_source_enabled, + displayLabel="KNN Derived Source Enabled", + inputHelp="Enable KNN derived source (OpenSearch 3.x+ only). Ignored for 2.x versions.", + inputType=InputType.Bool, + inputConfig={ + "value": False, + }, +) + MilvusLoadConfig = [ CaseConfigParamInput_IndexType, CaseConfigParamInput_M, @@ -1929,12 +1939,14 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch, CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch, + CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch, ] AWSOpenSearchPerformanceConfig = [ CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch, CaseConfigParamInput_EF_SEARCH_AWSOpensearch, CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch, + CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch, ] AliyunOpensearchLoadingConfig = [] @@ -2274,6 +2286,7 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch, + CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch, CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch, CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch, @@ -2289,6 +2302,7 @@ class CaseConfigInput(BaseModel): CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch, + CaseConfigParamInput_KNN_DERIVED_SOURCE_ENABLED_AWSOpensearch, CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch, CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch, CaseConfigParamInput_REPLICATION_TYPE_AWSOpensearch, diff --git a/vectordb_bench/models.py b/vectordb_bench/models.py index acbf28dbb..b051575b5 100644 --- a/vectordb_bench/models.py +++ b/vectordb_bench/models.py @@ -127,6 +127,7 @@ class CaseConfigParamType(Enum): oversample_ratio = "oversample_ratio" use_routing = "use_routing" replication_type = "replication_type" + knn_derived_source_enabled = "knn_derived_source_enabled" # CockroachDB parameters min_partition_size = "min_partition_size"