From a9ec9c63c32d7732015936352eb213324dbce235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9?= Date: Sun, 17 May 2026 15:14:10 +0200 Subject: [PATCH 1/4] feat(logosdb): add LogosDB vector database integration - Add LogosDB embedded HNSW client (local file-based, mmap, hnswlib) - Config: LogosDBConfig (uri path) + LogosDBIndexConfig (metric type) - Supports COSINE, L2, and IP distance metrics - Uses put_batch for efficient bulk insert; metadata IDs stored as text - Register DB.LogosDB enum, init_cls, config_cls, case_config_cls - Register 'logosdb' CLI command in vectordbbench - Add logosdb optional extra in pyproject.toml Benchmark result (50K OpenAI 1536-dim, COSINE): recall@100=0.9347 ndcg=0.9464 p99=4.6ms p95=4.0ms --- pyproject.toml | 1 + vectordb_bench/backend/clients/__init__.py | 16 ++++ .../backend/clients/logosdb/__init__.py | 0 vectordb_bench/backend/clients/logosdb/cli.py | 40 +++++++++ .../backend/clients/logosdb/config.py | 29 +++++++ .../backend/clients/logosdb/logosdb.py | 85 +++++++++++++++++++ vectordb_bench/cli/vectordbbench.py | 2 + 7 files changed, 173 insertions(+) create mode 100644 vectordb_bench/backend/clients/logosdb/__init__.py create mode 100644 vectordb_bench/backend/clients/logosdb/cli.py create mode 100644 vectordb_bench/backend/clients/logosdb/config.py create mode 100644 vectordb_bench/backend/clients/logosdb/logosdb.py diff --git a/pyproject.toml b/pyproject.toml index 3bbba8ac0..9e4ee831e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ endee = [ "endee==0.1.10" ] lindorm = [ "opensearch-py" ] seekdb = [ "mysql-connector-python" ] pinot = [ "requests" ] +logosdb = [ "logosdb" ] [project.urls] Repository = "https://github.com/zilliztech/VectorDBBench" diff --git a/vectordb_bench/backend/clients/__init__.py b/vectordb_bench/backend/clients/__init__.py index 4be8d0424..0d39ccd69 100644 --- a/vectordb_bench/backend/clients/__init__.py +++ b/vectordb_bench/backend/clients/__init__.py @@ -63,6 +63,7 @@ class DB(Enum): PolarDB = "PolarDB" Pinot = "Pinot" SeekDB = "SeekDB" + LogosDB = "LogosDB" @property def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915 @@ -269,6 +270,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915 return SeekDB + if self == DB.LogosDB: + from .logosdb.logosdb import LogosDB + + return LogosDB + msg = f"Unknown DB: {self.name}" raise ValueError(msg) @@ -477,6 +483,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915 return SeekDBConfig + if self == DB.LogosDB: + from .logosdb.config import LogosDBConfig + + return LogosDBConfig + msg = f"Unknown DB: {self.name}" raise ValueError(msg) @@ -667,6 +678,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915 return _seekdb_case_config.get(index_type) + if self == DB.LogosDB: + from .logosdb.config import LogosDBIndexConfig + + return LogosDBIndexConfig + # DB.Pinecone, DB.Redis return EmptyDBCaseConfig diff --git a/vectordb_bench/backend/clients/logosdb/__init__.py b/vectordb_bench/backend/clients/logosdb/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/vectordb_bench/backend/clients/logosdb/cli.py b/vectordb_bench/backend/clients/logosdb/cli.py new file mode 100644 index 000000000..5cc76f843 --- /dev/null +++ b/vectordb_bench/backend/clients/logosdb/cli.py @@ -0,0 +1,40 @@ +from typing import Annotated, Unpack + +import click + +from vectordb_bench.backend.clients import DB +from vectordb_bench.cli.cli import ( + CommonTypedDict, + cli, + click_parameter_decorators_from_typed_dict, + run, +) + +DBTYPE = DB.LogosDB + + +class LogosDBTypedDict(CommonTypedDict): + uri: Annotated[ + str, + click.option( + "--uri", + type=str, + help="Path to LogosDB directory (local embedded DB)", + required=False, + default="/tmp/vectordbbench_logosdb", + show_default=True, + ), + ] + + +@cli.command() +@click_parameter_decorators_from_typed_dict(LogosDBTypedDict) +def LogosDB(**parameters: Unpack[LogosDBTypedDict]): + from .config import LogosDBConfig, LogosDBIndexConfig + + run( + db=DBTYPE, + db_config=LogosDBConfig(uri=parameters["uri"]), + db_case_config=LogosDBIndexConfig(), + **parameters, + ) diff --git a/vectordb_bench/backend/clients/logosdb/config.py b/vectordb_bench/backend/clients/logosdb/config.py new file mode 100644 index 000000000..0d6bc58cc --- /dev/null +++ b/vectordb_bench/backend/clients/logosdb/config.py @@ -0,0 +1,29 @@ +from pydantic import BaseModel + +from ..api import DBCaseConfig, DBConfig, MetricType + + +class LogosDBConfig(DBConfig): + uri: str = "/tmp/vectordbbench_logosdb" + + def to_dict(self) -> dict: + return {"uri": self.uri} + + +class LogosDBIndexConfig(BaseModel, DBCaseConfig): + metric_type: MetricType | None = None + + def parse_metric(self) -> int: + import logosdb + + if self.metric_type == MetricType.L2: + return logosdb.DIST_L2 + if self.metric_type == MetricType.IP: + return logosdb.DIST_IP + return logosdb.DIST_COSINE + + def index_param(self) -> dict: + return {} + + def search_param(self) -> dict: + return {} diff --git a/vectordb_bench/backend/clients/logosdb/logosdb.py b/vectordb_bench/backend/clients/logosdb/logosdb.py new file mode 100644 index 000000000..010da1aa1 --- /dev/null +++ b/vectordb_bench/backend/clients/logosdb/logosdb.py @@ -0,0 +1,85 @@ +import logging +import os +import shutil +from collections.abc import Iterable +from contextlib import contextmanager + +import numpy as np + +from ..api import VectorDB +from .config import LogosDBIndexConfig + +log = logging.getLogger(__name__) + + +class LogosDB(VectorDB): + def __init__( + self, + dim: int, + db_config: dict, + db_case_config: LogosDBIndexConfig, + collection_name: str = "LogosDBCollection", + drop_old: bool = False, + name: str = "LogosDB", + **kwargs, + ): + self.name = name + self.db_config = db_config + self.case_config = db_case_config + self.dim = dim + self.uri = db_config["uri"] + self.db = None + + if drop_old and os.path.exists(self.uri): + log.info(f"{self.name} drop_old: removing {self.uri}") + shutil.rmtree(self.uri) + + import logosdb as _logosdb + + distance = self.case_config.parse_metric() + db = _logosdb.DB(self.uri, dim=self.dim, distance=distance) + log.info(f"{self.name} initialized at {self.uri} dim={dim} distance={distance}") + del db + + @contextmanager + def init(self): + import logosdb as _logosdb + + distance = self.case_config.parse_metric() + self.db = _logosdb.DB(self.uri, dim=self.dim, distance=distance) + try: + yield + finally: + del self.db + self.db = None + + def insert_embeddings( + self, + embeddings: Iterable[list[float]], + metadata: list[int], + **kwargs, + ) -> tuple[int, Exception]: + assert self.db is not None + try: + embeddings_arr = np.array(list(embeddings), dtype=np.float32) + texts = [str(m) for m in metadata] + self.db.put_batch(embeddings_arr, texts=texts) + return len(metadata), None + except Exception as e: + log.warning(f"{self.name} insert_embeddings error: {e}") + return 0, e + + def search_embedding( + self, + query: list[float], + k: int = 100, + filters: dict | None = None, + timeout: int | None = None, + ) -> list[int]: + assert self.db is not None + q = np.array(query, dtype=np.float32) + hits = self.db.search(q, top_k=k) + return [int(h.text) for h in hits] + + def optimize(self, data_size: int | None = None): + log.info(f"{self.name} optimize: HNSW index is built incrementally, no explicit step needed") diff --git a/vectordb_bench/cli/vectordbbench.py b/vectordb_bench/cli/vectordbbench.py index eca3dbc52..dce579412 100644 --- a/vectordb_bench/cli/vectordbbench.py +++ b/vectordb_bench/cli/vectordbbench.py @@ -14,6 +14,7 @@ from ..backend.clients.endee.cli import Endee from ..backend.clients.hologres.cli import HologresHGraph from ..backend.clients.lancedb.cli import LanceDB +from ..backend.clients.logosdb.cli import LogosDB from ..backend.clients.lindorm.cli import LindormHNSW, LindormIVFBQ, LindormIVFPQ from ..backend.clients.mariadb.cli import MariaDBHNSW from ..backend.clients.memorydb.cli import MemoryDB @@ -97,6 +98,7 @@ cli.add_command(PolarDBHNSWPQ) cli.add_command(PolarDBHNSWSQ) cli.add_command(SeekDBHNSW) +cli.add_command(LogosDB) if __name__ == "__main__": From 01e0fd0cc0beedacf798f7d8e050547580d657ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9?= Date: Fri, 29 May 2026 16:49:00 +0200 Subject: [PATCH 2/4] fix(logosdb): replace os.path.exists with Path.exists to satisfy PTH110 ruff rule Co-authored-by: Cursor --- vectordb_bench/backend/clients/logosdb/logosdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vectordb_bench/backend/clients/logosdb/logosdb.py b/vectordb_bench/backend/clients/logosdb/logosdb.py index 010da1aa1..84bfee045 100644 --- a/vectordb_bench/backend/clients/logosdb/logosdb.py +++ b/vectordb_bench/backend/clients/logosdb/logosdb.py @@ -1,8 +1,8 @@ import logging -import os import shutil from collections.abc import Iterable from contextlib import contextmanager +from pathlib import Path import numpy as np @@ -30,7 +30,7 @@ def __init__( self.uri = db_config["uri"] self.db = None - if drop_old and os.path.exists(self.uri): + if drop_old and Path(self.uri).exists(): log.info(f"{self.name} drop_old: removing {self.uri}") shutil.rmtree(self.uri) From 812eea6f143c18233ed8d19177121ff0188245fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9?= Date: Fri, 29 May 2026 16:51:47 +0200 Subject: [PATCH 3/4] fix(logosdb): move import to correct alphabetical position to satisfy ruff I001 Co-authored-by: Cursor --- vectordb_bench/cli/vectordbbench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vectordb_bench/cli/vectordbbench.py b/vectordb_bench/cli/vectordbbench.py index dce579412..698a80805 100644 --- a/vectordb_bench/cli/vectordbbench.py +++ b/vectordb_bench/cli/vectordbbench.py @@ -14,8 +14,8 @@ from ..backend.clients.endee.cli import Endee from ..backend.clients.hologres.cli import HologresHGraph from ..backend.clients.lancedb.cli import LanceDB -from ..backend.clients.logosdb.cli import LogosDB from ..backend.clients.lindorm.cli import LindormHNSW, LindormIVFBQ, LindormIVFPQ +from ..backend.clients.logosdb.cli import LogosDB from ..backend.clients.mariadb.cli import MariaDBHNSW from ..backend.clients.memorydb.cli import MemoryDB from ..backend.clients.milvus.cli import MilvusAutoIndex From b932872c676af26819ee6f2fb8b8485d3e5e97a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9?= Date: Fri, 29 May 2026 16:55:58 +0200 Subject: [PATCH 4/4] fix(logosdb): disable concurrent search (single-process embedded DB) Co-authored-by: Cursor --- vectordb_bench/backend/clients/logosdb/cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vectordb_bench/backend/clients/logosdb/cli.py b/vectordb_bench/backend/clients/logosdb/cli.py index 5cc76f843..cfa18b889 100644 --- a/vectordb_bench/backend/clients/logosdb/cli.py +++ b/vectordb_bench/backend/clients/logosdb/cli.py @@ -32,6 +32,10 @@ class LogosDBTypedDict(CommonTypedDict): def LogosDB(**parameters: Unpack[LogosDBTypedDict]): from .config import LogosDBConfig, LogosDBIndexConfig + # LogosDB is documented as single-process; disable concurrent search + # until a thread-safe concurrent runner is available. + parameters["search_concurrent"] = False + run( db=DBTYPE, db_config=LogosDBConfig(uri=parameters["uri"]),