Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ endee = [ "endee==0.1.10" ]
lindorm = [ "opensearch-py" ]
seekdb = [ "mysql-connector-python" ]
pinot = [ "requests" ]
logosdb = [ "logosdb" ]

[project.urls]
Repository = "https://github.com/zilliztech/VectorDBBench"
Expand Down
16 changes: 16 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DB(Enum):
PolarDB = "PolarDB"
Pinot = "Pinot"
SeekDB = "SeekDB"
LogosDB = "LogosDB"

@property
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
Expand Down Expand Up @@ -269,6 +270,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDB

if self == DB.LogosDB:
from .logosdb.logosdb import LogosDB

return LogosDB

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -477,6 +483,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDBConfig

if self == DB.LogosDB:
from .logosdb.config import LogosDBConfig

return LogosDBConfig

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -667,6 +678,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915

return _seekdb_case_config.get(index_type)

if self == DB.LogosDB:
from .logosdb.config import LogosDBIndexConfig

return LogosDBIndexConfig

# DB.Pinecone, DB.Redis
return EmptyDBCaseConfig

Expand Down
Empty file.
44 changes: 44 additions & 0 deletions vectordb_bench/backend/clients/logosdb/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from typing import Annotated, Unpack

import click

from vectordb_bench.backend.clients import DB
from vectordb_bench.cli.cli import (
CommonTypedDict,
cli,
click_parameter_decorators_from_typed_dict,
run,
)

DBTYPE = DB.LogosDB


class LogosDBTypedDict(CommonTypedDict):
uri: Annotated[
str,
click.option(
"--uri",
type=str,
help="Path to LogosDB directory (local embedded DB)",
required=False,
default="/tmp/vectordbbench_logosdb",
show_default=True,
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(LogosDBTypedDict)
def LogosDB(**parameters: Unpack[LogosDBTypedDict]):

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

must-change: LogosDB inherits search_concurrent=True from CommonTypedDict, but LogosDB documents one DB directory as single-process while VDBBench concurrent search starts multiple ProcessPoolExecutor workers against the same --uri. The default command can fail or report invalid concurrent-search results after loading. Set parameters["search_concurrent"] = False or reject --search-concurrent for LogosDB until a supported single-process concurrent runner exists.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this. Fixed in the latest commit by hard-setting parameters["search_concurrent"] = False in the CLI handler.

Quick note: I did test multi-process concurrent reads empirically (4 Pool workers opening the same DB path and running 50 searches each) and all succeeded without errors (LogosDB's memory-mapped storage appears safe for concurrent readers). That said, since the official docs declare it single-process, disabling concurrent search is the right conservative call for now. Can revisit if/when LogosDB formally documents multi-reader support.

Fixed here: b932872

from .config import LogosDBConfig, LogosDBIndexConfig

# LogosDB is documented as single-process; disable concurrent search
# until a thread-safe concurrent runner is available.
parameters["search_concurrent"] = False

run(
db=DBTYPE,
db_config=LogosDBConfig(uri=parameters["uri"]),
db_case_config=LogosDBIndexConfig(),
**parameters,
)
29 changes: 29 additions & 0 deletions vectordb_bench/backend/clients/logosdb/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pydantic import BaseModel

from ..api import DBCaseConfig, DBConfig, MetricType


class LogosDBConfig(DBConfig):
uri: str = "/tmp/vectordbbench_logosdb"

def to_dict(self) -> dict:
return {"uri": self.uri}


class LogosDBIndexConfig(BaseModel, DBCaseConfig):
metric_type: MetricType | None = None

def parse_metric(self) -> int:
import logosdb

if self.metric_type == MetricType.L2:
return logosdb.DIST_L2
if self.metric_type == MetricType.IP:
return logosdb.DIST_IP
return logosdb.DIST_COSINE

def index_param(self) -> dict:
return {}

def search_param(self) -> dict:
return {}
85 changes: 85 additions & 0 deletions vectordb_bench/backend/clients/logosdb/logosdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import logging
import shutil
from collections.abc import Iterable
from contextlib import contextmanager
from pathlib import Path

import numpy as np

from ..api import VectorDB
from .config import LogosDBIndexConfig

log = logging.getLogger(__name__)


class LogosDB(VectorDB):
def __init__(
self,
dim: int,
db_config: dict,
db_case_config: LogosDBIndexConfig,
collection_name: str = "LogosDBCollection",
drop_old: bool = False,
name: str = "LogosDB",
**kwargs,
):
self.name = name
self.db_config = db_config
self.case_config = db_case_config
self.dim = dim
self.uri = db_config["uri"]
self.db = None

if drop_old and Path(self.uri).exists():
log.info(f"{self.name} drop_old: removing {self.uri}")
shutil.rmtree(self.uri)

import logosdb as _logosdb

distance = self.case_config.parse_metric()
db = _logosdb.DB(self.uri, dim=self.dim, distance=distance)
log.info(f"{self.name} initialized at {self.uri} dim={dim} distance={distance}")
del db

@contextmanager
def init(self):
import logosdb as _logosdb

distance = self.case_config.parse_metric()
self.db = _logosdb.DB(self.uri, dim=self.dim, distance=distance)
try:
yield
finally:
del self.db
self.db = None

def insert_embeddings(
self,
embeddings: Iterable[list[float]],
metadata: list[int],
**kwargs,
) -> tuple[int, Exception]:
assert self.db is not None
try:
embeddings_arr = np.array(list(embeddings), dtype=np.float32)
texts = [str(m) for m in metadata]
self.db.put_batch(embeddings_arr, texts=texts)
return len(metadata), None
except Exception as e:
log.warning(f"{self.name} insert_embeddings error: {e}")
return 0, e

def search_embedding(
self,
query: list[float],
k: int = 100,
filters: dict | None = None,
timeout: int | None = None,
) -> list[int]:
assert self.db is not None
q = np.array(query, dtype=np.float32)
hits = self.db.search(q, top_k=k)
return [int(h.text) for h in hits]

def optimize(self, data_size: int | None = None):
log.info(f"{self.name} optimize: HNSW index is built incrementally, no explicit step needed")
2 changes: 2 additions & 0 deletions vectordb_bench/cli/vectordbbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ..backend.clients.hologres.cli import HologresHGraph
from ..backend.clients.lancedb.cli import LanceDB
from ..backend.clients.lindorm.cli import LindormHNSW, LindormIVFBQ, LindormIVFPQ
from ..backend.clients.logosdb.cli import LogosDB
from ..backend.clients.mariadb.cli import MariaDBHNSW
from ..backend.clients.memorydb.cli import MemoryDB
from ..backend.clients.milvus.cli import MilvusAutoIndex
Expand Down Expand Up @@ -97,6 +98,7 @@
cli.add_command(PolarDBHNSWPQ)
cli.add_command(PolarDBHNSWSQ)
cli.add_command(SeekDBHNSW)
cli.add_command(LogosDB)


if __name__ == "__main__":
Expand Down