Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ All the database client supported
| zvec | `pip install vectordb-bench[zvec]` |
| endee | `pip install vectordb-bench[endee]` |
| lindorm | `pip install vectordb-bench[lindorm]` |
| volc_mysql | `pip install vectordb-bench[volc_mysql]` |

### Run

Expand All @@ -87,6 +88,7 @@ Commands:
pgvectorhnsw
pgvectorivfflat
vectorchordrq
volcmysqlhnsw
test
weaviate
```
Expand Down Expand Up @@ -591,6 +593,45 @@ To list the options for PolarDB, execute `vectordbbench polardbhnswflat --help`.
Create index after load or inline at table creation
```

### Run VolcMySQL from command line

VolcMySQL is a MySQL-compatible service with a native `VECTOR` type and an HNSW vector index (created via `SECONDARY_ENGINE_ATTRIBUTE`). Optional quantization is configurable through `--quant-algorithm` (`NONE`, `SQ`, `PQ`) and `--quant-type` (`16_bit`, `8_bit`, `4_bit`, `binary`).

```shell
vectordbbench volcmysqlhnsw \
--case-type Performance1536D50K \
--username <db_user> \
--password '<db_password>' \
--host <db_host> \
--port 3306 \
--m 16 \
--ef-construction 128 \
--ef-search 100 \
--quant-algorithm SQ \
--quant-type 16_bit \
--num-concurrency '10,20,40,60,80' \
--concurrency-duration 30 \
--task-label <task_label> \
--db-label <db_label>
```

To list the options for VolcMySQL, execute `vectordbbench volcmysqlhnsw --help`. The following are some VolcMySQL-specific command-line options.

```text
--username TEXT Username [required]
--password TEXT Password [required]
--host TEXT Db host [default: 127.0.0.1]
--port INTEGER DB Port [default: 3306]
--m INTEGER M parameter in HNSW vector indexing
--ef-search INTEGER Session variable loose_hnsw_ef_search
--ef-construction INTEGER HNSW ef_construction
--quant-algorithm [NONE|SQ|PQ] Quantization algorithm
--quant-type [16_bit|8_bit|4_bit|binary]
Quantization type
```

> Note: vectors are loaded and queried over the raw-binary `VECTOR` path by default; the client auto-probes server support and falls back to `to_vector()` text when unavailable. Set `VDB_BINARY_VEC=0` to force the text path or `1` to force binary.

#### Using a configuration file.

The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
Expand Down Expand Up @@ -796,7 +837,7 @@ Now we can only run one task at the same time.
### Code Structure
![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
### Client
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, VectorChord, Redis, Chroma, CockroachDB, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, VectorChord, Redis, Chroma, CockroachDB, VolcMySQL, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
### Benchmark Cases
We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
#### Capacity Case
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ zvec = [ "zvec" ]
endee = [ "endee==0.1.10" ]
lindorm = [ "opensearch-py" ]
seekdb = [ "mysql-connector-python" ]
volc_mysql = [ "mysql-connector-python" ]
pinot = [ "requests" ]

[project.urls]
Expand Down
74 changes: 74 additions & 0 deletions tests/test_volc_mysql_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import struct

from vectordb_bench.backend.clients.volc_mysql.volc_mysql import _encode_batch_to_tsv


def test_encode_batch_to_tsv_sorts_by_id_and_hex_encodes(tmp_path):
tsv = tmp_path / "batch.tsv"
_encode_batch_to_tsv(
metadata=[42, 7, 99],
embeddings=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
dim=2,
tsv_path=str(tsv),
)

lines = tsv.read_text().splitlines()
assert len(lines) == 3

# Sorted by id ascending: 7, 42, 99
assert lines[0].startswith("7\t")
assert lines[1].startswith("42\t")
assert lines[2].startswith("99\t")

# The (3.0, 4.0) embedding was paired with id=7
_, hex_str = lines[0].split("\t")
floats = struct.unpack("<2f", bytes.fromhex(hex_str))
assert floats == (3.0, 4.0)


def test_encode_batch_to_tsv_writes_dim_floats_per_row(tmp_path):
tsv = tmp_path / "batch.tsv"
_encode_batch_to_tsv(
metadata=[1],
embeddings=[[0.5] * 1536],
dim=1536,
tsv_path=str(tsv),
)

line = tsv.read_text().rstrip("\n")
id_str, hex_str = line.split("\t")
assert id_str == "1"
# 1536 floats * 4 bytes * 2 hex chars = 12288 chars
assert len(hex_str) == 12288
assert bytes.fromhex(hex_str) == struct.pack("<1536f", *([0.5] * 1536))


def test_encode_batch_to_tsv_empty_batch(tmp_path):
tsv = tmp_path / "batch.tsv"
_encode_batch_to_tsv(metadata=[], embeddings=[], dim=4, tsv_path=str(tsv))
assert tsv.read_text() == ""


def test_encode_batch_to_tsv_text_mode_sorts_and_formats(tmp_path):
# binary=False (to_vector fallback): rows sorted by id, vector written as a
# delimiter-safe "[f1,f2,...]" literal with no tab/newline inside the field.
tsv = tmp_path / "batch.tsv"
_encode_batch_to_tsv(
metadata=[42, 7, 99],
embeddings=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
dim=2,
tsv_path=str(tsv),
binary=False,
)

lines = tsv.read_text().splitlines()
assert len(lines) == 3
# Sorted by id ascending: 7, 42, 99; id=7 was paired with (3.0, 4.0)
assert lines[0] == "7\t[3.0,4.0]"
assert lines[1] == "42\t[1.0,2.0]"
assert lines[2] == "99\t[5.0,6.0]"
# field is delimiter-safe: no tab/newline inside the bracketed literal
for line in lines:
_id, vec = line.split("\t")
assert vec.startswith("[") and vec.endswith("]")
assert "\t" not in vec
16 changes: 16 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DB(Enum):
PolarDB = "PolarDB"
Pinot = "Pinot"
SeekDB = "SeekDB"
VolcMySQL = "VolcMySQL"

@property
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
Expand Down Expand Up @@ -269,6 +270,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDB

if self == DB.VolcMySQL:
from .volc_mysql.volc_mysql import VolcMySQL

return VolcMySQL

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -477,6 +483,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDBConfig

if self == DB.VolcMySQL:
from .volc_mysql.config import VolcMySQLConfig

return VolcMySQLConfig

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -667,6 +678,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915

return _seekdb_case_config.get(index_type)

if self == DB.VolcMySQL:
from .volc_mysql.config import _volcmysql_case_config

return _volcmysql_case_config.get(index_type)

# DB.Pinecone, DB.Redis
return EmptyDBCaseConfig

Expand Down
134 changes: 134 additions & 0 deletions vectordb_bench/backend/clients/volc_mysql/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
from typing import Annotated, Unpack

import click
from pydantic import SecretStr

from vectordb_bench.backend.clients import DB

from ....cli.cli import (
CommonTypedDict,
cli,
click_parameter_decorators_from_typed_dict,
run,
)


class VolcMySQLTypedDict(CommonTypedDict):
user_name: Annotated[
str,
click.option(
"--username",
type=str,
help="Username",
required=True,
),
]
password: Annotated[
str,
click.option(
"--password",
type=str,
help="Password",
required=True,
),
]

host: Annotated[
str,
click.option(
"--host",
type=str,
help="Db host",
default="127.0.0.1",
),
]

port: Annotated[
int,
click.option(
"--port",
type=int,
default=3306,
help="DB Port",
),
]


class VolcMySQLHNSWTypedDict(VolcMySQLTypedDict):
m: Annotated[
int | None,
click.option(
"--m",
type=int,
help="M parameter in HNSW vector indexing",
required=False,
),
]

ef_search: Annotated[
int | None,
click.option(
"--ef-search",
type=int,
help="Session variable loose_hnsw_ef_search",
required=False,
),
]

ef_construction: Annotated[
int | None,
click.option(
"--ef-construction",
type=int,
help="HNSW ef_construction",
required=False,
),
]

quant_algorithm: Annotated[
str | None,
click.option(
"--quant-algorithm",
type=click.Choice(["NONE", "SQ", "PQ"]),
help="Quantization algorithm",
required=False,
),
]

quant_type: Annotated[
str | None,
click.option(
"--quant-type",
type=click.Choice(["16_bit", "8_bit", "4_bit", "binary"]),
help="Quantization type",
required=False,
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(VolcMySQLHNSWTypedDict)
def VolcMySQLHNSW(
**parameters: Unpack[VolcMySQLHNSWTypedDict],
):
from .config import VolcMySQLConfig, VolcMySQLHNSWConfig

run(
db=DB.VolcMySQL,
db_config=VolcMySQLConfig(
db_label=parameters["db_label"],
user_name=parameters["username"],
password=SecretStr(parameters["password"]),
host=parameters["host"],
port=parameters["port"],
),
db_case_config=VolcMySQLHNSWConfig(
M=parameters["m"],
ef_search=parameters["ef_search"],
ef_construction=parameters["ef_construction"],
quant_algorithm=parameters["quant_algorithm"],
quant_type=parameters["quant_type"],
),
**parameters,
)
Loading