diff --git a/benchmark/server/.env.example b/benchmark/server/.env.example deleted file mode 100644 index a9e3e254..00000000 --- a/benchmark/server/.env.example +++ /dev/null @@ -1,137 +0,0 @@ -# ============================================================================= -# PowerMem Benchmark Server Configuration -# ============================================================================= -# Copy this file to .env and modify the values according to your needs -# -# This configuration is specifically for benchmark testing scenarios. -# For production use, refer to the root .env.example file. -# ============================================================================= - -# ============================================================================= -# 1. LLM Configuration (Required) -# ============================================================================= -# Choose your LLM provider: openai, qwen, siliconflow, ollama, vllm, anthropic, deepseek -LLM_PROVIDER=openai - -LLM_API_KEY=your_api_key_here -# Adjust the model according to your provider -LLM_MODEL=gpt-4o -LLM_TEMPERATURE=0.2 - -## Keep the default settings, as modifications are generally not needed. -LLM_MAX_TOKENS=1000 -LLM_TOP_P=0.8 -LLM_TOP_K=50 - -# Default Base URLs for LLM providers, you can adjust if necessary -OPENAI_LLM_BASE_URL=https://api.openai.com/v1 -QWEN_LLM_BASE_URL=https://dashscope.aliyuncs.com/api/v1 -SILICONFLOW_LLM_BASE_URL=https://api.siliconflow.cn/v1 -OLLAMA_LLM_BASE_URL= -VLLM_LLM_BASE_URL= -ANTHROPIC_LLM_BASE_URL=https://api.anthropic.com -DEEPSEEK_LLM_BASE_URL=https://api.deepseek.com - -# ============================================================================= -# 2. Embedding Configuration (Required) -# ============================================================================= -# Choose your embedding provider: qwen, openai, siliconflow, huggingface, lmstudio, ollama -EMBEDDING_PROVIDER=qwen - -EMBEDDING_API_KEY=your_api_key_here -# Adjust the model according to your provider -EMBEDDING_MODEL=text-embedding-v4 -EMBEDDING_DIMS=1536 - -# Default Base URLs for embedding providers, you can adjust if necessary -QWEN_EMBEDDING_BASE_URL=https://dashscope.aliyuncs.com/api/v1 -OPENAI_EMBEDDING_BASE_URL=https://api.openai.com/v1 -SILICONFLOW_EMBEDDING_BASE_URL=https://api.siliconflow.cn/v1 -HUGGINFACE_EMBEDDING_BASE_URL= -LMSTUDIO_EMBEDDING_BASE_URL= -OLLAMA_EMBEDDING_BASE_URL= - -# ============================================================================= -# 3. Database Configuration (Required) -# ============================================================================= -# Choose your database provider: oceanbase, postgres -DATABASE_PROVIDER=oceanbase - -# Vector search weights (0.0 to 1.0) - Benchmark specific -VECTOR_WEIGHT=0.5 -FTS_WEIGHT=0.5 - -# ----------------------------------------------------------------------------- -# OceanBase Configuration (used when DATABASE_PROVIDER=oceanbase) -# ----------------------------------------------------------------------------- -OCEANBASE_HOST=127.0.0.1 -OCEANBASE_PORT=2881 -OCEANBASE_USER=root@sys -OCEANBASE_PASSWORD=your_password -OCEANBASE_DATABASE=ai_work -OCEANBASE_COLLECTION=powermem_collection - -## Keep the default settings, as modifications are generally not needed. -OCEANBASE_INDEX_TYPE=HNSW -OCEANBASE_VECTOR_METRIC_TYPE=l2 -OCEANBASE_TEXT_FIELD=document -OCEANBASE_VECTOR_FIELD=embedding -OCEANBASE_EMBEDDING_MODEL_DIMS=1536 -OCEANBASE_PRIMARY_FIELD=id -OCEANBASE_METADATA_FIELD=metadata -OCEANBASE_VIDX_NAME=memories_vidx - -# Sparse vector support (optional, only for OceanBase) -SPARSE_VECTOR_ENABLE=false - -# ----------------------------------------------------------------------------- -# PostgreSQL Configuration (used when DATABASE_PROVIDER=postgres) -# ----------------------------------------------------------------------------- -POSTGRES_HOST=127.0.0.1 -POSTGRES_PORT=5432 -POSTGRES_USER=postgres -POSTGRES_PASSWORD=your_password -POSTGRES_DATABASE=ai_work -POSTGRES_COLLECTION=memories - -## Keep the default settings, as modifications are generally not needed. -POSTGRES_EMBEDDING_MODEL_DIMS=1536 -POSTGRES_DISKANN=true -POSTGRES_HNSW=true - -# ============================================================================= -# 4. Application Configuration -# ============================================================================= -# History database path for benchmark tracking -HISTORY_DB_PATH=history.db - -# Configuration version -CONFIG_VERSION=v1.1 - -# ============================================================================= -# 5. Token Counting Configuration (Benchmark specific) -# ============================================================================= -# Enable token counting for benchmark metrics: "true" or "false" -TOKEN_COUNTING=true - -# ============================================================================= -# 6. Reranker Configuration (Optional) -# ============================================================================= -# Reranker settings for improved search results -RERANKER_ENABLED=true -RERANKER_PROVIDER=qwen -RERANKER_MODEL=qwen3-rerank -RERANKER_API_KEY=your_api_key_here -# Reranker base URL (for Qwen provider, uses DASHSCOPE_BASE_URL) -# If not set, defaults to https://dashscope.aliyuncs.com/api/v1 -# RERANKER_BASE_URL=https://dashscope.aliyuncs.com/api/v1 - -# ============================================================================= -# 7. Sparse Embedding Configuration (Optional) -# ============================================================================= -# Sparse embedding for hybrid search (only supported for OceanBase) -SPARSE_EMBEDDER_PROVIDER=qwen -SPARSE_EMBEDDER_API_KEY=your_api_key_here -SPARSE_EMBEDDER_MODEL=text-embedding-v4 -SPARSE_EMBEDDING_BASE_URL=https://dashscope.aliyuncs.com/api/v1 -SPARSE_EMBEDDER_DIMS=1536 diff --git a/benchmark/server/main.py b/benchmark/server/main.py index dc94ca13..37615d6f 100644 --- a/benchmark/server/main.py +++ b/benchmark/server/main.py @@ -7,27 +7,19 @@ This server provides REST APIs for managing and searching memories. Configuration: - All configuration is done through environment variables. - Copy .env.example to .env and modify the values as needed. - - Example: - cp benchmark/server/.env.example benchmark/server/.env - # Then edit benchmark/server/.env with your settings + Use the project root .env (same as PowerMem). Configure it first, then: + cd benchmark/server && uvicorn main:app --host 0.0.0.0 --port 8000 """ import asyncio import logging -import os import sys -from pathlib import Path from typing import Any, Dict, List, Optional -from dotenv import load_dotenv from fastapi import FastAPI, HTTPException, status from fastapi.responses import JSONResponse, RedirectResponse -from pydantic import BaseModel, Field, field_validator, model_validator -from pydantic_settings import BaseSettings, SettingsConfigDict -from powermem import Memory +from pydantic import BaseModel, Field, field_validator +from powermem import Memory, auto_config # ============================================================================ # Logging Configuration @@ -43,437 +35,12 @@ logger = logging.getLogger(__name__) # ============================================================================ -# Configuration - Using Pydantic Settings (similar to src/server/config.py) +# Configuration - from project root .env (user must configure before starting) # ============================================================================ -def _parse_boolish(value: object) -> object: - """ - Backward-compatible boolean parsing. - - Historically we accepted values like: true/1/yes/on/enabled. - `pydantic` already accepts many truthy strings, but "enabled"/"disabled" are not - guaranteed across versions, so we normalize explicitly. - """ - if value is None or isinstance(value, bool): - return value - - if isinstance(value, str): - text = value.strip().lower() - if text in {"1", "true", "t", "yes", "y", "on", "enabled"}: - return True - if text in {"0", "false", "f", "no", "n", "off", "disabled"}: - return False - - return value +DEFAULT_CONFIG = auto_config() -class BenchmarkSettings(BaseSettings): - """Configuration settings for PowerMem Benchmark Server.""" - - model_config = SettingsConfigDict( - env_file=".env", - env_file_encoding="utf-8", - case_sensitive=False, - extra="ignore", - ) - - # LLM Configuration - llm_provider: str = Field(default="openai") - llm_api_key: str = Field(...) # Required - llm_model: str = Field(default="gpt-4o") - llm_temperature: float = Field(default=0.2) - llm_max_tokens: int = Field(default=1000) - llm_top_p: float = Field(default=0.8) - llm_top_k: int = Field(default=50) - - # LLM Base URLs - openai_llm_base_url: str = Field(default="https://api.openai.com/v1") - qwen_llm_base_url: str = Field(default="https://dashscope.aliyuncs.com/api/v1") - siliconflow_llm_base_url: str = Field(default="https://api.siliconflow.cn/v1") - ollama_llm_base_url: str = Field(default="") - vllm_llm_base_url: str = Field(default="") - anthropic_llm_base_url: str = Field(default="https://api.anthropic.com") - deepseek_llm_base_url: str = Field(default="https://api.deepseek.com") - - # Embedding Configuration - embedding_provider: str = Field(default="qwen") - embedding_api_key: str = Field(...) # Required - embedding_model: str = Field(default="text-embedding-v4") - embedding_dims: int = Field(default=1536) - - # Embedding Base URLs - qwen_embedding_base_url: str = Field(default="https://dashscope.aliyuncs.com/api/v1") - openai_embedding_base_url: str = Field(default="https://api.openai.com/v1") - siliconflow_embedding_base_url: str = Field(default="https://api.siliconflow.cn/v1") - huggingface_embedding_base_url: str = Field(default="") - lmstudio_embedding_base_url: str = Field(default="") - ollama_embedding_base_url: str = Field(default="") - - # Database Configuration - database_provider: str = Field(default="oceanbase") - vector_weight: float = Field(default=0.5) - fts_weight: float = Field(default=0.5) - - # OceanBase Configuration - oceanbase_host: str = Field(default="127.0.0.1") - oceanbase_port: str = Field(default="2881") - oceanbase_user: str = Field(default="root@sys") - oceanbase_password: str = Field(default="") - oceanbase_database: str = Field(default="ai_work") - oceanbase_collection: str = Field(default="powermem_collection") - oceanbase_embedding_model_dims: int = Field(default=1536) - oceanbase_index_type: str = Field(default="HNSW") - oceanbase_vector_metric_type: str = Field(default="l2") - oceanbase_text_field: str = Field(default="document") - oceanbase_vector_field: str = Field(default="embedding") - oceanbase_primary_field: str = Field(default="id") - oceanbase_metadata_field: str = Field(default="metadata") - oceanbase_vidx_name: str = Field(default="memories_vidx") - oceanbase_enable_native_hybrid: bool = Field(default=False) - - # PostgreSQL Configuration - postgres_host: str = Field(default="127.0.0.1") - postgres_port: str = Field(default="5432") - postgres_user: str = Field(default="postgres") - postgres_password: str = Field(default="") - postgres_database: str = Field(default="ai_work") - postgres_collection: str = Field(default="memories") - postgres_embedding_model_dims: int = Field(default=1536) - postgres_diskann: bool = Field(default=True) - postgres_hnsw: bool = Field(default=True) - - # Token Counting Configuration - token_counting: bool = Field(default=True) - - # Application Configuration - history_db_path: str = Field(default="history.db") - config_version: str = Field(default="v1.1") - - # Reranker Configuration - reranker_enabled: bool = Field(default=True) - reranker_provider: str = Field(default="qwen") - reranker_model: str = Field(default="qwen3-rerank") - reranker_api_key: Optional[str] = Field(default=None) # Falls back to embedding_api_key - reranker_base_url: str = Field(default="") - - # Sparse Embedding Configuration - sparse_vector_enable: bool = Field(default=False) - sparse_embedder_provider: str = Field(default="qwen") - sparse_embedder_api_key: Optional[str] = Field(default=None) # Falls back to embedding_api_key - sparse_embedder_model: Optional[str] = Field(default=None) # Falls back to embedding_model - sparse_embedding_base_url: str = Field(default="") - sparse_embedder_dims: int = Field(default=1536) - - @field_validator( - "llm_provider", - "embedding_provider", - "database_provider", - "reranker_provider", - "sparse_embedder_provider", - mode="before", - ) - @classmethod - def normalize_provider(cls, value: object) -> str: - """Normalize provider names to lowercase.""" - if isinstance(value, str): - return value.lower() - return value - - @field_validator( - "token_counting", - "reranker_enabled", - "sparse_vector_enable", - "postgres_diskann", - "postgres_hnsw", - mode="before", - ) - @classmethod - def normalize_bool_fields(cls, value: object) -> object: - """Normalize boolean fields.""" - return _parse_boolish(value) - - @model_validator(mode="after") - def set_defaults(self) -> "BenchmarkSettings": - """Set default values for fields that depend on other fields.""" - # Set reranker_api_key to embedding_api_key if not provided - if not self.reranker_api_key: - self.reranker_api_key = self.embedding_api_key - - # Set sparse_embedder_api_key to embedding_api_key if not provided - if not self.sparse_embedder_api_key: - self.sparse_embedder_api_key = self.embedding_api_key - - # Set sparse_embedder_model to embedding_model if not provided - if not self.sparse_embedder_model: - self.sparse_embedder_model = self.embedding_model - - # Set sparse_embedding_base_url based on provider if not provided - if not self.sparse_embedding_base_url: - base_url_map = { - "qwen": self.qwen_embedding_base_url, - "openai": self.openai_embedding_base_url, - "siliconflow": self.siliconflow_embedding_base_url, - "huggingface": self.huggingface_embedding_base_url, - "lmstudio": self.lmstudio_embedding_base_url, - "ollama": self.ollama_embedding_base_url, - } - self.sparse_embedding_base_url = base_url_map.get(self.sparse_embedder_provider, "") - - return self - - def model_post_init(self, __context: Any) -> None: - """Post-initialization hook to set DASHSCOPE_BASE_URL for Qwen reranker.""" - super().model_post_init(__context) - # For Qwen reranker, set DASHSCOPE_BASE_URL if RERANKER_BASE_URL is provided - if self.reranker_base_url and self.reranker_provider == "qwen": - os.environ["DASHSCOPE_BASE_URL"] = self.reranker_base_url - - def get_llm_base_url(self, provider: str) -> str: - """Get base URL for LLM provider.""" - url_map = { - "openai": self.openai_llm_base_url, - "qwen": self.qwen_llm_base_url, - "siliconflow": self.siliconflow_llm_base_url, - "ollama": self.ollama_llm_base_url, - "vllm": self.vllm_llm_base_url, - "anthropic": self.anthropic_llm_base_url, - "deepseek": self.deepseek_llm_base_url, - } - return url_map.get(provider, "") - - def get_embedding_base_url(self, provider: str) -> str: - """Get base URL for embedding provider.""" - url_map = { - "qwen": self.qwen_embedding_base_url, - "openai": self.openai_embedding_base_url, - "siliconflow": self.siliconflow_embedding_base_url, - "huggingface": self.huggingface_embedding_base_url, - "lmstudio": self.lmstudio_embedding_base_url, - "ollama": self.ollama_embedding_base_url, - } - return url_map.get(provider, "") - - -# Load configuration with custom env file path -def load_benchmark_settings() -> BenchmarkSettings: - """Load benchmark settings from .env file in the same directory as this script.""" - script_dir = Path(__file__).parent - env_path = script_dir / ".env" - - if env_path.exists(): - logger.info(f"Loading environment variables from {env_path}") - return BenchmarkSettings(_env_file=str(env_path)) - else: - logger.info("Loading environment variables from default locations") - return BenchmarkSettings() - - -# Initialize settings -settings = load_benchmark_settings() - -# Backward compatibility: Create aliases for existing code -LLM_PROVIDER = settings.llm_provider -LLM_API_KEY = settings.llm_api_key -LLM_MODEL = settings.llm_model -LLM_TEMPERATURE = settings.llm_temperature -LLM_MAX_TOKENS = settings.llm_max_tokens -LLM_TOP_P = settings.llm_top_p -LLM_TOP_K = settings.llm_top_k - -EMBEDDING_PROVIDER = settings.embedding_provider -EMBEDDING_API_KEY = settings.embedding_api_key -EMBEDDING_MODEL = settings.embedding_model -EMBEDDING_DIMS = settings.embedding_dims - -DATABASE_PROVIDER = settings.database_provider -VECTOR_WEIGHT = settings.vector_weight -FTS_WEIGHT = settings.fts_weight - -TOKEN_COUNTING = settings.token_counting -HISTORY_DB_PATH = settings.history_db_path -CONFIG_VERSION = settings.config_version - -RERANKER_ENABLED = settings.reranker_enabled -RERANKER_PROVIDER = settings.reranker_provider -RERANKER_MODEL = settings.reranker_model -RERANKER_API_KEY = settings.reranker_api_key -RERANKER_BASE_URL = settings.reranker_base_url - -SPARSE_VECTOR_ENABLE = settings.sparse_vector_enable -SPARSE_EMBEDDER_PROVIDER = settings.sparse_embedder_provider -SPARSE_EMBEDDER_API_KEY = settings.sparse_embedder_api_key -SPARSE_EMBEDDER_MODEL = settings.sparse_embedder_model -SPARSE_EMBEDDING_BASE_URL = settings.sparse_embedding_base_url -SPARSE_EMBEDDER_DIMS = settings.sparse_embedder_dims - -OCEANBASE_ENABLE_NATIVE_HYBRID = settings.oceanbase_enable_native_hybrid - - -def load_config() -> Dict[str, Any]: - """Load and build configuration dictionary from settings.""" - # Select vector store based on DATABASE_PROVIDER - if DATABASE_PROVIDER == "oceanbase": - vector_store_config = { - "host": settings.oceanbase_host, - "port": settings.oceanbase_port, - "user": settings.oceanbase_user, - "password": settings.oceanbase_password, - "db_name": settings.oceanbase_database, - "collection_name": settings.oceanbase_collection, - "embedding_model_dims": settings.oceanbase_embedding_model_dims, - "index_type": settings.oceanbase_index_type, - "vidx_metric_type": settings.oceanbase_vector_metric_type, - "vector_weight": VECTOR_WEIGHT, - "fts_weight": FTS_WEIGHT, - # Optional field names (use defaults if not explicitly set) - "primary_field": settings.oceanbase_primary_field, - "vector_field": settings.oceanbase_vector_field, - "text_field": settings.oceanbase_text_field, - "metadata_field": settings.oceanbase_metadata_field, - "vidx_name": settings.oceanbase_vidx_name, - } - - # Add sparse vector support if enabled - if SPARSE_VECTOR_ENABLE: - vector_store_config["include_sparse"] = True - if OCEANBASE_ENABLE_NATIVE_HYBRID: - vector_store_config['enable_native_hybrid'] = True - - vector_store = { - "provider": "oceanbase", - "config": vector_store_config, - } - elif DATABASE_PROVIDER == "postgres": - vector_store = { - "provider": "pgvector", - "config": { - "host": settings.postgres_host, - "port": settings.postgres_port, - "user": settings.postgres_user, - "password": settings.postgres_password, - "dbname": settings.postgres_database, - "collection_name": settings.postgres_collection, - "embedding_model_dims": settings.postgres_embedding_model_dims, - "diskann": settings.postgres_diskann, - "hnsw": settings.postgres_hnsw, - }, - } - else: - raise ValueError( - f"Unsupported DATABASE_PROVIDER: {DATABASE_PROVIDER}. " - f"Must be 'oceanbase' or 'postgres'" - ) - - # Build LLM configuration - llm_base_url = settings.get_llm_base_url(LLM_PROVIDER) - llm_config = { - "api_key": LLM_API_KEY, - "temperature": LLM_TEMPERATURE, - "model": LLM_MODEL, - } - - # Add provider-specific base URL if available - # Note: Each provider has its own specific field name for base URL - if llm_base_url: - if LLM_PROVIDER == "openai": - llm_config["openai_base_url"] = llm_base_url - elif LLM_PROVIDER == "qwen": - # Qwen uses dashscope_base_url, not qwen_base_url - llm_config["dashscope_base_url"] = llm_base_url - elif LLM_PROVIDER == "deepseek": - llm_config["deepseek_base_url"] = llm_base_url - elif LLM_PROVIDER == "vllm": - llm_config["vllm_base_url"] = llm_base_url - elif LLM_PROVIDER == "ollama": - llm_config["ollama_base_url"] = llm_base_url - elif LLM_PROVIDER == "anthropic": - llm_config["anthropic_base_url"] = llm_base_url - elif LLM_PROVIDER == "siliconflow": - # SiliconFlow may use a generic base_url or siliconflow_base_url - # Using siliconflow_base_url as fallback - llm_config["siliconflow_base_url"] = llm_base_url - - # Add optional parameters - if LLM_MAX_TOKENS: - llm_config["max_tokens"] = LLM_MAX_TOKENS - if LLM_TOP_P: - llm_config["top_p"] = LLM_TOP_P - if LLM_TOP_K: - llm_config["top_k"] = LLM_TOP_K - - # Build embedder configuration - embedding_base_url = settings.get_embedding_base_url(EMBEDDING_PROVIDER) - embedder_config = { - "api_key": EMBEDDING_API_KEY, - "model": EMBEDDING_MODEL, - "embedding_dims": EMBEDDING_DIMS, - } - - # Add provider-specific base URL if available - # Note: Each provider has its own specific field name for base URL - if embedding_base_url: - if EMBEDDING_PROVIDER == "openai": - embedder_config["openai_base_url"] = embedding_base_url - elif EMBEDDING_PROVIDER == "qwen": - # Qwen uses dashscope_base_url, not qwen_base_url - embedder_config["dashscope_base_url"] = embedding_base_url - elif EMBEDDING_PROVIDER == "siliconflow": - embedder_config["siliconflow_base_url"] = embedding_base_url - elif EMBEDDING_PROVIDER == "huggingface": - embedder_config["huggingface_base_url"] = embedding_base_url - elif EMBEDDING_PROVIDER == "lmstudio": - embedder_config["lmstudio_base_url"] = embedding_base_url - elif EMBEDDING_PROVIDER == "ollama": - embedder_config["ollama_base_url"] = embedding_base_url - - # Build configuration dictionary - config = { - "version": CONFIG_VERSION, - "vector_store": vector_store, - "llm": { - "provider": LLM_PROVIDER, - "config": llm_config, - }, - "embedder": { - "provider": EMBEDDING_PROVIDER, - "config": embedder_config, - }, - "history_db_path": HISTORY_DB_PATH, - } - - # Add reranker if enabled - if RERANKER_ENABLED: - reranker_config = { - "api_key": RERANKER_API_KEY, - "model": RERANKER_MODEL, - } - # Add base URL for Qwen reranker (uses dashscope_base_url) - if RERANKER_BASE_URL and RERANKER_PROVIDER == "qwen": - reranker_config["dashscope_base_url"] = RERANKER_BASE_URL - - config["reranker"] = { - "enabled": True, - "provider": RERANKER_PROVIDER, - "config": reranker_config, - } - - # Add sparse embedder if enabled - if SPARSE_VECTOR_ENABLE: - sparse_config = { - "api_key": SPARSE_EMBEDDER_API_KEY, - "model": SPARSE_EMBEDDER_MODEL, - "embedding_dims": SPARSE_EMBEDDER_DIMS, - } - # Sparse embedder uses generic base_url field, not provider-specific - if SPARSE_EMBEDDING_BASE_URL: - sparse_config["base_url"] = SPARSE_EMBEDDING_BASE_URL - - config["sparse_embedder"] = { - "provider": SPARSE_EMBEDDER_PROVIDER, - "config": sparse_config, - } - - return config # ============================================================================ @@ -529,11 +96,7 @@ def reset(self) -> None: def setup_token_counting() -> None: - """Setup token counting for OpenAI API if enabled.""" - if not TOKEN_COUNTING: - logger.info("Token counting is disabled") - return - + """Setup token counting for OpenAI API (always on for benchmark server).""" try: logger.info("Token counting enabled") from openai.resources.chat.completions.completions import Completions @@ -574,7 +137,6 @@ def create(self, *args: Any, **kwargs: Any) -> Any: # ============================================================================ try: - DEFAULT_CONFIG = load_config() MEMORY_INSTANCE = Memory.from_config(DEFAULT_CONFIG) logger.info("Memory instance initialized successfully") except Exception as e: @@ -910,10 +472,13 @@ def reset_token_count() -> Dict[str, str]: @app.get("/health", summary="Health check", status_code=status.HTTP_200_OK) def health_check() -> Dict[str, str]: """Health check endpoint.""" + vs = DEFAULT_CONFIG.get("vector_store") or {} + llm = DEFAULT_CONFIG.get("llm") or {} + embedder = DEFAULT_CONFIG.get("embedder") or {} return { "status": "healthy", - "version": CONFIG_VERSION, - "database_provider": DATABASE_PROVIDER, - "llm_provider": LLM_PROVIDER, - "embedding_provider": EMBEDDING_PROVIDER, + "version": DEFAULT_CONFIG.get("version", "v1.1"), + "database_provider": vs.get("provider", ""), + "llm_provider": llm.get("provider", ""), + "embedding_provider": embedder.get("provider", ""), } diff --git a/docs/benchmark/overview.md b/docs/benchmark/overview.md index b8f2654a..09470bd0 100644 --- a/docs/benchmark/overview.md +++ b/docs/benchmark/overview.md @@ -27,9 +27,9 @@ The PowerMem Benchmark suite consists of two main components: # Install dependencies pip install -e . -# Configure environment -cp benchmark/server/.env.example benchmark/server/.env -# Edit benchmark/server/.env with your settings +# Configure environment (use project root .env) +cp .env.example .env +# Edit .env at project root with your settings # Start the server uvicorn benchmark.server.main:app --host 0.0.0.0 --port 8000 --reload @@ -56,8 +56,8 @@ bash run.sh results - Python 3.10 or higher - pip or poetry for dependency management -- Access to OpenAI API (or compatible API endpoint) -- Database: OceanBase or PostgreSQL (depending on your configuration) +- LLM and embedding API keys (OpenAI, Qwen, etc. — see root `.env.example`) +- Database: OceanBase, PostgreSQL, or SQLite (depending on your configuration) ### Installation @@ -70,43 +70,41 @@ bash run.sh results Or install specific dependencies: ```bash - pip install fastapi uvicorn python-dotenv powermem + pip install fastapi uvicorn powermem ``` 2. **Configure environment variables** - Copy the example environment file: + Copy the example environment file at project root: ```bash - cp benchmark/server/.env.example benchmark/server/.env + cp .env.example .env ``` - Edit `benchmark/server/.env` and configure: - - `OPENAI_API_KEY`: Your OpenAI API key (required) - - `EMBEDDER_API_KEY`: Optional, separate API key for embeddings (defaults to `OPENAI_API_KEY`) - - Database configuration (OceanBase or PostgreSQL) - - Other settings as needed + Edit `.env` at project root and configure: + - `LLM_API_KEY` and `EMBEDDING_API_KEY` (required) + - `DATABASE_PROVIDER` and database connection settings (OceanBase, PostgreSQL, or SQLite) + - Other options as in the root `.env.example` - See `benchmark/server/.env.example` for all available configuration options. + See the root `.env.example` for all available configuration options. ### Configuration -All configuration is done through environment variables. The server automatically loads a `.env` file from the `benchmark/server/` directory. +All configuration is done through environment variables. The server loads the `.env` file from the project root (same as the main PowerMem app). #### Required Environment Variables -- `OPENAI_API_KEY`: Your OpenAI API key +- `LLM_API_KEY`: Your LLM API key (or set `OPENAI_API_KEY` for OpenAI) +- `EMBEDDING_API_KEY`: Your embedding API key (or set `OPENAI_API_KEY` for OpenAI) #### Optional Environment Variables -- `EMBEDDER_API_KEY`: Separate API key for embeddings (defaults to `OPENAI_API_KEY`) -- `DB_TYPE`: Database type - `oceanbase` or `postgres` (default: `oceanbase`) -- `TOKEN_COUNTING`: Enable token counting - `true` or `false` (default: `true`) -- `LLM_MODEL`: LLM model name (default: `gpt-4o`) -- `LLM_TEMPERATURE`: LLM temperature (default: `0.2`) -- `EMBEDDER_MODEL`: Embedding model name (default: `text-embedding-3-small`) -- `EMBEDDER_DIMS`: Embedding dimensions (default: `1536`) +All options are the same as the main PowerMem app. See the root `.env.example` for the full list. Examples: + +- `DATABASE_PROVIDER`: `oceanbase`, `postgres`, or `sqlite` (default: `sqlite`) +- `LLM_PROVIDER` / `LLM_MODEL` / `LLM_TEMPERATURE`: LLM settings +- `EMBEDDING_PROVIDER` / `EMBEDDING_MODEL` / `EMBEDDING_DIMS`: Embedding settings -For database-specific configuration, see `benchmark/server/.env.example`. +Token counting is **always enabled** on the benchmark server (no env to disable it). ### Starting the Server @@ -337,10 +335,10 @@ The benchmark evaluates performance using multiple metrics: ### Server Issues -#### "OPENAI_API_KEY environment variable is required" -- **Solution**: Create a `.env` file in `benchmark/server/` directory -- Verify that `OPENAI_API_KEY` is set in the `.env` file -- Check that the file is being loaded (server logs will show the path) +#### "OPENAI_API_KEY environment variable is required" (or missing LLM/embedding keys) +- **Solution**: Create or edit the `.env` file at **project root** (same as PowerMem) +- Verify that `LLM_API_KEY` and `EMBEDDING_API_KEY` (or `OPENAI_API_KEY`) are set in the project root `.env` +- Ensure you have configured the root `.env` before starting the server #### Database connection errors - **Solution**: diff --git a/src/powermem/integrations/embeddings/config/base.py b/src/powermem/integrations/embeddings/config/base.py index e7ab793b..2a9166ff 100644 --- a/src/powermem/integrations/embeddings/config/base.py +++ b/src/powermem/integrations/embeddings/config/base.py @@ -27,7 +27,6 @@ def _register_provider(cls) -> None: def __init_subclass__(cls, **kwargs) -> None: super().__init_subclass__(**kwargs) - cls._register_provider() @classmethod def __pydantic_init_subclass__(cls, **kwargs) -> None: diff --git a/src/powermem/integrations/embeddings/config/sparse_base.py b/src/powermem/integrations/embeddings/config/sparse_base.py index 2f8ae33e..cf371a09 100644 --- a/src/powermem/integrations/embeddings/config/sparse_base.py +++ b/src/powermem/integrations/embeddings/config/sparse_base.py @@ -27,7 +27,6 @@ def _register_provider(cls) -> None: def __init_subclass__(cls, **kwargs) -> None: super().__init_subclass__(**kwargs) - cls._register_provider() @classmethod def __pydantic_init_subclass__(cls, **kwargs) -> None: diff --git a/src/powermem/integrations/llm/config/base.py b/src/powermem/integrations/llm/config/base.py index 2a5ac4ea..c37ea778 100644 --- a/src/powermem/integrations/llm/config/base.py +++ b/src/powermem/integrations/llm/config/base.py @@ -100,7 +100,6 @@ def _register_provider(cls) -> None: def __init_subclass__(cls, **kwargs) -> None: """Called when a class inherits from BaseLLMConfig.""" super().__init_subclass__(**kwargs) - cls._register_provider() @classmethod def __pydantic_init_subclass__(cls, **kwargs) -> None: diff --git a/src/powermem/integrations/rerank/config/base.py b/src/powermem/integrations/rerank/config/base.py index 61997364..b2cd90fd 100644 --- a/src/powermem/integrations/rerank/config/base.py +++ b/src/powermem/integrations/rerank/config/base.py @@ -80,7 +80,6 @@ def _register_provider(cls) -> None: def __init_subclass__(cls, **kwargs) -> None: """Called when a class inherits from BaseRerankConfig.""" super().__init_subclass__(**kwargs) - cls._register_provider() @classmethod def __pydantic_init_subclass__(cls, **kwargs) -> None: diff --git a/src/powermem/storage/config/base.py b/src/powermem/storage/config/base.py index dd89b701..3a99bede 100644 --- a/src/powermem/storage/config/base.py +++ b/src/powermem/storage/config/base.py @@ -45,7 +45,6 @@ def _register_provider(cls) -> None: def __init_subclass__(cls, **kwargs) -> None: """Called when a class inherits from BaseVectorStoreConfig.""" super().__init_subclass__(**kwargs) - cls._register_provider() @classmethod def __pydantic_init_subclass__(cls, **kwargs) -> None: @@ -236,7 +235,6 @@ def _register_provider(cls) -> None: def __init_subclass__(cls, **kwargs) -> None: """Called when a class inherits from BaseGraphStoreConfig.""" super().__init_subclass__(**kwargs) - cls._register_provider() @classmethod def __pydantic_init_subclass__(cls, **kwargs) -> None: