vectorize-io · csfet9 · Dec 16, 2025 · Dec 17, 2025 · Dec 17, 2025 · Dec 17, 2025
diff --git a/.env.example b/.env.example
@@ -2,11 +2,23 @@
 # Copy this file to .env and fill in your values
 
 # LLM Configuration (Required)
+# Supported providers: openai, groq, ollama, gemini, anthropic, lmstudio
 HINDSIGHT_API_LLM_PROVIDER=openai
 HINDSIGHT_API_LLM_API_KEY=your-api-key-here
 HINDSIGHT_API_LLM_MODEL=o3-mini
 HINDSIGHT_API_LLM_BASE_URL=https://api.openai.com/v1
 
+# Example: Anthropic Claude configuration
+# HINDSIGHT_API_LLM_PROVIDER=anthropic
+# HINDSIGHT_API_LLM_API_KEY=your-anthropic-api-key
+# HINDSIGHT_API_LLM_MODEL=claude-sonnet-4-20250514
+
+# Example: LM Studio local configuration (Qwen 2.5 32B recommended)
+# HINDSIGHT_API_LLM_PROVIDER=lmstudio
+# HINDSIGHT_API_LLM_API_KEY=lmstudio
+# HINDSIGHT_API_LLM_BASE_URL=http://localhost:1234/v1
+# HINDSIGHT_API_LLM_MODEL=qwen2.5-32b-instruct
+
 # API Configuration (Optional)
 HINDSIGHT_API_HOST=0.0.0.0
 HINDSIGHT_API_PORT=8888

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,125 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+Hindsight is an agent memory system that provides long-term memory for AI agents using biomimetic data structures. It stores memories as World facts, Experiences, Opinions, and Observations across memory banks.
+
+## Development Commands
+
+### API Server (Python/FastAPI)
+```bash
+# Start API server (loads .env automatically)
+./scripts/dev/start-api.sh
+
+# Run tests
+cd hindsight-api && uv run pytest tests/
+
+# Run specific test file
+cd hindsight-api && uv run pytest tests/test_http_api_integration.py -v
+
+# Lint
+cd hindsight-api && uv run ruff check .
+```
+
+### Control Plane (Next.js)
+```bash
+./scripts/dev/start-control-plane.sh
+# Or manually:
+cd hindsight-control-plane && npm run dev
+```
+
+### Documentation Site (Docusaurus)
+```bash
+./scripts/dev/start-docs.sh
+```
+
+### Generating Clients/OpenAPI
+```bash
+# Regenerate OpenAPI spec after API changes
+./scripts/generate-openapi.sh
+
+# Regenerate all client SDKs (Python, TypeScript, Rust)
+./scripts/generate-clients.sh
+```
+
+### Benchmarks
+```bash
+./scripts/benchmarks/run-longmemeval.sh
+./scripts/benchmarks/run-locomo.sh
+./scripts/benchmarks/start-visualizer.sh  # View results at localhost:8001
+```
+
+## Architecture
+
+### Monorepo Structure
+- **hindsight-api/**: Core FastAPI server with memory engine (Python, uv)
+- **hindsight/**: Embedded Python bundle (hindsight-all package)
+- **hindsight-control-plane/**: Admin UI (Next.js, npm)
+- **hindsight-cli/**: CLI tool (Rust, cargo)
+- **hindsight-clients/**: Generated SDK clients (Python, TypeScript, Rust)
+- **hindsight-docs/**: Docusaurus documentation site
+- **hindsight-integrations/**: Framework integrations (LiteLLM, OpenAI)
+- **hindsight-dev/**: Development tools and benchmarks
+
+### Core Engine (hindsight-api/hindsight_api/engine/)
+- `memory_engine.py`: Main orchestrator for retain/recall/reflect operations
+- `llm_wrapper.py`: LLM abstraction supporting OpenAI, Anthropic, Gemini, Groq, Ollama, LM Studio
+- `embeddings.py`: Embedding generation (local or TEI)
+- `cross_encoder.py`: Reranking (local or TEI)
+- `entity_resolver.py`: Entity extraction and normalization
+- `query_analyzer.py`: Query intent analysis
+- `retain/`: Memory ingestion pipeline
+- `search/`: Multi-strategy retrieval (semantic, BM25, graph, temporal)
+
+### API Layer (hindsight-api/hindsight_api/api/)
+FastAPI routers for all endpoints. Main operations:
+- **Retain**: Store memories, extracts facts/entities/relationships
+- **Recall**: Retrieve memories via parallel search strategies + reranking
+- **Reflect**: Deep analysis forming new opinions/observations
+
+### Database
+PostgreSQL with pgvector. Schema managed via Alembic migrations in `hindsight-api/hindsight_api/alembic/`. Migrations run automatically on API startup.
+
+Key tables: `banks`, `memory_units`, `documents`, `entities`, `entity_links`
+
+## Key Conventions
+
+### Memory Banks
+- Each bank is isolated (no cross-bank data access)
+- Banks have dispositions (skepticism, literalism, empathy traits 1-5) affecting reflect
+- Banks can have background context
+
+### API Design
+- All endpoints operate on a single bank per request
+- Multi-bank queries are client responsibility
+- Disposition traits only affect reflect, not recall
+
+### Python Style
+- Python 3.11+, type hints required
+- Async throughout (asyncpg, async FastAPI)
+- Pydantic models for request/response
+- Ruff for linting (line-length 120)
+
+### TypeScript Style
+- Next.js App Router for control plane
+- Tailwind CSS with shadcn/ui components
+
+## Environment Setup
+
+```bash
+cp .env.example .env
+# Edit .env with LLM API key
+
+# Python deps
+uv sync --directory hindsight-api/
+
+# Node deps (workspace)
+npm install
+```
+
+Required env vars:
+- `HINDSIGHT_API_LLM_PROVIDER`: openai, anthropic, gemini, groq, ollama, lmstudio
+- `HINDSIGHT_API_LLM_API_KEY`: Your API key
+- `HINDSIGHT_API_LLM_MODEL`: Model name (e.g., o3-mini, claude-sonnet-4-20250514)
diff --git a/hindsight-api/hindsight_api/config.py b/hindsight-api/hindsight_api/config.py
@@ -16,6 +16,8 @@
 ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
 ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
 ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
+ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
+ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
 
 ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
 ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
@@ -36,6 +38,8 @@
 DEFAULT_DATABASE_URL = "pg0"
 DEFAULT_LLM_PROVIDER = "openai"
 DEFAULT_LLM_MODEL = "gpt-5-mini"
+DEFAULT_LLM_MAX_CONCURRENT = 32
+DEFAULT_LLM_TIMEOUT = 120.0  # seconds
 
 DEFAULT_EMBEDDINGS_PROVIDER = "local"
 DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
@@ -66,6 +70,8 @@ class HindsightConfig:
     llm_api_key: str | None
     llm_model: str
     llm_base_url: str | None
+    llm_max_concurrent: int
+    llm_timeout: float
 
     # Embeddings
     embeddings_provider: str
@@ -97,6 +103,8 @@ def from_env(cls) -> "HindsightConfig":
             llm_api_key=os.getenv(ENV_LLM_API_KEY),
             llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
             llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
+            llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
+            llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
             # Embeddings
             embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
             embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
@@ -124,6 +132,8 @@ def get_llm_base_url(self) -> str:
             return "https://api.groq.com/openai/v1"
         elif provider == "ollama":
             return "http://localhost:11434/v1"
+        elif provider == "lmstudio":
+            return "http://localhost:1234/v1"
         else:
             return ""