diff --git a/.gitignore b/.gitignore index 84529a98..7dcfa594 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,8 @@ build/ dist/ wheels/ *.egg-info - +.mcp.json +.osgrep # Virtual environments .venv @@ -41,4 +42,8 @@ hindsight-docs/static/llms-full.txt hindsight-dev/benchmarks/locomo/results/ hindsight-dev/benchmarks/longmemeval/results/ hindsight-cli/target -hindsight-clients/rust/target \ No newline at end of file +hindsight-clients/rust/target +.claude +whats-next.md +TASK.md +CHANGELOG.md \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 17316e44..9293fabc 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,11 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -Hindsight is an agent memory system that provides long-term memory for AI agents using biomimetic data structures. It stores memories as World facts, Experiences, Opinions, and Observations across memory banks. +Hindsight is an agent memory system that provides long-term memory for AI agents using biomimetic data structures. Memories are organized as: +- **World facts**: General knowledge ("The sky is blue") +- **Experience facts**: Personal experiences ("I visited Paris in 2023") +- **Opinion facts**: Beliefs with confidence scores ("Paris is beautiful" - 0.9 confidence) +- **Observations**: Complex mental models derived from reflection ## Development Commands @@ -13,14 +17,21 @@ Hindsight is an agent memory system that provides long-term memory for AI agents # Start API server (loads .env automatically) ./scripts/dev/start-api.sh -# Run tests +# Run all tests (parallelized with pytest-xdist) cd hindsight-api && uv run pytest tests/ # Run specific test file cd hindsight-api && uv run pytest tests/test_http_api_integration.py -v -# Lint +# Run single test function +cd hindsight-api && uv run pytest tests/test_retain.py::test_retain_simple -v + +# Lint and format cd hindsight-api && uv run ruff check . +cd hindsight-api && uv run ruff format . + +# Type checking (uses ty - extremely fast type checker from Astral) +cd hindsight-api && uv run ty check hindsight_api/ ``` ### Control Plane (Next.js) @@ -37,7 +48,7 @@ cd hindsight-control-plane && npm run dev ### Generating Clients/OpenAPI ```bash -# Regenerate OpenAPI spec after API changes +# Regenerate OpenAPI spec after API changes (REQUIRED after changing endpoints) ./scripts/generate-openapi.sh # Regenerate all client SDKs (Python, TypeScript, Rust) @@ -57,27 +68,40 @@ cd hindsight-control-plane && npm run dev - **hindsight-api/**: Core FastAPI server with memory engine (Python, uv) - **hindsight/**: Embedded Python bundle (hindsight-all package) - **hindsight-control-plane/**: Admin UI (Next.js, npm) -- **hindsight-cli/**: CLI tool (Rust, cargo) +- **hindsight-cli/**: CLI tool (Rust, cargo, uses progenitor for API client) - **hindsight-clients/**: Generated SDK clients (Python, TypeScript, Rust) - **hindsight-docs/**: Docusaurus documentation site - **hindsight-integrations/**: Framework integrations (LiteLLM, OpenAI) - **hindsight-dev/**: Development tools and benchmarks ### Core Engine (hindsight-api/hindsight_api/engine/) -- `memory_engine.py`: Main orchestrator for retain/recall/reflect operations +- `memory_engine.py`: Main orchestrator (~170KB) for retain/recall/reflect operations - `llm_wrapper.py`: LLM abstraction supporting OpenAI, Anthropic, Gemini, Groq, Ollama, LM Studio -- `embeddings.py`: Embedding generation (local or TEI) +- `embeddings.py`: Embedding generation (local sentence-transformers or TEI) - `cross_encoder.py`: Reranking (local or TEI) - `entity_resolver.py`: Entity extraction and normalization - `query_analyzer.py`: Query intent analysis -- `retain/`: Memory ingestion pipeline -- `search/`: Multi-strategy retrieval (semantic, BM25, graph, temporal) + +**retain/**: Memory ingestion pipeline +- `orchestrator.py`: Coordinates the retain flow +- `fact_extraction.py`: LLM-based fact extraction from content +- `link_utils.py`: Entity link creation and management + +**search/**: Multi-strategy retrieval +- `retrieval.py`: Main retrieval orchestrator +- `graph_retrieval.py`: Entity/relationship graph traversal +- `mpfp_retrieval.py`: Multi-Path Fact Propagation retrieval +- `fusion.py`: Reciprocal rank fusion for combining results +- `reranking.py`: Cross-encoder reranking ### API Layer (hindsight-api/hindsight_api/api/) -FastAPI routers for all endpoints. Main operations: +- `http.py`: FastAPI HTTP routers (~80KB) for all REST endpoints +- `mcp.py`: Model Context Protocol server implementation + +Main operations: - **Retain**: Store memories, extracts facts/entities/relationships -- **Recall**: Retrieve memories via parallel search strategies + reranking -- **Reflect**: Deep analysis forming new opinions/observations +- **Recall**: Retrieve memories via 4 parallel strategies (semantic, BM25, graph, temporal) + reranking +- **Reflect**: Deep analysis forming new opinions/observations (disposition-aware) ### Database PostgreSQL with pgvector. Schema managed via Alembic migrations in `hindsight-api/hindsight_api/alembic/`. Migrations run automatically on API startup. @@ -94,13 +118,14 @@ Key tables: `banks`, `memory_units`, `documents`, `entities`, `entity_links` This runs the same checks as the pre-commit hook (Ruff for Python, ESLint/Prettier for TypeScript). ### Memory Banks -- Each bank is isolated (no cross-bank data access) +- Each bank is an isolated memory store (like a "brain" for one user/agent) - Banks have dispositions (skepticism, literalism, empathy traits 1-5) affecting reflect - Banks can have background context +- Bank isolation is strict - no cross-bank data leakage ### API Design - All endpoints operate on a single bank per request -- Multi-bank queries are client responsibility +- Multi-bank queries are client responsibility to orchestrate - Disposition traits only affect reflect, not recall ### Python Style @@ -108,6 +133,7 @@ This runs the same checks as the pre-commit hook (Ruff for Python, ESLint/Pretti - Async throughout (asyncpg, async FastAPI) - Pydantic models for request/response - Ruff for linting (line-length 120) +- No Python files at project root - maintain clean directory structure ### TypeScript Style - Next.js App Router for control plane @@ -145,7 +171,7 @@ cp .env.example .env # Python deps uv sync --directory hindsight-api/ -# Node deps (workspace) +# Node deps (uses npm workspaces) npm install ``` @@ -153,3 +179,8 @@ Required env vars: - `HINDSIGHT_API_LLM_PROVIDER`: openai, anthropic, gemini, groq, ollama, lmstudio - `HINDSIGHT_API_LLM_API_KEY`: Your API key - `HINDSIGHT_API_LLM_MODEL`: Model name (e.g., o3-mini, claude-sonnet-4-20250514) + +Optional (uses local models by default): +- `HINDSIGHT_API_EMBEDDINGS_PROVIDER`: local (default) or tei +- `HINDSIGHT_API_RERANKER_PROVIDER`: local (default) or tei +- `HINDSIGHT_API_DATABASE_URL`: External PostgreSQL (uses embedded pg0 by default) diff --git a/hindsight-api/hindsight_api/api/mcp.py b/hindsight-api/hindsight_api/api/mcp.py index f0b0429d..f8efad44 100644 --- a/hindsight-api/hindsight_api/api/mcp.py +++ b/hindsight-api/hindsight_api/api/mcp.py @@ -8,7 +8,6 @@ from fastmcp import FastMCP from hindsight_api import MemoryEngine -from hindsight_api.api.http import BankListItem, BankListResponse, BankProfileResponse, DispositionTraits from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES from hindsight_api.models import RequestContext @@ -54,7 +53,12 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP: mcp = FastMCP("hindsight-mcp-server", stateless_http=True) @mcp.tool() - async def retain(content: str, context: str = "general", bank_id: str | None = None) -> str: + async def retain( + content: str, + context: str = "general", + async_processing: bool = True, + bank_id: str | None = None, + ) -> str: """ Store important information to long-term memory. @@ -70,18 +74,28 @@ async def retain(content: str, context: str = "general", bank_id: str | None = N Args: content: The fact/memory to store (be specific and include relevant details) context: Category for the memory (e.g., 'preferences', 'work', 'hobbies', 'family'). Default: 'general' + async_processing: If True, queue for background processing and return immediately. If False, wait for completion. Default: True bank_id: Optional bank to store in (defaults to session bank). Use for cross-bank operations. """ try: target_bank = bank_id or get_current_bank_id() if target_bank is None: return "Error: No bank_id configured" - await memory.retain_batch_async( - bank_id=target_bank, - contents=[{"content": content, "context": context}], - request_context=RequestContext(), - ) - return f"Memory stored successfully in bank '{target_bank}'" + contents = [{"content": content, "context": context}] + if async_processing: + # Queue for background processing and return immediately + result = await memory.submit_async_retain( + bank_id=target_bank, contents=contents, request_context=RequestContext() + ) + return f"Memory queued for background processing (operation_id: {result.get('operation_id', 'N/A')})" + else: + # Wait for completion + await memory.retain_batch_async( + bank_id=target_bank, + contents=contents, + request_context=RequestContext(), + ) + return f"Memory stored successfully in bank '{target_bank}'" except Exception as e: logger.error(f"Error storing memory: {e}", exc_info=True) return f"Error: {str(e)}" @@ -173,79 +187,6 @@ async def reflect(query: str, context: str | None = None, budget: str = "low", b logger.error(f"Error reflecting: {e}", exc_info=True) return f'{{"error": "{e}", "text": ""}}' - @mcp.tool() - async def list_banks() -> str: - """ - List all available memory banks. - - Use this to discover banks for orchestration or to find - the correct bank_id for cross-bank operations. - - Returns: - JSON object with banks array containing bank_id, name, disposition, background, and timestamps - """ - try: - banks = await memory.list_banks(request_context=RequestContext()) - bank_items = [ - BankListItem( - bank_id=b.get("bank_id") or b.get("id"), - name=b.get("name"), - disposition=DispositionTraits( - **b.get("disposition", {"skepticism": 3, "literalism": 3, "empathy": 3}) - ), - background=b.get("background"), - created_at=str(b.get("created_at")) if b.get("created_at") else None, - updated_at=str(b.get("updated_at")) if b.get("updated_at") else None, - ) - for b in banks - ] - return BankListResponse(banks=bank_items).model_dump_json(indent=2) - except Exception as e: - logger.error(f"Error listing banks: {e}", exc_info=True) - return f'{{"error": "{e}", "banks": []}}' - - @mcp.tool() - async def create_bank(bank_id: str, name: str | None = None, background: str | None = None) -> str: - """ - Create or update a memory bank. - - Use this to create new banks for different agents, sessions, or purposes. - Banks are isolated memory stores - each bank has its own memories and personality. - - Args: - bank_id: Unique identifier for the bank (e.g., 'orchestrator-memory', 'agent-1') - name: Human-readable name for the bank - background: Context about what this bank stores or its purpose - """ - try: - # Get or create the bank profile (auto-creates with defaults) - await memory.get_bank_profile(bank_id, request_context=RequestContext()) - - # Update name and/or background if provided - if name is not None or background is not None: - await memory.update_bank(bank_id, name=name, background=background, request_context=RequestContext()) - - # Get final profile and return using BankProfileResponse model - profile = await memory.get_bank_profile(bank_id, request_context=RequestContext()) - disposition = profile.get("disposition") - if hasattr(disposition, "model_dump"): - disposition_traits = DispositionTraits(**disposition.model_dump()) - else: - disposition_traits = DispositionTraits( - **dict(disposition or {"skepticism": 3, "literalism": 3, "empathy": 3}) - ) - - response = BankProfileResponse( - bank_id=bank_id, - name=profile.get("name") or "", - disposition=disposition_traits, - background=profile.get("background") or "", - ) - return response.model_dump_json(indent=2) - except Exception as e: - logger.error(f"Error creating bank: {e}", exc_info=True) - return json.dumps({"error": str(e)}) - return mcp