vectorize-io
diff --git a/‎AGENTS.md‎
Lines changed: 4 additions & 0 deletions b/‎AGENTS.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 9 additions & 21 deletions b/‎README.md‎
Lines changed: 9 additions & 21 deletions
diff --git a/‎docker/standalone/start-all.sh‎
Lines changed: 2 additions & 7 deletions b/‎docker/standalone/start-all.sh‎
Lines changed: 2 additions & 7 deletions
diff --git a/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 30 additions & 1 deletion b/‎hindsight-api/hindsight_api/api/http.py‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎hindsight-api/hindsight_api/banner.py‎
Lines changed: 89 additions & 0 deletions b/‎hindsight-api/hindsight_api/banner.py‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎hindsight-api/hindsight_api/config.py‎
Lines changed: 2 additions & 2 deletions b/‎hindsight-api/hindsight_api/config.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎hindsight-api/hindsight_api/engine/llm_wrapper.py‎
Lines changed: 35 additions & 6 deletions b/‎hindsight-api/hindsight_api/engine/llm_wrapper.py‎
Lines changed: 35 additions & 6 deletions
diff --git a/‎hindsight-api/hindsight_api/engine/memory_engine.py‎
Lines changed: 13 additions & 3 deletions b/‎hindsight-api/hindsight_api/engine/memory_engine.py‎
Lines changed: 13 additions & 3 deletions
@@ -145,3 +145,7 @@ Note: The maintained wrapper `hindsight_client.py` and `README.md` are preserved
 - PostgreSQL with pgvector extension
 - Schema managed via Alembic migrations in `hindsight-api/alembic/`, db migrations happen during api startup, no manual commands
 - Key tables: `banks`, `memory_units`, `documents`, `entities`, `entity_links`
+
+# Branding
+## Colors
+- Primary: gradient from #0074d9 to #009296  
@@ -4,10 +4,11 @@
 
 [Documentation](https://vectorize-io.github.io/hindsight) • [Paper](#coming-soon) • [Examples](https://github.com/vectorize-io/hindsight-cookbook)
 
-[![CI](https://github.com/vectorize-io/hindsight/actions/workflows/test.yml/badge.svg)](https://github.com/vectorize-io/hindsight/actions/workflows/test.yml)
+[![CI](https://github.com/vectorize-io/hindsight/actions/workflows/release.yml/badge.svg)](https://github.com/vectorize-io/hindsight/actions/workflows/release.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+[![PyPI - hindsight-api](https://img.shields.io/pypi/v/hindsight-api?label=hindsight-api)](https://pypi.org/project/hindsight-api/)
 [![PyPI - hindsight-client](https://img.shields.io/pypi/v/hindsight-client?label=hindsight-client)](https://pypi.org/project/hindsight-client/)
-[![npm](https://img.shields.io/npm/v/@vectorize-io/hindsight-client)](https://www.npmjs.com/package/@vectorize-io/hindsight-client)
+[![npm - @vectorize-io/hindsight-client](https://img.shields.io/npm/v/@vectorize-io/hindsight-client)](https://www.npmjs.com/package/@vectorize-io/hindsight-client)
 [![Slack Community](https://img.shields.io/badge/Slack-Join%20Community-4A154B?logo=slack)](https://join.slack.com/t/hindsight-space/shared_invite/zt-3klo21kua-VUCC_zHP5rIcXFB1_5yw6A)
 
 
@@ -53,12 +54,10 @@ Memories in Hindsight are stored in banks (e.g. memory banks). When memories are
 ```bash
 export OPENAI_API_KEY=your-key
 
-docker run -p 8888:8888 -p 9999:9999 \
-  -e HINDSIGHT_API_LLM_PROVIDER=openai \
+docker run --rm -it --pull always -p 8888:8888 -p 9999:9999 \
   -e HINDSIGHT_API_LLM_API_KEY=$OPENAI_API_KEY \
-  -e HINDSIGHT_API_LLM_MODEL=gpt-4o-mini \
   -v $HOME/.hindsight-docker:/home/hindsight/.pg0 \
-  ghcr.io/vectorize-io/hindsight
+  ghcr.io/vectorize-io/hindsight:latest
 ```
 
 API: http://localhost:8888  
@@ -208,29 +207,18 @@ client.reflect(bank_id="my-bank", query="What should I know about Alice?")
 
 ![Retain Operation](hindsight-docs/static/img/reflect-operation.webp)
 
-## Integrations
-
-### Examples
-
-[Examples Repo]([./examples](https://github.com/vectorize-io/hindsight-cookbook)) includes:
-
-- Basic usage
-- Multi-session conversations
-- Temporal queries
-- Entity reasoning
-- Opinion tracking
-- Production setup (Docker Compose + monitoring)
-
 ---
 
 ## Resources
 
-**Documentation:** [vectorize-io.github.io/hindsight](https://vectorize-io.github.io/hindsight)
+**Documentation:** 
+- [https://hindsight.vectorize.io](https://hindsight.vectorize.io)
 
 **Clients:**
 - [Python](http://hindsight.vectorize.io/sdks/python)
 - [Node.js](http://hindsight.vectorize.io/sdks/nodejs)
-- [REST API](http://hindsight.vectorize.io/api-reference)
+- [REST API](https://hindsight.vectorize.io/api-reference)
+- [CLI](https://hindsight.vectorize.io/sdks/cli)
 
 **Community:**
 - [Slack](https://join.slack.com/t/hindsight-space/shared_invite/zt-3klo21kua-VUCC_zHP5rIcXFB1_5yw6A)
 
@@ -1,9 +1,6 @@
 #!/bin/bash
 set -e
 
-echo "🚀 Starting Hindsight..."
-echo ""
-
 # Service flags (default to true if not set)
 ENABLE_API="${HINDSIGHT_ENABLE_API:-true}"
 ENABLE_CP="${HINDSIGHT_ENABLE_CP:-true}"
@@ -31,16 +28,14 @@ if [ "$ENABLE_API" = "true" ]; then
     PIDS+=($API_PID)
 
     # Wait for API to be ready
-    echo "⏳ Waiting for API..."
     for i in {1..60}; do
         if curl -sf http://localhost:8888/health &>/dev/null; then
-            echo "✅ API is ready"
             break
         fi
         sleep 1
     done
 else
-    echo "⏭️  API disabled (HINDSIGHT_ENABLE_API=false)"
+    echo "API disabled (HINDSIGHT_ENABLE_API=false)"
 fi
 
 # Start Control Plane if enabled
@@ -51,7 +46,7 @@ if [ "$ENABLE_CP" = "true" ]; then
     CP_PID=$!
     PIDS+=($CP_PID)
 else
-    echo "⏭️  Control Plane disabled (HINDSIGHT_ENABLE_CP=false)"
+    echo "Control Plane disabled (HINDSIGHT_ENABLE_CP=false)"
 fi
 
 # Print status
 
@@ -672,11 +672,15 @@ class DeleteResponse(BaseModel):
     """Response model for delete operations."""
     model_config = ConfigDict(json_schema_extra={
         "example": {
-            "success": True
+            "success": True,
+            "message": "Deleted successfully",
+            "deleted_count": 10
         }
     })
 
     success: bool
+    message: Optional[str] = None
+    deleted_count: Optional[int] = None
 
 
 def create_app(memory: MemoryEngine, initialize_memory: bool = True) -> FastAPI:
@@ -1696,6 +1700,31 @@ async def api_create_or_update_bank(bank_id: str,
             raise HTTPException(status_code=500, detail=str(e))
 
 
+    @app.delete(
+        "/v1/default/banks/{bank_id}",
+        response_model=DeleteResponse,
+        summary="Delete memory bank",
+        description="Delete an entire memory bank including all memories, entities, documents, and the bank profile itself. "
+        "This is a destructive operation that cannot be undone.",
+        operation_id="delete_bank",
+        tags=["Banks"]
+    )
+    async def api_delete_bank(bank_id: str):
+        """Delete an entire memory bank and all its data."""
+        try:
+            result = await app.state.memory.delete_bank(bank_id)
+            return DeleteResponse(
+                success=True,
+                message=f"Bank '{bank_id}' and all associated data deleted successfully",
+                deleted_count=result.get("memory_units_deleted", 0) + result.get("entities_deleted", 0) + result.get("documents_deleted", 0)
+            )
+        except Exception as e:
+            import traceback
+            error_detail = f"{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+            logger.error(f"Error in DELETE /v1/default/banks/{bank_id}: {error_detail}")
+            raise HTTPException(status_code=500, detail=str(e))
+
+
     @app.post(
         "/v1/default/banks/{bank_id}/memories",
         response_model=RetainResponse,
 
@@ -0,0 +1,89 @@
+"""
+Banner display for Hindsight API startup.
+
+Shows the logo and tagline with gradient colors.
+"""
+
+# Gradient colors: #0074d9 -> #009296
+GRADIENT_START = (0, 116, 217)  # #0074d9
+GRADIENT_END = (0, 146, 150)    # #009296
+
+# Pre-generated logo (generated by test-logo.py)
+LOGO = """\
+  \033[38;2;9;127;184m\u2584\033[0m\033[48;2;8;130;178m\033[38;2;5;133;186m\u2584\033[0m       \033[48;2;10;143;160m\033[38;2;10;143;165m\u2584\033[0m\033[38;2;7;140;156m\u2584\033[0m
+ \033[38;2;8;125;192m\u2584\033[0m \033[38;2;3;132;191m\u2580\033[0m\033[38;2;2;133;192m\u2584\033[0m \033[38;2;3;132;180m\u2584\033[0m\033[38;2;1;137;184m\u2584\033[0m\033[38;2;3;133;174m\u2584\033[0m \033[38;2;3;142;176m\u2584\033[0m\033[38;2;4;142;169m\u2580\033[0m \033[38;2;10;144;164m\u2584\033[0m
+\033[38;2;6;121;195m\u2580\033[0m\033[38;2;5;128;203m\u2580\033[0m\033[48;2;5;124;195m\033[38;2;3;125;200m\u2584\033[0m\033[38;2;2;126;196m\u2584\033[0m\033[48;2;3;128;188m\033[38;2;1;131;196m\u2584\033[0m\033[48;2;0;152;219m\033[38;2;2;131;191m\u2584\033[0m\033[38;2;1;141;196m\u2580\033[0m\033[38;2;1;135;183m\u2580\033[0m\033[38;2;1;148;198m\u2580\033[0m\033[48;2;1;156;202m\033[38;2;2;135;180m\u2584\033[0m\033[48;2;4;134;169m\033[38;2;1;137;177m\u2584\033[0m\033[38;2;3;138;173m\u2584\033[0m\033[48;2;6;137;165m\033[38;2;2;140;170m\u2584\033[0m\033[38;2;7;144;169m\u2580\033[0m\033[38;2;7;139;158m\u2580\033[0m
+   \033[48;2;2;128;202m\033[38;2;2;124;201m\u2584\033[0m\033[48;2;1;130;201m\033[38;2;0;135;212m\u2584\033[0m\033[38;2;2;128;196m\u2584\033[0m \033[48;2;2;142;204m\033[38;2;7;138;199m\u2584\033[0m \033[38;2;1;135;186m\u2584\033[0m\033[48;2;1;142;186m\033[38;2;2;144;194m\u2584\033[0m\033[48;2;3;138;176m\033[38;2;2;134;176m\u2584\033[0m
+ \033[48;2;8;118;200m\033[38;2;8;121;209m\u2584\033[0m\033[38;2;3;121;203m\u2580\033[0m \033[38;2;3;122;192m\u2580\033[0m\033[38;2;1;138;216m\u2580\033[0m\033[48;2;0;138;210m\033[38;2;3;128;198m\u2584\033[0m\033[48;2;0;126;188m\033[38;2;2;131;198m\u2584\033[0m\033[48;2;0;142;205m\033[38;2;3;132;193m\u2584\033[0m\033[38;2;1;140;196m\u2580\033[0m  \033[38;2;4;134;175m\u2580\033[0m\033[48;2;13;135;167m\033[38;2;8;136;174m\u2584\033[0m """
+
+
+def _interpolate_color(start: tuple, end: tuple, t: float) -> tuple:
+    """Interpolate between two RGB colors."""
+    return (
+        int(start[0] + (end[0] - start[0]) * t),
+        int(start[1] + (end[1] - start[1]) * t),
+        int(start[2] + (end[2] - start[2]) * t),
+    )
+
+
+def gradient_text(text: str, start: tuple = GRADIENT_START, end: tuple = GRADIENT_END) -> str:
+    """Render text with a gradient color effect."""
+    result = []
+    length = len(text)
+    for i, char in enumerate(text):
+        if char == ' ':
+            result.append(' ')
+        else:
+            t = i / max(length - 1, 1)
+            r, g, b = _interpolate_color(start, end, t)
+            result.append(f"\033[38;2;{r};{g};{b}m{char}")
+    result.append("\033[0m")
+    return "".join(result)
+
+
+def print_banner():
+    """Print the Hindsight startup banner."""
+    print(LOGO)
+    tagline = gradient_text("Hindsight: Agent Memory That Works Like Human Memory")
+    print(f"\n  {tagline}\n")
+
+
+def color(text: str, t: float = 0.0) -> str:
+    """Color text using gradient position (0.0 = start, 1.0 = end)."""
+    r, g, b = _interpolate_color(GRADIENT_START, GRADIENT_END, t)
+    return f"\033[38;2;{r};{g};{b}m{text}\033[0m"
+
+
+def color_start(text: str) -> str:
+    """Color text with gradient start color (#0074d9)."""
+    return color(text, 0.0)
+
+
+def color_end(text: str) -> str:
+    """Color text with gradient end color (#009296)."""
+    return color(text, 1.0)
+
+
+def color_mid(text: str) -> str:
+    """Color text with gradient middle color."""
+    return color(text, 0.5)
+
+
+def dim(text: str) -> str:
+    """Dim/gray text."""
+    return f"\033[38;2;128;128;128m{text}\033[0m"
+
+
+def print_startup_info(host: str, port: int, database_url: str, llm_provider: str,
+                       llm_model: str, embeddings_provider: str, reranker_provider: str,
+                       mcp_enabled: bool = False):
+    """Print styled startup information."""
+    print(color_start("Starting Hindsight API..."))
+    print(f"  {dim('URL:')} {color(f'http://{host}:{port}', 0.2)}")
+    print(f"  {dim('Database:')} {color(database_url, 0.4)}")
+    print(f"  {dim('LLM:')} {color(f'{llm_provider} / {llm_model}', 0.6)}")
+    print(f"  {dim('Embeddings:')} {color(embeddings_provider, 0.8)}")
+    print(f"  {dim('Reranker:')} {color(reranker_provider, 1.0)}")
+    if mcp_enabled:
+        print(f"  {dim('MCP:')} {color_end('enabled at /mcp')}")
+    print()
@@ -32,8 +32,8 @@
 
 # Default values
 DEFAULT_DATABASE_URL = "pg0"
-DEFAULT_LLM_PROVIDER = "groq"
-DEFAULT_LLM_MODEL = "openai/gpt-oss-20b"
+DEFAULT_LLM_PROVIDER = "openai"
+DEFAULT_LLM_MODEL = "gpt-5-mini"
 
 DEFAULT_EMBEDDINGS_PROVIDER = "local"
 DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
 
@@ -91,12 +91,35 @@ def __init__(
             self._client = AsyncOpenAI(api_key="ollama", base_url=self.base_url, max_retries=0)
             self._gemini_client = None
         else:
-            self._client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, max_retries=0)
+            # Only pass base_url if it's set (OpenAI uses default URL otherwise)
+            client_kwargs = {"api_key": self.api_key, "max_retries": 0}
+            if self.base_url:
+                client_kwargs["base_url"] = self.base_url
+            self._client = AsyncOpenAI(**client_kwargs)
             self._gemini_client = None
 
-        logger.info(
-            f"Initialized LLM: provider={self.provider}, model={self.model}, base_url={self.base_url}"
-        )
+    async def verify_connection(self) -> None:
+        """
+        Verify that the LLM provider is configured correctly by making a simple test call.
+
+        Raises:
+            RuntimeError: If the connection test fails.
+        """
+        try:
+            logger.info(f"Verifying LLM: provider={self.provider}, model={self.model}, base_url={self.base_url or 'default'}...")
+            await self.call(
+                messages=[{"role": "user", "content": "Say 'ok'"}],
+                max_completion_tokens=10,
+                max_retries=2,
+                initial_backoff=0.5,
+                max_backoff=2.0,
+            )
+            # If we get here without exception, the connection is working
+            logger.info(f"LLM verified: {self.provider}/{self.model}")
+        except Exception as e:
+            raise RuntimeError(
+                f"LLM connection verification failed for {self.provider}/{self.model}: {e}"
+            ) from e
 
     async def call(
         self,
@@ -149,7 +172,12 @@ async def call(
 
             if max_completion_tokens is not None:
                 call_params["max_completion_tokens"] = max_completion_tokens
-            if temperature is not None:
+            # Check if model supports reasoning parameter (o1, o3, gpt-5 families)
+            model_lower = self.model.lower()
+            is_reasoning_model = any(x in model_lower for x in ["gpt-5", "o1", "o3"])
+
+            # GPT-5/o1/o3 family doesn't support custom temperature (only default 1)
+            if temperature is not None and not is_reasoning_model:
                 call_params["temperature"] = temperature
 
             # Provider-specific parameters
@@ -216,7 +244,8 @@ async def call(
                 except APIConnectionError as e:
                     last_exception = e
                     if attempt < max_retries:
-                        logger.warning(f"Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1})")
+                        status_code = getattr(e, 'status_code', None) or getattr(getattr(e, 'response', None), 'status_code', None)
+                        logger.warning(f"Connection error, retrying... (attempt {attempt + 1}/{max_retries + 1}) - status_code={status_code}, message={e}")
                         backoff = min(initial_backoff * (2 ** attempt), max_backoff)
                         await asyncio.sleep(backoff)
                         continue
 
@@ -453,12 +453,17 @@ async def init_query_analyzer():
             # Query analyzer load is sync and CPU-bound
             await loop.run_in_executor(None, self.query_analyzer.load)
 
+        async def verify_llm():
+            """Verify LLM connection is working."""
+            await self._llm_config.verify_connection()
+
         # Run pg0 and all model initializations in parallel
         await asyncio.gather(
             start_pg0(),
             init_embeddings(),
             init_cross_encoder(),
             init_query_analyzer(),
+            verify_llm(),
         )
 
         # Run database migrations if enabled
@@ -1791,10 +1796,14 @@ async def delete_bank(self, bank_id: str, fact_type: Optional[str] = None) -> Di
                         # Delete entities (cascades to unit_entities, entity_cooccurrences, memory_links with entity_id)
                         await conn.execute("DELETE FROM entities WHERE bank_id = $1", bank_id)
 
+                        # Delete the bank profile itself
+                        await conn.execute("DELETE FROM banks WHERE bank_id = $1", bank_id)
+
                         return {
                             "memory_units_deleted": units_count,
                             "entities_deleted": entities_count,
-                            "documents_deleted": documents_count
+                            "documents_deleted": documents_count,
+                            "bank_deleted": True
                         }
 
                 except Exception as e:
@@ -1839,10 +1848,11 @@ async def get_graph_data(self, bank_id: Optional[str] = None, fact_type: Optiona
             """, *query_params)
 
             # Get links, filtering to only include links between units of the selected agent
+            # Use DISTINCT ON with LEAST/GREATEST to deduplicate bidirectional links
             unit_ids = [row['id'] for row in units]
             if unit_ids:
                 links = await conn.fetch("""
-                    SELECT
+                    SELECT DISTINCT ON (LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid))
                         ml.from_unit_id,
                         ml.to_unit_id,
                         ml.link_type,
@@ -1851,7 +1861,7 @@ async def get_graph_data(self, bank_id: Optional[str] = None, fact_type: Optiona
                     FROM memory_links ml
                     LEFT JOIN entities e ON ml.entity_id = e.id
                     WHERE ml.from_unit_id = ANY($1::uuid[]) AND ml.to_unit_id = ANY($1::uuid[])
-                    ORDER BY ml.link_type, ml.weight DESC
+                    ORDER BY LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid), ml.weight DESC
                 """, unit_ids)
             else:
                 links = []