diff --git a/hindsight-api/hindsight_api/api/__init__.py b/hindsight-api/hindsight_api/api/__init__.py index 1351847a..c33b29f7 100644 --- a/hindsight-api/hindsight_api/api/__init__.py +++ b/hindsight-api/hindsight_api/api/__init__.py @@ -52,6 +52,7 @@ def create_app( if mcp_api_enabled: try: from .mcp import create_mcp_app + mcp_app = create_mcp_app(memory=memory) except ImportError as e: logger.error(f"MCP server requested but dependencies not available: {e}") diff --git a/hindsight-api/hindsight_api/api/http.py b/hindsight-api/hindsight_api/api/http.py index 8d98a4ba..c27acdd0 100644 --- a/hindsight-api/hindsight_api/api/http.py +++ b/hindsight-api/hindsight_api/api/http.py @@ -14,6 +14,8 @@ from fastapi import Depends, FastAPI, Header, HTTPException, Query +from hindsight_api.extensions import AuthenticationError + def _parse_metadata(metadata: Any) -> dict[str, Any]: """Parse metadata that may be a dict, JSON string, or None.""" @@ -35,7 +37,7 @@ def _parse_metadata(metadata: Any) -> dict[str, Any]: from hindsight_api.engine.db_utils import acquire_with_retry from hindsight_api.engine.memory_engine import Budget, fq_table from hindsight_api.engine.response_models import VALID_RECALL_FACT_TYPES -from hindsight_api.extensions import HttpExtension, load_extension +from hindsight_api.extensions import HttpExtension, OperationValidationError, load_extension from hindsight_api.metrics import create_metrics_collector, get_metrics_collector, initialize_metrics from hindsight_api.models import RequestContext @@ -989,6 +991,16 @@ def get_request_context(authorization: str | None = Header(default=None)) -> Req api_key = authorization.strip() return RequestContext(api_key=api_key) + # Global exception handler for authentication errors + @app.exception_handler(AuthenticationError) + async def authentication_error_handler(request, exc: AuthenticationError): + from fastapi.responses import JSONResponse + + return JSONResponse( + status_code=401, + content={"detail": str(exc)}, + ) + @app.get( "/health", summary="Health check endpoint", @@ -1036,6 +1048,8 @@ async def api_graph( try: data = await app.state.memory.get_graph_data(bank_id, type, request_context=request_context) return data + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1082,6 +1096,8 @@ async def api_list( request_context=request_context, ) return data + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1198,6 +1214,10 @@ async def api_recall( ) except HTTPException: raise + except OperationValidationError as e: + raise HTTPException(status_code=e.status_code, detail=e.reason) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1260,6 +1280,10 @@ async def api_reflect( structured_output=core_result.structured_output, ) + except OperationValidationError as e: + raise HTTPException(status_code=e.status_code, detail=e.reason) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1280,6 +1304,8 @@ async def api_list_banks(request_context: RequestContext = Depends(get_request_c try: banks = await app.state.memory.list_banks(request_context=request_context) return BankListResponse(banks=banks) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1403,6 +1429,8 @@ async def api_stats(bank_id: str): failed_operations=failed_operations, ) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1427,6 +1455,8 @@ async def api_list_entities( try: entities = await app.state.memory.list_entities(bank_id, limit=limit, request_context=request_context) return EntityListResponse(items=[EntityListItem(**e) for e in entities]) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1464,7 +1494,7 @@ async def api_get_entity( for obs in entity["observations"] ], ) - except HTTPException: + except (AuthenticationError, HTTPException): raise except Exception as e: import traceback @@ -1517,7 +1547,7 @@ async def api_regenerate_entity_observations( for obs in entity["observations"] ], ) - except HTTPException: + except (AuthenticationError, HTTPException): raise except Exception as e: import traceback @@ -1555,6 +1585,8 @@ async def api_list_documents( bank_id=bank_id, search_query=q, limit=limit, offset=offset, request_context=request_context ) return data + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1585,7 +1617,7 @@ async def api_get_document( if not document: raise HTTPException(status_code=404, detail="Document not found") return document - except HTTPException: + except (AuthenticationError, HTTPException): raise except Exception as e: import traceback @@ -1614,7 +1646,7 @@ async def api_get_chunk(chunk_id: str, request_context: RequestContext = Depends if not chunk: raise HTTPException(status_code=404, detail="Chunk not found") return chunk - except HTTPException: + except (AuthenticationError, HTTPException): raise except Exception as e: import traceback @@ -1658,7 +1690,7 @@ async def api_delete_document( document_id=document_id, memory_units_deleted=result["memory_units_deleted"], ) - except HTTPException: + except (AuthenticationError, HTTPException): raise except Exception as e: import traceback @@ -1683,6 +1715,8 @@ async def api_list_operations(bank_id: str, request_context: RequestContext = De bank_id=bank_id, operations=[OperationResponse(**op) for op in operations], ) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1713,6 +1747,8 @@ async def api_cancel_operation( return CancelOperationResponse(**result) except ValueError as e: raise HTTPException(status_code=404, detail=str(e)) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1744,6 +1780,8 @@ async def api_get_bank_profile(bank_id: str, request_context: RequestContext = D disposition=DispositionTraits(**disposition_dict), background=profile["background"], ) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1782,6 +1820,8 @@ async def api_update_bank_disposition( disposition=DispositionTraits(**disposition_dict), background=profile["background"], ) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1811,6 +1851,8 @@ async def api_add_bank_background( response.disposition = DispositionTraits(**result["disposition"]) return response + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1862,6 +1904,8 @@ async def api_create_or_update_bank( disposition=DispositionTraits(**disposition_dict), background=final_profile["background"], ) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1889,6 +1933,8 @@ async def api_delete_bank(bank_id: str, request_context: RequestContext = Depend + result.get("entities_deleted", 0) + result.get("documents_deleted", 0), ) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -1963,6 +2009,10 @@ async def api_retain( return RetainResponse.model_validate( {"success": True, "bank_id": bank_id, "items_count": len(contents), "async": False} ) + except OperationValidationError as e: + raise HTTPException(status_code=e.status_code, detail=e.reason) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback @@ -2001,6 +2051,8 @@ async def api_clear_bank_memories( await app.state.memory.delete_bank(bank_id, fact_type=type, request_context=request_context) return DeleteResponse(success=True) + except (AuthenticationError, HTTPException): + raise except Exception as e: import traceback diff --git a/hindsight-api/hindsight_api/config.py b/hindsight-api/hindsight_api/config.py index 9f20b14a..34a68f0e 100644 --- a/hindsight-api/hindsight_api/config.py +++ b/hindsight-api/hindsight_api/config.py @@ -166,7 +166,9 @@ def from_env(cls) -> "HindsightConfig": lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true", # Observation thresholds observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))), - observation_top_entities=int(os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))), + observation_top_entities=int( + os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES)) + ), ) def get_llm_base_url(self) -> str: diff --git a/hindsight-api/hindsight_api/engine/llm_wrapper.py b/hindsight-api/hindsight_api/engine/llm_wrapper.py index d3e11f9b..8b9a32c7 100644 --- a/hindsight-api/hindsight_api/engine/llm_wrapper.py +++ b/hindsight-api/hindsight_api/engine/llm_wrapper.py @@ -120,11 +120,7 @@ def __init__( elif self.provider in ("ollama", "lmstudio"): # Use dummy key if not provided for local api_key = self.api_key or "local" - client_kwargs = { - "api_key": api_key, - "base_url": self.base_url, - "max_retries": 0 - } + client_kwargs = {"api_key": api_key, "base_url": self.base_url, "max_retries": 0} if self.timeout: client_kwargs["timeout"] = self.timeout self._client = AsyncOpenAI(**client_kwargs) @@ -207,7 +203,14 @@ async def call( # Handle Anthropic provider separately if self.provider == "anthropic": return await self._call_anthropic( - messages, response_format, max_completion_tokens, max_retries, initial_backoff, max_backoff, skip_validation, start_time + messages, + response_format, + max_completion_tokens, + max_retries, + initial_backoff, + max_backoff, + skip_validation, + start_time, ) # Handle Ollama with native API for structured output (better schema enforcement) @@ -297,8 +300,8 @@ async def call( schema_msg + "\n\n" + call_params["messages"][0]["content"] ) if self.provider not in ("lmstudio", "ollama"): - call_params["response_format"] = {"type": "json_object"} - + call_params["response_format"] = {"type": "json_object"} + logger.debug(f"Sending request to {self.provider}/{self.model} (timeout={self.timeout})") response = await self._client.chat.completions.create(**call_params) logger.debug(f"Received response from {self.provider}/{self.model}") diff --git a/hindsight-api/hindsight_api/engine/memory_engine.py b/hindsight-api/hindsight_api/engine/memory_engine.py index d2bbacce..4b411442 100644 --- a/hindsight-api/hindsight_api/engine/memory_engine.py +++ b/hindsight-api/hindsight_api/engine/memory_engine.py @@ -374,7 +374,7 @@ async def _validate_operation(self, validation_coro) -> None: result = await validation_coro if not result.allowed: - raise OperationValidationError(result.reason or "Operation not allowed") + raise OperationValidationError(result.reason or "Operation not allowed", result.status_code) async def _authenticate_tenant(self, request_context: "RequestContext | None") -> str: """ @@ -401,7 +401,9 @@ async def _authenticate_tenant(self, request_context: "RequestContext | None") - if request_context is None: raise AuthenticationError("RequestContext is required when tenant extension is configured") + # Let AuthenticationError propagate - HTTP layer will convert to 401 tenant_context = await self._tenant_extension.authenticate(request_context) + _current_schema.set(tenant_context.schema_name) return tenant_context.schema_name @@ -2827,13 +2829,16 @@ async def _handle_form_opinion(self, task_dict: dict[str, Any]): Handler for form opinion tasks. Args: - task_dict: Dict with keys: 'bank_id', 'answer_text', 'query' + task_dict: Dict with keys: 'bank_id', 'answer_text', 'query', 'tenant_id' """ bank_id = task_dict["bank_id"] answer_text = task_dict["answer_text"] query = task_dict["query"] + tenant_id = task_dict.get("tenant_id") - await self._extract_and_store_opinions_async(bank_id=bank_id, answer_text=answer_text, query=query) + await self._extract_and_store_opinions_async( + bank_id=bank_id, answer_text=answer_text, query=query, tenant_id=tenant_id + ) async def _handle_reinforce_opinion(self, task_dict: dict[str, Any]): """ @@ -3222,8 +3227,15 @@ def model_json_schema(self): answer_text = result.strip() # Submit form_opinion task for background processing + # Pass tenant_id from request context for internal authentication in background task await self._task_backend.submit_task( - {"type": "form_opinion", "bank_id": bank_id, "answer_text": answer_text, "query": query} + { + "type": "form_opinion", + "bank_id": bank_id, + "answer_text": answer_text, + "query": query, + "tenant_id": getattr(request_context, "tenant_id", None) if request_context else None, + } ) total_time = time.time() - reflect_start @@ -3261,7 +3273,9 @@ def model_json_schema(self): return result - async def _extract_and_store_opinions_async(self, bank_id: str, answer_text: str, query: str): + async def _extract_and_store_opinions_async( + self, bank_id: str, answer_text: str, query: str, tenant_id: str | None = None + ): """ Background task to extract and store opinions from think response. @@ -3271,6 +3285,7 @@ async def _extract_and_store_opinions_async(self, bank_id: str, answer_text: str bank_id: bank IDentifier answer_text: The generated answer text query: The original query + tenant_id: Tenant identifier for internal authentication """ try: # Extract opinions from the answer @@ -3281,10 +3296,11 @@ async def _extract_and_store_opinions_async(self, bank_id: str, answer_text: str from datetime import datetime current_time = datetime.now(UTC) - # Use internal request context for background tasks + # Use internal context with tenant_id for background authentication + # Extension can check internal=True to bypass normal auth from hindsight_api.models import RequestContext - internal_context = RequestContext() + internal_context = RequestContext(tenant_id=tenant_id, internal=True) for opinion in new_opinions: await self.retain_async( bank_id=bank_id, diff --git a/hindsight-api/hindsight_api/extensions/context.py b/hindsight-api/hindsight_api/extensions/context.py index 03e5726a..d229d48d 100644 --- a/hindsight-api/hindsight_api/extensions/context.py +++ b/hindsight-api/hindsight_api/extensions/context.py @@ -98,7 +98,14 @@ async def run_migration(self, schema: str) -> None: """Run migrations for a specific schema.""" from hindsight_api.migrations import run_migrations - run_migrations(self._database_url, schema=schema) + # Prefer getting URL from memory engine (handles pg0 case where URL is set after init) + db_url = self._database_url + if self._memory_engine is not None: + engine_url = getattr(self._memory_engine, "db_url", None) + if engine_url: + db_url = engine_url + + run_migrations(db_url, schema=schema) def get_memory_engine(self) -> "MemoryEngineInterface": """Get the memory engine interface.""" diff --git a/hindsight-api/hindsight_api/extensions/operation_validator.py b/hindsight-api/hindsight_api/extensions/operation_validator.py index cd862ce5..a1dec0eb 100644 --- a/hindsight-api/hindsight_api/extensions/operation_validator.py +++ b/hindsight-api/hindsight_api/extensions/operation_validator.py @@ -17,8 +17,9 @@ class OperationValidationError(Exception): """Raised when an operation fails validation.""" - def __init__(self, reason: str): + def __init__(self, reason: str, status_code: int = 403): self.reason = reason + self.status_code = status_code super().__init__(f"Operation validation failed: {reason}") @@ -28,6 +29,7 @@ class ValidationResult: allowed: bool reason: str | None = None + status_code: int = 403 # Default to Forbidden @classmethod def accept(cls) -> "ValidationResult": @@ -35,9 +37,9 @@ def accept(cls) -> "ValidationResult": return cls(allowed=True) @classmethod - def reject(cls, reason: str) -> "ValidationResult": - """Create a rejected validation result with a reason.""" - return cls(allowed=False, reason=reason) + def reject(cls, reason: str, status_code: int = 403) -> "ValidationResult": + """Create a rejected validation result with a reason and HTTP status code.""" + return cls(allowed=False, reason=reason, status_code=status_code) # ============================================================================= diff --git a/hindsight-api/hindsight_api/main.py b/hindsight-api/hindsight_api/main.py index 04a2296a..398fb73f 100644 --- a/hindsight-api/hindsight_api/main.py +++ b/hindsight-api/hindsight_api/main.py @@ -31,7 +31,7 @@ IdleTimeoutMiddleware, daemonize, ) -from .extensions import OperationValidatorExtension, load_extension +from .extensions import DefaultExtensionContext, OperationValidatorExtension, TenantExtension, load_extension # Filter deprecation warnings from third-party libraries warnings.filterwarnings("ignore", message="websockets.legacy is deprecated") @@ -169,6 +169,8 @@ def release_lock(): llm_api_key=config.llm_api_key, llm_model=config.llm_model, llm_base_url=config.llm_base_url, + llm_max_concurrent=config.llm_max_concurrent, + llm_timeout=config.llm_timeout, embeddings_provider=config.embeddings_provider, embeddings_local_model=config.embeddings_local_model, embeddings_tei_url=config.embeddings_tei_url, @@ -180,6 +182,8 @@ def release_lock(): log_level=args.log_level, mcp_enabled=config.mcp_enabled, graph_retriever=config.graph_retriever, + observation_min_facts=config.observation_min_facts, + observation_top_entities=config.observation_top_entities, skip_llm_verification=config.skip_llm_verification, lazy_reranker=config.lazy_reranker, ) @@ -196,10 +200,27 @@ def release_lock(): operation_validator = load_extension("OPERATION_VALIDATOR", OperationValidatorExtension) if operation_validator: import logging + logging.info(f"Loaded operation validator: {operation_validator.__class__.__name__}") + # Load tenant extension if configured + tenant_extension = load_extension("TENANT", TenantExtension) + if tenant_extension: + import logging + + logging.info(f"Loaded tenant extension: {tenant_extension.__class__.__name__}") + # Create MemoryEngine (reads configuration from environment) - _memory = MemoryEngine(operation_validator=operation_validator) + _memory = MemoryEngine(operation_validator=operation_validator, tenant_extension=tenant_extension) + + # Set extension context on tenant extension (needed for schema provisioning) + if tenant_extension: + extension_context = DefaultExtensionContext( + database_url=config.database_url, + memory_engine=_memory, + ) + tenant_extension.set_context(extension_context) + logging.info("Extension context set on tenant extension") # Create FastAPI app app = create_app( diff --git a/hindsight-api/hindsight_api/models.py b/hindsight-api/hindsight_api/models.py index de5eddc4..50eb1b09 100644 --- a/hindsight-api/hindsight_api/models.py +++ b/hindsight-api/hindsight_api/models.py @@ -18,6 +18,9 @@ class RequestContext: """ api_key: str | None = None + api_key_id: str | None = None # UUID of the API key used for authentication + tenant_id: str | None = None # Tenant identifier (set by extension after auth) + internal: bool = False # True for background/internal operations (not user-visible) from pgvector.sqlalchemy import Vector diff --git a/package-lock.json b/package-lock.json index c1e93547..84f4c332 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,7 +13,7 @@ }, "hindsight-clients/typescript": { "name": "@vectorize-io/hindsight-client", - "version": "0.1.14", + "version": "0.1.16", "license": "MIT", "devDependencies": { "@hey-api/openapi-ts": "^0.88.0", @@ -26,7 +26,7 @@ }, "hindsight-control-plane": { "name": "@vectorize-io/hindsight-control-plane", - "version": "0.1.14", + "version": "0.1.16", "license": "ISC", "dependencies": { "@radix-ui/react-alert-dialog": "^1.1.15",