diff --git a/hindsight-api/hindsight_api/api/http.py b/hindsight-api/hindsight_api/api/http.py index c6a608f8..8d98a4ba 100644 --- a/hindsight-api/hindsight_api/api/http.py +++ b/hindsight-api/hindsight_api/api/http.py @@ -385,7 +385,16 @@ class ReflectRequest(BaseModel): "query": "What do you think about artificial intelligence?", "budget": "low", "context": "This is for a research paper on AI ethics", + "max_tokens": 4096, "include": {"facts": {}}, + "response_schema": { + "type": "object", + "properties": { + "summary": {"type": "string"}, + "key_points": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["summary", "key_points"], + }, } } ) @@ -393,9 +402,14 @@ class ReflectRequest(BaseModel): query: str budget: Budget = Budget.LOW context: str | None = None + max_tokens: int = Field(default=4096, description="Maximum tokens for the response") include: ReflectIncludeOptions = Field( default_factory=ReflectIncludeOptions, description="Options for including additional data (disabled by default)" ) + response_schema: dict | None = Field( + default=None, + description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.", + ) class OpinionItem(BaseModel): @@ -440,12 +454,20 @@ class ReflectResponse(BaseModel): {"id": "123", "text": "AI is used in healthcare", "type": "world"}, {"id": "456", "text": "I discussed AI applications last week", "type": "experience"}, ], + "structured_output": { + "summary": "AI is transformative", + "key_points": ["Used in healthcare", "Discussed recently"], + }, } } ) text: str based_on: list[ReflectFact] = [] # Facts used to generate the response + structured_output: dict | None = Field( + default=None, + description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.", + ) class BanksResponse(BaseModel): @@ -1211,6 +1233,8 @@ async def api_reflect( query=request.query, budget=request.budget, context=request.context, + max_tokens=request.max_tokens, + response_schema=request.response_schema, request_context=request_context, ) @@ -1233,6 +1257,7 @@ async def api_reflect( return ReflectResponse( text=core_result.text, based_on=based_on_facts, + structured_output=core_result.structured_output, ) except Exception as e: diff --git a/hindsight-api/hindsight_api/engine/interface.py b/hindsight-api/hindsight_api/engine/interface.py index 937f8ab3..a861b5b5 100644 --- a/hindsight-api/hindsight_api/engine/interface.py +++ b/hindsight-api/hindsight_api/engine/interface.py @@ -110,6 +110,8 @@ async def reflect_async( *, budget: "Budget | None" = None, context: str | None = None, + max_tokens: int = 4096, + response_schema: dict | None = None, request_context: "RequestContext", ) -> "ReflectResult": """ @@ -120,6 +122,8 @@ async def reflect_async( query: The question to reflect on. budget: Search budget for retrieving context. context: Additional context for the reflection. + max_tokens: Maximum tokens for the response. + response_schema: Optional JSON Schema for structured output. request_context: Request context for authentication. Returns: diff --git a/hindsight-api/hindsight_api/engine/llm_wrapper.py b/hindsight-api/hindsight_api/engine/llm_wrapper.py index b107c674..ece9d9ae 100644 --- a/hindsight-api/hindsight_api/engine/llm_wrapper.py +++ b/hindsight-api/hindsight_api/engine/llm_wrapper.py @@ -135,6 +135,7 @@ async def call( initial_backoff: float = 1.0, max_backoff: float = 60.0, skip_validation: bool = False, + strict_schema: bool = False, ) -> Any: """ Make an LLM API call with retry logic. @@ -149,6 +150,7 @@ async def call( initial_backoff: Initial backoff time in seconds. max_backoff: Maximum backoff time in seconds. skip_validation: Return raw JSON without Pydantic validation. + strict_schema: Use strict JSON schema enforcement (OpenAI only). Guarantees all required fields. Returns: Parsed response if response_format is provided, otherwise text content. @@ -226,19 +228,35 @@ async def call( for attempt in range(max_retries + 1): try: if response_format is not None: - # Add schema to system message for JSON mode + schema = None if hasattr(response_format, "model_json_schema"): schema = response_format.model_json_schema() - schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}" - if call_params["messages"] and call_params["messages"][0].get("role") == "system": - call_params["messages"][0]["content"] += schema_msg - elif call_params["messages"]: - call_params["messages"][0]["content"] = ( - schema_msg + "\n\n" + call_params["messages"][0]["content"] - ) + if strict_schema and schema is not None: + # Use OpenAI's strict JSON schema enforcement + # This guarantees all required fields are returned + call_params["response_format"] = { + "type": "json_schema", + "json_schema": { + "name": "response", + "strict": True, + "schema": schema, + }, + } + else: + # Soft enforcement: add schema to prompt and use json_object mode + if schema is not None: + schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}" + + if call_params["messages"] and call_params["messages"][0].get("role") == "system": + call_params["messages"][0]["content"] += schema_msg + elif call_params["messages"]: + call_params["messages"][0]["content"] = ( + schema_msg + "\n\n" + call_params["messages"][0]["content"] + ) + + call_params["response_format"] = {"type": "json_object"} - call_params["response_format"] = {"type": "json_object"} response = await self._client.chat.completions.create(**call_params) content = response.choices[0].message.content diff --git a/hindsight-api/hindsight_api/engine/memory_engine.py b/hindsight-api/hindsight_api/engine/memory_engine.py index d7d417d8..966e5951 100644 --- a/hindsight-api/hindsight_api/engine/memory_engine.py +++ b/hindsight-api/hindsight_api/engine/memory_engine.py @@ -3076,6 +3076,8 @@ async def reflect_async( *, budget: Budget | None = None, context: str | None = None, + max_tokens: int = 4096, + response_schema: dict | None = None, request_context: "RequestContext", ) -> ReflectResult: """ @@ -3087,19 +3089,22 @@ async def reflect_async( 3. Retrieves existing opinions (bank's formed perspectives) 4. Uses LLM to formulate an answer 5. Extracts and stores any new opinions formed during reflection - 6. Returns plain text answer and the facts used + 6. Optionally generates structured output based on response_schema + 7. Returns plain text answer and the facts used Args: bank_id: bank identifier query: Question to answer budget: Budget level for memory exploration (low=100, mid=300, high=600 units) context: Additional context string to include in LLM prompt (not used in recall) + response_schema: Optional JSON Schema for structured output Returns: ReflectResult containing: - text: Plain text answer (no markdown) - based_on: Dict with 'world', 'experience', and 'opinion' fact lists (MemoryFact objects) - new_opinions: List of newly formed opinions + - structured_output: Optional dict if response_schema was provided """ # Use cached LLM config if self._llm_config is None: @@ -3177,17 +3182,40 @@ async def reflect_async( log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars") system_message = think_utils.get_system_message(disposition) + messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}] + + # Prepare response_format if schema provided + response_format = None + if response_schema is not None: + # Wrapper class to provide Pydantic-like interface for raw JSON schemas + class JsonSchemaWrapper: + def __init__(self, schema: dict): + self._schema = schema + + def model_json_schema(self): + return self._schema + + response_format = JsonSchemaWrapper(response_schema) llm_start = time.time() - answer_text = await self._llm_config.call( - messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}], - scope="memory_think", - temperature=0.9, - max_completion_tokens=1000, + result = await self._llm_config.call( + messages=messages, + scope="memory_reflect", + max_completion_tokens=max_tokens, + response_format=response_format, + skip_validation=True if response_format else False, + strict_schema=True if response_format else False, ) llm_time = time.time() - llm_start - answer_text = answer_text.strip() + # Handle response based on whether structured output was requested + if response_schema is not None: + structured_output = result + answer_text = "" # Empty for backward compatibility + log_buffer.append(f"[REFLECT {reflect_id}] Structured output generated") + else: + structured_output = None + answer_text = result.strip() # Submit form_opinion task for background processing await self._task_backend.submit_task( @@ -3205,6 +3233,7 @@ async def reflect_async( text=answer_text, based_on={"world": world_results, "experience": agent_results, "opinion": opinion_results}, new_opinions=[], # Opinions are being extracted asynchronously + structured_output=structured_output, ) # Call post-operation hook if validator is configured diff --git a/hindsight-api/hindsight_api/engine/response_models.py b/hindsight-api/hindsight_api/engine/response_models.py index 452228ad..9607848b 100644 --- a/hindsight-api/hindsight_api/engine/response_models.py +++ b/hindsight-api/hindsight_api/engine/response_models.py @@ -123,7 +123,8 @@ class ReflectResult(BaseModel): Result from a reflect operation. Contains the formulated answer, the facts it was based on (organized by type), - and any new opinions that were formed during the reflection process. + any new opinions that were formed during the reflection process, and optionally + structured output if a response schema was provided. """ model_config = ConfigDict( @@ -145,6 +146,7 @@ class ReflectResult(BaseModel): "opinion": [], }, "new_opinions": ["Machine learning has great potential in healthcare"], + "structured_output": {"summary": "ML in healthcare", "confidence": 0.9}, } } ) @@ -154,6 +156,10 @@ class ReflectResult(BaseModel): description="Facts used to formulate the answer, organized by type (world, experience, opinion)" ) new_opinions: list[str] = Field(default_factory=list, description="List of newly formed opinions during reflection") + structured_output: dict[str, Any] | None = Field( + default=None, + description="Structured output parsed according to the provided response schema. Only present when response_schema was provided.", + ) class Opinion(BaseModel): diff --git a/hindsight-api/tests/test_http_api_integration.py b/hindsight-api/tests/test_http_api_integration.py index 1707be5c..5c0b7db0 100644 --- a/hindsight-api/tests/test_http_api_integration.py +++ b/hindsight-api/tests/test_http_api_integration.py @@ -608,3 +608,167 @@ async def submit_async_retain(doc): assert response.status_code == 200 results = response.json()["results"] assert len(results) > 0, f"Should find memories for document {i}" + + +@pytest.mark.asyncio +async def test_reflect_structured_output(api_client): + """Test reflect endpoint with structured output via response_schema. + + When response_schema is provided, the reflect endpoint should return + both the natural language text response and a structured_output field + containing the response parsed according to the provided JSON schema. + """ + test_bank_id = f"reflect_structured_test_{datetime.now().timestamp()}" + + # Store some memories to reflect on + response = await api_client.post( + f"/v1/default/banks/{test_bank_id}/memories", + json={ + "items": [ + { + "content": "Alice is a senior machine learning engineer with 8 years of experience.", + "context": "team member info" + }, + { + "content": "Bob is a junior data scientist who joined last month.", + "context": "team member info" + }, + { + "content": "The team uses Python and TensorFlow for most projects.", + "context": "tech stack" + } + ] + } + ) + assert response.status_code == 200 + + # Define a JSON schema for structured output + response_schema = { + "type": "object", + "properties": { + "team_members": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "role": {"type": "string"}, + "experience_level": {"type": "string"} + } + } + }, + "technologies": { + "type": "array", + "items": {"type": "string"} + }, + "summary": {"type": "string"} + }, + "required": ["team_members", "summary"] + } + + # Call reflect with response_schema + response = await api_client.post( + f"/v1/default/banks/{test_bank_id}/reflect", + json={ + "query": "Give me an overview of the team and their tech stack", + "response_schema": response_schema + } + ) + assert response.status_code == 200 + result = response.json() + + # Verify text field exists (empty when using structured output) + assert "text" in result + assert result["text"] == "" + + # Verify structured output exists and has expected structure + assert "structured_output" in result + assert result["structured_output"] is not None + + structured = result["structured_output"] + assert "team_members" in structured + assert "summary" in structured + assert isinstance(structured["team_members"], list) + assert isinstance(structured["summary"], str) + + # Verify team members have the expected fields + if len(structured["team_members"]) > 0: + member = structured["team_members"][0] + assert "name" in member or "role" in member # At least some fields should be present + + +@pytest.mark.asyncio +async def test_reflect_without_structured_output(api_client): + """Test that reflect works normally without response_schema. + + When response_schema is not provided, the structured_output field + should be null/None in the response. + """ + test_bank_id = f"reflect_no_structured_test_{datetime.now().timestamp()}" + + # Store a memory + response = await api_client.post( + f"/v1/default/banks/{test_bank_id}/memories", + json={ + "items": [ + { + "content": "The project deadline is next Friday.", + "context": "project timeline" + } + ] + } + ) + assert response.status_code == 200 + + # Call reflect without response_schema + response = await api_client.post( + f"/v1/default/banks/{test_bank_id}/reflect", + json={ + "query": "When is the project deadline?" + } + ) + assert response.status_code == 200 + result = response.json() + + # Verify response has text but structured_output is null + assert "text" in result + assert len(result["text"]) > 0 + assert result.get("structured_output") is None + + +@pytest.mark.asyncio +async def test_reflect_with_max_tokens(api_client): + """Test reflect endpoint with custom max_tokens parameter. + + The max_tokens parameter controls the maximum tokens for the LLM response. + """ + test_bank_id = f"reflect_max_tokens_test_{datetime.now().timestamp()}" + + # Store a memory + response = await api_client.post( + f"/v1/default/banks/{test_bank_id}/memories", + json={ + "items": [ + { + "content": "Python is a popular programming language for data science and machine learning.", + "context": "tech" + } + ] + } + ) + assert response.status_code == 200 + + # Call reflect with custom max_tokens + response = await api_client.post( + f"/v1/default/banks/{test_bank_id}/reflect", + json={ + "query": "What is Python used for?", + "max_tokens": 500 + } + ) + assert response.status_code == 200 + result = response.json() + + # Verify response has text + assert "text" in result + assert len(result["text"]) > 0 diff --git a/hindsight-cli/src/commands/explore.rs b/hindsight-cli/src/commands/explore.rs index 1d2d42ce..283d4366 100644 --- a/hindsight-cli/src/commands/explore.rs +++ b/hindsight-cli/src/commands/explore.rs @@ -354,7 +354,9 @@ impl App { query: query_text, budget: Some(query_budget), context: None, + max_tokens: 4096, include: None, + response_schema: None, }; let result = client.reflect(&bank_id, &request, false) diff --git a/hindsight-cli/src/commands/memory.rs b/hindsight-cli/src/commands/memory.rs index 600a3295..c1d3995a 100644 --- a/hindsight-cli/src/commands/memory.rs +++ b/hindsight-cli/src/commands/memory.rs @@ -10,6 +10,7 @@ use crate::ui; // Import types from generated client use hindsight_client::types::{Budget, ChunkIncludeOptions, IncludeOptions}; +use serde_json; // Helper function to parse budget string to Budget enum fn parse_budget(budget: &str) -> Budget { @@ -86,6 +87,8 @@ pub fn reflect( query: String, budget: String, context: Option, + max_tokens: Option, + schema_path: Option, verbose: bool, output_format: OutputFormat, ) -> Result<()> { @@ -95,11 +98,24 @@ pub fn reflect( None }; + // Load and parse schema if provided + let response_schema = if let Some(path) = schema_path { + let schema_content = fs::read_to_string(&path) + .with_context(|| format!("Failed to read schema file: {}", path.display()))?; + let schema: serde_json::Map = serde_json::from_str(&schema_content) + .with_context(|| format!("Failed to parse JSON schema from: {}", path.display()))?; + Some(schema) + } else { + None + }; + let request = ReflectRequest { query, budget: Some(parse_budget(&budget)), context, + max_tokens: max_tokens.unwrap_or(4096), include: None, + response_schema, }; let response = client.reflect(agent_id, &request, verbose); diff --git a/hindsight-cli/src/main.rs b/hindsight-cli/src/main.rs index 1eba251a..250b9f3c 100644 --- a/hindsight-cli/src/main.rs +++ b/hindsight-cli/src/main.rs @@ -206,6 +206,14 @@ enum MemoryCommands { /// Additional context #[arg(short = 'c', long)] context: Option, + + /// Maximum tokens for the response (server default: 4096) + #[arg(short = 'm', long)] + max_tokens: Option, + + /// Path to JSON schema file for structured output + #[arg(short = 's', long)] + schema: Option, }, /// Store (retain) a single memory @@ -421,8 +429,8 @@ fn run() -> Result<()> { MemoryCommands::Recall { bank_id, query, fact_type, budget, max_tokens, trace, include_chunks, chunk_max_tokens } => { commands::memory::recall(&client, &bank_id, query, fact_type, budget, max_tokens, trace, include_chunks, chunk_max_tokens, verbose, output_format) } - MemoryCommands::Reflect { bank_id, query, budget, context } => { - commands::memory::reflect(&client, &bank_id, query, budget, context, verbose, output_format) + MemoryCommands::Reflect { bank_id, query, budget, context, max_tokens, schema } => { + commands::memory::reflect(&client, &bank_id, query, budget, context, max_tokens, schema, verbose, output_format) } MemoryCommands::Retain { bank_id, content, doc_id, context, r#async } => { commands::memory::retain(&client, &bank_id, content, doc_id, context, r#async, verbose, output_format) diff --git a/hindsight-cli/src/ui.rs b/hindsight-cli/src/ui.rs index 5fa43ae3..185e87a9 100644 --- a/hindsight-cli/src/ui.rs +++ b/hindsight-cli/src/ui.rs @@ -175,6 +175,16 @@ pub fn print_think_response(response: &ReflectResponse) { if !response.based_on.is_empty() { println!("{}", dim(&format!("Based on {} memory units", response.based_on.len()))); } + + // Display structured output if present + if let Some(structured) = &response.structured_output { + println!(); + println!("{}", gradient_text("─── Structured Output ───")); + println!(); + if let Ok(json) = serde_json::to_string_pretty(structured) { + println!("{}", json); + } + } } pub fn print_trace_info(trace: &serde_json::Map) { diff --git a/hindsight-clients/python/.openapi-generator/FILES b/hindsight-clients/python/.openapi-generator/FILES index a16951aa..478ff38b 100644 --- a/hindsight-clients/python/.openapi-generator/FILES +++ b/hindsight-clients/python/.openapi-generator/FILES @@ -102,53 +102,4 @@ hindsight_client_api/models/update_disposition_request.py hindsight_client_api/models/validation_error.py hindsight_client_api/models/validation_error_loc_inner.py hindsight_client_api/rest.py -hindsight_client_api/test/__init__.py -hindsight_client_api/test/test_add_background_request.py -hindsight_client_api/test/test_background_response.py -hindsight_client_api/test/test_bank_list_item.py -hindsight_client_api/test/test_bank_list_response.py -hindsight_client_api/test/test_bank_profile_response.py -hindsight_client_api/test/test_bank_stats_response.py -hindsight_client_api/test/test_banks_api.py -hindsight_client_api/test/test_budget.py -hindsight_client_api/test/test_cancel_operation_response.py -hindsight_client_api/test/test_chunk_data.py -hindsight_client_api/test/test_chunk_include_options.py -hindsight_client_api/test/test_chunk_response.py -hindsight_client_api/test/test_create_bank_request.py -hindsight_client_api/test/test_delete_document_response.py -hindsight_client_api/test/test_delete_response.py -hindsight_client_api/test/test_disposition_traits.py -hindsight_client_api/test/test_document_response.py -hindsight_client_api/test/test_documents_api.py -hindsight_client_api/test/test_entities_api.py -hindsight_client_api/test/test_entity_detail_response.py -hindsight_client_api/test/test_entity_include_options.py -hindsight_client_api/test/test_entity_list_item.py -hindsight_client_api/test/test_entity_list_response.py -hindsight_client_api/test/test_entity_observation_response.py -hindsight_client_api/test/test_entity_state_response.py -hindsight_client_api/test/test_graph_data_response.py -hindsight_client_api/test/test_http_validation_error.py -hindsight_client_api/test/test_include_options.py -hindsight_client_api/test/test_list_documents_response.py -hindsight_client_api/test/test_list_memory_units_response.py -hindsight_client_api/test/test_memory_api.py -hindsight_client_api/test/test_memory_item.py -hindsight_client_api/test/test_monitoring_api.py -hindsight_client_api/test/test_operation_response.py -hindsight_client_api/test/test_operations_api.py -hindsight_client_api/test/test_operations_list_response.py -hindsight_client_api/test/test_recall_request.py -hindsight_client_api/test/test_recall_response.py -hindsight_client_api/test/test_recall_result.py -hindsight_client_api/test/test_reflect_fact.py -hindsight_client_api/test/test_reflect_include_options.py -hindsight_client_api/test/test_reflect_request.py -hindsight_client_api/test/test_reflect_response.py -hindsight_client_api/test/test_retain_request.py -hindsight_client_api/test/test_retain_response.py -hindsight_client_api/test/test_update_disposition_request.py -hindsight_client_api/test/test_validation_error.py -hindsight_client_api/test/test_validation_error_loc_inner.py hindsight_client_api_README.md diff --git a/hindsight-clients/python/hindsight_client/hindsight_client.py b/hindsight-clients/python/hindsight_client/hindsight_client.py index dc8312cf..6af56599 100644 --- a/hindsight-clients/python/hindsight_client/hindsight_client.py +++ b/hindsight-clients/python/hindsight_client/hindsight_client.py @@ -229,6 +229,8 @@ def reflect( query: str, budget: str = "low", context: Optional[str] = None, + max_tokens: Optional[int] = None, + response_schema: Optional[Dict[str, Any]] = None, ) -> ReflectResponse: """ Generate a contextual answer based on bank identity and memories. @@ -238,14 +240,21 @@ def reflect( query: The question or prompt budget: Budget level for reflection - "low", "mid", or "high" (default: "low") context: Optional additional context + max_tokens: Maximum tokens for the response (server default: 4096) + response_schema: Optional JSON Schema for structured output. When provided, + the response will include a 'structured_output' field with the LLM + response parsed according to this schema. Returns: - ReflectResponse with answer text and optionally facts used + ReflectResponse with answer text, optionally facts used, and optionally + structured_output if response_schema was provided """ request_obj = reflect_request.ReflectRequest( query=query, budget=budget, context=context, + max_tokens=max_tokens, + response_schema=response_schema, ) return _run_async(self._memory_api.reflect(bank_id, request_obj)) diff --git a/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md b/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md index 19b80147..afb95e23 100644 --- a/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md +++ b/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md @@ -9,7 +9,9 @@ Name | Type | Description | Notes **query** | **str** | | **budget** | [**Budget**](Budget.md) | | [optional] **context** | **str** | | [optional] +**max_tokens** | **int** | Maximum tokens for the response | [optional] [default to 4096] **include** | [**ReflectIncludeOptions**](ReflectIncludeOptions.md) | Options for including additional data (disabled by default) | [optional] +**response_schema** | **Dict[str, object]** | | [optional] ## Example diff --git a/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md b/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md index 266029a1..27f28498 100644 --- a/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md +++ b/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md @@ -8,6 +8,7 @@ Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- **text** | **str** | | **based_on** | [**List[ReflectFact]**](ReflectFact.md) | | [optional] [default to []] +**structured_output** | **Dict[str, object]** | | [optional] ## Example diff --git a/hindsight-clients/python/hindsight_client_api/models/reflect_request.py b/hindsight-clients/python/hindsight_client_api/models/reflect_request.py index d45aacd4..dca346cb 100644 --- a/hindsight-clients/python/hindsight_client_api/models/reflect_request.py +++ b/hindsight-clients/python/hindsight_client_api/models/reflect_request.py @@ -17,7 +17,7 @@ import re # noqa: F401 import json -from pydantic import BaseModel, ConfigDict, Field, StrictStr +from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr from typing import Any, ClassVar, Dict, List, Optional from hindsight_client_api.models.budget import Budget from hindsight_client_api.models.reflect_include_options import ReflectIncludeOptions @@ -31,8 +31,10 @@ class ReflectRequest(BaseModel): query: StrictStr budget: Optional[Budget] = None context: Optional[StrictStr] = None + max_tokens: Optional[StrictInt] = Field(default=4096, description="Maximum tokens for the response") include: Optional[ReflectIncludeOptions] = Field(default=None, description="Options for including additional data (disabled by default)") - __properties: ClassVar[List[str]] = ["query", "budget", "context", "include"] + response_schema: Optional[Dict[str, Any]] = None + __properties: ClassVar[List[str]] = ["query", "budget", "context", "max_tokens", "include", "response_schema"] model_config = ConfigDict( populate_by_name=True, @@ -81,6 +83,11 @@ def to_dict(self) -> Dict[str, Any]: if self.context is None and "context" in self.model_fields_set: _dict['context'] = None + # set to None if response_schema (nullable) is None + # and model_fields_set contains the field + if self.response_schema is None and "response_schema" in self.model_fields_set: + _dict['response_schema'] = None + return _dict @classmethod @@ -96,7 +103,9 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: "query": obj.get("query"), "budget": obj.get("budget"), "context": obj.get("context"), - "include": ReflectIncludeOptions.from_dict(obj["include"]) if obj.get("include") is not None else None + "max_tokens": obj.get("max_tokens") if obj.get("max_tokens") is not None else 4096, + "include": ReflectIncludeOptions.from_dict(obj["include"]) if obj.get("include") is not None else None, + "response_schema": obj.get("response_schema") }) return _obj diff --git a/hindsight-clients/python/hindsight_client_api/models/reflect_response.py b/hindsight-clients/python/hindsight_client_api/models/reflect_response.py index d9b32bb6..74773142 100644 --- a/hindsight-clients/python/hindsight_client_api/models/reflect_response.py +++ b/hindsight-clients/python/hindsight_client_api/models/reflect_response.py @@ -29,7 +29,8 @@ class ReflectResponse(BaseModel): """ # noqa: E501 text: StrictStr based_on: Optional[List[ReflectFact]] = None - __properties: ClassVar[List[str]] = ["text", "based_on"] + structured_output: Optional[Dict[str, Any]] = None + __properties: ClassVar[List[str]] = ["text", "based_on", "structured_output"] model_config = ConfigDict( populate_by_name=True, @@ -77,6 +78,11 @@ def to_dict(self) -> Dict[str, Any]: if _item_based_on: _items.append(_item_based_on.to_dict()) _dict['based_on'] = _items + # set to None if structured_output (nullable) is None + # and model_fields_set contains the field + if self.structured_output is None and "structured_output" in self.model_fields_set: + _dict['structured_output'] = None + return _dict @classmethod @@ -90,7 +96,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: _obj = cls.model_validate({ "text": obj.get("text"), - "based_on": [ReflectFact.from_dict(_item) for _item in obj["based_on"]] if obj.get("based_on") is not None else None + "based_on": [ReflectFact.from_dict(_item) for _item in obj["based_on"]] if obj.get("based_on") is not None else None, + "structured_output": obj.get("structured_output") }) return _obj diff --git a/hindsight-clients/python/tests/test_main_operations.py b/hindsight-clients/python/tests/test_main_operations.py index b1457b12..56861b9f 100644 --- a/hindsight-clients/python/tests/test_main_operations.py +++ b/hindsight-clients/python/tests/test_main_operations.py @@ -173,6 +173,51 @@ def test_reflect_with_context(self, client, bank_id): assert response.text is not None assert len(response.text) > 0 + def test_reflect_with_max_tokens(self, client, bank_id): + """Test reflect with custom max_tokens parameter.""" + response = client.reflect( + bank_id=bank_id, + query="What do you think about Python?", + max_tokens=500, + ) + + assert response is not None + assert response.text is not None + assert len(response.text) > 0 + + def test_reflect_with_structured_output(self, client, bank_id): + """Test reflect with structured output via response_schema. + + When response_schema is provided, the response returns structured_output + field parsed according to the provided JSON schema. The text field is empty + since only a single LLM call is made for structured output. + """ + from typing import Optional + from pydantic import BaseModel + + # Define schema using Pydantic model + class RecommendationResponse(BaseModel): + recommendation: str + reasons: list[str] + confidence: Optional[str] = None # Optional for LLM flexibility + + response = client.reflect( + bank_id=bank_id, + query="What programming language should I learn for data science?", + response_schema=RecommendationResponse.model_json_schema(), + max_tokens=10000, + ) + + assert response is not None + # Text is empty when using structured output (single LLM call) + assert response.text == "" + + # Verify structured output is present and can be parsed into model + assert response.structured_output is not None + result = RecommendationResponse.model_validate(response.structured_output) + assert result.recommendation + assert isinstance(result.reasons, list) + class TestListMemories: """Tests for listing memories.""" diff --git a/hindsight-clients/rust/src/lib.rs b/hindsight-clients/rust/src/lib.rs index 4d727c24..f1f71062 100644 --- a/hindsight-clients/rust/src/lib.rs +++ b/hindsight-clients/rust/src/lib.rs @@ -101,7 +101,9 @@ mod tests { query: "What do you know about Alice?".to_string(), budget: None, context: None, + max_tokens: 4096, include: None, + response_schema: None, }; let reflect_response = client .reflect(&bank_id, None, &reflect_request) diff --git a/hindsight-clients/typescript/generated/types.gen.ts b/hindsight-clients/typescript/generated/types.gen.ts index e4629425..9ae183ba 100644 --- a/hindsight-clients/typescript/generated/types.gen.ts +++ b/hindsight-clients/typescript/generated/types.gen.ts @@ -898,10 +898,24 @@ export type ReflectRequest = { * Context */ context?: string | null; + /** + * Max Tokens + * + * Maximum tokens for the response + */ + max_tokens?: number; /** * Options for including additional data (disabled by default) */ include?: ReflectIncludeOptions; + /** + * Response Schema + * + * Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema. + */ + response_schema?: { + [key: string]: unknown; + } | null; }; /** @@ -918,6 +932,14 @@ export type ReflectResponse = { * Based On */ based_on?: Array; + /** + * Structured Output + * + * Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request. + */ + structured_output?: { + [key: string]: unknown; + } | null; }; /** diff --git a/hindsight-control-plane/package.json b/hindsight-control-plane/package.json index bd6af0b4..90aabd0e 100644 --- a/hindsight-control-plane/package.json +++ b/hindsight-control-plane/package.json @@ -11,7 +11,7 @@ "public" ], "scripts": { - "dev": "next dev", + "dev": "next dev --turbopack -p $(node -e \"const net=require('net');const s=net.createServer();s.listen(0,()=>{console.log(s.address().port);s.close()})\")", "build": "next build && npm run build:standalone", "build:standalone": "rm -rf standalone && STANDALONE_ROOT=$(find .next/standalone -path '*/node_modules' -prune -o -name 'server.js' -print | head -1 | xargs dirname) && cp -r \"$STANDALONE_ROOT\" standalone && cp -r .next/standalone/node_modules standalone/node_modules && mkdir -p standalone/.next && cp -r .next/static standalone/.next/static && mkdir -p standalone/public && cp -r public/* standalone/public/ 2>/dev/null || true", "start": "next start", diff --git a/hindsight-docs/docs/developer/api/reflect.mdx b/hindsight-docs/docs/developer/api/reflect.mdx index c5f771fb..751ea08f 100644 --- a/hindsight-docs/docs/developer/api/reflect.mdx +++ b/hindsight-docs/docs/developer/api/reflect.mdx @@ -52,6 +52,8 @@ Make sure you've completed the [Quick Start](./quickstart) to install the client | `query` | string | required | Question or prompt | | `budget` | string | "low" | Budget level: "low", "mid", "high" | | `context` | string | None | Additional context for the query | +| `max_tokens` | int | 4096 | Maximum tokens for the response | +| `response_schema` | object | None | JSON Schema for [structured output](#structured-output) | @@ -127,3 +129,107 @@ This enables: - **Transparency** — users see why the bank said something - **Verification** — check if the response is grounded in facts - **Debugging** — understand retrieval quality + +## Structured Output + +For applications that need to process responses programmatically, you can request structured output by providing a JSON Schema via `response_schema`. When provided, the response includes a `structured_output` field with the LLM response parsed according to the schema. The `text` field will be empty since only a single LLM call is made for efficiency. + +The easiest way to define a schema is using **Pydantic models**: + + + + +```python +from pydantic import BaseModel +from hindsight_client import Hindsight + +# Define your response structure with Pydantic +class HiringRecommendation(BaseModel): + recommendation: str + confidence: str # "low", "medium", "high" + key_factors: list[str] + risks: list[str] = [] + +with Hindsight() as client: + response = client.reflect( + bank_id="hiring-team", + query="Should we hire Alice for the ML team lead position?", + response_schema=HiringRecommendation.model_json_schema(), + ) + + # Parse structured output into Pydantic model + result = HiringRecommendation.model_validate(response.structured_output) + print(f"Recommendation: {result.recommendation}") + print(f"Confidence: {result.confidence}") + print(f"Key factors: {result.key_factors}") +``` + + + + +```javascript +import { Hindsight } from "@anthropic-ai/hindsight"; + +const client = new Hindsight(); + +// Define JSON schema directly +const responseSchema = { + type: "object", + properties: { + recommendation: { type: "string" }, + confidence: { type: "string", enum: ["low", "medium", "high"] }, + key_factors: { type: "array", items: { type: "string" } }, + risks: { type: "array", items: { type: "string" } }, + }, + required: ["recommendation", "confidence", "key_factors"], +}; + +const response = await client.reflect({ + bankId: "hiring-team", + query: "Should we hire Alice for the ML team lead position?", + responseSchema: responseSchema, +}); + +// Structured output +console.log(response.structuredOutput.recommendation); +console.log(response.structuredOutput.keyFactors); +``` + + + + +First, create a JSON schema file `schema.json`: +```json +{ + "type": "object", + "properties": { + "recommendation": {"type": "string"}, + "confidence": {"type": "string", "enum": ["low", "medium", "high"]}, + "key_factors": {"type": "array", "items": {"type": "string"}} + }, + "required": ["recommendation", "confidence", "key_factors"] +} +``` + +Then use the `--schema` flag: +```bash +hindsight memory reflect hiring-team \ + "Should we hire Alice for the ML team lead position?" \ + --schema schema.json +``` + + + + +| Use Case | Why Structured Output Helps | +|----------|----------------------------| +| **Decision pipelines** | Parse recommendations into workflow systems | +| **Dashboards** | Extract confidence scores, risk factors for visualization | +| **Multi-agent systems** | Pass structured data between agents | +| **Auditing** | Log structured decisions with clear reasoning | + +**Tips:** +- Use Pydantic's `model_json_schema()` for type-safe schema generation +- Use `model_validate()` to parse the response back into your Pydantic model +- Keep schemas focused — extract only what you need +- Use `Optional` fields for data that may not always be available diff --git a/hindsight-docs/openapi.json b/hindsight-docs/openapi.json index 2eec09fd..792b296d 100644 --- a/hindsight-docs/openapi.json +++ b/hindsight-docs/openapi.json @@ -3269,9 +3269,28 @@ ], "title": "Context" }, + "max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "Maximum tokens for the response", + "default": 4096 + }, "include": { "$ref": "#/components/schemas/ReflectIncludeOptions", "description": "Options for including additional data (disabled by default)" + }, + "response_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Response Schema", + "description": "Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema." } }, "type": "object", @@ -3286,7 +3305,26 @@ "include": { "facts": {} }, - "query": "What do you think about artificial intelligence?" + "max_tokens": 4096, + "query": "What do you think about artificial intelligence?", + "response_schema": { + "properties": { + "summary": { + "type": "string" + }, + "key_points": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "summary", + "key_points" + ], + "type": "object" + } } }, "ReflectResponse": { @@ -3302,6 +3340,19 @@ "type": "array", "title": "Based On", "default": [] + }, + "structured_output": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Structured Output", + "description": "Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request." } }, "type": "object", @@ -3323,6 +3374,13 @@ "type": "experience" } ], + "structured_output": { + "key_points": [ + "Used in healthcare", + "Discussed recently" + ], + "summary": "AI is transformative" + }, "text": "Based on my understanding, AI is a transformative technology..." } }, diff --git a/hindsight-docs/static/openapi.json b/hindsight-docs/static/openapi.json index 2eec09fd..792b296d 100644 --- a/hindsight-docs/static/openapi.json +++ b/hindsight-docs/static/openapi.json @@ -3269,9 +3269,28 @@ ], "title": "Context" }, + "max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "Maximum tokens for the response", + "default": 4096 + }, "include": { "$ref": "#/components/schemas/ReflectIncludeOptions", "description": "Options for including additional data (disabled by default)" + }, + "response_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Response Schema", + "description": "Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema." } }, "type": "object", @@ -3286,7 +3305,26 @@ "include": { "facts": {} }, - "query": "What do you think about artificial intelligence?" + "max_tokens": 4096, + "query": "What do you think about artificial intelligence?", + "response_schema": { + "properties": { + "summary": { + "type": "string" + }, + "key_points": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "summary", + "key_points" + ], + "type": "object" + } } }, "ReflectResponse": { @@ -3302,6 +3340,19 @@ "type": "array", "title": "Based On", "default": [] + }, + "structured_output": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Structured Output", + "description": "Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request." } }, "type": "object", @@ -3323,6 +3374,13 @@ "type": "experience" } ], + "structured_output": { + "key_points": [ + "Used in healthcare", + "Discussed recently" + ], + "summary": "AI is transformative" + }, "text": "Based on my understanding, AI is a transformative technology..." } }, diff --git a/openapi.json b/openapi.json index 2eec09fd..792b296d 100644 --- a/openapi.json +++ b/openapi.json @@ -3269,9 +3269,28 @@ ], "title": "Context" }, + "max_tokens": { + "type": "integer", + "title": "Max Tokens", + "description": "Maximum tokens for the response", + "default": 4096 + }, "include": { "$ref": "#/components/schemas/ReflectIncludeOptions", "description": "Options for including additional data (disabled by default)" + }, + "response_schema": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Response Schema", + "description": "Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema." } }, "type": "object", @@ -3286,7 +3305,26 @@ "include": { "facts": {} }, - "query": "What do you think about artificial intelligence?" + "max_tokens": 4096, + "query": "What do you think about artificial intelligence?", + "response_schema": { + "properties": { + "summary": { + "type": "string" + }, + "key_points": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "summary", + "key_points" + ], + "type": "object" + } } }, "ReflectResponse": { @@ -3302,6 +3340,19 @@ "type": "array", "title": "Based On", "default": [] + }, + "structured_output": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "title": "Structured Output", + "description": "Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request." } }, "type": "object", @@ -3323,6 +3374,13 @@ "type": "experience" } ], + "structured_output": { + "key_points": [ + "Used in healthcare", + "Discussed recently" + ], + "summary": "AI is transformative" + }, "text": "Based on my understanding, AI is a transformative technology..." } }, diff --git a/scripts/hooks/lint.sh b/scripts/hooks/lint.sh index c7058043..34bd2a5e 100755 --- a/scripts/hooks/lint.sh +++ b/scripts/hooks/lint.sh @@ -31,6 +31,11 @@ run_task() { NAMES+=("$name") } +echo " Syncing Python dependencies..." +# Run uv sync first to avoid race conditions when multiple uv run commands +# try to reinstall local packages in parallel (e.g., after version bump) +uv sync --quiet + echo " Running lints in parallel..." # Node/TypeScript tasks