diff --git a/hindsight-api/hindsight_api/api/http.py b/hindsight-api/hindsight_api/api/http.py
index c6a608f8..8d98a4ba 100644
--- a/hindsight-api/hindsight_api/api/http.py
+++ b/hindsight-api/hindsight_api/api/http.py
@@ -385,7 +385,16 @@ class ReflectRequest(BaseModel):
                 "query": "What do you think about artificial intelligence?",
                 "budget": "low",
                 "context": "This is for a research paper on AI ethics",
+                "max_tokens": 4096,
                 "include": {"facts": {}},
+                "response_schema": {
+                    "type": "object",
+                    "properties": {
+                        "summary": {"type": "string"},
+                        "key_points": {"type": "array", "items": {"type": "string"}},
+                    },
+                    "required": ["summary", "key_points"],
+                },
             }
         }
     )
@@ -393,9 +402,14 @@ class ReflectRequest(BaseModel):
     query: str
     budget: Budget = Budget.LOW
     context: str | None = None
+    max_tokens: int = Field(default=4096, description="Maximum tokens for the response")
     include: ReflectIncludeOptions = Field(
         default_factory=ReflectIncludeOptions, description="Options for including additional data (disabled by default)"
     )
+    response_schema: dict | None = Field(
+        default=None,
+        description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.",
+    )
 
 
 class OpinionItem(BaseModel):
@@ -440,12 +454,20 @@ class ReflectResponse(BaseModel):
                     {"id": "123", "text": "AI is used in healthcare", "type": "world"},
                     {"id": "456", "text": "I discussed AI applications last week", "type": "experience"},
                 ],
+                "structured_output": {
+                    "summary": "AI is transformative",
+                    "key_points": ["Used in healthcare", "Discussed recently"],
+                },
             }
         }
     )
 
     text: str
     based_on: list[ReflectFact] = []  # Facts used to generate the response
+    structured_output: dict | None = Field(
+        default=None,
+        description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.",
+    )
 
 
 class BanksResponse(BaseModel):
@@ -1211,6 +1233,8 @@ async def api_reflect(
                     query=request.query,
                     budget=request.budget,
                     context=request.context,
+                    max_tokens=request.max_tokens,
+                    response_schema=request.response_schema,
                     request_context=request_context,
                 )
 
@@ -1233,6 +1257,7 @@ async def api_reflect(
             return ReflectResponse(
                 text=core_result.text,
                 based_on=based_on_facts,
+                structured_output=core_result.structured_output,
             )
 
         except Exception as e:
diff --git a/hindsight-api/hindsight_api/engine/interface.py b/hindsight-api/hindsight_api/engine/interface.py
index 937f8ab3..a861b5b5 100644
--- a/hindsight-api/hindsight_api/engine/interface.py
+++ b/hindsight-api/hindsight_api/engine/interface.py
@@ -110,6 +110,8 @@ async def reflect_async(
         *,
         budget: "Budget | None" = None,
         context: str | None = None,
+        max_tokens: int = 4096,
+        response_schema: dict | None = None,
         request_context: "RequestContext",
     ) -> "ReflectResult":
         """
@@ -120,6 +122,8 @@ async def reflect_async(
             query: The question to reflect on.
             budget: Search budget for retrieving context.
             context: Additional context for the reflection.
+            max_tokens: Maximum tokens for the response.
+            response_schema: Optional JSON Schema for structured output.
             request_context: Request context for authentication.
 
         Returns:
diff --git a/hindsight-api/hindsight_api/engine/llm_wrapper.py b/hindsight-api/hindsight_api/engine/llm_wrapper.py
index b107c674..ece9d9ae 100644
--- a/hindsight-api/hindsight_api/engine/llm_wrapper.py
+++ b/hindsight-api/hindsight_api/engine/llm_wrapper.py
@@ -135,6 +135,7 @@ async def call(
         initial_backoff: float = 1.0,
         max_backoff: float = 60.0,
         skip_validation: bool = False,
+        strict_schema: bool = False,
     ) -> Any:
         """
         Make an LLM API call with retry logic.
@@ -149,6 +150,7 @@ async def call(
             initial_backoff: Initial backoff time in seconds.
             max_backoff: Maximum backoff time in seconds.
             skip_validation: Return raw JSON without Pydantic validation.
+            strict_schema: Use strict JSON schema enforcement (OpenAI only). Guarantees all required fields.
 
         Returns:
             Parsed response if response_format is provided, otherwise text content.
@@ -226,19 +228,35 @@ async def call(
             for attempt in range(max_retries + 1):
                 try:
                     if response_format is not None:
-                        # Add schema to system message for JSON mode
+                        schema = None
                         if hasattr(response_format, "model_json_schema"):
                             schema = response_format.model_json_schema()
-                            schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
 
-                            if call_params["messages"] and call_params["messages"][0].get("role") == "system":
-                                call_params["messages"][0]["content"] += schema_msg
-                            elif call_params["messages"]:
-                                call_params["messages"][0]["content"] = (
-                                    schema_msg + "\n\n" + call_params["messages"][0]["content"]
-                                )
+                        if strict_schema and schema is not None:
+                            # Use OpenAI's strict JSON schema enforcement
+                            # This guarantees all required fields are returned
+                            call_params["response_format"] = {
+                                "type": "json_schema",
+                                "json_schema": {
+                                    "name": "response",
+                                    "strict": True,
+                                    "schema": schema,
+                                },
+                            }
+                        else:
+                            # Soft enforcement: add schema to prompt and use json_object mode
+                            if schema is not None:
+                                schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
+
+                                if call_params["messages"] and call_params["messages"][0].get("role") == "system":
+                                    call_params["messages"][0]["content"] += schema_msg
+                                elif call_params["messages"]:
+                                    call_params["messages"][0]["content"] = (
+                                        schema_msg + "\n\n" + call_params["messages"][0]["content"]
+                                    )
+
+                            call_params["response_format"] = {"type": "json_object"}
 
-                        call_params["response_format"] = {"type": "json_object"}
                         response = await self._client.chat.completions.create(**call_params)
 
                         content = response.choices[0].message.content
diff --git a/hindsight-api/hindsight_api/engine/memory_engine.py b/hindsight-api/hindsight_api/engine/memory_engine.py
index d7d417d8..966e5951 100644
--- a/hindsight-api/hindsight_api/engine/memory_engine.py
+++ b/hindsight-api/hindsight_api/engine/memory_engine.py
@@ -3076,6 +3076,8 @@ async def reflect_async(
         *,
         budget: Budget | None = None,
         context: str | None = None,
+        max_tokens: int = 4096,
+        response_schema: dict | None = None,
         request_context: "RequestContext",
     ) -> ReflectResult:
         """
@@ -3087,19 +3089,22 @@ async def reflect_async(
         3. Retrieves existing opinions (bank's formed perspectives)
         4. Uses LLM to formulate an answer
         5. Extracts and stores any new opinions formed during reflection
-        6. Returns plain text answer and the facts used
+        6. Optionally generates structured output based on response_schema
+        7. Returns plain text answer and the facts used
 
         Args:
             bank_id: bank identifier
             query: Question to answer
             budget: Budget level for memory exploration (low=100, mid=300, high=600 units)
             context: Additional context string to include in LLM prompt (not used in recall)
+            response_schema: Optional JSON Schema for structured output
 
         Returns:
             ReflectResult containing:
                 - text: Plain text answer (no markdown)
                 - based_on: Dict with 'world', 'experience', and 'opinion' fact lists (MemoryFact objects)
                 - new_opinions: List of newly formed opinions
+                - structured_output: Optional dict if response_schema was provided
         """
         # Use cached LLM config
         if self._llm_config is None:
@@ -3177,17 +3182,40 @@ async def reflect_async(
         log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars")
 
         system_message = think_utils.get_system_message(disposition)
+        messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]
+
+        # Prepare response_format if schema provided
+        response_format = None
+        if response_schema is not None:
+            # Wrapper class to provide Pydantic-like interface for raw JSON schemas
+            class JsonSchemaWrapper:
+                def __init__(self, schema: dict):
+                    self._schema = schema
+
+                def model_json_schema(self):
+                    return self._schema
+
+            response_format = JsonSchemaWrapper(response_schema)
 
         llm_start = time.time()
-        answer_text = await self._llm_config.call(
-            messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
-            scope="memory_think",
-            temperature=0.9,
-            max_completion_tokens=1000,
+        result = await self._llm_config.call(
+            messages=messages,
+            scope="memory_reflect",
+            max_completion_tokens=max_tokens,
+            response_format=response_format,
+            skip_validation=True if response_format else False,
+            strict_schema=True if response_format else False,
         )
         llm_time = time.time() - llm_start
 
-        answer_text = answer_text.strip()
+        # Handle response based on whether structured output was requested
+        if response_schema is not None:
+            structured_output = result
+            answer_text = ""  # Empty for backward compatibility
+            log_buffer.append(f"[REFLECT {reflect_id}] Structured output generated")
+        else:
+            structured_output = None
+            answer_text = result.strip()
 
         # Submit form_opinion task for background processing
         await self._task_backend.submit_task(
@@ -3205,6 +3233,7 @@ async def reflect_async(
             text=answer_text,
             based_on={"world": world_results, "experience": agent_results, "opinion": opinion_results},
             new_opinions=[],  # Opinions are being extracted asynchronously
+            structured_output=structured_output,
         )
 
         # Call post-operation hook if validator is configured
diff --git a/hindsight-api/hindsight_api/engine/response_models.py b/hindsight-api/hindsight_api/engine/response_models.py
index 452228ad..9607848b 100644
--- a/hindsight-api/hindsight_api/engine/response_models.py
+++ b/hindsight-api/hindsight_api/engine/response_models.py
@@ -123,7 +123,8 @@ class ReflectResult(BaseModel):
     Result from a reflect operation.
 
     Contains the formulated answer, the facts it was based on (organized by type),
-    and any new opinions that were formed during the reflection process.
+    any new opinions that were formed during the reflection process, and optionally
+    structured output if a response schema was provided.
     """
 
     model_config = ConfigDict(
@@ -145,6 +146,7 @@ class ReflectResult(BaseModel):
                     "opinion": [],
                 },
                 "new_opinions": ["Machine learning has great potential in healthcare"],
+                "structured_output": {"summary": "ML in healthcare", "confidence": 0.9},
             }
         }
     )
@@ -154,6 +156,10 @@ class ReflectResult(BaseModel):
         description="Facts used to formulate the answer, organized by type (world, experience, opinion)"
     )
     new_opinions: list[str] = Field(default_factory=list, description="List of newly formed opinions during reflection")
+    structured_output: dict[str, Any] | None = Field(
+        default=None,
+        description="Structured output parsed according to the provided response schema. Only present when response_schema was provided.",
+    )
 
 
 class Opinion(BaseModel):
diff --git a/hindsight-api/tests/test_http_api_integration.py b/hindsight-api/tests/test_http_api_integration.py
index 1707be5c..5c0b7db0 100644
--- a/hindsight-api/tests/test_http_api_integration.py
+++ b/hindsight-api/tests/test_http_api_integration.py
@@ -608,3 +608,167 @@ async def submit_async_retain(doc):
         assert response.status_code == 200
         results = response.json()["results"]
         assert len(results) > 0, f"Should find memories for document {i}"
+
+
+@pytest.mark.asyncio
+async def test_reflect_structured_output(api_client):
+    """Test reflect endpoint with structured output via response_schema.
+
+    When response_schema is provided, the reflect endpoint should return
+    both the natural language text response and a structured_output field
+    containing the response parsed according to the provided JSON schema.
+    """
+    test_bank_id = f"reflect_structured_test_{datetime.now().timestamp()}"
+
+    # Store some memories to reflect on
+    response = await api_client.post(
+        f"/v1/default/banks/{test_bank_id}/memories",
+        json={
+            "items": [
+                {
+                    "content": "Alice is a senior machine learning engineer with 8 years of experience.",
+                    "context": "team member info"
+                },
+                {
+                    "content": "Bob is a junior data scientist who joined last month.",
+                    "context": "team member info"
+                },
+                {
+                    "content": "The team uses Python and TensorFlow for most projects.",
+                    "context": "tech stack"
+                }
+            ]
+        }
+    )
+    assert response.status_code == 200
+
+    # Define a JSON schema for structured output
+    response_schema = {
+        "type": "object",
+        "properties": {
+            "team_members": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "role": {"type": "string"},
+                        "experience_level": {"type": "string"}
+                    }
+                }
+            },
+            "technologies": {
+                "type": "array",
+                "items": {"type": "string"}
+            },
+            "summary": {"type": "string"}
+        },
+        "required": ["team_members", "summary"]
+    }
+
+    # Call reflect with response_schema
+    response = await api_client.post(
+        f"/v1/default/banks/{test_bank_id}/reflect",
+        json={
+            "query": "Give me an overview of the team and their tech stack",
+            "response_schema": response_schema
+        }
+    )
+    assert response.status_code == 200
+    result = response.json()
+
+    # Verify text field exists (empty when using structured output)
+    assert "text" in result
+    assert result["text"] == ""
+
+    # Verify structured output exists and has expected structure
+    assert "structured_output" in result
+    assert result["structured_output"] is not None
+
+    structured = result["structured_output"]
+    assert "team_members" in structured
+    assert "summary" in structured
+    assert isinstance(structured["team_members"], list)
+    assert isinstance(structured["summary"], str)
+
+    # Verify team members have the expected fields
+    if len(structured["team_members"]) > 0:
+        member = structured["team_members"][0]
+        assert "name" in member or "role" in member  # At least some fields should be present
+
+
+@pytest.mark.asyncio
+async def test_reflect_without_structured_output(api_client):
+    """Test that reflect works normally without response_schema.
+
+    When response_schema is not provided, the structured_output field
+    should be null/None in the response.
+    """
+    test_bank_id = f"reflect_no_structured_test_{datetime.now().timestamp()}"
+
+    # Store a memory
+    response = await api_client.post(
+        f"/v1/default/banks/{test_bank_id}/memories",
+        json={
+            "items": [
+                {
+                    "content": "The project deadline is next Friday.",
+                    "context": "project timeline"
+                }
+            ]
+        }
+    )
+    assert response.status_code == 200
+
+    # Call reflect without response_schema
+    response = await api_client.post(
+        f"/v1/default/banks/{test_bank_id}/reflect",
+        json={
+            "query": "When is the project deadline?"
+        }
+    )
+    assert response.status_code == 200
+    result = response.json()
+
+    # Verify response has text but structured_output is null
+    assert "text" in result
+    assert len(result["text"]) > 0
+    assert result.get("structured_output") is None
+
+
+@pytest.mark.asyncio
+async def test_reflect_with_max_tokens(api_client):
+    """Test reflect endpoint with custom max_tokens parameter.
+
+    The max_tokens parameter controls the maximum tokens for the LLM response.
+    """
+    test_bank_id = f"reflect_max_tokens_test_{datetime.now().timestamp()}"
+
+    # Store a memory
+    response = await api_client.post(
+        f"/v1/default/banks/{test_bank_id}/memories",
+        json={
+            "items": [
+                {
+                    "content": "Python is a popular programming language for data science and machine learning.",
+                    "context": "tech"
+                }
+            ]
+        }
+    )
+    assert response.status_code == 200
+
+    # Call reflect with custom max_tokens
+    response = await api_client.post(
+        f"/v1/default/banks/{test_bank_id}/reflect",
+        json={
+            "query": "What is Python used for?",
+            "max_tokens": 500
+        }
+    )
+    assert response.status_code == 200
+    result = response.json()
+
+    # Verify response has text
+    assert "text" in result
+    assert len(result["text"]) > 0
diff --git a/hindsight-cli/src/commands/explore.rs b/hindsight-cli/src/commands/explore.rs
index 1d2d42ce..283d4366 100644
--- a/hindsight-cli/src/commands/explore.rs
+++ b/hindsight-cli/src/commands/explore.rs
@@ -354,7 +354,9 @@ impl App {
                             query: query_text,
                             budget: Some(query_budget),
                             context: None,
+                            max_tokens: 4096,
                             include: None,
+                            response_schema: None,
                         };
 
                         let result = client.reflect(&bank_id, &request, false)
diff --git a/hindsight-cli/src/commands/memory.rs b/hindsight-cli/src/commands/memory.rs
index 600a3295..c1d3995a 100644
--- a/hindsight-cli/src/commands/memory.rs
+++ b/hindsight-cli/src/commands/memory.rs
@@ -10,6 +10,7 @@ use crate::ui;
 
 // Import types from generated client
 use hindsight_client::types::{Budget, ChunkIncludeOptions, IncludeOptions};
+use serde_json;
 
 // Helper function to parse budget string to Budget enum
 fn parse_budget(budget: &str) -> Budget {
@@ -86,6 +87,8 @@ pub fn reflect(
     query: String,
     budget: String,
     context: Option<String>,
+    max_tokens: Option<i64>,
+    schema_path: Option<PathBuf>,
     verbose: bool,
     output_format: OutputFormat,
 ) -> Result<()> {
@@ -95,11 +98,24 @@ pub fn reflect(
         None
     };
 
+    // Load and parse schema if provided
+    let response_schema = if let Some(path) = schema_path {
+        let schema_content = fs::read_to_string(&path)
+            .with_context(|| format!("Failed to read schema file: {}", path.display()))?;
+        let schema: serde_json::Map<String, serde_json::Value> = serde_json::from_str(&schema_content)
+            .with_context(|| format!("Failed to parse JSON schema from: {}", path.display()))?;
+        Some(schema)
+    } else {
+        None
+    };
+
     let request = ReflectRequest {
         query,
         budget: Some(parse_budget(&budget)),
         context,
+        max_tokens: max_tokens.unwrap_or(4096),
         include: None,
+        response_schema,
     };
 
     let response = client.reflect(agent_id, &request, verbose);
diff --git a/hindsight-cli/src/main.rs b/hindsight-cli/src/main.rs
index 1eba251a..250b9f3c 100644
--- a/hindsight-cli/src/main.rs
+++ b/hindsight-cli/src/main.rs
@@ -206,6 +206,14 @@ enum MemoryCommands {
         /// Additional context
         #[arg(short = 'c', long)]
         context: Option<String>,
+
+        /// Maximum tokens for the response (server default: 4096)
+        #[arg(short = 'm', long)]
+        max_tokens: Option<i64>,
+
+        /// Path to JSON schema file for structured output
+        #[arg(short = 's', long)]
+        schema: Option<PathBuf>,
     },
 
     /// Store (retain) a single memory
@@ -421,8 +429,8 @@ fn run() -> Result<()> {
             MemoryCommands::Recall { bank_id, query, fact_type, budget, max_tokens, trace, include_chunks, chunk_max_tokens } => {
                 commands::memory::recall(&client, &bank_id, query, fact_type, budget, max_tokens, trace, include_chunks, chunk_max_tokens, verbose, output_format)
             }
-            MemoryCommands::Reflect { bank_id, query, budget, context } => {
-                commands::memory::reflect(&client, &bank_id, query, budget, context, verbose, output_format)
+            MemoryCommands::Reflect { bank_id, query, budget, context, max_tokens, schema } => {
+                commands::memory::reflect(&client, &bank_id, query, budget, context, max_tokens, schema, verbose, output_format)
             }
             MemoryCommands::Retain { bank_id, content, doc_id, context, r#async } => {
                 commands::memory::retain(&client, &bank_id, content, doc_id, context, r#async, verbose, output_format)
diff --git a/hindsight-cli/src/ui.rs b/hindsight-cli/src/ui.rs
index 5fa43ae3..185e87a9 100644
--- a/hindsight-cli/src/ui.rs
+++ b/hindsight-cli/src/ui.rs
@@ -175,6 +175,16 @@ pub fn print_think_response(response: &ReflectResponse) {
     if !response.based_on.is_empty() {
         println!("{}", dim(&format!("Based on {} memory units", response.based_on.len())));
     }
+
+    // Display structured output if present
+    if let Some(structured) = &response.structured_output {
+        println!();
+        println!("{}", gradient_text("─── Structured Output ───"));
+        println!();
+        if let Ok(json) = serde_json::to_string_pretty(structured) {
+            println!("{}", json);
+        }
+    }
 }
 
 pub fn print_trace_info(trace: &serde_json::Map<String, serde_json::Value>) {
diff --git a/hindsight-clients/python/.openapi-generator/FILES b/hindsight-clients/python/.openapi-generator/FILES
index a16951aa..478ff38b 100644
--- a/hindsight-clients/python/.openapi-generator/FILES
+++ b/hindsight-clients/python/.openapi-generator/FILES
@@ -102,53 +102,4 @@ hindsight_client_api/models/update_disposition_request.py
 hindsight_client_api/models/validation_error.py
 hindsight_client_api/models/validation_error_loc_inner.py
 hindsight_client_api/rest.py
-hindsight_client_api/test/__init__.py
-hindsight_client_api/test/test_add_background_request.py
-hindsight_client_api/test/test_background_response.py
-hindsight_client_api/test/test_bank_list_item.py
-hindsight_client_api/test/test_bank_list_response.py
-hindsight_client_api/test/test_bank_profile_response.py
-hindsight_client_api/test/test_bank_stats_response.py
-hindsight_client_api/test/test_banks_api.py
-hindsight_client_api/test/test_budget.py
-hindsight_client_api/test/test_cancel_operation_response.py
-hindsight_client_api/test/test_chunk_data.py
-hindsight_client_api/test/test_chunk_include_options.py
-hindsight_client_api/test/test_chunk_response.py
-hindsight_client_api/test/test_create_bank_request.py
-hindsight_client_api/test/test_delete_document_response.py
-hindsight_client_api/test/test_delete_response.py
-hindsight_client_api/test/test_disposition_traits.py
-hindsight_client_api/test/test_document_response.py
-hindsight_client_api/test/test_documents_api.py
-hindsight_client_api/test/test_entities_api.py
-hindsight_client_api/test/test_entity_detail_response.py
-hindsight_client_api/test/test_entity_include_options.py
-hindsight_client_api/test/test_entity_list_item.py
-hindsight_client_api/test/test_entity_list_response.py
-hindsight_client_api/test/test_entity_observation_response.py
-hindsight_client_api/test/test_entity_state_response.py
-hindsight_client_api/test/test_graph_data_response.py
-hindsight_client_api/test/test_http_validation_error.py
-hindsight_client_api/test/test_include_options.py
-hindsight_client_api/test/test_list_documents_response.py
-hindsight_client_api/test/test_list_memory_units_response.py
-hindsight_client_api/test/test_memory_api.py
-hindsight_client_api/test/test_memory_item.py
-hindsight_client_api/test/test_monitoring_api.py
-hindsight_client_api/test/test_operation_response.py
-hindsight_client_api/test/test_operations_api.py
-hindsight_client_api/test/test_operations_list_response.py
-hindsight_client_api/test/test_recall_request.py
-hindsight_client_api/test/test_recall_response.py
-hindsight_client_api/test/test_recall_result.py
-hindsight_client_api/test/test_reflect_fact.py
-hindsight_client_api/test/test_reflect_include_options.py
-hindsight_client_api/test/test_reflect_request.py
-hindsight_client_api/test/test_reflect_response.py
-hindsight_client_api/test/test_retain_request.py
-hindsight_client_api/test/test_retain_response.py
-hindsight_client_api/test/test_update_disposition_request.py
-hindsight_client_api/test/test_validation_error.py
-hindsight_client_api/test/test_validation_error_loc_inner.py
 hindsight_client_api_README.md
diff --git a/hindsight-clients/python/hindsight_client/hindsight_client.py b/hindsight-clients/python/hindsight_client/hindsight_client.py
index dc8312cf..6af56599 100644
--- a/hindsight-clients/python/hindsight_client/hindsight_client.py
+++ b/hindsight-clients/python/hindsight_client/hindsight_client.py
@@ -229,6 +229,8 @@ def reflect(
         query: str,
         budget: str = "low",
         context: Optional[str] = None,
+        max_tokens: Optional[int] = None,
+        response_schema: Optional[Dict[str, Any]] = None,
     ) -> ReflectResponse:
         """
         Generate a contextual answer based on bank identity and memories.
@@ -238,14 +240,21 @@ def reflect(
             query: The question or prompt
             budget: Budget level for reflection - "low", "mid", or "high" (default: "low")
             context: Optional additional context
+            max_tokens: Maximum tokens for the response (server default: 4096)
+            response_schema: Optional JSON Schema for structured output. When provided,
+                the response will include a 'structured_output' field with the LLM
+                response parsed according to this schema.
 
         Returns:
-            ReflectResponse with answer text and optionally facts used
+            ReflectResponse with answer text, optionally facts used, and optionally
+            structured_output if response_schema was provided
         """
         request_obj = reflect_request.ReflectRequest(
             query=query,
             budget=budget,
             context=context,
+            max_tokens=max_tokens,
+            response_schema=response_schema,
         )
 
         return _run_async(self._memory_api.reflect(bank_id, request_obj))
diff --git a/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md b/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md
index 19b80147..afb95e23 100644
--- a/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md
+++ b/hindsight-clients/python/hindsight_client_api/docs/ReflectRequest.md
@@ -9,7 +9,9 @@ Name | Type | Description | Notes
 **query** | **str** |  | 
 **budget** | [**Budget**](Budget.md) |  | [optional] 
 **context** | **str** |  | [optional] 
+**max_tokens** | **int** | Maximum tokens for the response | [optional] [default to 4096]
 **include** | [**ReflectIncludeOptions**](ReflectIncludeOptions.md) | Options for including additional data (disabled by default) | [optional] 
+**response_schema** | **Dict[str, object]** |  | [optional] 
 
 ## Example
 
diff --git a/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md b/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md
index 266029a1..27f28498 100644
--- a/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md
+++ b/hindsight-clients/python/hindsight_client_api/docs/ReflectResponse.md
@@ -8,6 +8,7 @@ Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
 **text** | **str** |  | 
 **based_on** | [**List[ReflectFact]**](ReflectFact.md) |  | [optional] [default to []]
+**structured_output** | **Dict[str, object]** |  | [optional] 
 
 ## Example
 
diff --git a/hindsight-clients/python/hindsight_client_api/models/reflect_request.py b/hindsight-clients/python/hindsight_client_api/models/reflect_request.py
index d45aacd4..dca346cb 100644
--- a/hindsight-clients/python/hindsight_client_api/models/reflect_request.py
+++ b/hindsight-clients/python/hindsight_client_api/models/reflect_request.py
@@ -17,7 +17,7 @@
 import re  # noqa: F401
 import json
 
-from pydantic import BaseModel, ConfigDict, Field, StrictStr
+from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
 from typing import Any, ClassVar, Dict, List, Optional
 from hindsight_client_api.models.budget import Budget
 from hindsight_client_api.models.reflect_include_options import ReflectIncludeOptions
@@ -31,8 +31,10 @@ class ReflectRequest(BaseModel):
     query: StrictStr
     budget: Optional[Budget] = None
     context: Optional[StrictStr] = None
+    max_tokens: Optional[StrictInt] = Field(default=4096, description="Maximum tokens for the response")
     include: Optional[ReflectIncludeOptions] = Field(default=None, description="Options for including additional data (disabled by default)")
-    __properties: ClassVar[List[str]] = ["query", "budget", "context", "include"]
+    response_schema: Optional[Dict[str, Any]] = None
+    __properties: ClassVar[List[str]] = ["query", "budget", "context", "max_tokens", "include", "response_schema"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -81,6 +83,11 @@ def to_dict(self) -> Dict[str, Any]:
         if self.context is None and "context" in self.model_fields_set:
             _dict['context'] = None
 
+        # set to None if response_schema (nullable) is None
+        # and model_fields_set contains the field
+        if self.response_schema is None and "response_schema" in self.model_fields_set:
+            _dict['response_schema'] = None
+
         return _dict
 
     @classmethod
@@ -96,7 +103,9 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
             "query": obj.get("query"),
             "budget": obj.get("budget"),
             "context": obj.get("context"),
-            "include": ReflectIncludeOptions.from_dict(obj["include"]) if obj.get("include") is not None else None
+            "max_tokens": obj.get("max_tokens") if obj.get("max_tokens") is not None else 4096,
+            "include": ReflectIncludeOptions.from_dict(obj["include"]) if obj.get("include") is not None else None,
+            "response_schema": obj.get("response_schema")
         })
         return _obj
 
diff --git a/hindsight-clients/python/hindsight_client_api/models/reflect_response.py b/hindsight-clients/python/hindsight_client_api/models/reflect_response.py
index d9b32bb6..74773142 100644
--- a/hindsight-clients/python/hindsight_client_api/models/reflect_response.py
+++ b/hindsight-clients/python/hindsight_client_api/models/reflect_response.py
@@ -29,7 +29,8 @@ class ReflectResponse(BaseModel):
     """ # noqa: E501
     text: StrictStr
     based_on: Optional[List[ReflectFact]] = None
-    __properties: ClassVar[List[str]] = ["text", "based_on"]
+    structured_output: Optional[Dict[str, Any]] = None
+    __properties: ClassVar[List[str]] = ["text", "based_on", "structured_output"]
 
     model_config = ConfigDict(
         populate_by_name=True,
@@ -77,6 +78,11 @@ def to_dict(self) -> Dict[str, Any]:
                 if _item_based_on:
                     _items.append(_item_based_on.to_dict())
             _dict['based_on'] = _items
+        # set to None if structured_output (nullable) is None
+        # and model_fields_set contains the field
+        if self.structured_output is None and "structured_output" in self.model_fields_set:
+            _dict['structured_output'] = None
+
         return _dict
 
     @classmethod
@@ -90,7 +96,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
 
         _obj = cls.model_validate({
             "text": obj.get("text"),
-            "based_on": [ReflectFact.from_dict(_item) for _item in obj["based_on"]] if obj.get("based_on") is not None else None
+            "based_on": [ReflectFact.from_dict(_item) for _item in obj["based_on"]] if obj.get("based_on") is not None else None,
+            "structured_output": obj.get("structured_output")
         })
         return _obj
 
diff --git a/hindsight-clients/python/tests/test_main_operations.py b/hindsight-clients/python/tests/test_main_operations.py
index b1457b12..56861b9f 100644
--- a/hindsight-clients/python/tests/test_main_operations.py
+++ b/hindsight-clients/python/tests/test_main_operations.py
@@ -173,6 +173,51 @@ def test_reflect_with_context(self, client, bank_id):
         assert response.text is not None
         assert len(response.text) > 0
 
+    def test_reflect_with_max_tokens(self, client, bank_id):
+        """Test reflect with custom max_tokens parameter."""
+        response = client.reflect(
+            bank_id=bank_id,
+            query="What do you think about Python?",
+            max_tokens=500,
+        )
+
+        assert response is not None
+        assert response.text is not None
+        assert len(response.text) > 0
+
+    def test_reflect_with_structured_output(self, client, bank_id):
+        """Test reflect with structured output via response_schema.
+
+        When response_schema is provided, the response returns structured_output
+        field parsed according to the provided JSON schema. The text field is empty
+        since only a single LLM call is made for structured output.
+        """
+        from typing import Optional
+        from pydantic import BaseModel
+
+        # Define schema using Pydantic model
+        class RecommendationResponse(BaseModel):
+            recommendation: str
+            reasons: list[str]
+            confidence: Optional[str] = None  # Optional for LLM flexibility
+
+        response = client.reflect(
+            bank_id=bank_id,
+            query="What programming language should I learn for data science?",
+            response_schema=RecommendationResponse.model_json_schema(),
+            max_tokens=10000,
+        )
+
+        assert response is not None
+        # Text is empty when using structured output (single LLM call)
+        assert response.text == ""
+
+        # Verify structured output is present and can be parsed into model
+        assert response.structured_output is not None
+        result = RecommendationResponse.model_validate(response.structured_output)
+        assert result.recommendation
+        assert isinstance(result.reasons, list)
+
 
 class TestListMemories:
     """Tests for listing memories."""
diff --git a/hindsight-clients/rust/src/lib.rs b/hindsight-clients/rust/src/lib.rs
index 4d727c24..f1f71062 100644
--- a/hindsight-clients/rust/src/lib.rs
+++ b/hindsight-clients/rust/src/lib.rs
@@ -101,7 +101,9 @@ mod tests {
             query: "What do you know about Alice?".to_string(),
             budget: None,
             context: None,
+            max_tokens: 4096,
             include: None,
+            response_schema: None,
         };
         let reflect_response = client
             .reflect(&bank_id, None, &reflect_request)
diff --git a/hindsight-clients/typescript/generated/types.gen.ts b/hindsight-clients/typescript/generated/types.gen.ts
index e4629425..9ae183ba 100644
--- a/hindsight-clients/typescript/generated/types.gen.ts
+++ b/hindsight-clients/typescript/generated/types.gen.ts
@@ -898,10 +898,24 @@ export type ReflectRequest = {
    * Context
    */
   context?: string | null;
+  /**
+   * Max Tokens
+   *
+   * Maximum tokens for the response
+   */
+  max_tokens?: number;
   /**
    * Options for including additional data (disabled by default)
    */
   include?: ReflectIncludeOptions;
+  /**
+   * Response Schema
+   *
+   * Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.
+   */
+  response_schema?: {
+    [key: string]: unknown;
+  } | null;
 };
 
 /**
@@ -918,6 +932,14 @@ export type ReflectResponse = {
    * Based On
    */
   based_on?: Array<ReflectFact>;
+  /**
+   * Structured Output
+   *
+   * Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.
+   */
+  structured_output?: {
+    [key: string]: unknown;
+  } | null;
 };
 
 /**
diff --git a/hindsight-control-plane/package.json b/hindsight-control-plane/package.json
index bd6af0b4..90aabd0e 100644
--- a/hindsight-control-plane/package.json
+++ b/hindsight-control-plane/package.json
@@ -11,7 +11,7 @@
     "public"
   ],
   "scripts": {
-    "dev": "next dev",
+    "dev": "next dev --turbopack -p $(node -e \"const net=require('net');const s=net.createServer();s.listen(0,()=>{console.log(s.address().port);s.close()})\")",
     "build": "next build && npm run build:standalone",
     "build:standalone": "rm -rf standalone && STANDALONE_ROOT=$(find .next/standalone -path '*/node_modules' -prune -o -name 'server.js' -print | head -1 | xargs dirname) && cp -r \"$STANDALONE_ROOT\" standalone && cp -r .next/standalone/node_modules standalone/node_modules && mkdir -p standalone/.next && cp -r .next/static standalone/.next/static && mkdir -p standalone/public && cp -r public/* standalone/public/ 2>/dev/null || true",
     "start": "next start",
diff --git a/hindsight-docs/docs/developer/api/reflect.mdx b/hindsight-docs/docs/developer/api/reflect.mdx
index c5f771fb..751ea08f 100644
--- a/hindsight-docs/docs/developer/api/reflect.mdx
+++ b/hindsight-docs/docs/developer/api/reflect.mdx
@@ -52,6 +52,8 @@ Make sure you've completed the [Quick Start](./quickstart) to install the client
 | `query` | string | required | Question or prompt |
 | `budget` | string | "low" | Budget level: "low", "mid", "high" |
 | `context` | string | None | Additional context for the query |
+| `max_tokens` | int | 4096 | Maximum tokens for the response |
+| `response_schema` | object | None | JSON Schema for [structured output](#structured-output) |
 
 <Tabs>
 <TabItem value="python" label="Python">
@@ -127,3 +129,107 @@ This enables:
 - **Transparency** — users see why the bank said something
 - **Verification** — check if the response is grounded in facts
 - **Debugging** — understand retrieval quality
+
+## Structured Output
+
+For applications that need to process responses programmatically, you can request structured output by providing a JSON Schema via `response_schema`. When provided, the response includes a `structured_output` field with the LLM response parsed according to the schema. The `text` field will be empty since only a single LLM call is made for efficiency.
+
+The easiest way to define a schema is using **Pydantic models**:
+
+<Tabs>
+<TabItem value="python" label="Python">
+
+```python
+from pydantic import BaseModel
+from hindsight_client import Hindsight
+
+# Define your response structure with Pydantic
+class HiringRecommendation(BaseModel):
+    recommendation: str
+    confidence: str  # "low", "medium", "high"
+    key_factors: list[str]
+    risks: list[str] = []
+
+with Hindsight() as client:
+    response = client.reflect(
+        bank_id="hiring-team",
+        query="Should we hire Alice for the ML team lead position?",
+        response_schema=HiringRecommendation.model_json_schema(),
+    )
+
+    # Parse structured output into Pydantic model
+    result = HiringRecommendation.model_validate(response.structured_output)
+    print(f"Recommendation: {result.recommendation}")
+    print(f"Confidence: {result.confidence}")
+    print(f"Key factors: {result.key_factors}")
+```
+
+</TabItem>
+<TabItem value="node" label="Node.js">
+
+```javascript
+import { Hindsight } from "@anthropic-ai/hindsight";
+
+const client = new Hindsight();
+
+// Define JSON schema directly
+const responseSchema = {
+  type: "object",
+  properties: {
+    recommendation: { type: "string" },
+    confidence: { type: "string", enum: ["low", "medium", "high"] },
+    key_factors: { type: "array", items: { type: "string" } },
+    risks: { type: "array", items: { type: "string" } },
+  },
+  required: ["recommendation", "confidence", "key_factors"],
+};
+
+const response = await client.reflect({
+  bankId: "hiring-team",
+  query: "Should we hire Alice for the ML team lead position?",
+  responseSchema: responseSchema,
+});
+
+// Structured output
+console.log(response.structuredOutput.recommendation);
+console.log(response.structuredOutput.keyFactors);
+```
+
+</TabItem>
+<TabItem value="cli" label="CLI">
+
+First, create a JSON schema file `schema.json`:
+```json
+{
+  "type": "object",
+  "properties": {
+    "recommendation": {"type": "string"},
+    "confidence": {"type": "string", "enum": ["low", "medium", "high"]},
+    "key_factors": {"type": "array", "items": {"type": "string"}}
+  },
+  "required": ["recommendation", "confidence", "key_factors"]
+}
+```
+
+Then use the `--schema` flag:
+```bash
+hindsight memory reflect hiring-team \
+  "Should we hire Alice for the ML team lead position?" \
+  --schema schema.json
+```
+
+</TabItem>
+</Tabs>
+
+| Use Case | Why Structured Output Helps |
+|----------|----------------------------|
+| **Decision pipelines** | Parse recommendations into workflow systems |
+| **Dashboards** | Extract confidence scores, risk factors for visualization |
+| **Multi-agent systems** | Pass structured data between agents |
+| **Auditing** | Log structured decisions with clear reasoning |
+
+**Tips:**
+- Use Pydantic's `model_json_schema()` for type-safe schema generation
+- Use `model_validate()` to parse the response back into your Pydantic model
+- Keep schemas focused — extract only what you need
+- Use `Optional` fields for data that may not always be available
diff --git a/hindsight-docs/openapi.json b/hindsight-docs/openapi.json
index 2eec09fd..792b296d 100644
--- a/hindsight-docs/openapi.json
+++ b/hindsight-docs/openapi.json
@@ -3269,9 +3269,28 @@
             ],
             "title": "Context"
           },
+          "max_tokens": {
+            "type": "integer",
+            "title": "Max Tokens",
+            "description": "Maximum tokens for the response",
+            "default": 4096
+          },
           "include": {
             "$ref": "#/components/schemas/ReflectIncludeOptions",
             "description": "Options for including additional data (disabled by default)"
+          },
+          "response_schema": {
+            "anyOf": [
+              {
+                "additionalProperties": true,
+                "type": "object"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Response Schema",
+            "description": "Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema."
           }
         },
         "type": "object",
@@ -3286,7 +3305,26 @@
           "include": {
             "facts": {}
           },
-          "query": "What do you think about artificial intelligence?"
+          "max_tokens": 4096,
+          "query": "What do you think about artificial intelligence?",
+          "response_schema": {
+            "properties": {
+              "summary": {
+                "type": "string"
+              },
+              "key_points": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "summary",
+              "key_points"
+            ],
+            "type": "object"
+          }
         }
       },
       "ReflectResponse": {
@@ -3302,6 +3340,19 @@
             "type": "array",
             "title": "Based On",
             "default": []
+          },
+          "structured_output": {
+            "anyOf": [
+              {
+                "additionalProperties": true,
+                "type": "object"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Structured Output",
+            "description": "Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request."
           }
         },
         "type": "object",
@@ -3323,6 +3374,13 @@
               "type": "experience"
             }
           ],
+          "structured_output": {
+            "key_points": [
+              "Used in healthcare",
+              "Discussed recently"
+            ],
+            "summary": "AI is transformative"
+          },
           "text": "Based on my understanding, AI is a transformative technology..."
         }
       },
diff --git a/hindsight-docs/static/openapi.json b/hindsight-docs/static/openapi.json
index 2eec09fd..792b296d 100644
--- a/hindsight-docs/static/openapi.json
+++ b/hindsight-docs/static/openapi.json
@@ -3269,9 +3269,28 @@
             ],
             "title": "Context"
           },
+          "max_tokens": {
+            "type": "integer",
+            "title": "Max Tokens",
+            "description": "Maximum tokens for the response",
+            "default": 4096
+          },
           "include": {
             "$ref": "#/components/schemas/ReflectIncludeOptions",
             "description": "Options for including additional data (disabled by default)"
+          },
+          "response_schema": {
+            "anyOf": [
+              {
+                "additionalProperties": true,
+                "type": "object"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Response Schema",
+            "description": "Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema."
           }
         },
         "type": "object",
@@ -3286,7 +3305,26 @@
           "include": {
             "facts": {}
           },
-          "query": "What do you think about artificial intelligence?"
+          "max_tokens": 4096,
+          "query": "What do you think about artificial intelligence?",
+          "response_schema": {
+            "properties": {
+              "summary": {
+                "type": "string"
+              },
+              "key_points": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "summary",
+              "key_points"
+            ],
+            "type": "object"
+          }
         }
       },
       "ReflectResponse": {
@@ -3302,6 +3340,19 @@
             "type": "array",
             "title": "Based On",
             "default": []
+          },
+          "structured_output": {
+            "anyOf": [
+              {
+                "additionalProperties": true,
+                "type": "object"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Structured Output",
+            "description": "Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request."
           }
         },
         "type": "object",
@@ -3323,6 +3374,13 @@
               "type": "experience"
             }
           ],
+          "structured_output": {
+            "key_points": [
+              "Used in healthcare",
+              "Discussed recently"
+            ],
+            "summary": "AI is transformative"
+          },
           "text": "Based on my understanding, AI is a transformative technology..."
         }
       },
diff --git a/openapi.json b/openapi.json
index 2eec09fd..792b296d 100644
--- a/openapi.json
+++ b/openapi.json
@@ -3269,9 +3269,28 @@
             ],
             "title": "Context"
           },
+          "max_tokens": {
+            "type": "integer",
+            "title": "Max Tokens",
+            "description": "Maximum tokens for the response",
+            "default": 4096
+          },
           "include": {
             "$ref": "#/components/schemas/ReflectIncludeOptions",
             "description": "Options for including additional data (disabled by default)"
+          },
+          "response_schema": {
+            "anyOf": [
+              {
+                "additionalProperties": true,
+                "type": "object"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Response Schema",
+            "description": "Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema."
           }
         },
         "type": "object",
@@ -3286,7 +3305,26 @@
           "include": {
             "facts": {}
           },
-          "query": "What do you think about artificial intelligence?"
+          "max_tokens": 4096,
+          "query": "What do you think about artificial intelligence?",
+          "response_schema": {
+            "properties": {
+              "summary": {
+                "type": "string"
+              },
+              "key_points": {
+                "items": {
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "summary",
+              "key_points"
+            ],
+            "type": "object"
+          }
         }
       },
       "ReflectResponse": {
@@ -3302,6 +3340,19 @@
             "type": "array",
             "title": "Based On",
             "default": []
+          },
+          "structured_output": {
+            "anyOf": [
+              {
+                "additionalProperties": true,
+                "type": "object"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "title": "Structured Output",
+            "description": "Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request."
           }
         },
         "type": "object",
@@ -3323,6 +3374,13 @@
               "type": "experience"
             }
           ],
+          "structured_output": {
+            "key_points": [
+              "Used in healthcare",
+              "Discussed recently"
+            ],
+            "summary": "AI is transformative"
+          },
           "text": "Based on my understanding, AI is a transformative technology..."
         }
       },
diff --git a/scripts/hooks/lint.sh b/scripts/hooks/lint.sh
index c7058043..34bd2a5e 100755
--- a/scripts/hooks/lint.sh
+++ b/scripts/hooks/lint.sh
@@ -31,6 +31,11 @@ run_task() {
     NAMES+=("$name")
 }
 
+echo "  Syncing Python dependencies..."
+# Run uv sync first to avoid race conditions when multiple uv run commands
+# try to reinstall local packages in parallel (e.g., after version bump)
+uv sync --quiet
+
 echo "  Running lints in parallel..."
 
 # Node/TypeScript tasks