Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions hindsight-api/hindsight_api/api/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,17 +385,31 @@ class ReflectRequest(BaseModel):
"query": "What do you think about artificial intelligence?",
"budget": "low",
"context": "This is for a research paper on AI ethics",
"max_tokens": 4096,
"include": {"facts": {}},
"response_schema": {
"type": "object",
"properties": {
"summary": {"type": "string"},
"key_points": {"type": "array", "items": {"type": "string"}},
},
"required": ["summary", "key_points"],
},
}
}
)

query: str
budget: Budget = Budget.LOW
context: str | None = None
max_tokens: int = Field(default=4096, description="Maximum tokens for the response")
include: ReflectIncludeOptions = Field(
default_factory=ReflectIncludeOptions, description="Options for including additional data (disabled by default)"
)
response_schema: dict | None = Field(
default=None,
description="Optional JSON Schema for structured output. When provided, the response will include a 'structured_output' field with the LLM response parsed according to this schema.",
)


class OpinionItem(BaseModel):
Expand Down Expand Up @@ -440,12 +454,20 @@ class ReflectResponse(BaseModel):
{"id": "123", "text": "AI is used in healthcare", "type": "world"},
{"id": "456", "text": "I discussed AI applications last week", "type": "experience"},
],
"structured_output": {
"summary": "AI is transformative",
"key_points": ["Used in healthcare", "Discussed recently"],
},
}
}
)

text: str
based_on: list[ReflectFact] = [] # Facts used to generate the response
structured_output: dict | None = Field(
default=None,
description="Structured output parsed according to the request's response_schema. Only present when response_schema was provided in the request.",
)


class BanksResponse(BaseModel):
Expand Down Expand Up @@ -1211,6 +1233,8 @@ async def api_reflect(
query=request.query,
budget=request.budget,
context=request.context,
max_tokens=request.max_tokens,
response_schema=request.response_schema,
request_context=request_context,
)

Expand All @@ -1233,6 +1257,7 @@ async def api_reflect(
return ReflectResponse(
text=core_result.text,
based_on=based_on_facts,
structured_output=core_result.structured_output,
)

except Exception as e:
Expand Down
4 changes: 4 additions & 0 deletions hindsight-api/hindsight_api/engine/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ async def reflect_async(
*,
budget: "Budget | None" = None,
context: str | None = None,
max_tokens: int = 4096,
response_schema: dict | None = None,
request_context: "RequestContext",
) -> "ReflectResult":
"""
Expand All @@ -120,6 +122,8 @@ async def reflect_async(
query: The question to reflect on.
budget: Search budget for retrieving context.
context: Additional context for the reflection.
max_tokens: Maximum tokens for the response.
response_schema: Optional JSON Schema for structured output.
request_context: Request context for authentication.

Returns:
Expand Down
36 changes: 27 additions & 9 deletions hindsight-api/hindsight_api/engine/llm_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ async def call(
initial_backoff: float = 1.0,
max_backoff: float = 60.0,
skip_validation: bool = False,
strict_schema: bool = False,
) -> Any:
"""
Make an LLM API call with retry logic.
Expand All @@ -149,6 +150,7 @@ async def call(
initial_backoff: Initial backoff time in seconds.
max_backoff: Maximum backoff time in seconds.
skip_validation: Return raw JSON without Pydantic validation.
strict_schema: Use strict JSON schema enforcement (OpenAI only). Guarantees all required fields.

Returns:
Parsed response if response_format is provided, otherwise text content.
Expand Down Expand Up @@ -226,19 +228,35 @@ async def call(
for attempt in range(max_retries + 1):
try:
if response_format is not None:
# Add schema to system message for JSON mode
schema = None
if hasattr(response_format, "model_json_schema"):
schema = response_format.model_json_schema()
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"

if call_params["messages"] and call_params["messages"][0].get("role") == "system":
call_params["messages"][0]["content"] += schema_msg
elif call_params["messages"]:
call_params["messages"][0]["content"] = (
schema_msg + "\n\n" + call_params["messages"][0]["content"]
)
if strict_schema and schema is not None:
# Use OpenAI's strict JSON schema enforcement
# This guarantees all required fields are returned
call_params["response_format"] = {
"type": "json_schema",
"json_schema": {
"name": "response",
"strict": True,
"schema": schema,
},
}
else:
# Soft enforcement: add schema to prompt and use json_object mode
if schema is not None:
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"

if call_params["messages"] and call_params["messages"][0].get("role") == "system":
call_params["messages"][0]["content"] += schema_msg
elif call_params["messages"]:
call_params["messages"][0]["content"] = (
schema_msg + "\n\n" + call_params["messages"][0]["content"]
)

call_params["response_format"] = {"type": "json_object"}

call_params["response_format"] = {"type": "json_object"}
response = await self._client.chat.completions.create(**call_params)

content = response.choices[0].message.content
Expand Down
43 changes: 36 additions & 7 deletions hindsight-api/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3076,6 +3076,8 @@ async def reflect_async(
*,
budget: Budget | None = None,
context: str | None = None,
max_tokens: int = 4096,
response_schema: dict | None = None,
request_context: "RequestContext",
) -> ReflectResult:
"""
Expand All @@ -3087,19 +3089,22 @@ async def reflect_async(
3. Retrieves existing opinions (bank's formed perspectives)
4. Uses LLM to formulate an answer
5. Extracts and stores any new opinions formed during reflection
6. Returns plain text answer and the facts used
6. Optionally generates structured output based on response_schema
7. Returns plain text answer and the facts used

Args:
bank_id: bank identifier
query: Question to answer
budget: Budget level for memory exploration (low=100, mid=300, high=600 units)
context: Additional context string to include in LLM prompt (not used in recall)
response_schema: Optional JSON Schema for structured output

Returns:
ReflectResult containing:
- text: Plain text answer (no markdown)
- based_on: Dict with 'world', 'experience', and 'opinion' fact lists (MemoryFact objects)
- new_opinions: List of newly formed opinions
- structured_output: Optional dict if response_schema was provided
"""
# Use cached LLM config
if self._llm_config is None:
Expand Down Expand Up @@ -3177,17 +3182,40 @@ async def reflect_async(
log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars")

system_message = think_utils.get_system_message(disposition)
messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]

# Prepare response_format if schema provided
response_format = None
if response_schema is not None:
# Wrapper class to provide Pydantic-like interface for raw JSON schemas
class JsonSchemaWrapper:
def __init__(self, schema: dict):
self._schema = schema

def model_json_schema(self):
return self._schema

response_format = JsonSchemaWrapper(response_schema)

llm_start = time.time()
answer_text = await self._llm_config.call(
messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
scope="memory_think",
temperature=0.9,
max_completion_tokens=1000,
result = await self._llm_config.call(
messages=messages,
scope="memory_reflect",
max_completion_tokens=max_tokens,
response_format=response_format,
skip_validation=True if response_format else False,
strict_schema=True if response_format else False,
)
llm_time = time.time() - llm_start

answer_text = answer_text.strip()
# Handle response based on whether structured output was requested
if response_schema is not None:
structured_output = result
answer_text = "" # Empty for backward compatibility
log_buffer.append(f"[REFLECT {reflect_id}] Structured output generated")
else:
structured_output = None
answer_text = result.strip()

# Submit form_opinion task for background processing
await self._task_backend.submit_task(
Expand All @@ -3205,6 +3233,7 @@ async def reflect_async(
text=answer_text,
based_on={"world": world_results, "experience": agent_results, "opinion": opinion_results},
new_opinions=[], # Opinions are being extracted asynchronously
structured_output=structured_output,
)

# Call post-operation hook if validator is configured
Expand Down
8 changes: 7 additions & 1 deletion hindsight-api/hindsight_api/engine/response_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ class ReflectResult(BaseModel):
Result from a reflect operation.

Contains the formulated answer, the facts it was based on (organized by type),
and any new opinions that were formed during the reflection process.
any new opinions that were formed during the reflection process, and optionally
structured output if a response schema was provided.
"""

model_config = ConfigDict(
Expand All @@ -145,6 +146,7 @@ class ReflectResult(BaseModel):
"opinion": [],
},
"new_opinions": ["Machine learning has great potential in healthcare"],
"structured_output": {"summary": "ML in healthcare", "confidence": 0.9},
}
}
)
Expand All @@ -154,6 +156,10 @@ class ReflectResult(BaseModel):
description="Facts used to formulate the answer, organized by type (world, experience, opinion)"
)
new_opinions: list[str] = Field(default_factory=list, description="List of newly formed opinions during reflection")
structured_output: dict[str, Any] | None = Field(
default=None,
description="Structured output parsed according to the provided response schema. Only present when response_schema was provided.",
)


class Opinion(BaseModel):
Expand Down
Loading
Loading