diff --git a/.gitignore b/.gitignore
index 82f772956..ac44d8108 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,52 +1,61 @@
-/venv/
-/_pycache/
-*.pyc*
-__pycache__/
-.venv/
-.vscode/
-.env
-.env.local
-.env.development.local
-.env.test.local
-.env.production.local
-.env.development
-.env.test
-export*
-__pycache__/model_tools.cpython-310.pyc
-__pycache__/web_tools.cpython-310.pyc
-logs/
-data/
-.pytest_cache/
-tmp/
-temp_vision_images/
-hermes-*/*
-examples/
-tests/quick_test_dataset.jsonl
-tests/sample_dataset.jsonl
-run_datagen_kimik2-thinking.sh
-run_datagen_megascience_glm4-6.sh
-run_datagen_sonnet.sh
-source-data/*
-run_datagen_megascience_glm4-6.sh
-data/*
-node_modules/
-browser-use/
-agent-browser/
-# Private keys
-*.ppk
-*.pem
-privvy*
-images/
-__pycache__/
-hermes_agent.egg-info/
-wandb/
-testlogs
-
-# CLI config (may contain sensitive SSH paths)
-cli-config.yaml
-
-# Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case)
-skills/.hub/
+/venv/
+/_pycache/
+*.pyc*
+__pycache__/
+.venv/
+.vscode/
+.env
+.env.local
+.env.development.local
+.env.test.local
+.env.production.local
+.env.development
+.env.test
+export*
+__pycache__/model_tools.cpython-310.pyc
+__pycache__/web_tools.cpython-310.pyc
+logs/
+data/
+.pytest_cache/
+tmp/
+temp_vision_images/
+hermes-*/*
+examples/
+tests/quick_test_dataset.jsonl
+tests/sample_dataset.jsonl
+run_datagen_kimik2-thinking.sh
+run_datagen_megascience_glm4-6.sh
+run_datagen_sonnet.sh
+source-data/*
+run_datagen_megascience_glm4-6.sh
+data/*
+node_modules/
+browser-use/
+agent-browser/
+# Private keys
+*.ppk
+*.pem
+privvy*
+images/
+__pycache__/
+hermes_agent.egg-info/
+wandb/
+testlogs
+
+# CLI config (may contain sensitive SSH paths)
+cli-config.yaml
+config.yaml
+
+# Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case)
+skills/.hub/
ignored/
.worktrees/
environments/benchmarks/evals/
+
+# Session logs and user data
+sessions/
+*.log
+
+# OS files
+.DS_Store
+Thumbs.db
\ No newline at end of file
diff --git a/run_agent.py b/run_agent.py
index 6adbe14d2..b3db48986 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -15,7 +15,7 @@
Usage:
from run_agent import AIAgent
-
+
agent = AIAgent(base_url="http://localhost:30000/v1", model="claude-opus-4-20250514")
response = agent.run_conversation("Tell me about the latest Python updates")
"""
@@ -24,6 +24,7 @@
import hashlib
import json
import logging
+
logger = logging.getLogger(__name__)
import os
import random
@@ -44,7 +45,7 @@
_hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
_user_env = _hermes_home / ".env"
-_project_env = Path(__file__).parent / '.env'
+_project_env = Path(__file__).parent / ".env"
if _user_env.exists():
try:
load_dotenv(dotenv_path=_user_env, encoding="utf-8")
@@ -65,7 +66,11 @@
os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
# Import our tool system
-from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
+from model_tools import (
+ get_tool_definitions,
+ handle_function_call,
+ check_toolset_requirements,
+)
from tools.terminal_tool import cleanup_vm
from tools.interrupt import set_interrupt as _set_interrupt
from tools.browser_tool import cleanup_browser
@@ -76,25 +81,33 @@
# Agent internals extracted to agent/ package for modularity
from agent.prompt_builder import (
- DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
- MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
+ DEFAULT_AGENT_IDENTITY,
+ PLATFORM_HINTS,
+ MEMORY_GUIDANCE,
+ SESSION_SEARCH_GUIDANCE,
+ SKILLS_GUIDANCE,
)
from agent.model_metadata import (
- fetch_model_metadata, get_model_context_length,
- estimate_tokens_rough, estimate_messages_tokens_rough,
- get_next_probe_tier, parse_context_limit_from_error,
+ fetch_model_metadata,
+ get_model_context_length,
+ estimate_tokens_rough,
+ estimate_messages_tokens_rough,
+ get_next_probe_tier,
+ parse_context_limit_from_error,
save_context_length,
)
from agent.context_compressor import ContextCompressor
from agent.prompt_caching import apply_anthropic_cache_control
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt
from agent.display import (
- KawaiiSpinner, build_tool_preview as _build_tool_preview,
+ KawaiiSpinner,
+ build_tool_preview as _build_tool_preview,
get_cute_tool_message as _get_cute_tool_message_impl,
_detect_tool_failure,
)
from agent.trajectory import (
- convert_scratchpad_to_think, has_incomplete_scratchpad,
+ convert_scratchpad_to_think,
+ has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
)
@@ -187,11 +200,11 @@ def remaining(self) -> int:
class AIAgent:
"""
AI Agent with tool calling capabilities.
-
+
This class manages the conversation flow, tool execution, and response handling
for AI models that support function calling.
"""
-
+
def __init__(
self,
base_url: str = None,
@@ -292,13 +305,19 @@ def __init__(
# Store effective base URL for feature detection (prompt caching, reasoning, etc.)
# When no base_url is provided, the client defaults to OpenRouter, so reflect that here.
self.base_url = base_url or OPENROUTER_BASE_URL
- provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
+ provider_name = (
+ provider.strip().lower()
+ if isinstance(provider, str) and provider.strip()
+ else None
+ )
self.provider = provider_name or "openrouter"
if api_mode in {"chat_completions", "codex_responses"}:
self.api_mode = api_mode
elif self.provider == "openai-codex":
self.api_mode = "codex_responses"
- elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
+ elif (
+ provider_name is None
+ ) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
self.api_mode = "codex_responses"
self.provider = "openai-codex"
else:
@@ -310,15 +329,15 @@ def __init__(
self.clarify_callback = clarify_callback
self.step_callback = step_callback
self._last_reported_tool = None # Track for "new tool" mode
-
+
# Interrupt mechanism for breaking out of tool loops
self._interrupt_requested = False
self._interrupt_message = None # Optional message that triggered interrupt
-
+
# Subagent delegation state
- self._delegate_depth = 0 # 0 = top-level agent, incremented for children
- self._active_children = [] # Running child AIAgents (for interrupt propagation)
-
+ self._delegate_depth = 0 # 0 = top-level agent, incremented for children
+ self._active_children = [] # Running child AIAgents (for interrupt propagation)
+
# Store OpenRouter provider preferences
self.providers_allowed = providers_allowed
self.providers_ignored = providers_ignored
@@ -330,17 +349,20 @@ def __init__(
# Store toolset filtering options
self.enabled_toolsets = enabled_toolsets
self.disabled_toolsets = disabled_toolsets
-
+
# Model response configuration
self.max_tokens = max_tokens # None = use model default
- self.reasoning_config = reasoning_config # None = use default (medium for OpenRouter)
+ self.reasoning_config = (
+ reasoning_config # None = use default (medium for OpenRouter)
+ )
self.prefill_messages = prefill_messages or [] # Prefilled conversation turns
-
+
# Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
# Reduces input costs by ~75% on multi-turn conversations by caching the
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
is_openrouter = "openrouter" in self.base_url.lower()
is_claude = "claude" in self.model.lower()
+
self._use_prompt_caching = is_openrouter and is_claude
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
@@ -354,86 +376,96 @@ def __init__(
# Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
# so tool failures, API errors, etc. are inspectable after the fact.
from agent.redact import RedactingFormatter
+
_error_log_dir = Path.home() / ".hermes" / "logs"
_error_log_dir.mkdir(parents=True, exist_ok=True)
_error_log_path = _error_log_dir / "errors.log"
from logging.handlers import RotatingFileHandler
+
_error_file_handler = RotatingFileHandler(
- _error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2,
+ _error_log_path,
+ maxBytes=2 * 1024 * 1024,
+ backupCount=2,
)
_error_file_handler.setLevel(logging.WARNING)
- _error_file_handler.setFormatter(RedactingFormatter(
- '%(asctime)s %(levelname)s %(name)s: %(message)s',
- ))
+ _error_file_handler.setFormatter(
+ RedactingFormatter(
+ "%(asctime)s %(levelname)s %(name)s: %(message)s",
+ )
+ )
logging.getLogger().addHandler(_error_file_handler)
if self.verbose_logging:
logging.basicConfig(
level=logging.DEBUG,
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- datefmt='%H:%M:%S'
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ datefmt="%H:%M:%S",
)
for handler in logging.getLogger().handlers:
- handler.setFormatter(RedactingFormatter(
- '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
- datefmt='%H:%M:%S',
- ))
+ handler.setFormatter(
+ RedactingFormatter(
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+ datefmt="%H:%M:%S",
+ )
+ )
# Keep third-party libraries at WARNING level to reduce noise
# We have our own retry and error logging that's more informative
- logging.getLogger('openai').setLevel(logging.WARNING)
- logging.getLogger('openai._base_client').setLevel(logging.WARNING)
- logging.getLogger('httpx').setLevel(logging.WARNING)
- logging.getLogger('httpcore').setLevel(logging.WARNING)
- logging.getLogger('asyncio').setLevel(logging.WARNING)
+ logging.getLogger("openai").setLevel(logging.WARNING)
+ logging.getLogger("openai._base_client").setLevel(logging.WARNING)
+ logging.getLogger("httpx").setLevel(logging.WARNING)
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
+ logging.getLogger("asyncio").setLevel(logging.WARNING)
# Suppress Modal/gRPC related debug spam
- logging.getLogger('hpack').setLevel(logging.WARNING)
- logging.getLogger('hpack.hpack').setLevel(logging.WARNING)
- logging.getLogger('grpc').setLevel(logging.WARNING)
- logging.getLogger('modal').setLevel(logging.WARNING)
- logging.getLogger('rex-deploy').setLevel(logging.INFO) # Keep INFO for sandbox status
+ logging.getLogger("hpack").setLevel(logging.WARNING)
+ logging.getLogger("hpack.hpack").setLevel(logging.WARNING)
+ logging.getLogger("grpc").setLevel(logging.WARNING)
+ logging.getLogger("modal").setLevel(logging.WARNING)
+ logging.getLogger("rex-deploy").setLevel(
+ logging.INFO
+ ) # Keep INFO for sandbox status
logger.info("Verbose logging enabled (third-party library logs suppressed)")
else:
# Set logging to INFO level for important messages only
logging.basicConfig(
level=logging.INFO,
- format='%(asctime)s - %(levelname)s - %(message)s',
- datefmt='%H:%M:%S'
+ format="%(asctime)s - %(levelname)s - %(message)s",
+ datefmt="%H:%M:%S",
)
# Suppress noisy library logging
- logging.getLogger('openai').setLevel(logging.ERROR)
- logging.getLogger('openai._base_client').setLevel(logging.ERROR)
- logging.getLogger('httpx').setLevel(logging.ERROR)
- logging.getLogger('httpcore').setLevel(logging.ERROR)
+ logging.getLogger("openai").setLevel(logging.ERROR)
+ logging.getLogger("openai._base_client").setLevel(logging.ERROR)
+ logging.getLogger("httpx").setLevel(logging.ERROR)
+ logging.getLogger("httpcore").setLevel(logging.ERROR)
if self.quiet_mode:
# In quiet mode (CLI default), suppress all tool/infra log
# noise. The TUI has its own rich display for status; logger
# INFO/WARNING messages just clutter it.
for quiet_logger in [
- 'tools', # all tools.* (terminal, browser, web, file, etc.)
- 'minisweagent', # mini-swe-agent execution backend
- 'run_agent', # agent runner internals
- 'trajectory_compressor',
- 'cron', # scheduler (only relevant in daemon mode)
- 'hermes_cli', # CLI helpers
+ "tools", # all tools.* (terminal, browser, web, file, etc.)
+ "minisweagent", # mini-swe-agent execution backend
+ "run_agent", # agent runner internals
+ "trajectory_compressor",
+ "cron", # scheduler (only relevant in daemon mode)
+ "hermes_cli", # CLI helpers
]:
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
-
+
# Initialize OpenAI client - defaults to OpenRouter
client_kwargs = {}
-
+
# Default to OpenRouter if no base_url provided
if base_url:
client_kwargs["base_url"] = base_url
else:
client_kwargs["base_url"] = OPENROUTER_BASE_URL
-
+
# Handle API key - OpenRouter is the primary provider
if api_key:
client_kwargs["api_key"] = api_key
else:
# Primary: OPENROUTER_API_KEY, fallback to direct provider keys
client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "")
-
+
# OpenRouter app attribution โ shows hermes-agent in rankings/analytics
effective_base = client_kwargs.get("base_url", "")
if "openrouter" in effective_base.lower():
@@ -448,7 +480,7 @@ def __init__(
client_kwargs["default_headers"] = {
"User-Agent": "KimiCLI/1.0",
}
-
+
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
try:
self.client = OpenAI(**client_kwargs)
@@ -461,14 +493,18 @@ def __init__(
if key_used and key_used != "dummy-key" and len(key_used) > 12:
print(f"๐ Using API key: {key_used[:8]}...{key_used[-4:]}")
else:
- print(f"โ ๏ธ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
+ print(
+ f"โ ๏ธ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')"
+ )
except Exception as e:
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
-
+
# Provider fallback โ a single backup model/provider tried when the
# primary is exhausted (rate-limit, overload, connection failure).
# Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}
- self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None
+ self._fallback_model = (
+ fallback_model if isinstance(fallback_model, dict) else None
+ )
self._fallback_activated = False
if self._fallback_model:
fb_p = self._fallback_model.get("provider", "")
@@ -482,7 +518,7 @@ def __init__(
disabled_toolsets=disabled_toolsets,
quiet_mode=self.quiet_mode,
)
-
+
# Show tool configuration and store valid tool names for validation
self.valid_tool_names = set()
if self.tools:
@@ -490,7 +526,7 @@ def __init__(
tool_names = sorted(self.valid_tool_names)
if not self.quiet_mode:
print(f"๐ ๏ธ Loaded {len(self.tools)} tools: {', '.join(tool_names)}")
-
+
# Show filtering info if applied
if enabled_toolsets:
print(f" โ
Enabled toolsets: {', '.join(enabled_toolsets)}")
@@ -498,27 +534,39 @@ def __init__(
print(f" โ Disabled toolsets: {', '.join(disabled_toolsets)}")
elif not self.quiet_mode:
print("๐ ๏ธ No tools loaded (all tools filtered out or unavailable)")
-
+
# Check tool requirements
if self.tools and not self.quiet_mode:
requirements = check_toolset_requirements()
- missing_reqs = [name for name, available in requirements.items() if not available]
+ missing_reqs = [
+ name for name, available in requirements.items() if not available
+ ]
if missing_reqs:
- print(f"โ ๏ธ Some tools may not work due to missing requirements: {missing_reqs}")
-
+ print(
+ f"โ ๏ธ Some tools may not work due to missing requirements: {missing_reqs}"
+ )
+
# Show trajectory saving status
if self.save_trajectories and not self.quiet_mode:
print("๐ Trajectory saving enabled")
-
+
# Show ephemeral system prompt status
if self.ephemeral_system_prompt and not self.quiet_mode:
- prompt_preview = self.ephemeral_system_prompt[:60] + "..." if len(self.ephemeral_system_prompt) > 60 else self.ephemeral_system_prompt
- print(f"๐ Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)")
-
+ prompt_preview = (
+ self.ephemeral_system_prompt[:60] + "..."
+ if len(self.ephemeral_system_prompt) > 60
+ else self.ephemeral_system_prompt
+ )
+ print(
+ f"๐ Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)"
+ )
+
# Show prompt caching status
if self._use_prompt_caching and not self.quiet_mode:
- print(f"๐พ Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)")
-
+ print(
+ f"๐พ Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)"
+ )
+
# Session logging setup - auto-save conversation trajectories for debugging
self.session_start = datetime.now()
if session_id:
@@ -529,26 +577,27 @@ def __init__(
timestamp_str = self.session_start.strftime("%Y%m%d_%H%M%S")
short_uuid = uuid.uuid4().hex[:6]
self.session_id = f"{timestamp_str}_{short_uuid}"
-
+
# Session logs go into ~/.hermes/sessions/ alongside gateway sessions
hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
self.logs_dir = hermes_home / "sessions"
self.logs_dir.mkdir(parents=True, exist_ok=True)
self.session_log_file = self.logs_dir / f"session_{self.session_id}.json"
-
+
# Track conversation messages for session logging
self._session_messages: List[Dict[str, Any]] = []
-
+
# Cached system prompt -- built once per session, only rebuilt on compression
self._cached_system_prompt: Optional[str] = None
-
+
# Filesystem checkpoint manager (transparent โ not a tool)
from tools.checkpoint_manager import CheckpointManager
+
self._checkpoint_mgr = CheckpointManager(
enabled=checkpoints_enabled,
max_snapshots=checkpoint_max_snapshots,
)
-
+
# SQLite session store (optional -- provided by CLI or gateway)
self._session_db = session_db
self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes
@@ -567,11 +616,12 @@ def __init__(
)
except Exception as e:
logger.debug("Session DB create_session failed: %s", e)
-
+
# In-memory todo list for task planning (one per agent/session)
from tools.todo_tool import TodoStore
+
self._todo_store = TodoStore()
-
+
# Persistent memory (MEMORY.md + USER.md) -- loaded from disk
self._memory_store = None
self._memory_enabled = False
@@ -581,13 +631,17 @@ def __init__(
if not skip_memory:
try:
from hermes_cli.config import load_config as _load_mem_config
+
mem_config = _load_mem_config().get("memory", {})
self._memory_enabled = mem_config.get("memory_enabled", False)
- self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
+ self._user_profile_enabled = mem_config.get(
+ "user_profile_enabled", False
+ )
self._memory_nudge_interval = int(mem_config.get("nudge_interval", 10))
self._memory_flush_min_turns = int(mem_config.get("flush_min_turns", 6))
if self._memory_enabled or self._user_profile_enabled:
from tools.memory_tool import MemoryStore
+
self._memory_store = MemoryStore(
memory_char_limit=mem_config.get("memory_char_limit", 2200),
user_char_limit=mem_config.get("user_char_limit", 1375),
@@ -595,17 +649,22 @@ def __init__(
self._memory_store.load_from_disk()
except Exception:
pass # Memory is optional -- don't break agent init
-
+
# Honcho AI-native memory (cross-session user modeling)
# Reads ~/.honcho/config.json as the single source of truth.
self._honcho = None # HonchoSessionManager | None
self._honcho_session_key = honcho_session_key
if not skip_memory:
try:
- from honcho_integration.client import HonchoClientConfig, get_honcho_client
+ from honcho_integration.client import (
+ HonchoClientConfig,
+ get_honcho_client,
+ )
+
hcfg = HonchoClientConfig.from_global_config()
if hcfg.enabled and hcfg.api_key:
from honcho_integration.session import HonchoSessionManager
+
client = get_honcho_client(hcfg)
self._honcho = HonchoSessionManager(
honcho=client,
@@ -615,17 +674,19 @@ def __init__(
# Resolve session key: explicit arg > global sessions map > fallback
if not self._honcho_session_key:
self._honcho_session_key = (
- hcfg.resolve_session_name()
- or "hermes-default"
+ hcfg.resolve_session_name() or "hermes-default"
)
# Ensure session exists in Honcho
self._honcho.get_or_create(self._honcho_session_key)
# Inject session context into the honcho tool module
from tools.honcho_tools import set_session_context
+
set_session_context(self._honcho, self._honcho_session_key)
logger.info(
"Honcho active (session: %s, user: %s, workspace: %s)",
- self._honcho_session_key, hcfg.peer_name, hcfg.workspace_id,
+ self._honcho_session_key,
+ hcfg.peer_name,
+ hcfg.workspace_id,
)
else:
if not hcfg.enabled:
@@ -640,18 +701,25 @@ def __init__(
self._skill_nudge_interval = 15
try:
from hermes_cli.config import load_config as _load_skills_config
+
skills_config = _load_skills_config().get("skills", {})
- self._skill_nudge_interval = int(skills_config.get("creation_nudge_interval", 15))
+ self._skill_nudge_interval = int(
+ skills_config.get("creation_nudge_interval", 15)
+ )
except Exception:
pass
-
+
# Initialize context compressor for automatic context management
# Compresses conversation when approaching model's context limit
# Configuration via config.yaml (compression section) or environment variables
- compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
- compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
+ compression_threshold = float(
+ os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85")
+ )
+ compression_enabled = os.getenv(
+ "CONTEXT_COMPRESSION_ENABLED", "true"
+ ).lower() in ("true", "1", "yes")
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
-
+
self.context_compressor = ContextCompressor(
model=self.model,
threshold_percent=compression_threshold,
@@ -670,16 +738,20 @@ def __init__(
self.session_completion_tokens = 0
self.session_total_tokens = 0
self.session_api_calls = 0
-
+
if not self.quiet_mode:
if compression_enabled:
- print(f"๐ Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})")
+ print(
+ f"๐ Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold * 100)}% = {self.context_compressor.threshold_tokens:,})"
+ )
else:
- print(f"๐ Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
-
+ print(
+ f"๐ Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)"
+ )
+
def _max_tokens_param(self, value: int) -> dict:
"""Return the correct max tokens kwarg for the current provider.
-
+
OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
'max_completion_tokens'. OpenRouter, local models, and older
OpenAI models use 'max_tokens'.
@@ -695,30 +767,30 @@ def _max_tokens_param(self, value: int) -> dict:
def _has_content_after_think_block(self, content: str) -> bool:
"""
Check if content has actual text after any blocks.
-
+
This detects cases where the model only outputs reasoning but no actual
response, which indicates an incomplete generation that should be retried.
-
+
Args:
content: The assistant message content to check
-
+
Returns:
True if there's meaningful content after think blocks, False otherwise
"""
if not content:
return False
-
+
# Remove all ... blocks (including nested ones, non-greedy)
- cleaned = re.sub(r'.*?', '', content, flags=re.DOTALL)
-
+ cleaned = re.sub(r".*?", "", content, flags=re.DOTALL)
+
# Check if there's any non-whitespace content remaining
return bool(cleaned.strip())
-
+
def _strip_think_blocks(self, content: str) -> str:
"""Remove ... blocks from content, returning only visible text."""
if not content:
return ""
- return re.sub(r'.*?', '', content, flags=re.DOTALL)
+ return re.sub(r".*?", "", content, flags=re.DOTALL)
def _looks_like_codex_intermediate_ack(
self,
@@ -730,14 +802,19 @@ def _looks_like_codex_intermediate_ack(
if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages):
return False
- assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower()
+ assistant_text = (
+ self._strip_think_blocks(assistant_content or "").strip().lower()
+ )
if not assistant_text:
return False
if len(assistant_text) > 1200:
return False
has_future_ack = bool(
- re.search(r"\b(i['โ]ll|i will|let me|i can do that|i can help with that)\b", assistant_text)
+ re.search(
+ r"\b(i['โ]ll|i will|let me|i can do that|i can help with that)\b",
+ assistant_text,
+ )
)
if not has_future_ack:
return False
@@ -785,56 +862,69 @@ def _looks_like_codex_intermediate_ack(
or "~/" in user_text
or "/" in user_text
)
- assistant_mentions_action = any(marker in assistant_text for marker in action_markers)
+ assistant_mentions_action = any(
+ marker in assistant_text for marker in action_markers
+ )
assistant_targets_workspace = any(
marker in assistant_text for marker in workspace_markers
)
- return (user_targets_workspace or assistant_targets_workspace) and assistant_mentions_action
-
-
+ return (
+ user_targets_workspace or assistant_targets_workspace
+ ) and assistant_mentions_action
+
def _extract_reasoning(self, assistant_message) -> Optional[str]:
"""
Extract reasoning/thinking content from an assistant message.
-
+
OpenRouter and various providers can return reasoning in multiple formats:
1. message.reasoning - Direct reasoning field (DeepSeek, Qwen, etc.)
2. message.reasoning_content - Alternative field (Moonshot AI, Novita, etc.)
3. message.reasoning_details - Array of {type, summary, ...} objects (OpenRouter unified)
-
+
Args:
assistant_message: The assistant message object from the API response
-
+
Returns:
Combined reasoning text, or None if no reasoning found
"""
reasoning_parts = []
-
+
# Check direct reasoning field
- if hasattr(assistant_message, 'reasoning') and assistant_message.reasoning:
+ if hasattr(assistant_message, "reasoning") and assistant_message.reasoning:
reasoning_parts.append(assistant_message.reasoning)
-
+
# Check reasoning_content field (alternative name used by some providers)
- if hasattr(assistant_message, 'reasoning_content') and assistant_message.reasoning_content:
+ if (
+ hasattr(assistant_message, "reasoning_content")
+ and assistant_message.reasoning_content
+ ):
# Don't duplicate if same as reasoning
if assistant_message.reasoning_content not in reasoning_parts:
reasoning_parts.append(assistant_message.reasoning_content)
-
+
# Check reasoning_details array (OpenRouter unified format)
# Format: [{"type": "reasoning.summary", "summary": "...", ...}, ...]
- if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
+ if (
+ hasattr(assistant_message, "reasoning_details")
+ and assistant_message.reasoning_details
+ ):
for detail in assistant_message.reasoning_details:
if isinstance(detail, dict):
# Extract summary from reasoning detail object
- summary = detail.get('summary') or detail.get('content') or detail.get('text')
+ summary = (
+ detail.get("summary")
+ or detail.get("content")
+ or detail.get("text")
+ )
if summary and summary not in reasoning_parts:
reasoning_parts.append(summary)
-
+
# Combine all reasoning parts
if reasoning_parts:
return "\n\n".join(reasoning_parts)
-
+
return None
-
+
def _cleanup_task_resources(self, task_id: str) -> None:
"""Clean up VM and browser resources for a given task."""
try:
@@ -848,7 +938,9 @@ def _cleanup_task_resources(self, task_id: str) -> None:
if self.verbose_logging:
logging.warning(f"Failed to cleanup browser for task {task_id}: {e}")
- def _persist_session(self, messages: List[Dict], conversation_history: List[Dict] = None):
+ def _persist_session(
+ self, messages: List[Dict], conversation_history: List[Dict] = None
+ ):
"""Save session state to both JSON log and SQLite on any exit path.
Ensures conversations are never lost, even on errors or early returns.
@@ -859,7 +951,6 @@ def _persist_session(self, messages: List[Dict], conversation_history: List[Dict
def _flush_messages_to_session_db(self, messages: List[Dict], conversation_history: List[Dict] = None):
"""Persist any un-flushed messages to the SQLite session store.
-
Uses _last_flushed_db_idx to track which messages have already been
written, so repeated calls (from multiple exit paths) only write
truly new messages โ preventing the duplicate-write bug (#860).
@@ -896,44 +987,44 @@ def _flush_messages_to_session_db(self, messages: List[Dict], conversation_histo
def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
"""
Get messages up to (but not including) the last assistant turn.
-
+
This is used when we need to "roll back" to the last successful point
in the conversation, typically when the final assistant message is
incomplete or malformed.
-
+
Args:
messages: Full message list
-
+
Returns:
Messages up to the last complete assistant turn (ending with user/tool message)
"""
if not messages:
return []
-
+
# Find the index of the last assistant message
last_assistant_idx = None
for i in range(len(messages) - 1, -1, -1):
if messages[i].get("role") == "assistant":
last_assistant_idx = i
break
-
+
if last_assistant_idx is None:
# No assistant message found, return all messages
return messages.copy()
-
+
# Return everything up to (not including) the last assistant message
return messages[:last_assistant_idx]
-
+
def _format_tools_for_system_message(self) -> str:
"""
Format tool definitions for the system message in the trajectory format.
-
+
Returns:
str: JSON string representation of tool definitions
"""
if not self.tools:
return "[]"
-
+
# Convert tool definitions to the format expected in trajectories
formatted_tools = []
for tool in self.tools:
@@ -942,26 +1033,28 @@ def _format_tools_for_system_message(self) -> str:
"name": func["name"],
"description": func.get("description", ""),
"parameters": func.get("parameters", {}),
- "required": None # Match the format in the example
+ "required": None, # Match the format in the example
}
formatted_tools.append(formatted_tool)
-
+
return json.dumps(formatted_tools, ensure_ascii=False)
-
- def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_query: str, completed: bool) -> List[Dict[str, Any]]:
+
+ def _convert_to_trajectory_format(
+ self, messages: List[Dict[str, Any]], user_query: str, completed: bool
+ ) -> List[Dict[str, Any]]:
"""
Convert internal message format to trajectory format for saving.
-
+
Args:
messages (List[Dict]): Internal message history
user_query (str): Original user query
completed (bool): Whether the conversation completed successfully
-
+
Returns:
List[Dict]: Messages in trajectory format
"""
trajectory = []
-
+
# Add system message with tool definitions
system_msg = (
"You are a function calling AI model. You are provided with function signatures within XML tags. "
@@ -976,70 +1069,67 @@ def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_que
"Each function call should be enclosed within XML tags.\n"
"Example:\n\n{'name': ,'arguments': }\n"
)
-
- trajectory.append({
- "from": "system",
- "value": system_msg
- })
-
+
+ trajectory.append({"from": "system", "value": system_msg})
+
# Add the actual user prompt (from the dataset) as the first human message
- trajectory.append({
- "from": "human",
- "value": user_query
- })
-
+ trajectory.append({"from": "human", "value": user_query})
+
# Skip the first message (the user query) since we already added it above.
# Prefill messages are injected at API-call time only (not in the messages
# list), so no offset adjustment is needed here.
i = 1
-
+
while i < len(messages):
msg = messages[i]
-
+
if msg["role"] == "assistant":
# Check if this message has tool calls
if "tool_calls" in msg and msg["tool_calls"]:
# Format assistant message with tool calls
# Add tags around reasoning for trajectory storage
content = ""
-
+
# Prepend reasoning in tags if available (native thinking tokens)
if msg.get("reasoning") and msg["reasoning"].strip():
content = f"\n{msg['reasoning']}\n\n"
-
+
if msg.get("content") and msg["content"].strip():
# Convert any tags to tags
# (used when native thinking is disabled and model reasons via XML)
content += convert_scratchpad_to_think(msg["content"]) + "\n"
-
+
# Add tool calls wrapped in XML tags
for tool_call in msg["tool_calls"]:
# Parse arguments - should always succeed since we validate during conversation
# but keep try-except as safety net
try:
- arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
+ arguments = (
+ json.loads(tool_call["function"]["arguments"])
+ if isinstance(tool_call["function"]["arguments"], str)
+ else tool_call["function"]["arguments"]
+ )
except json.JSONDecodeError:
# This shouldn't happen since we validate and retry during conversation,
# but if it does, log warning and use empty dict
- logging.warning(f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}")
+ logging.warning(
+ f"Unexpected invalid JSON in trajectory conversion: {tool_call['function']['arguments'][:100]}"
+ )
arguments = {}
-
+
tool_call_json = {
"name": tool_call["function"]["name"],
- "arguments": arguments
+ "arguments": arguments,
}
content += f"\n{json.dumps(tool_call_json, ensure_ascii=False)}\n\n"
-
+
# Ensure every gpt turn has a block (empty if no reasoning)
# so the format is consistent for training data
if "" not in content:
content = "\n\n" + content
-
- trajectory.append({
- "from": "gpt",
- "value": content.rstrip()
- })
-
+
+ trajectory.append({"from": "gpt", "value": content.rstrip()})
+
# Collect all subsequent tool responses
tool_responses = []
j = i + 1
@@ -1047,7 +1137,7 @@ def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_que
tool_msg = messages[j]
# Format tool response with XML tags
tool_response = f"\n"
-
+
# Try to parse tool content as JSON if it looks like JSON
tool_content = tool_msg["content"]
try:
@@ -1055,61 +1145,63 @@ def _convert_to_trajectory_format(self, messages: List[Dict[str, Any]], user_que
tool_content = json.loads(tool_content)
except (json.JSONDecodeError, AttributeError):
pass # Keep as string if not valid JSON
-
- tool_response += json.dumps({
- "tool_call_id": tool_msg.get("tool_call_id", ""),
- "name": msg["tool_calls"][len(tool_responses)]["function"]["name"] if len(tool_responses) < len(msg["tool_calls"]) else "unknown",
- "content": tool_content
- }, ensure_ascii=False)
+
+ tool_response += json.dumps(
+ {
+ "tool_call_id": tool_msg.get("tool_call_id", ""),
+ "name": msg["tool_calls"][len(tool_responses)][
+ "function"
+ ]["name"]
+ if len(tool_responses) < len(msg["tool_calls"])
+ else "unknown",
+ "content": tool_content,
+ },
+ ensure_ascii=False,
+ )
tool_response += "\n"
tool_responses.append(tool_response)
j += 1
-
+
# Add all tool responses as a single message
if tool_responses:
- trajectory.append({
- "from": "tool",
- "value": "\n".join(tool_responses)
- })
+ trajectory.append(
+ {"from": "tool", "value": "\n".join(tool_responses)}
+ )
i = j - 1 # Skip the tool messages we just processed
-
+
else:
# Regular assistant message without tool calls
# Add tags around reasoning for trajectory storage
content = ""
-
+
# Prepend reasoning in tags if available (native thinking tokens)
if msg.get("reasoning") and msg["reasoning"].strip():
content = f"\n{msg['reasoning']}\n\n"
-
+
# Convert any tags to tags
# (used when native thinking is disabled and model reasons via XML)
raw_content = msg["content"] or ""
content += convert_scratchpad_to_think(raw_content)
-
+
# Ensure every gpt turn has a block (empty if no reasoning)
if "" not in content:
content = "\n\n" + content
-
- trajectory.append({
- "from": "gpt",
- "value": content.strip()
- })
-
+
+ trajectory.append({"from": "gpt", "value": content.strip()})
+
elif msg["role"] == "user":
- trajectory.append({
- "from": "human",
- "value": msg["content"]
- })
-
+ trajectory.append({"from": "human", "value": msg["content"]})
+
i += 1
-
+
return trajectory
-
- def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, completed: bool):
+
+ def _save_trajectory(
+ self, messages: List[Dict[str, Any]], user_query: str, completed: bool
+ ):
"""
Save conversation trajectory to JSONL file.
-
+
Args:
messages (List[Dict]): Complete message history
user_query (str): Original user query
@@ -1117,10 +1209,10 @@ def _save_trajectory(self, messages: List[Dict[str, Any]], user_query: str, comp
"""
if not self.save_trajectories:
return
-
+
trajectory = self._convert_to_trajectory_format(messages, user_query, completed)
_save_trajectory_to_file(trajectory, self.model, completed)
-
+
def _mask_api_key_for_logs(self, key: Optional[str]) -> Optional[str]:
if not key:
return None
@@ -1185,7 +1277,9 @@ def _dump_api_request_debug(
response_obj = getattr(error, "response", None)
if response_obj is not None:
try:
- error_info["response_status"] = getattr(response_obj, "status_code", None)
+ error_info["response_status"] = getattr(
+ response_obj, "status_code", None
+ )
error_info["response_text"] = response_obj.text
except Exception as e:
logger.debug("Could not extract error response details: %s", e)
@@ -1193,7 +1287,9 @@ def _dump_api_request_debug(
dump_payload["error"] = error_info
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
- dump_file = self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json"
+ dump_file = (
+ self.logs_dir / f"request_dump_{self.session_id}_{timestamp}.json"
+ )
dump_file.write_text(
json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str),
encoding="utf-8",
@@ -1201,13 +1297,22 @@ def _dump_api_request_debug(
print(f"{self.log_prefix}๐งพ Request debug dump written to: {dump_file}")
- if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {"1", "true", "yes", "on"}:
- print(json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str))
+ if os.getenv("HERMES_DUMP_REQUEST_STDOUT", "").strip().lower() in {
+ "1",
+ "true",
+ "yes",
+ "on",
+ }:
+ print(
+ json.dumps(dump_payload, ensure_ascii=False, indent=2, default=str)
+ )
return dump_file
except Exception as dump_error:
if self.verbose_logging:
- logging.warning(f"Failed to dump API request debug payload: {dump_error}")
+ logging.warning(
+ f"Failed to dump API request debug payload: {dump_error}"
+ )
return None
@staticmethod
@@ -1216,8 +1321,8 @@ def _clean_session_content(content: str) -> str:
if not content:
return content
content = convert_scratchpad_to_think(content)
- content = re.sub(r'\n+()', r'\n\1', content)
- content = re.sub(r'()\n+', r'\1\n', content)
+ content = re.sub(r"\n+()", r"\n\1", content)
+ content = re.sub(r"()\n+", r"\1\n", content)
return content.strip()
def _save_session_log(self, messages: List[Dict[str, Any]] = None):
@@ -1264,26 +1369,26 @@ def _save_session_log(self, messages: List[Dict[str, Any]] = None):
except Exception as e:
if self.verbose_logging:
logging.warning(f"Failed to save session log: {e}")
-
+
def interrupt(self, message: str = None) -> None:
"""
Request the agent to interrupt its current tool-calling loop.
-
+
Call this from another thread (e.g., input handler, message receiver)
to gracefully stop the agent and process a new message.
-
+
Also signals long-running tool executions (e.g. terminal commands)
to terminate early, so the agent can respond immediately.
-
+
Args:
message: Optional new message that triggered the interrupt.
If provided, the agent will include this in its response context.
-
+
Example (CLI):
# In a separate input thread:
if user_typed_something:
agent.interrupt(user_input)
-
+
Example (Messaging):
# When new message arrives for active session:
if session_has_running_agent:
@@ -1300,18 +1405,27 @@ def interrupt(self, message: str = None) -> None:
except Exception as e:
logger.debug("Failed to propagate interrupt to child agent: %s", e)
if not self.quiet_mode:
- print(f"\nโก Interrupt requested" + (f": '{message[:40]}...'" if message and len(message) > 40 else f": '{message}'" if message else ""))
-
+ print(
+ f"\nโก Interrupt requested"
+ + (
+ f": '{message[:40]}...'"
+ if message and len(message) > 40
+ else f": '{message}'"
+ if message
+ else ""
+ )
+ )
+
def clear_interrupt(self) -> None:
"""Clear any pending interrupt request and the global tool interrupt signal."""
self._interrupt_requested = False
self._interrupt_message = None
_set_interrupt(False)
-
+
def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None:
"""
Recover todo state from conversation history.
-
+
The gateway creates a fresh AIAgent per message, so the in-memory
TodoStore is empty. We scan the history for the most recent todo
tool response and replay it to reconstruct the state.
@@ -1332,14 +1446,16 @@ def _hydrate_todo_store(self, history: List[Dict[str, Any]]) -> None:
break
except (json.JSONDecodeError, TypeError):
continue
-
+
if last_todo_response:
# Replay the items into the store (replace mode)
self._todo_store.write(last_todo_response, merge=False)
if not self.quiet_mode:
- print(f"{self.log_prefix}๐ Restored {len(last_todo_response)} todo item(s) from history")
+ print(
+ f"{self.log_prefix}๐ Restored {len(last_todo_response)} todo item(s) from history"
+ )
_set_interrupt(False)
-
+
@property
def is_interrupted(self) -> bool:
"""Check if an interrupt has been requested."""
@@ -1355,7 +1471,9 @@ def _honcho_prefetch(self, user_message: str) -> str:
if not self._honcho or not self._honcho_session_key:
return ""
try:
- ctx = self._honcho.get_prefetch_context(self._honcho_session_key, user_message)
+ ctx = self._honcho.get_prefetch_context(
+ self._honcho_session_key, user_message
+ )
if not ctx:
return ""
parts = []
@@ -1384,11 +1502,13 @@ def _honcho_save_user_observation(self, content: str) -> str:
session = self._honcho.get_or_create(self._honcho_session_key)
session.add_message("user", f"[observation] {content.strip()}")
self._honcho.save(session)
- return json.dumps({
- "success": True,
- "target": "user",
- "message": "Saved to Honcho user model.",
- })
+ return json.dumps(
+ {
+ "success": True,
+ "target": "user",
+ "message": "Saved to Honcho user model.",
+ }
+ )
except Exception as e:
logger.debug("Honcho user observation failed: %s", e)
return json.dumps({"success": False, "error": f"Honcho save failed: {e}"})
@@ -1408,7 +1528,7 @@ def _honcho_sync(self, user_content: str, assistant_content: str) -> None:
def _build_system_prompt(self, system_message: str = None) -> str:
"""
Assemble the full system prompt from all layers.
-
+
Called once per session (cached on self._cached_system_prompt) and only
rebuilt after context compression events. This ensures the system prompt
is stable across all turns in a session, maximizing prefix cache hits.
@@ -1468,6 +1588,7 @@ def _build_system_prompt(self, system_message: str = None) -> str:
prompt_parts.append(context_files_prompt)
from hermes_time import now as _hermes_now
+
now = _hermes_now()
prompt_parts.append(
f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
@@ -1478,7 +1599,7 @@ def _build_system_prompt(self, system_message: str = None) -> str:
prompt_parts.append(PLATFORM_HINTS[platform_key])
return "\n\n".join(prompt_parts)
-
+
def _repair_tool_call(self, tool_name: str) -> str | None:
"""Attempt to repair a mismatched tool name before aborting.
@@ -1510,7 +1631,7 @@ def _repair_tool_call(self, tool_name: str) -> str | None:
def _invalidate_system_prompt(self):
"""
Invalidate the cached system prompt, forcing a rebuild on the next turn.
-
+
Called after context compression events. Also reloads memory from disk
so the rebuilt prompt captures any writes from this session.
"""
@@ -1518,7 +1639,9 @@ def _invalidate_system_prompt(self):
if self._memory_store:
self._memory_store.load_from_disk()
- def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
+ def _responses_tools(
+ self, tools: Optional[List[Dict[str, Any]]] = None
+ ) -> Optional[List[Dict[str, Any]]]:
"""Convert chat-completions tool schemas to Responses function-tool schemas."""
source_tools = tools if tools is not None else self.tools
if not source_tools:
@@ -1530,13 +1653,17 @@ def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Opti
name = fn.get("name")
if not isinstance(name, str) or not name.strip():
continue
- converted.append({
- "type": "function",
- "name": name,
- "description": fn.get("description", ""),
- "strict": False,
- "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
- })
+ converted.append(
+ {
+ "type": "function",
+ "name": name,
+ "description": fn.get("description", ""),
+ "strict": False,
+ "parameters": fn.get(
+ "parameters", {"type": "object", "properties": {}}
+ ),
+ }
+ )
return converted or None
@staticmethod
@@ -1571,13 +1698,13 @@ def _derive_responses_function_call_id(
if source.startswith("fc_"):
return source
if source.startswith("call_") and len(source) > len("call_"):
- return f"fc_{source[len('call_'):]}"
+ return f"fc_{source[len('call_') :]}"
sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
if sanitized.startswith("fc_"):
return sanitized
if sanitized.startswith("call_") and len(sanitized) > len("call_"):
- return f"fc_{sanitized[len('call_'):]}"
+ return f"fc_{sanitized[len('call_') :]}"
if sanitized:
return f"fc_{sanitized[:48]}"
@@ -1585,7 +1712,9 @@ def _derive_responses_function_call_id(
digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
return f"fc_{digest}"
- def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ def _chat_messages_to_responses_input(
+ self, messages: List[Dict[str, Any]]
+ ) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
items: List[Dict[str, Any]] = []
@@ -1622,8 +1751,8 @@ def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> L
if not isinstance(fn_name, str) or not fn_name.strip():
continue
- embedded_call_id, embedded_response_item_id = self._split_responses_tool_id(
- tc.get("id")
+ embedded_call_id, embedded_response_item_id = (
+ self._split_responses_tool_id(tc.get("id"))
)
call_id = tc.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
@@ -1634,7 +1763,7 @@ def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> L
and embedded_response_item_id.startswith("fc_")
and len(embedded_response_item_id) > len("fc_")
):
- call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
+ call_id = f"call_{embedded_response_item_id[len('fc_') :]}"
else:
call_id = f"call_{uuid.uuid4().hex[:12]}"
call_id = call_id.strip()
@@ -1646,12 +1775,14 @@ def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> L
arguments = str(arguments)
arguments = arguments.strip() or "{}"
- items.append({
- "type": "function_call",
- "call_id": call_id,
- "name": fn_name,
- "arguments": arguments,
- })
+ items.append(
+ {
+ "type": "function_call",
+ "call_id": call_id,
+ "name": fn_name,
+ "arguments": arguments,
+ }
+ )
continue
items.append({"role": role, "content": content_text})
@@ -1665,11 +1796,13 @@ def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> L
call_id = raw_tool_call_id.strip()
if not isinstance(call_id, str) or not call_id.strip():
continue
- items.append({
- "type": "function_call_output",
- "call_id": call_id,
- "output": str(msg.get("content", "") or ""),
- })
+ items.append(
+ {
+ "type": "function_call_output",
+ "call_id": call_id,
+ "output": str(msg.get("content", "") or ""),
+ }
+ )
return items
@@ -1687,9 +1820,13 @@ def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
call_id = item.get("call_id")
name = item.get("name")
if not isinstance(call_id, str) or not call_id.strip():
- raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+ raise ValueError(
+ f"Codex Responses input[{idx}] function_call is missing call_id."
+ )
if not isinstance(name, str) or not name.strip():
- raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+ raise ValueError(
+ f"Codex Responses input[{idx}] function_call is missing name."
+ )
arguments = item.get("arguments", "{}")
if isinstance(arguments, dict):
@@ -1711,7 +1848,9 @@ def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
if item_type == "function_call_output":
call_id = item.get("call_id")
if not isinstance(call_id, str) or not call_id.strip():
- raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+ raise ValueError(
+ f"Codex Responses input[{idx}] function_call_output is missing call_id."
+ )
output = item.get("output", "")
if output is None:
output = ""
@@ -1730,7 +1869,10 @@ def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]:
if item_type == "reasoning":
encrypted = item.get("encrypted_content")
if isinstance(encrypted, str) and encrypted:
- reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
+ reasoning_item = {
+ "type": "reasoning",
+ "encrypted_content": encrypted,
+ }
item_id = item.get("id")
if isinstance(item_id, str) and item_id:
reasoning_item["id"] = item_id
@@ -1771,11 +1913,15 @@ def _preflight_codex_api_kwargs(
required = {"model", "instructions", "input"}
missing = [key for key in required if key not in api_kwargs]
if missing:
- raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+ raise ValueError(
+ f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}."
+ )
model = api_kwargs.get("model")
if not isinstance(model, str) or not model.strip():
- raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+ raise ValueError(
+ "Codex Responses request 'model' must be a non-empty string."
+ )
model = model.strip()
instructions = api_kwargs.get("instructions")
@@ -1791,20 +1937,28 @@ def _preflight_codex_api_kwargs(
normalized_tools = None
if tools is not None:
if not isinstance(tools, list):
- raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+ raise ValueError(
+ "Codex Responses request 'tools' must be a list when provided."
+ )
normalized_tools = []
for idx, tool in enumerate(tools):
if not isinstance(tool, dict):
raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
if tool.get("type") != "function":
- raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+ raise ValueError(
+ f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}."
+ )
name = tool.get("name")
parameters = tool.get("parameters")
if not isinstance(name, str) or not name.strip():
- raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+ raise ValueError(
+ f"Codex Responses tools[{idx}] is missing a valid name."
+ )
if not isinstance(parameters, dict):
- raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+ raise ValueError(
+ f"Codex Responses tools[{idx}] is missing valid parameters."
+ )
description = tool.get("description", "")
if description is None:
@@ -1873,7 +2027,9 @@ def _preflight_codex_api_kwargs(
normalized["stream"] = True
allowed_keys.add("stream")
elif "stream" in api_kwargs:
- raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+ raise ValueError(
+ "Codex Responses stream flag is only allowed in fallback streaming requests."
+ )
unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
if unexpected:
@@ -1932,14 +2088,22 @@ def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
if isinstance(error_obj, dict):
error_msg = error_obj.get("message") or str(error_obj)
else:
- error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+ error_msg = (
+ str(error_obj)
+ if error_obj
+ else f"Responses API returned status '{response_status}'"
+ )
raise RuntimeError(error_msg)
content_parts: List[str] = []
reasoning_parts: List[str] = []
reasoning_items_raw: List[Dict[str, Any]] = []
tool_calls: List[Any] = []
- has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+ has_incomplete_items = response_status in {
+ "queued",
+ "in_progress",
+ "incomplete",
+ }
saw_commentary_phase = False
saw_final_answer_phase = False
@@ -1985,7 +2149,9 @@ def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
for part in summary:
text = getattr(part, "text", None)
if isinstance(text, str):
- raw_summary.append({"type": "summary_text", "text": text})
+ raw_summary.append(
+ {"type": "summary_text", "text": text}
+ )
raw_item["summary"] = raw_summary
reasoning_items_raw.append(raw_item)
elif item_type == "function_call":
@@ -1998,19 +2164,27 @@ def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
- call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+ call_id = (
+ raw_call_id
+ if isinstance(raw_call_id, str) and raw_call_id.strip()
+ else embedded_call_id
+ )
if not isinstance(call_id, str) or not call_id.strip():
call_id = f"call_{uuid.uuid4().hex[:12]}"
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
- response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
- tool_calls.append(SimpleNamespace(
- id=call_id,
- call_id=call_id,
- response_item_id=response_item_id,
- type="function",
- function=SimpleNamespace(name=fn_name, arguments=arguments),
- ))
+ response_item_id = self._derive_responses_function_call_id(
+ call_id, response_item_id
+ )
+ tool_calls.append(
+ SimpleNamespace(
+ id=call_id,
+ call_id=call_id,
+ response_item_id=response_item_id,
+ type="function",
+ function=SimpleNamespace(name=fn_name, arguments=arguments),
+ )
+ )
elif item_type == "custom_tool_call":
fn_name = getattr(item, "name", "") or ""
arguments = getattr(item, "input", "{}")
@@ -2019,19 +2193,27 @@ def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
raw_call_id = getattr(item, "call_id", None)
raw_item_id = getattr(item, "id", None)
embedded_call_id, _ = self._split_responses_tool_id(raw_item_id)
- call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+ call_id = (
+ raw_call_id
+ if isinstance(raw_call_id, str) and raw_call_id.strip()
+ else embedded_call_id
+ )
if not isinstance(call_id, str) or not call_id.strip():
call_id = f"call_{uuid.uuid4().hex[:12]}"
call_id = call_id.strip()
response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
- response_item_id = self._derive_responses_function_call_id(call_id, response_item_id)
- tool_calls.append(SimpleNamespace(
- id=call_id,
- call_id=call_id,
- response_item_id=response_item_id,
- type="function",
- function=SimpleNamespace(name=fn_name, arguments=arguments),
- ))
+ response_item_id = self._derive_responses_function_call_id(
+ call_id, response_item_id
+ )
+ tool_calls.append(
+ SimpleNamespace(
+ id=call_id,
+ call_id=call_id,
+ response_item_id=response_item_id,
+ type="function",
+ function=SimpleNamespace(name=fn_name, arguments=arguments),
+ )
+ )
final_text = "\n".join([p for p in content_parts if p]).strip()
if not final_text and hasattr(response, "output_text"):
@@ -2050,7 +2232,9 @@ def _normalize_codex_response(self, response: Any) -> tuple[Any, str]:
if tool_calls:
finish_reason = "tool_calls"
- elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+ elif has_incomplete_items or (
+ saw_commentary_phase and not saw_final_answer_phase
+ ):
finish_reason = "incomplete"
else:
finish_reason = "stop"
@@ -2086,7 +2270,9 @@ def _run_codex_create_stream_fallback(self, api_kwargs: dict):
"""Fallback path for stream completion edge cases on Codex-style Responses backends."""
fallback_kwargs = dict(api_kwargs)
fallback_kwargs["stream"] = True
- fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True)
+ fallback_kwargs = self._preflight_codex_api_kwargs(
+ fallback_kwargs, allow_stream=True
+ )
stream_or_response = self.client.responses.create(**fallback_kwargs)
# Compatibility shim for mocks or providers that still return a concrete response.
@@ -2101,7 +2287,11 @@ def _run_codex_create_stream_fallback(self, api_kwargs: dict):
event_type = getattr(event, "type", None)
if not event_type and isinstance(event, dict):
event_type = event.get("type")
- if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
+ if event_type not in {
+ "response.completed",
+ "response.incomplete",
+ "response.failed",
+ }:
continue
terminal_response = getattr(event, "response", None)
@@ -2119,7 +2309,9 @@ def _run_codex_create_stream_fallback(self, api_kwargs: dict):
if terminal_response is not None:
return terminal_response
- raise RuntimeError("Responses create(stream=True) fallback did not emit a terminal response.")
+ raise RuntimeError(
+ "Responses create(stream=True) fallback did not emit a terminal response."
+ )
def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
if self.api_mode != "codex_responses" or self.provider != "openai-codex":
@@ -2153,7 +2345,9 @@ def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
try:
self.client = OpenAI(**self._client_kwargs)
except Exception as exc:
- logger.warning("Failed to rebuild OpenAI client after Codex refresh: %s", exc)
+ logger.warning(
+ "Failed to rebuild OpenAI client after Codex refresh: %s", exc
+ )
return False
return True
@@ -2166,7 +2360,9 @@ def _try_refresh_nous_client_credentials(self, *, force: bool = True) -> bool:
from hermes_cli.auth import resolve_nous_runtime_credentials
creds = resolve_nous_runtime_credentials(
- min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+ min_key_ttl_seconds=max(
+ 60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))
+ ),
timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
force_mint=force,
)
@@ -2196,7 +2392,9 @@ def _try_refresh_nous_client_credentials(self, *, force: bool = True) -> bool:
try:
self.client = OpenAI(**self._client_kwargs)
except Exception as exc:
- logger.warning("Failed to rebuild OpenAI client after Nous refresh: %s", exc)
+ logger.warning(
+ "Failed to rebuild OpenAI client after Nous refresh: %s", exc
+ )
return False
return True
@@ -2205,7 +2403,7 @@ def _interruptible_api_call(self, api_kwargs: dict):
"""
Run the API call in a background thread so the main conversation loop
can detect interrupts without waiting for the full HTTP round-trip.
-
+
On interrupt, closes the HTTP client to cancel the in-flight request
(stops token generation and avoids wasting money), then rebuilds the
client for future calls.
@@ -2217,7 +2415,9 @@ def _call():
if self.api_mode == "codex_responses":
result["response"] = self._run_codex_stream(api_kwargs)
else:
- result["response"] = self.client.chat.completions.create(**api_kwargs)
+ result["response"] = self.client.chat.completions.create(
+ **api_kwargs
+ )
except Exception as e:
result["error"] = e
@@ -2275,13 +2475,15 @@ def _resolve_fallback_credentials(
resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider]
try:
import hermes_cli.auth as _auth
+
resolver = getattr(_auth, resolver_name)
creds = resolver()
return creds["api_key"], creds["base_url"], api_mode
except Exception as e:
logging.warning(
"Fallback to %s failed (credential resolution): %s",
- fb_provider, e,
+ fb_provider,
+ e,
)
return None
@@ -2357,8 +2559,7 @@ def _try_activate_fallback(self) -> bool:
# Re-evaluate prompt caching for the new provider/model
self._use_prompt_caching = (
- "openrouter" in fb_base_url.lower()
- and "claude" in fb_model.lower()
+ "openrouter" in fb_base_url.lower() and "claude" in fb_model.lower()
)
print(
@@ -2367,7 +2568,9 @@ def _try_activate_fallback(self) -> bool:
)
logging.info(
"Fallback activated: %s โ %s (%s)",
- old_model, fb_model, fb_provider,
+ old_model,
+ fb_model,
+ fb_provider,
)
return True
except Exception as e:
@@ -2455,10 +2658,7 @@ def _build_api_kwargs(self, api_messages: list) -> dict:
if self.reasoning_config is not None:
extra_body["reasoning"] = self.reasoning_config
else:
- extra_body["reasoning"] = {
- "enabled": True,
- "effort": "medium"
- }
+ extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
# Nous Portal product attribution
if _is_nous:
@@ -2478,8 +2678,14 @@ def _build_assistant_message(self, assistant_message, finish_reason: str) -> dic
reasoning_text = self._extract_reasoning(assistant_message)
if reasoning_text and self.verbose_logging:
- preview = reasoning_text[:100] + "..." if len(reasoning_text) > 100 else reasoning_text
- logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {preview}")
+ preview = (
+ reasoning_text[:100] + "..."
+ if len(reasoning_text) > 100
+ else reasoning_text
+ )
+ logging.debug(
+ f"Captured reasoning ({len(reasoning_text)} chars): {preview}"
+ )
if reasoning_text and self.reasoning_callback:
try:
@@ -2494,7 +2700,10 @@ def _build_assistant_message(self, assistant_message, finish_reason: str) -> dic
"finish_reason": finish_reason,
}
- if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
+ if (
+ hasattr(assistant_message, "reasoning_details")
+ and assistant_message.reasoning_details
+ ):
# Pass reasoning_details back unmodified so providers (OpenRouter,
# Anthropic, OpenAI) can maintain reasoning continuity across turns.
# Each provider may include opaque fields (signature, encrypted_content)
@@ -2533,7 +2742,10 @@ def _build_assistant_message(self, assistant_message, finish_reason: str) -> dic
call_id = call_id.strip()
response_item_id = getattr(tool_call, "response_item_id", None)
- if not isinstance(response_item_id, str) or not response_item_id.strip():
+ if (
+ not isinstance(response_item_id, str)
+ or not response_item_id.strip()
+ ):
_, embedded_response_item_id = self._split_responses_tool_id(raw_id)
response_item_id = embedded_response_item_id
@@ -2549,7 +2761,7 @@ def _build_assistant_message(self, assistant_message, finish_reason: str) -> dic
"type": tool_call.type,
"function": {
"name": tool_call.function.name,
- "arguments": tool_call.function.arguments
+ "arguments": tool_call.function.arguments,
},
}
# Preserve extra_content (e.g. Gemini thought_signature) so it
@@ -2583,12 +2795,14 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
return
if "memory" not in self.valid_tool_names or not self._memory_store:
return
- effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
+ effective_min = (
+ min_turns if min_turns is not None else self._memory_flush_min_turns
+ )
if self._user_turn_count < effective_min:
return
if messages is None:
- messages = getattr(self, '_session_messages', None)
+ messages = getattr(self, "_session_messages", None)
if not messages or len(messages) < 3:
return
@@ -2597,7 +2811,11 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
"Please save anything worth remembering to your memories.]"
)
_sentinel = f"__flush_{id(self)}_{time.monotonic()}"
- flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
+ flush_msg = {
+ "role": "user",
+ "content": flush_content,
+ "_flush_sentinel": _sentinel,
+ }
messages.append(flush_msg)
try:
@@ -2612,14 +2830,23 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
api_msg.pop("reasoning", None)
api_msg.pop("finish_reason", None)
api_msg.pop("_flush_sentinel", None)
+ # Remove call_id and response_item_id from tool_calls - these are
+ # OpenAI Responses API specific and rejected by Mistral with 422
+ if "tool_calls" in api_msg and isinstance(api_msg["tool_calls"], list):
+ for tc in api_msg["tool_calls"]:
+ if isinstance(tc, dict):
+ tc.pop("call_id", None)
+ tc.pop("response_item_id", None)
api_messages.append(api_msg)
if self._cached_system_prompt:
- api_messages = [{"role": "system", "content": self._cached_system_prompt}] + api_messages
+ api_messages = [
+ {"role": "system", "content": self._cached_system_prompt}
+ ] + api_messages
# Make one API call with only the memory tool available
memory_tool_def = None
- for t in (self.tools or []):
+ for t in self.tools or []:
if t.get("function", {}).get("name") == "memory":
memory_tool_def = t
break
@@ -2631,6 +2858,7 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
# Use auxiliary client for the flush call when available --
# it's cheaper and avoids Codex Responses API incompatibility.
from agent.auxiliary_client import get_text_auxiliary_client
+
aux_client, aux_model = get_text_auxiliary_client()
if aux_client:
@@ -2641,7 +2869,9 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
"temperature": 0.3,
"max_tokens": 5120,
}
- response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
+ response = aux_client.chat.completions.create(
+ **api_kwargs, timeout=30.0
+ )
elif self.api_mode == "codex_responses":
# No auxiliary client -- use the Codex Responses path directly
codex_kwargs = self._build_api_kwargs(api_messages)
@@ -2658,7 +2888,9 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
"temperature": 0.3,
**self._max_tokens_param(5120),
}
- response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
+ response = self.client.chat.completions.create(
+ **api_kwargs, timeout=30.0
+ )
# Extract tool calls from the response, handling both API formats
tool_calls = []
@@ -2677,6 +2909,7 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
args = json.loads(tc.function.arguments)
flush_target = args.get("target", "memory")
from tools.memory_tool import memory_tool as _memory_tool
+
result = _memory_tool(
action=args.get("action"),
target=flush_target,
@@ -2684,10 +2917,16 @@ def flush_memories(self, messages: list = None, min_turns: int = None):
old_text=args.get("old_text"),
store=self._memory_store,
)
- if self._honcho and flush_target == "user" and args.get("action") == "add":
+ if (
+ self._honcho
+ and flush_target == "user"
+ and args.get("action") == "add"
+ ):
self._honcho_save_user_observation(args.get("content", ""))
if not self.quiet_mode:
- print(f" ๐ง Memory flush: saved to {args.get('target', 'memory')}")
+ print(
+ f" ๐ง Memory flush: saved to {args.get('target', 'memory')}"
+ )
except Exception as e:
logger.debug("Memory flush tool call failed: %s", e)
except Exception as e:
@@ -2711,7 +2950,9 @@ def _compress_context(self, messages: list, system_message: str, *, approx_token
# Pre-compression memory flush: let the model save memories before they're lost
self.flush_memories(messages, min_turns=0)
- compressed = self.context_compressor.compress(messages, current_tokens=approx_tokens)
+ compressed = self.context_compressor.compress(
+ messages, current_tokens=approx_tokens
+ )
todo_snapshot = self._todo_store.format_for_injection()
if todo_snapshot:
@@ -2746,7 +2987,9 @@ def _compress_context(self, messages: list, system_message: str, *, approx_token
old_title = self._session_db.get_session_title(self.session_id)
self._session_db.end_session(self.session_id, "compression")
old_session_id = self.session_id
- self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+ self.session_id = (
+ f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+ )
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or "cli",
@@ -2756,7 +2999,9 @@ def _compress_context(self, messages: list, system_message: str, *, approx_token
# Auto-number the title for the continuation session
if old_title:
try:
- new_title = self._session_db.get_next_title_in_lineage(old_title)
+ new_title = self._session_db.get_next_title_in_lineage(
+ old_title
+ )
self._session_db.set_session_title(self.session_id, new_title)
except (ValueError, Exception) as e:
logger.debug("Could not propagate title on compression: %s", e)
@@ -2775,9 +3020,11 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
# If the user sent "stop" during a previous tool's execution,
# do NOT start any more tools -- skip them all immediately.
if self._interrupt_requested:
- remaining_calls = assistant_message.tool_calls[i-1:]
+ remaining_calls = assistant_message.tool_calls[i - 1 :]
if remaining_calls:
- print(f"{self.log_prefix}โก Interrupt: skipping {len(remaining_calls)} tool call(s)")
+ print(
+ f"{self.log_prefix}โก Interrupt: skipping {len(remaining_calls)} tool call(s)"
+ )
for skipped_tc in remaining_calls:
skipped_name = skipped_tc.function.name
skip_msg = {
@@ -2806,8 +3053,14 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
if not self.quiet_mode:
args_str = json.dumps(function_args, ensure_ascii=False)
- args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
- print(f" ๐ Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
+ args_preview = (
+ args_str[: self.log_prefix_chars] + "..."
+ if len(args_str) > self.log_prefix_chars
+ else args_str
+ )
+ print(
+ f" ๐ Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}"
+ )
if self.tool_progress_callback:
try:
@@ -2817,11 +3070,16 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
logging.debug(f"Tool progress callback error: {cb_err}")
# Checkpoint: snapshot working dir before file-mutating tools
- if function_name in ("write_file", "patch") and self._checkpoint_mgr.enabled:
+ if (
+ function_name in ("write_file", "patch")
+ and self._checkpoint_mgr.enabled
+ ):
try:
file_path = function_args.get("path", "")
if file_path:
- work_dir = self._checkpoint_mgr.get_working_dir_for_path(file_path)
+ work_dir = self._checkpoint_mgr.get_working_dir_for_path(
+ file_path
+ )
self._checkpoint_mgr.ensure_checkpoint(
work_dir, f"before {function_name}"
)
@@ -2832,6 +3090,7 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
if function_name == "todo":
from tools.todo_tool import todo_tool as _todo_tool
+
function_result = _todo_tool(
todos=function_args.get("todos"),
merge=function_args.get("merge", False),
@@ -2839,12 +3098,19 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}")
+ print(
+ f" {_get_cute_tool_message_impl('todo', function_args, tool_duration, result=function_result)}"
+ )
elif function_name == "session_search":
if not self._session_db:
- function_result = json.dumps({"success": False, "error": "Session database not available."})
+ function_result = json.dumps(
+ {"success": False, "error": "Session database not available."}
+ )
else:
- from tools.session_search_tool import session_search as _session_search
+ from tools.session_search_tool import (
+ session_search as _session_search,
+ )
+
function_result = _session_search(
query=function_args.get("query", ""),
role_filter=function_args.get("role_filter"),
@@ -2854,10 +3120,13 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}")
+ print(
+ f" {_get_cute_tool_message_impl('session_search', function_args, tool_duration, result=function_result)}"
+ )
elif function_name == "memory":
target = function_args.get("target", "memory")
from tools.memory_tool import memory_tool as _memory_tool
+
function_result = _memory_tool(
action=function_args.get("action"),
target=target,
@@ -2866,13 +3135,20 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
store=self._memory_store,
)
# Also send user observations to Honcho when active
- if self._honcho and target == "user" and function_args.get("action") == "add":
+ if (
+ self._honcho
+ and target == "user"
+ and function_args.get("action") == "add"
+ ):
self._honcho_save_user_observation(function_args.get("content", ""))
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}")
+ print(
+ f" {_get_cute_tool_message_impl('memory', function_args, tool_duration, result=function_result)}"
+ )
elif function_name == "clarify":
from tools.clarify_tool import clarify_tool as _clarify_tool
+
function_result = _clarify_tool(
question=function_args.get("question", ""),
choices=function_args.get("choices"),
@@ -2880,19 +3156,26 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
- print(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
+ print(
+ f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}"
+ )
elif function_name == "delegate_task":
from tools.delegate_tool import delegate_task as _delegate_task
+
tasks_arg = function_args.get("tasks")
if tasks_arg and isinstance(tasks_arg, list):
spinner_label = f"๐ delegating {len(tasks_arg)} tasks"
else:
goal_preview = (function_args.get("goal") or "")[:30]
- spinner_label = f"๐ {goal_preview}" if goal_preview else "๐ delegating"
+ spinner_label = (
+ f"๐ {goal_preview}" if goal_preview else "๐ delegating"
+ )
spinner = None
if self.quiet_mode:
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
- spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
+ spinner = KawaiiSpinner(
+ f"{face} {spinner_label}", spinner_type="dots"
+ )
spinner.start()
self._delegate_spinner = spinner
_delegate_result = None
@@ -2909,7 +3192,12 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
finally:
self._delegate_spinner = None
tool_duration = time.time() - tool_start_time
- cute_msg = _get_cute_tool_message_impl('delegate_task', function_args, tool_duration, result=_delegate_result)
+ cute_msg = _get_cute_tool_message_impl(
+ "delegate_task",
+ function_args,
+ tool_duration,
+ result=_delegate_result,
+ )
if spinner:
spinner.stop(cute_msg)
elif self.quiet_mode:
@@ -2917,59 +3205,118 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
elif self.quiet_mode:
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
tool_emoji_map = {
- 'web_search': '๐', 'web_extract': '๐', 'web_crawl': '๐ธ๏ธ',
- 'terminal': '๐ป', 'process': 'โ๏ธ',
- 'read_file': '๐', 'write_file': 'โ๏ธ', 'patch': '๐ง', 'search_files': '๐',
- 'browser_navigate': '๐', 'browser_snapshot': '๐ธ',
- 'browser_click': '๐', 'browser_type': 'โจ๏ธ',
- 'browser_scroll': '๐', 'browser_back': 'โ๏ธ',
- 'browser_press': 'โจ๏ธ', 'browser_close': '๐ช',
- 'browser_get_images': '๐ผ๏ธ', 'browser_vision': '๐๏ธ',
- 'image_generate': '๐จ', 'text_to_speech': '๐',
- 'vision_analyze': '๐๏ธ', 'mixture_of_agents': '๐ง ',
- 'skills_list': '๐', 'skill_view': '๐',
- 'schedule_cronjob': 'โฐ', 'list_cronjobs': 'โฐ', 'remove_cronjob': 'โฐ',
- 'send_message': '๐จ', 'todo': '๐', 'memory': '๐ง ', 'session_search': '๐',
- 'clarify': 'โ', 'execute_code': '๐', 'delegate_task': '๐',
+ "web_search": "๐",
+ "web_extract": "๐",
+ "web_crawl": "๐ธ๏ธ",
+ "terminal": "๐ป",
+ "process": "โ๏ธ",
+ "read_file": "๐",
+ "write_file": "โ๏ธ",
+ "patch": "๐ง",
+ "search_files": "๐",
+ "browser_navigate": "๐",
+ "browser_snapshot": "๐ธ",
+ "browser_click": "๐",
+ "browser_type": "โจ๏ธ",
+ "browser_scroll": "๐",
+ "browser_back": "โ๏ธ",
+ "browser_press": "โจ๏ธ",
+ "browser_close": "๐ช",
+ "browser_get_images": "๐ผ๏ธ",
+ "browser_vision": "๐๏ธ",
+ "image_generate": "๐จ",
+ "text_to_speech": "๐",
+ "vision_analyze": "๐๏ธ",
+ "mixture_of_agents": "๐ง ",
+ "skills_list": "๐",
+ "skill_view": "๐",
+ "schedule_cronjob": "โฐ",
+ "list_cronjobs": "โฐ",
+ "remove_cronjob": "โฐ",
+ "send_message": "๐จ",
+ "todo": "๐",
+ "memory": "๐ง ",
+ "session_search": "๐",
+ "clarify": "โ",
+ "execute_code": "๐",
+ "delegate_task": "๐",
}
- emoji = tool_emoji_map.get(function_name, 'โก')
- preview = _build_tool_preview(function_name, function_args) or function_name
+ emoji = tool_emoji_map.get(function_name, "โก")
+ preview = (
+ _build_tool_preview(function_name, function_args) or function_name
+ )
if len(preview) > 30:
preview = preview[:27] + "..."
- spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
+ spinner = KawaiiSpinner(
+ f"{face} {emoji} {preview}", spinner_type="dots"
+ )
spinner.start()
_spinner_result = None
try:
function_result = handle_function_call(
- function_name, function_args, effective_task_id,
- enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
+ function_name,
+ function_args,
+ effective_task_id,
+ enabled_tools=list(self.valid_tool_names)
+ if self.valid_tool_names
+ else None,
)
_spinner_result = function_result
except Exception as tool_error:
- function_result = f"Error executing tool '{function_name}': {tool_error}"
- logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
+ function_result = (
+ f"Error executing tool '{function_name}': {tool_error}"
+ )
+ logger.error(
+ "handle_function_call raised for %s: %s",
+ function_name,
+ tool_error,
+ exc_info=True,
+ )
finally:
tool_duration = time.time() - tool_start_time
- cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
+ cute_msg = _get_cute_tool_message_impl(
+ function_name,
+ function_args,
+ tool_duration,
+ result=_spinner_result,
+ )
spinner.stop(cute_msg)
else:
try:
function_result = handle_function_call(
- function_name, function_args, effective_task_id,
- enabled_tools=list(self.valid_tool_names) if self.valid_tool_names else None,
+ function_name,
+ function_args,
+ effective_task_id,
+ enabled_tools=list(self.valid_tool_names)
+ if self.valid_tool_names
+ else None,
)
except Exception as tool_error:
- function_result = f"Error executing tool '{function_name}': {tool_error}"
- logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
+ function_result = (
+ f"Error executing tool '{function_name}': {tool_error}"
+ )
+ logger.error(
+ "handle_function_call raised for %s: %s",
+ function_name,
+ tool_error,
+ exc_info=True,
+ )
tool_duration = time.time() - tool_start_time
- result_preview = function_result[:200] if len(function_result) > 200 else function_result
+ result_preview = (
+ function_result[:200] if len(function_result) > 200 else function_result
+ )
# Log tool errors to the persistent error log so [error] tags
# in the UI always have a corresponding detailed entry on disk.
_is_error_result, _ = _detect_tool_failure(function_name, function_result)
if _is_error_result:
- logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
+ logger.warning(
+ "Tool %s returned error (%.2fs): %s",
+ function_name,
+ tool_duration,
+ result_preview,
+ )
if self.verbose_logging:
logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
@@ -2991,23 +3338,31 @@ def _execute_tool_calls(self, assistant_message, messages: list, effective_task_
tool_msg = {
"role": "tool",
"content": function_result,
- "tool_call_id": tool_call.id
+ "tool_call_id": tool_call.id,
}
messages.append(tool_msg)
if not self.quiet_mode:
- response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
- print(f" โ
Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
+ response_preview = (
+ function_result[: self.log_prefix_chars] + "..."
+ if len(function_result) > self.log_prefix_chars
+ else function_result
+ )
+ print(
+ f" โ
Tool {i} completed in {tool_duration:.2f}s - {response_preview}"
+ )
if self._interrupt_requested and i < len(assistant_message.tool_calls):
remaining = len(assistant_message.tool_calls) - i
- print(f"{self.log_prefix}โก Interrupt: skipping {remaining} remaining tool call(s)")
+ print(
+ f"{self.log_prefix}โก Interrupt: skipping {remaining} remaining tool call(s)"
+ )
for skipped_tc in assistant_message.tool_calls[i:]:
skipped_name = skipped_tc.function.name
skip_msg = {
"role": "tool",
"content": f"[Tool execution skipped โ {skipped_name} was not started. User sent a new message]",
- "tool_call_id": skipped_tc.id
+ "tool_call_id": skipped_tc.id,
}
messages.append(skip_msg)
break
@@ -3062,7 +3417,9 @@ def _get_budget_warning(self, api_call_count: int) -> Optional[str]:
def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
"""Request a summary when max iterations are reached. Returns the final response text."""
- print(f"โ ๏ธ Reached maximum iterations ({self.max_iterations}). Requesting summary...")
+ print(
+ f"โ ๏ธ Reached maximum iterations ({self.max_iterations}). Requesting summary..."
+ )
summary_request = (
"You've reached the maximum number of tool-calling iterations allowed. "
@@ -3079,13 +3436,24 @@ def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
api_msg = msg.copy()
for internal_field in ("reasoning", "finish_reason"):
api_msg.pop(internal_field, None)
+ # Remove call_id and response_item_id from tool_calls - these are
+ # OpenAI Responses API specific and rejected by Mistral with 422
+ if "tool_calls" in api_msg and isinstance(api_msg["tool_calls"], list):
+ for tc in api_msg["tool_calls"]:
+ if isinstance(tc, dict):
+ tc.pop("call_id", None)
+ tc.pop("response_item_id", None)
api_messages.append(api_msg)
effective_system = self._cached_system_prompt or ""
if self.ephemeral_system_prompt:
- effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+ effective_system = (
+ effective_system + "\n\n" + self.ephemeral_system_prompt
+ ).strip()
if effective_system:
- api_messages = [{"role": "system", "content": effective_system}] + api_messages
+ api_messages = [
+ {"role": "system", "content": effective_system}
+ ] + api_messages
if self.prefill_messages:
sys_offset = 1 if effective_system else 0
for idx, pfm in enumerate(self.prefill_messages):
@@ -3100,7 +3468,7 @@ def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
else:
summary_extra_body["reasoning"] = {
"enabled": True,
- "effort": "medium"
+ "effort": "medium",
}
if _is_nous:
summary_extra_body["tags"] = ["product=hermes-agent"]
@@ -3110,7 +3478,11 @@ def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
codex_kwargs.pop("tools", None)
summary_response = self._run_codex_stream(codex_kwargs)
assistant_message, _ = self._normalize_codex_response(summary_response)
- final_response = (assistant_message.content or "").strip() if assistant_message else ""
+ final_response = (
+ (assistant_message.content or "").strip()
+ if assistant_message
+ else ""
+ )
else:
summary_kwargs = {
"model": self.model,
@@ -3137,18 +3509,25 @@ def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
summary_response = self.client.chat.completions.create(**summary_kwargs)
- if summary_response.choices and summary_response.choices[0].message.content:
+ if (
+ summary_response.choices
+ and summary_response.choices[0].message.content
+ ):
final_response = summary_response.choices[0].message.content
else:
final_response = ""
if final_response:
if "" in final_response:
- final_response = re.sub(r'.*?\s*', '', final_response, flags=re.DOTALL).strip()
+ final_response = re.sub(
+ r".*?\s*", "", final_response, flags=re.DOTALL
+ ).strip()
if final_response:
messages.append({"role": "assistant", "content": final_response})
else:
- final_response = "I reached the iteration limit and couldn't generate a summary."
+ final_response = (
+ "I reached the iteration limit and couldn't generate a summary."
+ )
else:
# Retry summary generation
if self.api_mode == "codex_responses":
@@ -3156,7 +3535,9 @@ def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
codex_kwargs.pop("tools", None)
retry_response = self._run_codex_stream(codex_kwargs)
retry_msg, _ = self._normalize_codex_response(retry_response)
- final_response = (retry_msg.content or "").strip() if retry_msg else ""
+ final_response = (
+ (retry_msg.content or "").strip() if retry_msg else ""
+ )
else:
summary_kwargs = {
"model": self.model,
@@ -3167,22 +3548,36 @@ def _handle_max_iterations(self, messages: list, api_call_count: int) -> str:
if summary_extra_body:
summary_kwargs["extra_body"] = summary_extra_body
- summary_response = self.client.chat.completions.create(**summary_kwargs)
+ summary_response = self.client.chat.completions.create(
+ **summary_kwargs
+ )
- if summary_response.choices and summary_response.choices[0].message.content:
+ if (
+ summary_response.choices
+ and summary_response.choices[0].message.content
+ ):
final_response = summary_response.choices[0].message.content
else:
final_response = ""
if final_response:
if "" in final_response:
- final_response = re.sub(r'.*?\s*', '', final_response, flags=re.DOTALL).strip()
+ final_response = re.sub(
+ r".*?\s*",
+ "",
+ final_response,
+ flags=re.DOTALL,
+ ).strip()
if final_response:
- messages.append({"role": "assistant", "content": final_response})
+ messages.append(
+ {"role": "assistant", "content": final_response}
+ )
else:
final_response = "I reached the iteration limit and couldn't generate a summary."
else:
- final_response = "I reached the iteration limit and couldn't generate a summary."
+ final_response = (
+ "I reached the iteration limit and couldn't generate a summary."
+ )
except Exception as e:
logging.warning(f"Failed to get summary response: {e}")
@@ -3195,7 +3590,7 @@ def run_conversation(
user_message: str,
system_message: str = None,
conversation_history: List[Dict[str, Any]] = None,
- task_id: str = None
+ task_id: str = None,
) -> Dict[str, Any]:
"""
Run a complete conversation with tool calling until completion.
@@ -3216,7 +3611,7 @@ def run_conversation(
# Generate unique task_id if not provided to isolate VMs between concurrent tasks
effective_task_id = task_id or str(uuid.uuid4())
-
+
# Reset retry counters and iteration budget at the start of each turn
# so subagent usage from a previous turn doesn't eat into the next one.
self._invalid_tool_retries = 0
@@ -3228,21 +3623,21 @@ def run_conversation(
self._turns_since_memory = 0
self._iters_since_skill = 0
self.iteration_budget = IterationBudget(self.max_iterations)
-
+
# Initialize conversation (copy to avoid mutating the caller's list)
messages = list(conversation_history) if conversation_history else []
-
+
# Hydrate todo store from conversation history (gateway creates a fresh
# AIAgent per message, so the in-memory store is empty -- we need to
# recover the todo state from the most recent todo tool response in history)
if conversation_history and not self._todo_store.has_items():
self._hydrate_todo_store(conversation_history)
-
+
# Prefill messages (few-shot priming) are injected at API-call time only,
# never stored in the messages list. This keeps them ephemeral: they won't
# be saved to session DB, session logs, or batch trajectories, but they're
# automatically re-applied on every API call (including session continuations).
-
+
# Track user turns for memory flush and periodic nudge logic
self._user_turn_count += 1
@@ -3252,9 +3647,11 @@ def run_conversation(
# Periodic memory nudge: remind the model to consider saving memories.
# Counter resets whenever the memory tool is actually used.
- if (self._memory_nudge_interval > 0
- and "memory" in self.valid_tool_names
- and self._memory_store):
+ if (
+ self._memory_nudge_interval > 0
+ and "memory" in self.valid_tool_names
+ and self._memory_store
+ ):
self._turns_since_memory += 1
if self._turns_since_memory >= self._memory_nudge_interval:
user_message += (
@@ -3265,9 +3662,11 @@ def run_conversation(
# Skill creation nudge: fires on the first user message after a long tool loop.
# The counter increments per API iteration in the tool loop and is checked here.
- if (self._skill_nudge_interval > 0
- and self._iters_since_skill >= self._skill_nudge_interval
- and "skill_manage" in self.valid_tool_names):
+ if (
+ self._skill_nudge_interval > 0
+ and self._iters_since_skill >= self._skill_nudge_interval
+ and "skill_manage" in self.valid_tool_names
+ ):
user_message += (
"\n\n[System: The previous task involved many steps. "
"If you discovered a reusable workflow, consider saving it as a skill.]"
@@ -3290,10 +3689,11 @@ def run_conversation(
# Add user message
user_msg = {"role": "user", "content": user_message}
messages.append(user_msg)
-
if not self.quiet_mode:
- print(f"๐ฌ Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
-
+ print(
+ f"๐ฌ Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'"
+ )
+
# โโ System prompt (cached per session for prefix caching) โโ
# Built once on first call, reused for all subsequent calls.
# Only rebuilt after context compression events (which invalidate
@@ -3331,7 +3731,9 @@ def run_conversation(
# Store the system prompt snapshot in SQLite
if self._session_db:
try:
- self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
+ self._session_db.update_system_prompt(
+ self.session_id, self._cached_system_prompt
+ )
except Exception as e:
logger.debug("Session DB update_system_prompt failed: %s", e)
@@ -3346,8 +3748,10 @@ def run_conversation(
# 4xx and abort the request entirely).
if (
self.compression_enabled
- and len(messages) > self.context_compressor.protect_first_n
- + self.context_compressor.protect_last_n + 1
+ and len(messages)
+ > self.context_compressor.protect_first_n
+ + self.context_compressor.protect_last_n
+ + 1
):
_sys_tok_est = estimate_tokens_rough(active_system_prompt or "")
_msg_tok_est = estimate_messages_tokens_rough(messages)
@@ -3390,11 +3794,13 @@ def run_conversation(
codex_ack_continuations = 0
length_continue_retries = 0
truncated_response_prefix = ""
-
+
# Clear any stale interrupt state at start
self.clear_interrupt()
-
- while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
+
+ while (
+ api_call_count < self.max_iterations and self.iteration_budget.remaining > 0
+ ):
# Reset per-turn checkpoint dedup so each iteration can take one snapshot
self._checkpoint_mgr.new_turn()
@@ -3404,11 +3810,13 @@ def run_conversation(
if not self.quiet_mode:
print(f"\nโก Breaking out of tool loop due to interrupt...")
break
-
+
api_call_count += 1
if not self.iteration_budget.consume():
if not self.quiet_mode:
- print(f"\nโ ๏ธ Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
+ print(
+ f"\nโ ๏ธ Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)"
+ )
break
# Fire step_callback for gateway hooks (agent:step event)
@@ -3425,14 +3833,20 @@ def run_conversation(
break
self.step_callback(api_call_count, prev_tools)
except Exception as _step_err:
- logger.debug("step_callback error (iteration %s): %s", api_call_count, _step_err)
+ logger.debug(
+ "step_callback error (iteration %s): %s",
+ api_call_count,
+ _step_err,
+ )
# Track tool-calling iterations for skill nudge.
# Counter resets whenever skill_manage is actually used.
- if (self._skill_nudge_interval > 0
- and "skill_manage" in self.valid_tool_names):
+ if (
+ self._skill_nudge_interval > 0
+ and "skill_manage" in self.valid_tool_names
+ ):
self._iters_since_skill += 1
-
+
# Prepare messages for API call
# If we have an ephemeral system prompt, prepend it to the messages
# Note: Reasoning is embedded in content via tags for trajectory storage.
@@ -3457,6 +3871,13 @@ def run_conversation(
# Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
if "finish_reason" in api_msg:
api_msg.pop("finish_reason")
+ # Remove call_id and response_item_id from tool_calls - these are
+ # OpenAI Responses API specific and rejected by Mistral with 422
+ if "tool_calls" in api_msg and isinstance(api_msg["tool_calls"], list):
+ for tc in api_msg["tool_calls"]:
+ if isinstance(tc, dict):
+ tc.pop("call_id", None)
+ tc.pop("response_item_id", None)
# Keep 'reasoning_details' - OpenRouter uses this for multi-turn reasoning context
# The signature field helps maintain reasoning continuity
api_messages.append(api_msg)
@@ -3470,9 +3891,13 @@ def run_conversation(
# session, maximizing Anthropic prompt cache hits.
effective_system = active_system_prompt or ""
if self.ephemeral_system_prompt:
- effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+ effective_system = (
+ effective_system + "\n\n" + self.ephemeral_system_prompt
+ ).strip()
if effective_system:
- api_messages = [{"role": "system", "content": effective_system}] + api_messages
+ api_messages = [
+ {"role": "system", "content": effective_system}
+ ] + api_messages
# Inject ephemeral prefill messages right after the system prompt
# but before conversation history. Same API-call-time-only pattern.
@@ -3486,26 +3911,36 @@ def run_conversation(
# inject cache_control breakpoints (system + last 3 messages) to reduce
# input token costs by ~75% on multi-turn conversations.
if self._use_prompt_caching:
- api_messages = apply_anthropic_cache_control(api_messages, cache_ttl=self._cache_ttl)
+ api_messages = apply_anthropic_cache_control(
+ api_messages, cache_ttl=self._cache_ttl
+ )
# Safety net: strip orphaned tool results / add stubs for missing
# results before sending to the API. The compressor handles this
# during compression, but orphans can also sneak in from session
# loading or manual message manipulation.
- if hasattr(self, 'context_compressor') and self.context_compressor:
- api_messages = self.context_compressor._sanitize_tool_pairs(api_messages)
+ if hasattr(self, "context_compressor") and self.context_compressor:
+ api_messages = self.context_compressor._sanitize_tool_pairs(
+ api_messages
+ )
# Calculate approximate request size for logging
total_chars = sum(len(str(msg)) for msg in api_messages)
approx_tokens = total_chars // 4 # Rough estimate: 4 chars per token
-
+
# Thinking spinner for quiet mode (animated during API call)
thinking_spinner = None
-
+
if not self.quiet_mode:
- print(f"\n{self.log_prefix}๐ Making API call #{api_call_count}/{self.max_iterations}...")
- print(f"{self.log_prefix} ๐ Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
- print(f"{self.log_prefix} ๐ง Available tools: {len(self.tools) if self.tools else 0}")
+ print(
+ f"\n{self.log_prefix}๐ Making API call #{api_call_count}/{self.max_iterations}..."
+ )
+ print(
+ f"{self.log_prefix} ๐ Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)"
+ )
+ print(
+ f"{self.log_prefix} ๐ง Available tools: {len(self.tools) if self.tools else 0}"
+ )
else:
# Animated thinking spinner in quiet mode
face = random.choice(KawaiiSpinner.KAWAII_THINKING)
@@ -3514,16 +3949,24 @@ def run_conversation(
# CLI TUI mode: use prompt_toolkit widget instead of raw spinner
self.thinking_callback(f"{face} {verb}...")
else:
- spinner_type = random.choice(['brain', 'sparkle', 'pulse', 'moon', 'star'])
- thinking_spinner = KawaiiSpinner(f"{face} {verb}...", spinner_type=spinner_type)
+ spinner_type = random.choice(
+ ["brain", "sparkle", "pulse", "moon", "star"]
+ )
+ thinking_spinner = KawaiiSpinner(
+ f"{face} {verb}...", spinner_type=spinner_type
+ )
thinking_spinner.start()
-
+
# Log request details if verbose
if self.verbose_logging:
- logging.debug(f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}")
- logging.debug(f"Last message role: {messages[-1]['role'] if messages else 'none'}")
+ logging.debug(
+ f"API Request - Model: {self.model}, Messages: {len(messages)}, Tools: {len(self.tools) if self.tools else 0}"
+ )
+ logging.debug(
+ f"Last message role: {messages[-1]['role'] if messages else 'none'}"
+ )
logging.debug(f"Total message size: ~{approx_tokens:,} tokens")
-
+
api_start_time = time.time()
retry_count = 0
max_retries = 3
@@ -3541,15 +3984,22 @@ def run_conversation(
try:
api_kwargs = self._build_api_kwargs(api_messages)
if self.api_mode == "codex_responses":
- api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
+ api_kwargs = self._preflight_codex_api_kwargs(
+ api_kwargs, allow_stream=False
+ )
- if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
+ if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {
+ "1",
+ "true",
+ "yes",
+ "on",
+ }:
self._dump_api_request_debug(api_kwargs, reason="preflight")
response = self._interruptible_api_call(api_kwargs)
-
+
api_duration = time.time() - api_start_time
-
+
# Stop thinking spinner silently -- the response box or tool
# execution messages that follow are more informative.
if thinking_spinner:
@@ -3557,20 +4007,30 @@ def run_conversation(
thinking_spinner = None
if self.thinking_callback:
self.thinking_callback("")
-
+
if not self.quiet_mode:
- print(f"{self.log_prefix}โฑ๏ธ API call completed in {api_duration:.2f}s")
-
+ print(
+ f"{self.log_prefix}โฑ๏ธ API call completed in {api_duration:.2f}s"
+ )
+
if self.verbose_logging:
# Log response with provider info if available
- resp_model = getattr(response, 'model', 'N/A') if response else 'N/A'
- logging.debug(f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
-
+ resp_model = (
+ getattr(response, "model", "N/A") if response else "N/A"
+ )
+ logging.debug(
+ f"API Response received - Model: {resp_model}, Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}"
+ )
+
# Validate response shape before proceeding
response_invalid = False
error_details = []
if self.api_mode == "codex_responses":
- output_items = getattr(response, "output", None) if response is not None else None
+ output_items = (
+ getattr(response, "output", None)
+ if response is not None
+ else None
+ )
if response is None:
response_invalid = True
error_details.append("response is None")
@@ -3581,12 +4041,19 @@ def run_conversation(
response_invalid = True
error_details.append("response.output is empty")
else:
- if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
+ if (
+ response is None
+ or not hasattr(response, "choices")
+ or response.choices is None
+ or len(response.choices) == 0
+ ):
response_invalid = True
if response is None:
error_details.append("response is None")
- elif not hasattr(response, 'choices'):
- error_details.append("response has no 'choices' attribute")
+ elif not hasattr(response, "choices"):
+ error_details.append(
+ "response has no 'choices' attribute"
+ )
elif response.choices is None:
error_details.append("response.choices is None")
else:
@@ -3599,63 +4066,101 @@ def run_conversation(
thinking_spinner = None
if self.thinking_callback:
self.thinking_callback("")
-
+
# This is often rate limiting or provider returning malformed response
retry_count += 1
-
+
# Check for error field in response (some providers include this)
error_msg = "Unknown"
provider_name = "Unknown"
- if response and hasattr(response, 'error') and response.error:
+ if response and hasattr(response, "error") and response.error:
error_msg = str(response.error)
# Try to extract provider from error metadata
- if hasattr(response.error, 'metadata') and response.error.metadata:
- provider_name = response.error.metadata.get('provider_name', 'Unknown')
- elif response and hasattr(response, 'message') and response.message:
+ if (
+ hasattr(response.error, "metadata")
+ and response.error.metadata
+ ):
+ provider_name = response.error.metadata.get(
+ "provider_name", "Unknown"
+ )
+ elif (
+ response
+ and hasattr(response, "message")
+ and response.message
+ ):
error_msg = str(response.message)
-
+
# Try to get provider from model field (OpenRouter often returns actual model used)
- if provider_name == "Unknown" and response and hasattr(response, 'model') and response.model:
+ if (
+ provider_name == "Unknown"
+ and response
+ and hasattr(response, "model")
+ and response.model
+ ):
provider_name = f"model={response.model}"
-
+
# Check for x-openrouter-provider or similar metadata
if provider_name == "Unknown" and response:
# Log all response attributes for debugging
- resp_attrs = {k: str(v)[:100] for k, v in vars(response).items() if not k.startswith('_')}
+ resp_attrs = {
+ k: str(v)[:100]
+ for k, v in vars(response).items()
+ if not k.startswith("_")
+ }
if self.verbose_logging:
- logging.debug(f"Response attributes for invalid response: {resp_attrs}")
-
- print(f"{self.log_prefix}โ ๏ธ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
+ logging.debug(
+ f"Response attributes for invalid response: {resp_attrs}"
+ )
+
+ print(
+ f"{self.log_prefix}โ ๏ธ Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}"
+ )
print(f"{self.log_prefix} ๐ข Provider: {provider_name}")
- print(f"{self.log_prefix} ๐ Provider message: {error_msg[:200]}")
- print(f"{self.log_prefix} โฑ๏ธ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)")
-
+ print(
+ f"{self.log_prefix} ๐ Provider message: {error_msg[:200]}"
+ )
+ print(
+ f"{self.log_prefix} โฑ๏ธ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)"
+ )
+
if retry_count >= max_retries:
# Try fallback before giving up
if self._try_activate_fallback():
retry_count = 0
continue
- print(f"{self.log_prefix}โ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
- logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.")
+ print(
+ f"{self.log_prefix}โ Max retries ({max_retries}) exceeded for invalid responses. Giving up."
+ )
+ logging.error(
+ f"{self.log_prefix}Invalid API response after {max_retries} retries."
+ )
self._persist_session(messages, conversation_history)
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": "Invalid API response shape. Likely rate limited or malformed provider response.",
- "failed": True # Mark as failure for filtering
+ "failed": True, # Mark as failure for filtering
}
-
+
# Longer backoff for rate limiting (likely cause of None choices)
- wait_time = min(5 * (2 ** (retry_count - 1)), 120) # 5s, 10s, 20s, 40s, 80s, 120s
- print(f"{self.log_prefix}โณ Retrying in {wait_time}s (extended backoff for possible rate limit)...")
- logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
-
+ wait_time = min(
+ 5 * (2 ** (retry_count - 1)), 120
+ ) # 5s, 10s, 20s, 40s, 80s, 120s
+ print(
+ f"{self.log_prefix}โณ Retrying in {wait_time}s (extended backoff for possible rate limit)..."
+ )
+ logging.warning(
+ f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}"
+ )
+
# Sleep in small increments to stay responsive to interrupts
sleep_end = time.time() + wait_time
while time.time() < sleep_end:
if self._interrupt_requested:
- print(f"{self.log_prefix}โก Interrupt detected during retry wait, aborting.")
+ print(
+ f"{self.log_prefix}โก Interrupt detected during retry wait, aborting."
+ )
self._persist_session(messages, conversation_history)
self.clear_interrupt()
return {
@@ -3671,13 +4176,20 @@ def run_conversation(
# Check finish_reason before proceeding
if self.api_mode == "codex_responses":
status = getattr(response, "status", None)
- incomplete_details = getattr(response, "incomplete_details", None)
+ incomplete_details = getattr(
+ response, "incomplete_details", None
+ )
incomplete_reason = None
if isinstance(incomplete_details, dict):
incomplete_reason = incomplete_details.get("reason")
else:
- incomplete_reason = getattr(incomplete_details, "reason", None)
- if status == "incomplete" and incomplete_reason in {"max_output_tokens", "length"}:
+ incomplete_reason = getattr(
+ incomplete_details, "reason", None
+ )
+ if status == "incomplete" and incomplete_reason in {
+ "max_output_tokens",
+ "length",
+ }:
finish_reason = "length"
else:
finish_reason = "stop"
@@ -3685,16 +4197,22 @@ def run_conversation(
finish_reason = response.choices[0].finish_reason
if finish_reason == "length":
- print(f"{self.log_prefix}โ ๏ธ Response truncated (finish_reason='length') - model hit max output tokens")
+ print(
+ f"{self.log_prefix}โ ๏ธ Response truncated (finish_reason='length') - model hit max output tokens"
+ )
if self.api_mode == "chat_completions":
assistant_message = response.choices[0].message
if not assistant_message.tool_calls:
length_continue_retries += 1
- interim_msg = self._build_assistant_message(assistant_message, finish_reason)
+ interim_msg = self._build_assistant_message(
+ assistant_message, finish_reason
+ )
messages.append(interim_msg)
if assistant_message.content:
- truncated_response_prefix += assistant_message.content
+ truncated_response_prefix += (
+ assistant_message.content
+ )
if length_continue_retries < 3:
print(
@@ -3715,7 +4233,9 @@ def run_conversation(
restart_with_length_continuation = True
break
- partial_response = self._strip_think_blocks(truncated_response_prefix).strip()
+ partial_response = self._strip_think_blocks(
+ truncated_response_prefix
+ ).strip()
self._cleanup_task_resources(effective_task_id)
self._persist_session(messages, conversation_history)
return {
@@ -3729,8 +4249,12 @@ def run_conversation(
# If we have prior messages, roll back to last complete state
if len(messages) > 1:
- print(f"{self.log_prefix} โช Rolling back to last complete assistant turn")
- rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
+ print(
+ f"{self.log_prefix} โช Rolling back to last complete assistant turn"
+ )
+ rolled_back_messages = (
+ self._get_messages_up_to_last_assistant(messages)
+ )
self._cleanup_task_resources(effective_task_id)
self._persist_session(messages, conversation_history)
@@ -3741,11 +4265,13 @@ def run_conversation(
"api_calls": api_call_count,
"completed": False,
"partial": True,
- "error": "Response truncated due to output length limit"
+ "error": "Response truncated due to output length limit",
}
else:
# First message was truncated - mark as failed
- print(f"{self.log_prefix}โ First response truncated - cannot recover")
+ print(
+ f"{self.log_prefix}โ First response truncated - cannot recover"
+ )
self._persist_session(messages, conversation_history)
return {
"final_response": None,
@@ -3753,22 +4279,31 @@ def run_conversation(
"api_calls": api_call_count,
"completed": False,
"failed": True,
- "error": "First response truncated due to output length limit"
+ "error": "First response truncated due to output length limit",
}
-
+
# Track actual token usage from response for context management
- if hasattr(response, 'usage') and response.usage:
+ if hasattr(response, "usage") and response.usage:
if self.api_mode == "codex_responses":
- prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
- completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
- total_tokens = (
- getattr(response.usage, 'total_tokens', None)
- or (prompt_tokens + completion_tokens)
+ prompt_tokens = (
+ getattr(response.usage, "input_tokens", 0) or 0
)
+ completion_tokens = (
+ getattr(response.usage, "output_tokens", 0) or 0
+ )
+ total_tokens = getattr(
+ response.usage, "total_tokens", None
+ ) or (prompt_tokens + completion_tokens)
else:
- prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0
- completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0
- total_tokens = getattr(response.usage, 'total_tokens', 0) or 0
+ prompt_tokens = (
+ getattr(response.usage, "prompt_tokens", 0) or 0
+ )
+ completion_tokens = (
+ getattr(response.usage, "completion_tokens", 0) or 0
+ )
+ total_tokens = (
+ getattr(response.usage, "total_tokens", 0) or 0
+ )
usage_dict = {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
@@ -3780,27 +4315,43 @@ def run_conversation(
if self.context_compressor._context_probed:
ctx = self.context_compressor.context_length
save_context_length(self.model, self.base_url, ctx)
- print(f"{self.log_prefix}๐พ Cached context length: {ctx:,} tokens for {self.model}")
+ print(
+ f"{self.log_prefix}๐พ Cached context length: {ctx:,} tokens for {self.model}"
+ )
self.context_compressor._context_probed = False
self.session_prompt_tokens += prompt_tokens
self.session_completion_tokens += completion_tokens
self.session_total_tokens += total_tokens
self.session_api_calls += 1
-
+
if self.verbose_logging:
- logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
-
+ logging.debug(
+ f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}"
+ )
+
# Log cache hit stats when prompt caching is active
if self._use_prompt_caching:
- details = getattr(response.usage, 'prompt_tokens_details', None)
- cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
- written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
+ details = getattr(
+ response.usage, "prompt_tokens_details", None
+ )
+ cached = (
+ getattr(details, "cached_tokens", 0) or 0
+ if details
+ else 0
+ )
+ written = (
+ getattr(details, "cache_write_tokens", 0) or 0
+ if details
+ else 0
+ )
prompt = usage_dict["prompt_tokens"]
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
if not self.quiet_mode:
- print(f"{self.log_prefix} ๐พ Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
-
+ print(
+ f"{self.log_prefix} ๐พ Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)"
+ )
+
break # Success, exit retry loop
except InterruptedError:
@@ -3833,7 +4384,9 @@ def run_conversation(
):
codex_auth_retry_attempted = True
if self._try_refresh_codex_client_credentials(force=True):
- print(f"{self.log_prefix}๐ Codex auth refreshed after 401. Retrying request...")
+ print(
+ f"{self.log_prefix}๐ Codex auth refreshed after 401. Retrying request..."
+ )
continue
if (
self.api_mode == "chat_completions"
@@ -3843,24 +4396,34 @@ def run_conversation(
):
nous_auth_retry_attempted = True
if self._try_refresh_nous_client_credentials(force=True):
- print(f"{self.log_prefix}๐ Nous agent key refreshed after 401. Retrying request...")
+ print(
+ f"{self.log_prefix}๐ Nous agent key refreshed after 401. Retrying request..."
+ )
continue
retry_count += 1
elapsed_time = time.time() - api_start_time
-
+
# Enhanced error logging
error_type = type(api_error).__name__
error_msg = str(api_error).lower()
-
- print(f"{self.log_prefix}โ ๏ธ API call failed (attempt {retry_count}/{max_retries}): {error_type}")
- print(f"{self.log_prefix} โฑ๏ธ Time elapsed before failure: {elapsed_time:.2f}s")
+
+ print(
+ f"{self.log_prefix}โ ๏ธ API call failed (attempt {retry_count}/{max_retries}): {error_type}"
+ )
+ print(
+ f"{self.log_prefix} โฑ๏ธ Time elapsed before failure: {elapsed_time:.2f}s"
+ )
print(f"{self.log_prefix} ๐ Error: {str(api_error)[:200]}")
- print(f"{self.log_prefix} ๐ Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools")
-
+ print(
+ f"{self.log_prefix} ๐ Request context: {len(api_messages)} messages, ~{approx_tokens:,} tokens, {len(self.tools) if self.tools else 0} tools"
+ )
+
# Check for interrupt before deciding to retry
if self._interrupt_requested:
- print(f"{self.log_prefix}โก Interrupt detected during error handling, aborting retries.")
+ print(
+ f"{self.log_prefix}โก Interrupt detected during error handling, aborting retries."
+ )
self._persist_session(messages, conversation_history)
self.clear_interrupt()
return {
@@ -3870,32 +4433,38 @@ def run_conversation(
"completed": False,
"interrupted": True,
}
-
+
# Check for 413 payload-too-large BEFORE generic 4xx handler.
# A 413 is a payload-size error โ the correct response is to
# compress history and retry, not abort immediately.
status_code = getattr(api_error, "status_code", None)
is_payload_too_large = (
status_code == 413
- or 'request entity too large' in error_msg
- or 'payload too large' in error_msg
- or 'error code: 413' in error_msg
+ or "request entity too large" in error_msg
+ or "payload too large" in error_msg
+ or "error code: 413" in error_msg
)
if is_payload_too_large:
compression_attempts += 1
if compression_attempts > max_compression_attempts:
- print(f"{self.log_prefix}โ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.")
- logging.error(f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
+ print(
+ f"{self.log_prefix}โ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error."
+ )
+ logging.error(
+ f"{self.log_prefix}413 compression failed after {max_compression_attempts} attempts."
+ )
self._persist_session(messages, conversation_history)
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": f"Request payload too large: max compression attempts ({max_compression_attempts}) reached.",
- "partial": True
+ "partial": True,
}
- print(f"{self.log_prefix}โ ๏ธ Request payload too large (413) โ compression attempt {compression_attempts}/{max_compression_attempts}...")
+ print(
+ f"{self.log_prefix}โ ๏ธ Request payload too large (413) โ compression attempt {compression_attempts}/{max_compression_attempts}..."
+ )
original_len = len(messages)
messages, active_system_prompt = self._compress_context(
@@ -3904,20 +4473,26 @@ def run_conversation(
)
if len(messages) < original_len:
- print(f"{self.log_prefix} ๐๏ธ Compressed {original_len} โ {len(messages)} messages, retrying...")
+ print(
+ f"{self.log_prefix} ๐๏ธ Compressed {original_len} โ {len(messages)} messages, retrying..."
+ )
time.sleep(2) # Brief pause between compression retries
restart_with_compressed_messages = True
break
else:
- print(f"{self.log_prefix}โ Payload too large and cannot compress further.")
- logging.error(f"{self.log_prefix}413 payload too large. Cannot compress further.")
+ print(
+ f"{self.log_prefix}โ Payload too large and cannot compress further."
+ )
+ logging.error(
+ f"{self.log_prefix}413 payload too large. Cannot compress further."
+ )
self._persist_session(messages, conversation_history)
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": "Request payload too large (413). Cannot compress further.",
- "partial": True
+ "partial": True,
}
# Check for context-length errors BEFORE generic 4xx handler.
@@ -3940,32 +4515,46 @@ def run_conversation(
parsed_limit = parse_context_limit_from_error(error_msg)
if parsed_limit and parsed_limit < old_ctx:
new_ctx = parsed_limit
- print(f"{self.log_prefix}โ ๏ธ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
+ print(
+ f"{self.log_prefix}โ ๏ธ Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})"
+ )
else:
# Step down to the next probe tier
new_ctx = get_next_probe_tier(old_ctx)
if new_ctx and new_ctx < old_ctx:
compressor.context_length = new_ctx
- compressor.threshold_tokens = int(new_ctx * compressor.threshold_percent)
+ compressor.threshold_tokens = int(
+ new_ctx * compressor.threshold_percent
+ )
compressor._context_probed = True
- print(f"{self.log_prefix}โ ๏ธ Context length exceeded โ stepping down: {old_ctx:,} โ {new_ctx:,} tokens")
+ print(
+ f"{self.log_prefix}โ ๏ธ Context length exceeded โ stepping down: {old_ctx:,} โ {new_ctx:,} tokens"
+ )
else:
- print(f"{self.log_prefix}โ ๏ธ Context length exceeded at minimum tier โ attempting compression...")
+ print(
+ f"{self.log_prefix}โ ๏ธ Context length exceeded at minimum tier โ attempting compression..."
+ )
compression_attempts += 1
if compression_attempts > max_compression_attempts:
- print(f"{self.log_prefix}โ Max compression attempts ({max_compression_attempts}) reached.")
- logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
+ print(
+ f"{self.log_prefix}โ Max compression attempts ({max_compression_attempts}) reached."
+ )
+ logging.error(
+ f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts."
+ )
self._persist_session(messages, conversation_history)
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
- "partial": True
+ "partial": True,
}
- print(f"{self.log_prefix} ๐๏ธ Context compression attempt {compression_attempts}/{max_compression_attempts}...")
+ print(
+ f"{self.log_prefix} ๐๏ธ Context compression attempt {compression_attempts}/{max_compression_attempts}..."
+ )
original_len = len(messages)
messages, active_system_prompt = self._compress_context(
@@ -3973,41 +4562,73 @@ def run_conversation(
task_id=effective_task_id,
)
- if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
+ if (
+ len(messages) < original_len
+ or new_ctx
+ and new_ctx < old_ctx
+ ):
if len(messages) < original_len:
- print(f"{self.log_prefix} ๐๏ธ Compressed {original_len} โ {len(messages)} messages, retrying...")
+ print(
+ f"{self.log_prefix} ๐๏ธ Compressed {original_len} โ {len(messages)} messages, retrying..."
+ )
time.sleep(2) # Brief pause between compression retries
restart_with_compressed_messages = True
break
else:
# Can't compress further and already at minimum tier
- print(f"{self.log_prefix}โ Context length exceeded and cannot compress further.")
- print(f"{self.log_prefix} ๐ก The conversation has accumulated too much content.")
- logging.error(f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
+ print(
+ f"{self.log_prefix}โ Context length exceeded and cannot compress further."
+ )
+ print(
+ f"{self.log_prefix} ๐ก The conversation has accumulated too much content."
+ )
+ logging.error(
+ f"{self.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further."
+ )
self._persist_session(messages, conversation_history)
return {
"messages": messages,
"completed": False,
"api_calls": api_call_count,
"error": f"Context length exceeded ({approx_tokens:,} tokens). Cannot compress further.",
- "partial": True
+ "partial": True,
}
# Check for non-retryable client errors (4xx HTTP status codes).
# These indicate a problem with the request itself (bad model ID,
# invalid API key, forbidden, etc.) and will never succeed on retry.
# Note: 413 and context-length errors are excluded โ handled above.
+ # Note: 429 (rate limit) is excluded โ it's retryable with backoff.
# Also catch local validation errors (ValueError, TypeError) โ these
# are programming bugs, not transient failures.
is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
- is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
- is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
- 'error code: 401', 'error code: 403',
- 'error code: 404', 'error code: 422',
- 'is not a valid model', 'invalid model', 'model not found',
- 'invalid api key', 'invalid_api_key', 'authentication',
- 'unauthorized', 'forbidden', 'not found',
- ])) and not is_context_length_error
+ is_client_status_error = (
+ isinstance(status_code, int)
+ and 400 <= status_code < 500
+ and status_code not in (413, 429)
+ )
+ is_client_error = (
+ is_local_validation_error
+ or is_client_status_error
+ or any(
+ phrase in error_msg
+ for phrase in [
+ "error code: 401",
+ "error code: 403",
+ "error code: 404",
+ "error code: 422",
+ "is not a valid model",
+ "invalid model",
+ "model not found",
+ "invalid api key",
+ "invalid_api_key",
+ "authentication",
+ "unauthorized",
+ "forbidden",
+ "not found",
+ ]
+ )
+ ) and not is_context_length_error
if is_client_error:
# Try fallback before aborting โ a different provider
@@ -4016,11 +4637,19 @@ def run_conversation(
retry_count = 0
continue
self._dump_api_request_debug(
- api_kwargs, reason="non_retryable_client_error", error=api_error,
+ api_kwargs,
+ reason="non_retryable_client_error",
+ error=api_error,
+ )
+ print(
+ f"{self.log_prefix}โ Non-retryable client error detected. Aborting immediately."
+ )
+ print(
+ f"{self.log_prefix} ๐ก This type of error won't be fixed by retrying."
+ )
+ logging.error(
+ f"{self.log_prefix}Non-retryable client error: {api_error}"
)
- print(f"{self.log_prefix}โ Non-retryable client error detected. Aborting immediately.")
- print(f"{self.log_prefix} ๐ก This type of error won't be fixed by retrying.")
- logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
self._persist_session(messages, conversation_history)
return {
"final_response": None,
@@ -4036,23 +4665,37 @@ def run_conversation(
if self._try_activate_fallback():
retry_count = 0
continue
- print(f"{self.log_prefix}โ Max retries ({max_retries}) exceeded. Giving up.")
- logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
- logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}")
+ print(
+ f"{self.log_prefix}โ Max retries ({max_retries}) exceeded. Giving up."
+ )
+ logging.error(
+ f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}"
+ )
+ logging.error(
+ f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}"
+ )
raise api_error
- wait_time = min(2 ** retry_count, 60) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
- logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
+ wait_time = min(
+ 2**retry_count, 60
+ ) # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
+ logging.warning(
+ f"API retry {retry_count}/{max_retries} after error: {api_error}"
+ )
if retry_count >= max_retries:
- print(f"{self.log_prefix}โ ๏ธ API call failed after {retry_count} attempts: {str(api_error)[:100]}")
+ print(
+ f"{self.log_prefix}โ ๏ธ API call failed after {retry_count} attempts: {str(api_error)[:100]}"
+ )
print(f"{self.log_prefix}โณ Final retry in {wait_time}s...")
-
+
# Sleep in small increments so we can respond to interrupts quickly
# instead of blocking the entire wait_time in one sleep() call
sleep_end = time.time() + wait_time
while time.time() < sleep_end:
if self._interrupt_requested:
- print(f"{self.log_prefix}โก Interrupt detected during retry wait, aborting.")
+ print(
+ f"{self.log_prefix}โก Interrupt detected during retry wait, aborting."
+ )
self._persist_session(messages, conversation_history)
self.clear_interrupt()
return {
@@ -4063,7 +4706,7 @@ def run_conversation(
"interrupted": True,
}
time.sleep(0.2) # Check interrupt every 200ms
-
+
# If the API call was interrupted, skip response processing
if interrupted:
break
@@ -4080,23 +4723,33 @@ def run_conversation(
# (e.g. repeated context-length errors that exhausted retry_count),
# the `response` variable is still None. Break out cleanly.
if response is None:
- print(f"{self.log_prefix}โ All API retries exhausted with no successful response.")
+ print(
+ f"{self.log_prefix}โ All API retries exhausted with no successful response."
+ )
self._persist_session(messages, conversation_history)
break
try:
if self.api_mode == "codex_responses":
- assistant_message, finish_reason = self._normalize_codex_response(response)
+ assistant_message, finish_reason = self._normalize_codex_response(
+ response
+ )
else:
assistant_message = response.choices[0].message
-
+
# Normalize content to string โ some OpenAI-compatible servers
# (llama-server, etc.) return content as a dict or list instead
# of a plain string, which crashes downstream .strip() calls.
- if assistant_message.content is not None and not isinstance(assistant_message.content, str):
+ if assistant_message.content is not None and not isinstance(
+ assistant_message.content, str
+ ):
raw = assistant_message.content
if isinstance(raw, dict):
- assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
+ assistant_message.content = (
+ raw.get("text", "")
+ or raw.get("content", "")
+ or json.dumps(raw)
+ )
elif isinstance(raw, list):
# Multimodal content list โ extract text parts
parts = []
@@ -4113,59 +4766,72 @@ def run_conversation(
# Handle assistant response
if assistant_message.content and not self.quiet_mode:
- print(f"{self.log_prefix}๐ค Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
+ print(
+ f"{self.log_prefix}๐ค Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}"
+ )
# Notify progress callback of model's thinking (used by subagent
# delegation to relay the child's reasoning to the parent display).
# Guard: only fire for subagents (_delegate_depth >= 1) to avoid
# spamming gateway platforms with the main agent's every thought.
- if (assistant_message.content and self.tool_progress_callback
- and getattr(self, '_delegate_depth', 0) > 0):
+ if (
+ assistant_message.content
+ and self.tool_progress_callback
+ and getattr(self, "_delegate_depth", 0) > 0
+ ):
_think_text = assistant_message.content.strip()
# Strip reasoning XML tags that shouldn't leak to parent display
_think_text = re.sub(
- r'?(?:REASONING_SCRATCHPAD|think|reasoning)>', '', _think_text
+ r"?(?:REASONING_SCRATCHPAD|think|reasoning)>", "", _think_text
).strip()
- first_line = _think_text.split('\n')[0][:80] if _think_text else ""
+ first_line = _think_text.split("\n")[0][:80] if _think_text else ""
if first_line:
try:
self.tool_progress_callback("_thinking", first_line)
except Exception:
pass
-
+
# Check for incomplete (opened but never closed)
# This means the model ran out of output tokens mid-reasoning โ retry up to 2 times
if has_incomplete_scratchpad(assistant_message.content or ""):
- if not hasattr(self, '_incomplete_scratchpad_retries'):
+ if not hasattr(self, "_incomplete_scratchpad_retries"):
self._incomplete_scratchpad_retries = 0
self._incomplete_scratchpad_retries += 1
-
- print(f"{self.log_prefix}โ ๏ธ Incomplete detected (opened but never closed)")
-
+
+ print(
+ f"{self.log_prefix}โ ๏ธ Incomplete detected (opened but never closed)"
+ )
+
if self._incomplete_scratchpad_retries <= 2:
- print(f"{self.log_prefix}๐ Retrying API call ({self._incomplete_scratchpad_retries}/2)...")
+ print(
+ f"{self.log_prefix}๐ Retrying API call ({self._incomplete_scratchpad_retries}/2)..."
+ )
# Don't add the broken message, just retry
continue
else:
# Max retries - discard this turn and save as partial
- print(f"{self.log_prefix}โ Max retries (2) for incomplete scratchpad. Saving as partial.")
+ print(
+ f"{self.log_prefix}โ Max retries (2) for incomplete scratchpad. Saving as partial."
+ )
self._incomplete_scratchpad_retries = 0
-
- rolled_back_messages = self._get_messages_up_to_last_assistant(messages)
+
+ rolled_back_messages = self._get_messages_up_to_last_assistant(
+ messages
+ )
self._cleanup_task_resources(effective_task_id)
self._persist_session(messages, conversation_history)
-
+
return {
"final_response": None,
"messages": rolled_back_messages,
"api_calls": api_call_count,
"completed": False,
"partial": True,
- "error": "Incomplete REASONING_SCRATCHPAD after 2 retries"
+ "error": "Incomplete REASONING_SCRATCHPAD after 2 retries",
}
-
+
# Reset incomplete scratchpad counter on clean response
- if hasattr(self, '_incomplete_scratchpad_retries'):
+ if hasattr(self, "_incomplete_scratchpad_retries"):
self._incomplete_scratchpad_retries = 0
if self.api_mode == "codex_responses" and finish_reason == "incomplete":
@@ -4173,9 +4839,17 @@ def run_conversation(
self._codex_incomplete_retries = 0
self._codex_incomplete_retries += 1
- interim_msg = self._build_assistant_message(assistant_message, finish_reason)
- interim_has_content = bool((interim_msg.get("content") or "").strip())
- interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
+ interim_msg = self._build_assistant_message(
+ assistant_message, finish_reason
+ )
+ interim_has_content = bool(
+ (interim_msg.get("content") or "").strip()
+ )
+ interim_has_reasoning = (
+ bool(interim_msg.get("reasoning", "").strip())
+ if isinstance(interim_msg.get("reasoning"), str)
+ else False
+ )
if interim_has_content or interim_has_reasoning:
last_msg = messages[-1] if messages else None
@@ -4183,15 +4857,19 @@ def run_conversation(
isinstance(last_msg, dict)
and last_msg.get("role") == "assistant"
and last_msg.get("finish_reason") == "incomplete"
- and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
- and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
+ and (last_msg.get("content") or "")
+ == (interim_msg.get("content") or "")
+ and (last_msg.get("reasoning") or "")
+ == (interim_msg.get("reasoning") or "")
)
if not duplicate_interim:
messages.append(interim_msg)
if self._codex_incomplete_retries < 3:
if not self.quiet_mode:
- print(f"{self.log_prefix}โป Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)")
+ print(
+ f"{self.log_prefix}โป Codex response incomplete; continuing turn ({self._codex_incomplete_retries}/3)"
+ )
self._session_messages = messages
self._save_session_log(messages)
continue
@@ -4208,51 +4886,68 @@ def run_conversation(
}
elif hasattr(self, "_codex_incomplete_retries"):
self._codex_incomplete_retries = 0
-
+
# Check for tool calls
if assistant_message.tool_calls:
if not self.quiet_mode:
- print(f"{self.log_prefix}๐ง Processing {len(assistant_message.tool_calls)} tool call(s)...")
-
+ print(
+ f"{self.log_prefix}๐ง Processing {len(assistant_message.tool_calls)} tool call(s)..."
+ )
+
if self.verbose_logging:
for tc in assistant_message.tool_calls:
- logging.debug(f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}...")
-
+ logging.debug(
+ f"Tool call: {tc.function.name} with args: {tc.function.arguments[:200]}..."
+ )
+
# Validate tool call names - detect model hallucinations
# Repair mismatched tool names before validating
for tc in assistant_message.tool_calls:
if tc.function.name not in self.valid_tool_names:
repaired = self._repair_tool_call(tc.function.name)
if repaired:
- print(f"{self.log_prefix}๐ง Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'")
+ print(
+ f"{self.log_prefix}๐ง Auto-repaired tool name: '{tc.function.name}' -> '{repaired}'"
+ )
tc.function.name = repaired
invalid_tool_calls = [
- tc.function.name for tc in assistant_message.tool_calls
+ tc.function.name
+ for tc in assistant_message.tool_calls
if tc.function.name not in self.valid_tool_names
]
if invalid_tool_calls:
# Return helpful error to model โ model can self-correct next turn
available = ", ".join(sorted(self.valid_tool_names))
invalid_name = invalid_tool_calls[0]
- invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
- print(f"{self.log_prefix}โ ๏ธ Unknown tool '{invalid_preview}' โ sending error to model for self-correction")
- assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
+ invalid_preview = (
+ invalid_name[:80] + "..."
+ if len(invalid_name) > 80
+ else invalid_name
+ )
+ print(
+ f"{self.log_prefix}โ ๏ธ Unknown tool '{invalid_preview}' โ sending error to model for self-correction"
+ )
+ assistant_msg = self._build_assistant_message(
+ assistant_message, finish_reason
+ )
messages.append(assistant_msg)
for tc in assistant_message.tool_calls:
if tc.function.name not in self.valid_tool_names:
content = f"Tool '{tc.function.name}' does not exist. Available tools: {available}"
else:
content = f"Skipped: another tool call in this turn used an invalid name. Please retry this tool call."
- messages.append({
- "role": "tool",
- "tool_call_id": tc.id,
- "content": content,
- })
+ messages.append(
+ {
+ "role": "tool",
+ "tool_call_id": tc.id,
+ "content": content,
+ }
+ )
continue
# Reset retry counter on successful tool call validation
- if hasattr(self, '_invalid_tool_retries'):
+ if hasattr(self, "_invalid_tool_retries"):
self._invalid_tool_retries = 0
-
+
# Validate tool call arguments are valid JSON
# Handle empty strings as empty objects (common model quirk)
invalid_json_args = []
@@ -4266,23 +4961,29 @@ def run_conversation(
json.loads(args)
except json.JSONDecodeError as e:
invalid_json_args.append((tc.function.name, str(e)))
-
+
if invalid_json_args:
# Track retries for invalid JSON arguments
self._invalid_json_retries += 1
-
+
tool_name, error_msg = invalid_json_args[0]
- print(f"{self.log_prefix}โ ๏ธ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
-
+ print(
+ f"{self.log_prefix}โ ๏ธ Invalid JSON in tool call arguments for '{tool_name}': {error_msg}"
+ )
+
if self._invalid_json_retries < 3:
- print(f"{self.log_prefix}๐ Retrying API call ({self._invalid_json_retries}/3)...")
+ print(
+ f"{self.log_prefix}๐ Retrying API call ({self._invalid_json_retries}/3)..."
+ )
# Don't add anything to messages, just retry the API call
continue
else:
# Instead of returning partial, inject a helpful message and let model recover
- print(f"{self.log_prefix}โ ๏ธ Injecting recovery message for invalid JSON...")
+ print(
+ f"{self.log_prefix}โ ๏ธ Injecting recovery message for invalid JSON..."
+ )
self._invalid_json_retries = 0 # Reset for next attempt
-
+
# Add a user message explaining the issue
recovery_msg = (
f"Your tool call to '{tool_name}' had invalid JSON arguments. "
@@ -4293,25 +4994,29 @@ def run_conversation(
recovery_dict = {"role": "user", "content": recovery_msg}
messages.append(recovery_dict)
continue
-
+
# Reset retry counter on successful JSON validation
self._invalid_json_retries = 0
-
- assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
-
+
+ assistant_msg = self._build_assistant_message(
+ assistant_message, finish_reason
+ )
+
# If this turn has both content AND tool_calls, capture the content
# as a fallback final response. Common pattern: model delivers its
# answer and calls memory/skill tools as a side-effect in the same
# turn. If the follow-up turn after tools is empty, we use this.
turn_content = assistant_message.content or ""
- if turn_content and self._has_content_after_think_block(turn_content):
+ if turn_content and self._has_content_after_think_block(
+ turn_content
+ ):
self._last_content_with_tools = turn_content
# Show intermediate commentary so the user can follow along
if self.quiet_mode:
clean = self._strip_think_blocks(turn_content).strip()
if clean:
print(f" โ ๐ฌ {clean}")
-
+
messages.append(assistant_msg)
_msg_count_before_tools = len(messages)
@@ -4320,7 +5025,9 @@ def run_conversation(
# Refund the iteration if the ONLY tool(s) called were
# execute_code (programmatic tool calling). These are
# cheap RPC-style calls that shouldn't eat the budget.
- _tc_names = {tc.function.name for tc in assistant_message.tool_calls}
+ _tc_names = {
+ tc.function.name for tc in assistant_message.tool_calls
+ }
if _tc_names == {"execute_code"}:
self.iteration_budget.refund()
@@ -4343,83 +5050,109 @@ def run_conversation(
approx_tokens=self.context_compressor.last_prompt_tokens,
task_id=effective_task_id,
)
-
+
# Save session log incrementally (so progress is visible even if interrupted)
self._session_messages = messages
self._save_session_log(messages)
-
+
# Continue loop for next response
continue
-
+
else:
# No tool calls - this is the final response
final_response = assistant_message.content or ""
-
+
# Check if response only has think block with no actual content after it
if not self._has_content_after_think_block(final_response):
# If the previous turn already delivered real content alongside
# tool calls (e.g. "You're welcome!" + memory save), the model
# has nothing more to say. Use the earlier content immediately
# instead of wasting API calls on retries that won't help.
- fallback = getattr(self, '_last_content_with_tools', None)
+ fallback = getattr(self, "_last_content_with_tools", None)
if fallback:
- logger.debug("Empty follow-up after tool calls โ using prior turn content as final response")
+ logger.debug(
+ "Empty follow-up after tool calls โ using prior turn content as final response"
+ )
self._last_content_with_tools = None
self._empty_content_retries = 0
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
- if msg.get("role") == "assistant" and msg.get("tool_calls"):
+ if msg.get("role") == "assistant" and msg.get(
+ "tool_calls"
+ ):
tool_names = []
for tc in msg["tool_calls"]:
fn = tc.get("function", {})
tool_names.append(fn.get("name", "unknown"))
- msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
+ msg["content"] = (
+ f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
+ )
break
final_response = self._strip_think_blocks(fallback).strip()
break
# No fallback available โ this is a genuine empty response.
# Retry in case the model just had a bad generation.
- if not hasattr(self, '_empty_content_retries'):
+ if not hasattr(self, "_empty_content_retries"):
self._empty_content_retries = 0
self._empty_content_retries += 1
-
+
reasoning_text = self._extract_reasoning(assistant_message)
- print(f"{self.log_prefix}โ ๏ธ Response only contains think block with no content after it")
+ print(
+ f"{self.log_prefix}โ ๏ธ Response only contains think block with no content after it"
+ )
if reasoning_text:
- reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
+ reasoning_preview = (
+ reasoning_text[:500] + "..."
+ if len(reasoning_text) > 500
+ else reasoning_text
+ )
print(f"{self.log_prefix} Reasoning: {reasoning_preview}")
else:
- content_preview = final_response[:80] + "..." if len(final_response) > 80 else final_response
+ content_preview = (
+ final_response[:80] + "..."
+ if len(final_response) > 80
+ else final_response
+ )
print(f"{self.log_prefix} Content: '{content_preview}'")
-
+
if self._empty_content_retries < 3:
- print(f"{self.log_prefix}๐ Retrying API call ({self._empty_content_retries}/3)...")
+ print(
+ f"{self.log_prefix}๐ Retrying API call ({self._empty_content_retries}/3)..."
+ )
continue
else:
- print(f"{self.log_prefix}โ Max retries (3) for empty content exceeded.")
+ print(
+ f"{self.log_prefix}โ Max retries (3) for empty content exceeded."
+ )
self._empty_content_retries = 0
-
+
# If a prior tool_calls turn had real content, salvage it:
# rewrite that turn's content to a brief tool description,
# and use the original content as the final response here.
- fallback = getattr(self, '_last_content_with_tools', None)
+ fallback = getattr(self, "_last_content_with_tools", None)
if fallback:
self._last_content_with_tools = None
# Find the last assistant message with tool_calls and rewrite it
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
- if msg.get("role") == "assistant" and msg.get("tool_calls"):
+ if msg.get("role") == "assistant" and msg.get(
+ "tool_calls"
+ ):
tool_names = []
for tc in msg["tool_calls"]:
fn = tc.get("function", {})
tool_names.append(fn.get("name", "unknown"))
- msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
+ msg["content"] = (
+ f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
+ )
break
# Strip blocks from fallback content for user display
- final_response = self._strip_think_blocks(fallback).strip()
+ final_response = self._strip_think_blocks(
+ fallback
+ ).strip()
break
-
+
# No fallback -- append the empty message as-is
empty_msg = {
"role": "assistant",
@@ -4428,21 +5161,20 @@ def run_conversation(
"finish_reason": finish_reason,
}
messages.append(empty_msg)
-
self._cleanup_task_resources(effective_task_id)
self._persist_session(messages, conversation_history)
-
+
return {
"final_response": final_response or None,
"messages": messages,
"api_calls": api_call_count,
"completed": False,
"partial": True,
- "error": "Model generated only think blocks with no actual response after 3 retries"
+ "error": "Model generated only think blocks with no actual response after 3 retries",
}
-
+
# Reset retry counter on successful content
- if hasattr(self, '_empty_content_retries'):
+ if hasattr(self, "_empty_content_retries"):
self._empty_content_retries = 0
if (
@@ -4456,7 +5188,9 @@ def run_conversation(
)
):
codex_ack_continuations += 1
- interim_msg = self._build_assistant_message(assistant_message, "incomplete")
+ interim_msg = self._build_assistant_message(
+ assistant_message, "incomplete"
+ )
messages.append(interim_msg)
continue_msg = {
@@ -4475,25 +5209,28 @@ def run_conversation(
if truncated_response_prefix:
final_response = truncated_response_prefix + final_response
-
+
# Strip blocks from user-facing response (keep raw in messages for trajectory)
final_response = self._strip_think_blocks(final_response).strip()
-
- final_msg = self._build_assistant_message(assistant_message, finish_reason)
-
+
+ final_msg = self._build_assistant_message(
+ assistant_message, finish_reason
+ )
+
messages.append(final_msg)
-
if not self.quiet_mode:
- print(f"๐ Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
+ print(
+ f"๐ Conversation completed after {api_call_count} OpenAI-compatible API call(s)"
+ )
break
-
+
except Exception as e:
error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
print(f"โ {error_msg}")
-
+
if self.verbose_logging:
logging.exception("Detailed error information:")
-
+
# If an assistant message with tool_calls was already appended,
# the API expects a role="tool" result for every tool_call_id.
# Fill in error results for any that weren't answered yet.
@@ -4507,7 +5244,7 @@ def run_conversation(
if msg.get("role") == "assistant" and msg.get("tool_calls"):
answered_ids = {
m["tool_call_id"]
- for m in messages[idx + 1:]
+ for m in messages[idx + 1 :]
if isinstance(m, dict) and m.get("role") == "tool"
}
for tc in msg["tool_calls"]:
@@ -4520,7 +5257,7 @@ def run_conversation(
messages.append(err_msg)
pending_handled = True
break
-
+
if not pending_handled:
# Error happened before tool processing (e.g. response parsing).
# Use a user-role message so the model can see what went wrong
@@ -4530,20 +5267,23 @@ def run_conversation(
"content": f"[System error during processing: {error_msg}]",
}
messages.append(sys_err_msg)
-
# If we're near the limit, break to avoid infinite loops
if api_call_count >= self.max_iterations - 1:
- final_response = f"I apologize, but I encountered repeated errors: {error_msg}"
+ final_response = (
+ f"I apologize, but I encountered repeated errors: {error_msg}"
+ )
break
-
+
if final_response is None and (
api_call_count >= self.max_iterations
or self.iteration_budget.remaining <= 0
):
if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
- print(f"\nโ ๏ธ Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)")
+ print(
+ f"\nโ ๏ธ Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)"
+ )
final_response = self._handle_max_iterations(messages, api_call_count)
-
+
# Determine if conversation completed successfully
completed = final_response is not None and api_call_count < self.max_iterations
@@ -4577,23 +5317,23 @@ def run_conversation(
"partial": False, # True only when stopped due to invalid tool calls
"interrupted": interrupted,
}
-
+
# Include interrupt message if one triggered the interrupt
if interrupted and self._interrupt_message:
result["interrupt_message"] = self._interrupt_message
-
+
# Clear interrupt state after handling
self.clear_interrupt()
-
+
return result
-
+
def chat(self, message: str) -> str:
"""
Simple chat interface that returns just the final response.
-
+
Args:
message (str): User message
-
+
Returns:
str: Final assistant response
"""
@@ -4613,7 +5353,7 @@ def main(
save_trajectories: bool = False,
save_sample: bool = False,
verbose: bool = False,
- log_prefix_chars: int = 20
+ log_prefix_chars: int = 20,
):
"""
Main function for running the agent directly.
@@ -4639,58 +5379,69 @@ def main(
"""
print("๐ค AI Agent with Tool Calling")
print("=" * 50)
-
+
# Handle tool listing
if list_tools:
- from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets
+ from model_tools import (
+ get_all_tool_names,
+ get_toolset_for_tool,
+ get_available_toolsets,
+ )
from toolsets import get_all_toolsets, get_toolset_info
-
+
print("๐ Available Tools & Toolsets:")
print("-" * 50)
-
+
# Show new toolsets system
print("\n๐ฏ Predefined Toolsets (New System):")
print("-" * 40)
all_toolsets = get_all_toolsets()
-
+
# Group by category
basic_toolsets = []
composite_toolsets = []
scenario_toolsets = []
-
+
for name, toolset in all_toolsets.items():
info = get_toolset_info(name)
if info:
entry = (name, info)
if name in ["web", "terminal", "vision", "creative", "reasoning"]:
basic_toolsets.append(entry)
- elif name in ["research", "development", "analysis", "content_creation", "full_stack"]:
+ elif name in [
+ "research",
+ "development",
+ "analysis",
+ "content_creation",
+ "full_stack",
+ ]:
composite_toolsets.append(entry)
else:
scenario_toolsets.append(entry)
-
+
# Print basic toolsets
print("\n๐ Basic Toolsets:")
for name, info in basic_toolsets:
- tools_str = ', '.join(info['resolved_tools']) if info['resolved_tools'] else 'none'
+ tools_str = (
+ ", ".join(info["resolved_tools"]) if info["resolved_tools"] else "none"
+ )
print(f" โข {name:15} - {info['description']}")
print(f" Tools: {tools_str}")
-
+
# Print composite toolsets
print("\n๐ Composite Toolsets (built from other toolsets):")
for name, info in composite_toolsets:
- includes_str = ', '.join(info['includes']) if info['includes'] else 'none'
+ includes_str = ", ".join(info["includes"]) if info["includes"] else "none"
print(f" โข {name:15} - {info['description']}")
print(f" Includes: {includes_str}")
print(f" Total tools: {info['tool_count']}")
-
+
# Print scenario-specific toolsets
print("\n๐ญ Scenario-Specific Toolsets:")
for name, info in scenario_toolsets:
print(f" โข {name:20} - {info['description']}")
print(f" Total tools: {info['tool_count']}")
-
-
+
# Show legacy toolset compatibility
print("\n๐ฆ Legacy Toolsets (for backward compatibility):")
legacy_toolsets = get_available_toolsets()
@@ -4699,47 +5450,57 @@ def main(
print(f" {status} {name}: {info['description']}")
if not info["available"]:
print(f" Requirements: {', '.join(info['requirements'])}")
-
+
# Show individual tools
all_tools = get_all_tool_names()
print(f"\n๐ง Individual Tools ({len(all_tools)} available):")
for tool_name in sorted(all_tools):
toolset = get_toolset_for_tool(tool_name)
print(f" ๐ {tool_name} (from {toolset})")
-
+
print(f"\n๐ก Usage Examples:")
print(f" # Use predefined toolsets")
- print(f" python run_agent.py --enabled_toolsets=research --query='search for Python news'")
- print(f" python run_agent.py --enabled_toolsets=development --query='debug this code'")
- print(f" python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'")
+ print(
+ f" python run_agent.py --enabled_toolsets=research --query='search for Python news'"
+ )
+ print(
+ f" python run_agent.py --enabled_toolsets=development --query='debug this code'"
+ )
+ print(
+ f" python run_agent.py --enabled_toolsets=safe --query='analyze without terminal'"
+ )
print(f" ")
print(f" # Combine multiple toolsets")
- print(f" python run_agent.py --enabled_toolsets=web,vision --query='analyze website'")
+ print(
+ f" python run_agent.py --enabled_toolsets=web,vision --query='analyze website'"
+ )
print(f" ")
print(f" # Disable toolsets")
- print(f" python run_agent.py --disabled_toolsets=terminal --query='no command execution'")
+ print(
+ f" python run_agent.py --disabled_toolsets=terminal --query='no command execution'"
+ )
print(f" ")
print(f" # Run with trajectory saving enabled")
print(f" python run_agent.py --save_trajectories --query='your question here'")
return
-
+
# Parse toolset selection arguments
enabled_toolsets_list = None
disabled_toolsets_list = None
-
+
if enabled_toolsets:
enabled_toolsets_list = [t.strip() for t in enabled_toolsets.split(",")]
print(f"๐ฏ Enabled toolsets: {enabled_toolsets_list}")
-
+
if disabled_toolsets:
disabled_toolsets_list = [t.strip() for t in disabled_toolsets.split(",")]
print(f"๐ซ Disabled toolsets: {disabled_toolsets_list}")
-
+
if save_trajectories:
print(f"๐พ Trajectory saving: ENABLED")
print(f" - Successful conversations โ trajectory_samples.jsonl")
print(f" - Failed conversations โ failed_trajectories.jsonl")
-
+
# Initialize agent with provided parameters
try:
agent = AIAgent(
@@ -4751,12 +5512,12 @@ def main(
disabled_toolsets=disabled_toolsets_list,
save_trajectories=save_trajectories,
verbose_logging=verbose,
- log_prefix_chars=log_prefix_chars
+ log_prefix_chars=log_prefix_chars,
)
except RuntimeError as e:
print(f"โ Failed to initialize agent: {e}")
return
-
+
# Use provided query or default to Python 3.13 example
if query is None:
user_query = (
@@ -4765,45 +5526,43 @@ def main(
)
else:
user_query = query
-
+
print(f"\n๐ User Query: {user_query}")
print("\n" + "=" * 50)
-
+
# Run conversation
result = agent.run_conversation(user_query)
-
+
print("\n" + "=" * 50)
print("๐ CONVERSATION SUMMARY")
print("=" * 50)
print(f"โ
Completed: {result['completed']}")
print(f"๐ API Calls: {result['api_calls']}")
print(f"๐ฌ Messages: {len(result['messages'])}")
-
- if result['final_response']:
+
+ if result["final_response"]:
print(f"\n๐ฏ FINAL RESPONSE:")
print("-" * 30)
- print(result['final_response'])
-
+ print(result["final_response"])
+
# Save sample trajectory to UUID-named file if requested
if save_sample:
sample_id = str(uuid.uuid4())[:8]
sample_filename = f"sample_{sample_id}.json"
-
+
# Convert messages to trajectory format (same as batch_runner)
trajectory = agent._convert_to_trajectory_format(
- result['messages'],
- user_query,
- result['completed']
+ result["messages"], user_query, result["completed"]
)
-
+
entry = {
"conversations": trajectory,
"timestamp": datetime.now().isoformat(),
"model": model,
- "completed": result['completed'],
- "query": user_query
+ "completed": result["completed"],
+ "query": user_query,
}
-
+
try:
with open(sample_filename, "w", encoding="utf-8") as f:
# Pretty-print JSON with indent for readability
@@ -4811,7 +5570,7 @@ def main(
print(f"\n๐พ Sample trajectory saved to: {sample_filename}")
except Exception as e:
print(f"\nโ ๏ธ Failed to save sample: {e}")
-
+
print("\n๐ Agent execution completed!")