diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index 37777d29f644..d3b0159c6a5b 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -104,6 +104,7 @@ def set_dataset_type(dataset_name: str) -> str: AGENT_CLS_TO_FAKE_USER_RESPONSE_FN = { 'CodeActAgent': codeact_user_response, + 'OpenCodeAgent': codeact_user_response, } diff --git a/openhands/agenthub/__init__.py b/openhands/agenthub/__init__.py index ab0aa7ae1af6..5c5a167b1d08 100644 --- a/openhands/agenthub/__init__.py +++ b/openhands/agenthub/__init__.py @@ -8,6 +8,7 @@ codeact_agent, dummy_agent, loc_agent, + opencode_agent, readonly_agent, visualbrowsing_agent, ) @@ -21,4 +22,5 @@ 'visualbrowsing_agent', 'readonly_agent', 'loc_agent', + 'opencode_agent', ] diff --git a/openhands/agenthub/opencode_agent/README.md b/openhands/agenthub/opencode_agent/README.md new file mode 100644 index 000000000000..6257672af3ea --- /dev/null +++ b/openhands/agenthub/opencode_agent/README.md @@ -0,0 +1,212 @@ +# OpenCode Agent + +The OpenCode Agent is inspired by Claude Code's tool-based approach to software development assistance. + +## Overview + +This agent provides a comprehensive set of tools for: +- **File Operations**: read, write, edit, glob, grep, list_dir +- **Web Access**: webfetch, websearch +- **User Interaction**: question tool for clarifying requirements +- **Task Management**: todo_read, todo_write for tracking progress +- **Code Execution**: bash commands +- **Structured Edits**: apply_patch for multi-file atomic changes + +## Key Features + +### 1. File Operations +- `read`: Read file contents with line numbers +- `write`: Create or overwrite files +- `edit`: Make precise string replacements +- `glob`: Pattern-based file search +- `grep`: Content search across files +- `list_dir`: Directory listing + +### 2. Web Access +- `webfetch`: Fetch web content in text/markdown/html format +- `websearch`: Search the web for current information + +### 3. User Interaction +- `question`: Ask users multiple-choice or custom questions + - Supports single/multiple selection + - Helps clarify requirements before implementation + +### 4. Task Management +- `todo_read`: Read current task list +- `todo_write`: Update task list with new tasks or status changes + - Tracks: pending, in_progress, completed, cancelled + - Helps organize multi-step work + +### 5. Structured Editing +- `apply_patch`: Apply unified diff patches + - Supports add, update, delete, move operations + - Atomic multi-file changes + - LSP diagnostics after changes + +## Tool Descriptions + +### Core File Tools + +#### read +Reads files with line numbers. Supports: +- Absolute paths +- Offset and limit for large files +- Image files (jpeg, png, gif, webp) + +#### write +Creates or overwrites files. Automatically creates parent directories. + +#### edit +Makes exact string replacements in files. Requires reading the file first. + +#### glob +Finds files matching glob patterns (e.g., `**/*.py`, `src/**/*.ts`). + +#### grep +Searches file contents with regex support. Output modes: +- `content`: Show matching lines with context +- `files_with_matches`: Just file paths +- `count`: Match counts per file + +#### list_dir +Lists directory contents with optional recursion. + +### Web Tools + +#### webfetch +Fetches web content with format conversion: +- `text`: Plain text extraction +- `markdown`: HTML to markdown conversion +- `html`: Raw HTML + +#### websearch +Searches the web with customizable options: +- `type`: auto/fast/deep search +- `livecrawl`: fallback/preferred for freshness +- Returns content optimized for LLMs + +### Interaction Tools + +#### question +Asks users structured questions: +```python +{ + "questions": [{ + "question": "Which library should we use?", + "header": "Library", + "options": [ + {"label": "React", "description": "Popular UI library"}, + {"label": "Vue", "description": "Progressive framework"} + ], + "multiple": false, + "custom": true + }] +} +``` + +### Task Management Tools + +#### todo_read +Returns current task list as JSON array. + +#### todo_write +Updates task list: +```python +{ + "todos": [ + {"id": "1", "title": "Implement feature", "status": "completed"}, + {"id": "2", "title": "Write tests", "status": "in_progress"} + ] +} +``` + +### Patch Tool + +#### apply_patch +Applies unified diff patches: +```diff +*** Begin Patch +--- path/to/file.py ++++ path/to/file.py +@@ -10,7 +10,7 @@ + unchanged +-old line ++new line + unchanged +*** End Patch +``` + +Supports: +- Add files: `+++ new/file.py` +- Delete files: `+++ /dev/null` +- Move files: Different `---` and `+++` paths +- Update files: Standard diffs + +## Usage + +### Quick Start + +To use the OpenCode agent in code: + +```python +from openhands.core.config import AgentConfig +from openhands.llm.llm_registry import LLMRegistry + +config = AgentConfig(agent_name='OpenCodeAgent') +llm_registry = LLMRegistry() +agent = OpenCodeAgent(config, llm_registry) +``` + +Or via configuration: +```yaml +agent: + name: OpenCodeAgent +``` + +### Evaluation and Benchmarking + +To run SWE-bench or other evaluations with OpenCodeAgent: + +```bash +# Using run_infer.sh script +export AGENT="OpenCodeAgent" +./evaluation/benchmarks/swe_bench/scripts/run_infer.sh llama3_1 test OpenCodeAgent + +# Or call run_infer.py directly +poetry run python evaluation/benchmarks/swe_bench/run_infer.py \ + --agent-cls OpenCodeAgent \ + --llm-config llama3_1 \ + --max-iterations 30 \ + --dataset princeton-nlp/SWE-bench_Lite \ + --split test +``` + +**For detailed instructions on:** +- Dynamic agent selection (OpenCodeAgent vs CodeActAgent) +- Custom system prompt overrides +- Configuration options +- Troubleshooting + +**See the comprehensive [Usage Guide](./USAGE_GUIDE.md)** + +## Design Principles + +1. **Clear Tool Boundaries**: Each tool has a specific purpose +2. **User Interaction**: Question tool for clarification before action +3. **Task Tracking**: Todo tools for managing complex workflows +4. **Atomic Operations**: apply_patch for multi-file changes +5. **Web Integration**: Fetch and search for current information + +## Comparison to CodeActAgent + +| Feature | CodeActAgent | OpenCodeAgent | +|---------|--------------|---------------| +| File ops | ✓ | ✓ | +| Web access | Browser tool | webfetch + websearch | +| User questions | - | ✓ question tool | +| Task tracking | task_tracker | todo_read/write | +| Multi-file edits | Individual edits | apply_patch | +| Jupyter | ✓ | - | +| Plan mode | ✓ | - | + +OpenCodeAgent is optimized for structured workflows with clear tool boundaries and explicit user interaction patterns. diff --git a/openhands/agenthub/opencode_agent/__init__.py b/openhands/agenthub/opencode_agent/__init__.py new file mode 100644 index 000000000000..991d822fc974 --- /dev/null +++ b/openhands/agenthub/opencode_agent/__init__.py @@ -0,0 +1,4 @@ +from openhands.agenthub.opencode_agent.opencode_agent import OpenCodeAgent +from openhands.controller.agent import Agent + +Agent.register('OpenCodeAgent', OpenCodeAgent) diff --git a/openhands/agenthub/opencode_agent/function_calling.py b/openhands/agenthub/opencode_agent/function_calling.py new file mode 100644 index 000000000000..db04d8f4f626 --- /dev/null +++ b/openhands/agenthub/opencode_agent/function_calling.py @@ -0,0 +1,313 @@ +"""Function calling implementation for OpenCode agent.""" + +import json + +from litellm import ModelResponse + +from openhands.agenthub.codeact_agent.function_calling import ( + combine_thought, + set_security_risk, +) +from openhands.agenthub.codeact_agent.tools import create_cmd_run_tool +from openhands.agenthub.opencode_agent.tools.apply_patch import APPLY_PATCH_TOOL_NAME +from openhands.agenthub.opencode_agent.tools.bash import create_cmd_run_tool +from openhands.agenthub.opencode_agent.tools.edit import EditTool +from openhands.agenthub.opencode_agent.tools.finish import FinishTool +from openhands.agenthub.opencode_agent.tools.glob import GlobTool +from openhands.agenthub.opencode_agent.tools.grep import GrepTool +from openhands.agenthub.opencode_agent.tools.list_dir import ListDirTool +from openhands.agenthub.opencode_agent.tools.question import QUESTION_TOOL_NAME +from openhands.agenthub.opencode_agent.tools.read import ReadTool +from openhands.agenthub.opencode_agent.tools.think import ThinkTool +from openhands.agenthub.opencode_agent.tools.todo import ( + TODO_READ_TOOL_NAME, + TODO_WRITE_TOOL_NAME, +) +from openhands.agenthub.opencode_agent.tools.write import WriteTool +from openhands.core.exceptions import ( + FunctionCallNotExistsError, + FunctionCallValidationError, + LLMContextWindowExceedError, +) +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import ( + Action, + AgentFinishAction, + AgentThinkAction, + ApplyPatchAction, + CmdRunAction, + FileEditAction, + GlobAction, + GrepAction, + ListDirAction, + MessageAction, + OpenCodeReadAction, + OpenCodeWriteAction, + QuestionAction, + TodoReadAction, + TodoWriteAction, + ValidationFailureAction, +) +from openhands.events.action.mcp import MCPAction +from openhands.events.event import FileEditSource +from openhands.events.tool import ToolCallMetadata + + +def response_to_actions( + response: ModelResponse, mcp_tool_names: list[str] | None = None +) -> list[Action]: + """Convert LLM response to OpenHands actions for OpenCode agent.""" + actions: list[Action] = [] + assert len(response.choices) == 1, "Only one choice is supported for now" + choice = response.choices[0] + assistant_msg = choice.message + + # Check if both content and tool_calls are None + has_content = assistant_msg.content is not None + has_tool_calls = hasattr(assistant_msg, "tool_calls") and assistant_msg.tool_calls + + if not has_content and not has_tool_calls: + raise LLMContextWindowExceedError( + "LLM returned empty response with no content and no tool calls. This indicates the context length limit has been exceeded." + ) + + if hasattr(assistant_msg, "tool_calls") and assistant_msg.tool_calls: + # Extract thought from content + thought = "" + if isinstance(assistant_msg.content, str): + thought = assistant_msg.content + elif isinstance(assistant_msg.content, list): + for msg in assistant_msg.content: + if msg["type"] == "text": + thought += msg["text"] + + # Process each tool call + for i, tool_call in enumerate(assistant_msg.tool_calls): + action: Action + logger.debug(f'Tool call in opencode function_calling.py: {tool_call}') + + try: + try: + arguments = json.loads(tool_call.function.arguments) + except json.decoder.JSONDecodeError as e: + raise FunctionCallValidationError( + f'Failed to parse tool call arguments: {tool_call.function.arguments}' + ) from e + + # ================================================ + # Bash/CmdRun + # ================================================ + if tool_call.function.name == create_cmd_run_tool()['function']['name']: + if 'command' not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "command" in tool call {tool_call.function.name}' + ) + is_input = arguments.get('is_input', 'false') == 'true' + action = CmdRunAction(command=arguments['command'], is_input=is_input) + if 'timeout' in arguments: + try: + action.set_hard_timeout(min(float(arguments['timeout']), 600)) + except ValueError as e: + raise FunctionCallValidationError( + f"Invalid float passed to 'timeout' argument: {arguments['timeout']}" + ) from e + set_security_risk(action, arguments) + + # ================================================ + # Read + # ================================================ + elif tool_call.function.name == ReadTool["function"]["name"]: + if "file_path" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "file_path" in tool call {tool_call.function.name}' + ) + action = OpenCodeReadAction( + path=arguments["file_path"], + offset=arguments.get("offset", 0), + limit=arguments.get("limit", 2000), + ) + + # ================================================ + # Write + # ================================================ + elif tool_call.function.name == WriteTool["function"]["name"]: + if "file_path" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "file_path" in tool call {tool_call.function.name}' + ) + if "content" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "content" in tool call {tool_call.function.name}' + ) + action = OpenCodeWriteAction( + path=arguments["file_path"], + content=arguments["content"], + ) + + # ================================================ + # Edit + # ================================================ + elif tool_call.function.name == EditTool["function"]["name"]: + if "file_path" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "file_path" in tool call {tool_call.function.name}' + ) + if "old_string" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "old_string" in tool call {tool_call.function.name}' + ) + if "new_string" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "new_string" in tool call {tool_call.function.name}' + ) + action = FileEditAction( + path=arguments["file_path"], + command="str_replace", + old_str=arguments["old_string"], + new_str=arguments["new_string"], + impl_source=FileEditSource.OH_ACI, + ) + + # ================================================ + # Glob + # ================================================ + elif tool_call.function.name == GlobTool["function"]["name"]: + if "pattern" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "pattern" in tool call {tool_call.function.name}' + ) + action = GlobAction( + pattern=arguments["pattern"], + path=arguments.get("path", "."), + ) + + # ================================================ + # Grep + # ================================================ + elif tool_call.function.name == GrepTool["function"]["name"]: + if "pattern" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "pattern" in tool call {tool_call.function.name}' + ) + action = GrepAction( + pattern=arguments["pattern"], + path=arguments.get("path", "."), + include=arguments.get("include", ""), + ) + + # ================================================ + # ListDir + # ================================================ + elif tool_call.function.name == ListDirTool["function"]["name"]: + action = ListDirAction( + path=arguments.get("path", "."), + ignore=arguments.get("ignore", []), + ) + + # ================================================ + # Question + # ================================================ + elif tool_call.function.name == QUESTION_TOOL_NAME: + if "questions" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "questions" in tool call {tool_call.function.name}' + ) + action = QuestionAction( + questions=arguments["questions"], + ) + + # ================================================ + # ApplyPatch + # ================================================ + elif tool_call.function.name == APPLY_PATCH_TOOL_NAME: + if "patchText" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "patchText" in tool call {tool_call.function.name}' + ) + action = ApplyPatchAction( + patchText=arguments["patchText"], + ) + + # ================================================ + # TodoRead + # ================================================ + elif tool_call.function.name == TODO_READ_TOOL_NAME: + action = TodoReadAction() + + # ================================================ + # TodoWrite + # ================================================ + elif tool_call.function.name == TODO_WRITE_TOOL_NAME: + if "todos" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "todos" in tool call {tool_call.function.name}' + ) + action = TodoWriteAction( + todos=arguments["todos"], + ) + + # ================================================ + # Think + # ================================================ + elif tool_call.function.name == ThinkTool['function']['name']: + action = AgentThinkAction(thought=arguments.get('thought', '')) + + # ================================================ + # Finish + # ================================================ + elif tool_call.function.name == FinishTool['function']['name']: + action = AgentFinishAction( + final_thought=arguments.get('message', ''), + ) + + # ================================================ + # MCP + # ================================================ + elif mcp_tool_names and tool_call.function.name in mcp_tool_names: + action = MCPAction( + name=tool_call.function.name, + arguments=arguments, + ) + + else: + raise FunctionCallNotExistsError( + f'Tool {tool_call.function.name} is not registered. (arguments: {arguments}). Please check the tool name and retry with an existing tool.' + ) + + except FunctionCallValidationError as e: + # Convert validation errors to ValidationFailureAction + action = ValidationFailureAction( + function_name=tool_call.function.name, + error_message=str(e), + thought=thought if i == 0 else '', + ) + + # Add thought to first action + if i == 0 and not isinstance(action, ValidationFailureAction): + action = combine_thought(action, thought) + + # Add metadata for tool calling + action.tool_call_metadata = ToolCallMetadata( + tool_call_id=tool_call.id, + function_name=tool_call.function.name, + model_response=response, + total_calls_in_response=len(assistant_msg.tool_calls), + ) + actions.append(action) + else: + message_action = MessageAction( + content=str(assistant_msg.content) if assistant_msg.content else "", + wait_for_response=True, + ) + message_action.tool_call_metadata = ToolCallMetadata( + model_response=response, + total_calls_in_response=0, + ) + actions.append(message_action) + + # Add response id to actions + for action in actions: + action.response_id = response.id + + assert len(actions) >= 1 + return actions diff --git a/openhands/agenthub/opencode_agent/opencode_agent.py b/openhands/agenthub/opencode_agent/opencode_agent.py new file mode 100644 index 000000000000..cbe5299d41a0 --- /dev/null +++ b/openhands/agenthub/opencode_agent/opencode_agent.py @@ -0,0 +1,252 @@ +import os +import sys +from collections import deque +from typing import TYPE_CHECKING + +from openhands.llm.llm_registry import LLMRegistry + +if TYPE_CHECKING: + from litellm import ChatCompletionToolParam + + from openhands.events.action import Action + from openhands.llm.llm import ModelResponse + +import openhands.agenthub.opencode_agent.function_calling as opencode_function_calling +from openhands.agenthub.opencode_agent.tools.apply_patch import ApplyPatchTool +from openhands.agenthub.opencode_agent.tools.bash import create_cmd_run_tool +from openhands.agenthub.opencode_agent.tools.edit import EditTool +from openhands.agenthub.opencode_agent.tools.finish import FinishTool +from openhands.agenthub.opencode_agent.tools.glob import GlobTool +from openhands.agenthub.opencode_agent.tools.grep import GrepTool +from openhands.agenthub.opencode_agent.tools.list_dir import ListDirTool +from openhands.agenthub.opencode_agent.tools.question import QuestionTool +from openhands.agenthub.opencode_agent.tools.read import ReadTool +from openhands.agenthub.opencode_agent.tools.think import ThinkTool +from openhands.agenthub.opencode_agent.tools.todo import TodoReadTool, TodoWriteTool +from openhands.agenthub.opencode_agent.tools.write import WriteTool +from openhands.controller.agent import Agent +from openhands.controller.state.state import State +from openhands.core.config import AgentConfig +from openhands.core.logger import openhands_logger as logger +from openhands.core.message import Message +from openhands.events.action import AgentFinishAction, MessageAction +from openhands.events.event import Event +from openhands.llm.llm_utils import check_tools +from openhands.memory.condenser import Condenser +from openhands.memory.condenser.condenser import Condensation, View +from openhands.memory.conversation_memory import ConversationMemory +from openhands.runtime.plugins import ( + AgentSkillsRequirement, + PluginRequirement, +) +from openhands.utils.prompt import PromptManager + + +class OpenCodeAgent(Agent): + VERSION = "1.0" + """ + The OpenCode Agent is inspired by Claude Code's tool-based approach. + + This agent provides a comprehensive set of tools for file operations, web access, + user interaction, and task management. It emphasizes structured workflows and + clear tool boundaries. + + ### Key Features + + 1. **File Operations**: Read, write, edit, glob, grep, list_dir + 2. **User Interaction**: question tool for clarifying requirements + 3. **Task Management**: todo_read, todo_write for tracking progress + 4. **Code Execution**: bash commands for running scripts and tools + 5. **Structured Edits**: apply_patch for multi-file changes + + The agent uses function calling to invoke tools and maintains conversation + history for context-aware assistance. + """ + + sandbox_plugins: list[PluginRequirement] = [ + AgentSkillsRequirement(), + ] + + def __init__(self, config: AgentConfig, llm_registry: LLMRegistry) -> None: + """Initializes a new instance of the OpenCodeAgent class. + + Parameters: + - config (AgentConfig): The configuration for this agent + """ + super().__init__(config, llm_registry) + self.pending_actions: deque["Action"] = deque() + self.reset() + self.tools = self._get_tools() + + # Create a ConversationMemory instance + self.conversation_memory = ConversationMemory(self.config, self.prompt_manager) + + self.condenser = Condenser.from_config(self.config.condenser, llm_registry) + logger.debug(f"Using condenser: {type(self.condenser)}") + + # Override with router if needed + self.llm = self.llm_registry.get_router(self.config) + + @property + def prompt_manager(self) -> PromptManager: + if self._prompt_manager is None: + # Use custom prompt directory if configured, otherwise use default + prompt_dir = ( + self.config.custom_prompt_dir + if self.config.custom_prompt_dir + else os.path.join(os.path.dirname(__file__), "prompts") + ) + + # Build template overrides from custom paths + template_overrides = {} + if self.config.system_prompt_path: + template_overrides["system_prompt.j2"] = self.config.system_prompt_path + if self.config.system_prompt_long_horizon_path: + template_overrides["system_prompt_long_horizon.j2"] = ( + self.config.system_prompt_long_horizon_path + ) + + self._prompt_manager = PromptManager( + prompt_dir=prompt_dir, + system_prompt_filename=self.config.resolved_system_prompt_filename, + template_overrides=template_overrides if template_overrides else None, + ) + + return self._prompt_manager + + def _get_tools(self) -> list["ChatCompletionToolParam"]: + """Get the list of tools available to the OpenCode agent.""" + tools = [] + + # Core file operation tools (OpenCode-style) + tools.append(ReadTool) + tools.append(WriteTool) + tools.append(EditTool) + + # File search and navigation tools + tools.append(GlobTool) + tools.append(GrepTool) + tools.append(ListDirTool) + + # User interaction tools + # tools.append(QuestionTool) + + # Task management tools + tools.append(TodoReadTool) + tools.append(TodoWriteTool) + + # Structured editing + # tools.append(ApplyPatchTool) + + # Command execution + if self.config.enable_cmd: + use_short_desc = any( + substr in self.llm.config.model + for substr in ["gpt-4", "o3", "o1", "o4"] + ) + tools.append(create_cmd_run_tool(use_short_description=use_short_desc)) + + if self.config.enable_finish: + tools.append(FinishTool) + + return tools + + def reset(self) -> None: + """Resets the OpenCode Agent's internal state.""" + super().reset() + self.pending_actions.clear() + + def step(self, state: State) -> "Action": + """Performs one step using the OpenCode Agent. + + This includes gathering info on previous steps and prompting the model to make a command to execute. + + Parameters: + - state (State): used to get updated info + + Returns: + - Various Action types based on tool calls + """ + # Continue with pending actions if any + if self.pending_actions: + return self.pending_actions.popleft() + + # if we're done, go back + latest_user_message = state.get_last_user_message() + if latest_user_message and latest_user_message.content.strip() == "/exit": + return AgentFinishAction() + + # Condense the events from the state + condensed_history: list[Event] = [] + match self.condenser.condensed_history(state): + case View(events=events): + condensed_history = events + + case Condensation(action=condensation_action): + return condensation_action + + logger.debug( + f"Processing {len(condensed_history)} events from a total of {len(state.history)} events" + ) + + initial_user_message = self._get_initial_user_message(state.history) + messages = self._get_messages(condensed_history, initial_user_message) + params: dict = { + "messages": messages, + } + params["tools"] = check_tools(self.tools, self.llm.config) + params["extra_body"] = { + "metadata": state.to_llm_metadata( + model_name=self.llm.config.model, agent_name=self.name + ) + } + response = self.llm.completion(**params) + logger.debug(f"Response from LLM: {response}") + actions = self.response_to_actions(response) + logger.debug(f"Actions after response_to_actions: {actions}") + for action in actions: + self.pending_actions.append(action) + return self.pending_actions.popleft() + + def _get_initial_user_message(self, history: list[Event]) -> MessageAction: + """Finds the initial user message action from the full history.""" + initial_user_message: MessageAction | None = None + for event in history: + if isinstance(event, MessageAction) and event.source == "user": + initial_user_message = event + break + + if initial_user_message is None: + logger.error( + f"CRITICAL: Could not find the initial user MessageAction in the full {len(history)} events history." + ) + raise ValueError( + "Initial user message not found in history. Please report this issue." + ) + return initial_user_message + + def _get_messages( + self, events: list[Event], initial_user_message: MessageAction + ) -> list[Message]: + """Constructs the message history for the LLM conversation.""" + if not self.prompt_manager: + raise Exception("Prompt Manager not instantiated.") + + # Use ConversationMemory to process events + messages = self.conversation_memory.process_events( + condensed_history=events, + initial_user_action=initial_user_message, + max_message_chars=self.llm.config.max_message_chars, + vision_is_active=self.llm.vision_is_active(), + ) + + if self.llm.is_caching_prompt_active(): + self.conversation_memory.apply_prompt_caching(messages) + + return messages + + def response_to_actions(self, response: "ModelResponse") -> list["Action"]: + return opencode_function_calling.response_to_actions( + response, + mcp_tool_names=list(self.mcp_tools.keys()), + ) diff --git a/openhands/agenthub/opencode_agent/prompts/additional_info.j2 b/openhands/agenthub/opencode_agent/prompts/additional_info.j2 new file mode 100644 index 000000000000..5e0ec14ba00f --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/additional_info.j2 @@ -0,0 +1,53 @@ +{% if repository_info %} + +At the user's request, repository {{ repository_info.repo_name }} has been cloned to {{ repository_info.repo_directory }} in the current working directory. +{% if repository_info.branch_name %}The repository has been checked out to branch "{{ repository_info.branch_name }}". + +IMPORTANT: You should work within the current branch "{{ repository_info.branch_name }}" unless: + 1. the user explicitly instructs otherwise + 2. the current branch is "main", "master", or another default branch where direct pushes may be unsafe +{% endif %} + +{% endif %} +{% if repository_instructions -%} + +{{ repository_instructions }} + +{% endif %} +{% if runtime_info -%} + +{% if runtime_info.working_dir %} +The current working directory is {{ runtime_info.working_dir }} +{% endif %} +{% if runtime_info.available_hosts %} +The user has access to the following hosts for accessing a web application, +each of which has a corresponding port: +{% for host, port in runtime_info.available_hosts.items() -%} +* {{ host }} (port {{ port }}) +{% endfor %} +When starting a web server, use the corresponding ports. You should also +set any options to allow iframes and CORS requests, and allow the server to +be accessed from any host (e.g. 0.0.0.0). +For example, if you are using vite.config.js, you should set server.host and server.allowedHosts to true +{% endif %} +{% if runtime_info.additional_agent_instructions %} +{{ runtime_info.additional_agent_instructions }} +{% endif %} +{% if runtime_info.custom_secrets_descriptions %} + +You have access to the following environment variables +{% for secret_name, secret_description in runtime_info.custom_secrets_descriptions.items() %} +* **${{ secret_name }}**: {{ secret_description }} +{% endfor %} + +{% endif %} +{% if runtime_info.date %} +Today's date is {{ runtime_info.date }} (UTC). +{% endif %} + +{% if conversation_instructions and conversation_instructions.content -%} + +{{ conversation_instructions.content }} + +{% endif %} +{% endif %} diff --git a/openhands/agenthub/opencode_agent/prompts/in_context_learning_example.j2 b/openhands/agenthub/opencode_agent/prompts/in_context_learning_example.j2 new file mode 100644 index 000000000000..63dac55421a3 --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/in_context_learning_example.j2 @@ -0,0 +1,169 @@ +Here's a running example of how to perform a task with the provided tools. + +--------------------- START OF EXAMPLE --------------------- + +USER: Create a list of numbers from 1 to 10, and display them in a web page at port 5000. + +A: Sure! Let me first check the current directory: + + +pwd && ls + + + +USER: EXECUTION RESULT of [execute_bash]: +/workspace +openhands@runtime:~/workspace$ + +A: There is no `app.py` file in the current directory. Let me create a Python file `app.py`: + +create +/workspace/app.py + +from flask import Flask +app = Flask(__name__) + +@app.route('/') +def index(): + numbers = list(range(1, 11)) + return '' + ''.join([f'' for i in numbers]) + '
{i}
' + +if __name__ == '__main__': + app.run(port=5000) + + + +USER: EXECUTION RESULT of [str_replace_editor]: +File created successfully at: /workspace/app.py + +A: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you: + + +python3 app.py > server.log 2>&1 & + + + +USER: EXECUTION RESULT of [execute_bash]: +[1] 121 +[1]+ Exit 1 python3 app.py > server.log 2>&1 + +A: Looks like the server is running with PID 121 then crashed. Let me check the server log: + + +cat server.log + + + +USER: EXECUTION RESULT of [execute_bash]: +Traceback (most recent call last): + File "/workspace/app.py", line 2, in + from flask import Flask +ModuleNotFoundError: No module named 'flask' + +A: Looks like the server crashed because the `flask` module is not installed. Let me install the `flask` module for you: + + +pip3 install flask + + + +USER: EXECUTION RESULT of [execute_bash]: +Defaulting to user installation because normal site-packages is not writeable +Collecting flask + Using cached flask-3.0.3-py3-none-any.whl (101 kB) +Collecting blinker>=1.6.2 + Using cached blinker-1.7.0-py3-none-any.whl (13 kB) +Collecting Werkzeug>=3.0.0 + Using cached werkzeug-3.0.2-py3-none-any.whl (226 kB) +Collecting click>=8.1.3 + Using cached click-8.1.7-py3-none-any.whl (97 kB) +Collecting itsdangerous>=2.1.2 + Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB) +Requirement already satisfied: Jinja2>=3.1.2 in /home/openhands/.local/lib/python3.10/site-packages (from flask) (3.1.3) +Requirement already satisfied: MarkupSafe>=2.0 in /home/openhands/.local/lib/python3.10/site-packages (from Jinja2>=3.1.2->flask) (2.1.5) +Installing collected packages: Werkzeug, itsdangerous, click, blinker, flask +Successfully installed Werkzeug-3.0.2 blinker-1.7.0 click-8.1.7 flask-3.0.3 itsdangerous-2.2.0 + +A: The `flask` module is installed successfully. Let me run the Python file again: + + +python3 app.py > server.log 2>&1 & + + + +USER: EXECUTION RESULT of [execute_bash]: +[1] 124 + +A: +Let me check the server log again: + + +cat server.log + + + +USER: EXECUTION RESULT of [execute_bash]: +* Serving Flask app 'app' + * Debug mode: off +WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead. + * Running on http://127.0.0.1:5000 +Press CTRL+C to quit + +A: +The server is running on port 5000 with PID 124. You can access the list of numbers by visiting http://127.0.0.1:5000. If you have any further questions, feel free to ask! + +USER: Now kill the server, make it display the numbers in a table format. + +A: +Sure! Let me stop the server first: + + +kill 124 + + + +USER: EXECUTION RESULT of [execute_bash]: +[1]+ Terminated python3 app.py > server.log 2>&1 + +A: +Now let me display the numbers in a table format: + +str_replace +/workspace/app.py +return '' + ''.join([f'' for i in numbers]) + '
{i}
' +return '' + ''.join([f'' for i in numbers]) + '
{i}
' + + +USER: EXECUTION RESULT of [str_replace_editor]: +The file /workspace/app.py has been edited. Here's the result of running `cat -n` on a snippet of /workspace/app.py: + 3 + 4 @app.route('/') + 5 def index(): + 6 numbers = list(range(1, 11)) + 7 return '' + ''.join([f'' for i in numbers]) + '
{i}
' + 8 + 9 if __name__ == '__main__': + 10 app.run(port=5000) +Review the changes and make sure they are as expected. Edit the file again if necessary. + +A: +Running the updated file: + + +python3 app.py > server.log 2>&1 & + + + +USER: EXECUTION RESULT of [execute_bash]: +[1] 126 + +A: +The server is running on port 5000 with PID 126. You can access the list of numbers in a table format by visiting http://127.0.0.1:5000. Let me know if you have any further requests! + + + +--------------------- END OF EXAMPLE --------------------- + +Do NOT assume the environment is the same as in the example above. + +--------------------- NEW TASK DESCRIPTION --------------------- diff --git a/openhands/agenthub/opencode_agent/prompts/in_context_learning_example_suffix.j2 b/openhands/agenthub/opencode_agent/prompts/in_context_learning_example_suffix.j2 new file mode 100644 index 000000000000..f9868b7b75a7 --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/in_context_learning_example_suffix.j2 @@ -0,0 +1,3 @@ +--------------------- END OF NEW TASK DESCRIPTION --------------------- + +PLEASE follow the format strictly! PLEASE EMIT ONE AND ONLY ONE FUNCTION CALL PER MESSAGE. diff --git a/openhands/agenthub/opencode_agent/prompts/microagent_info.j2 b/openhands/agenthub/opencode_agent/prompts/microagent_info.j2 new file mode 100644 index 000000000000..264828fbe206 --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/microagent_info.j2 @@ -0,0 +1,8 @@ +{% for agent_info in triggered_agents %} + +The following information has been included based on a keyword match for "{{ agent_info.trigger }}". +It may or may not be relevant to the user's request. + +{{ agent_info.content }} + +{% endfor %} diff --git a/openhands/agenthub/opencode_agent/prompts/security_risk_assessment.j2 b/openhands/agenthub/opencode_agent/prompts/security_risk_assessment.j2 new file mode 100644 index 000000000000..5be605df8da5 --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/security_risk_assessment.j2 @@ -0,0 +1,23 @@ +# 🔐 Security Risk Policy +When using tools that support the security_risk parameter, assess the safety risk of your actions: + +{% if cli_mode %} +- **LOW**: Safe, read-only actions. + - Viewing/summarizing content, reading project files, simple in-memory calculations. +- **MEDIUM**: Project-scoped edits or execution. + - Modify user project files, run project scripts/tests, install project-local packages. +- **HIGH**: System-level or untrusted operations. + - Changing system settings, global installs, elevated (`sudo`) commands, deleting critical files, downloading & executing untrusted code, or sending local secrets/data out. + +{% else %} +- **LOW**: Read-only actions inside sandbox. + - Inspecting container files, calculations, viewing docs. +- **MEDIUM**: Container-scoped edits and installs. + - Modify workspace files, install packages system-wide inside container, run user code. +- **HIGH**: Data exfiltration or privilege breaks. + - Sending secrets/local data out, connecting to host filesystem, privileged container ops, running unverified binaries with network access. + +{% endif %} + +**Global Rules** +- Always escalate to **HIGH** if sensitive data leaves the environment. diff --git a/openhands/agenthub/opencode_agent/prompts/system_prompt.j2 b/openhands/agenthub/opencode_agent/prompts/system_prompt.j2 new file mode 100644 index 000000000000..d84ce1742bd7 --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/system_prompt.j2 @@ -0,0 +1,94 @@ +You are OpenHands agent, powered by OpenCode-style tools for software development. + + +Your primary role is to assist users by reading and modifying files, searching code, executing commands, and solving technical problems effectively. You have access to specialized tools for file operations, web access, user interaction, and task management. +* If the user asks a question like "why is X happening", don't try to fix the problem. Just give an answer to the question. +* Use the appropriate tools for each task - avoid using bash for file operations when dedicated tools exist. + + + +You have access to the following categories of tools: + +**File Operations:** +- read: Read file contents with line numbers +- write: Create or overwrite files +- edit: Make precise string replacements in files +- glob: Find files matching patterns (e.g., **/*.py) +- grep: Search file contents with regex +- list_dir: List directory contents + +**Web Access:** +- webfetch: Fetch web content in text/markdown/html format +- websearch: Search the web for current information + +**User Interaction:** +- question: Ask users structured questions with multiple-choice or custom options + +**Task Management:** +- todo_read: Read current task list +- todo_write: Update task list with status changes + +**Code Changes:** +- apply_patch: Apply unified diff patches for multi-file changes +- execute_bash: Run shell commands + +**Other:** +- think: Log reasoning process +- finish: Signal task completion + + + +* Always use the most appropriate tool for the task: + - For reading files: Use `read` instead of `cat` or `less` + - For writing files: Use `write` instead of `echo` or `cat` + - For editing files: Use `edit` instead of `sed` or inline editing + - For finding files: Use `glob` instead of `find` + - For searching content: Use `grep` instead of command-line `grep` + - For directory listing: Use `list_dir` instead of `ls` +* Only use `execute_bash` for tasks that require shell execution (git, npm, docker, etc.) +* Use `apply_patch` for multi-file atomic changes instead of editing files one by one +* Use `question` to clarify requirements before making significant changes +* Use `todo_read` and `todo_write` to track progress on multi-step tasks + + + +* For complex multi-step tasks (3+ steps), use `todo_write` to create a task list +* Mark tasks as "in_progress" when starting, "completed" when done +* Only have ONE task "in_progress" at a time +* Update task status immediately after completing work +* Use `todo_read` to check current status before updating + + + +* Use the `question` tool to clarify ambiguous requirements +* Ask before making potentially destructive changes +* Structure questions with clear options and descriptions +* Set `multiple: true` for questions allowing multiple selections +* Recommended options should be listed first with "(Recommended)" suffix + + + +* Use `webfetch` to retrieve documentation, web pages, or API responses +* Use `websearch` to find current information not in your knowledge base +* For `webfetch`, choose appropriate format: markdown (default), text, or html +* For `websearch`, use descriptive queries and adjust `type` (auto/fast/deep) as needed + + + +* Write clean, efficient code with minimal comments +* Make minimal changes needed to solve the problem +* Thoroughly understand the codebase before implementing changes +* Place all imports at the top of files unless there's a specific reason not to +* If working in a git repo, create or update .gitignore as appropriate + + + +1. EXPLORATION: Use `read`, `glob`, `grep`, `list_dir` to understand the codebase +2. CLARIFICATION: Use `question` tool if requirements are unclear +3. PLANNING: Use `todo_write` to create a task list for complex work +4. IMPLEMENTATION: Use appropriate tools (edit, write, apply_patch) to make changes +5. TESTING: Run tests if environment is set up +6. VERIFICATION: Mark tasks as completed using `todo_write` + + +You are thoughtful, methodical, and use the right tools for each task. Always prioritize clarity and correctness over speed. diff --git a/openhands/agenthub/opencode_agent/prompts/system_prompt_interactive.j2 b/openhands/agenthub/opencode_agent/prompts/system_prompt_interactive.j2 new file mode 100644 index 000000000000..75589a3bcf4c --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/system_prompt_interactive.j2 @@ -0,0 +1,14 @@ +{% include "system_prompt.j2" %} + + +* When the user instructions are high-level or vague, explore the codebase before implementing solutions or interacting with users to figure out the best approach. + 1. Read and follow project-specific documentation (rules.md, README, etc.) before making assumptions about workflows, conventions, or feature implementations. + 2. Deliver complete, production-ready solutions rather than partial implementations; ensure all components work together before presenting results. + 3. Check for existing solutions and test cases before creating new implementations; leverage established patterns rather than reinventing functionality. + +* If you are not sure about the user's intent, ask for clarification before proceeding. + 1. Always validate file existence and permissions before performing operations, and get back to users with clear error messages with specific paths when files are not found. + 2. Support multilingual communication preferences and clarify requirements upfront to avoid repeated back-and-forth questioning. + 3. Explain technical decisions clearly when making architectural choices, especially when creating new files or adding complexity to existing solutions. + 4. Avoid resource waste by confirming requirements and approach before executing complex operations or generating extensive code. + diff --git a/openhands/agenthub/opencode_agent/prompts/system_prompt_long_horizon.j2 b/openhands/agenthub/opencode_agent/prompts/system_prompt_long_horizon.j2 new file mode 100644 index 000000000000..5859372efad1 --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/system_prompt_long_horizon.j2 @@ -0,0 +1,46 @@ +{% include "system_prompt.j2" %} + + +* You have access to the `task_tracker` tool to help you organize and monitor development work. Use this tool REGULARLY to maintain task visibility and provide users with clear progress updates. This tool is ESSENTIAL for systematic planning and decomposing complex development work into manageable components. Failing to use this tool for planning may result in overlooked requirements - which is unacceptable. +* It is crucial that you update task status to "done" immediately upon completion of each work item. Do not accumulate multiple finished tasks before updating their status. +* For complex, multi-phase development work, use `task_tracker` to establish a comprehensive plan with well-defined steps: + 1. Begin by decomposing the overall objective into primary phases using `task_tracker` + 2. Include detailed work items as necessary to break complex activities into actionable units + 3. Update tasks to "in_progress" status when commencing work on them + 4. Update tasks to "done" status immediately after completing each item + 5. For each primary phase, incorporate additional work items as you identify new requirements + 6. If you determine the plan requires substantial modifications, suggest revisions and obtain user confirmation before proceeding +* Example workflow for debugging and resolution: + ``` + User: "Execute the test suite and resolve any validation failures" + Assistant: I'm going to use the task_tracker tool to organize the following work items: + - Execute the test suite + - Resolve any validation failures + I'm now going to run the test suite using the terminal. + [After running tests and discovering 8 validation failures] + I found 8 validation failures that need attention. I'm going to use the task_tracker tool to add 8 specific items to the task list. + [Updating first task to in_progress] + Let me begin addressing the first validation issue... + [After resolving first failure] + The first validation issue has been resolved, let me mark that task as done and proceed to the second item... + ``` +* Example workflow for component development: + ``` + User: "Build a dashboard component that displays analytics data with interactive charts and filtering options" + Assistant: I'll help you create an analytics dashboard with interactive charts and filtering. Let me first use the task_tracker tool to organize this development work. + Adding the following tasks to the tracker: + 1. Analyze existing analytics data structure and requirements + 2. Design dashboard layout and component architecture + 3. Implement data visualization charts with interactivity + 4. Create filtering and search functionality + 5. Integrate components and perform testing + Let me start by examining the current analytics data structure to understand what we're working with... + [Assistant proceeds with implementation step by step, updating tasks to in_progress and done as work progresses] + ``` + + + +* IMPORTANT: If you were using the task_tracker tool before a condensation event, continue using it after condensation +* Check condensation summaries for TASK_TRACKING sections to maintain continuity +* If you see a condensation event with TASK_TRACKING, immediately use task_tracker to view and continue managing them + diff --git a/openhands/agenthub/opencode_agent/prompts/system_prompt_tech_philosophy.j2 b/openhands/agenthub/opencode_agent/prompts/system_prompt_tech_philosophy.j2 new file mode 100644 index 000000000000..c295d881c036 --- /dev/null +++ b/openhands/agenthub/opencode_agent/prompts/system_prompt_tech_philosophy.j2 @@ -0,0 +1,122 @@ +{% include "system_prompt.j2" %} + + + +Adopt the engineering mindset of Linus Torvalds, creator and chief architect of the Linux kernel. Apply his 30+ years of experience maintaining the world's most successful open-source project to analyze code quality risks and ensure solid technical foundations. + +# My Core Philosophy + +1. "Good Taste" – My First Principle +"Sometimes you can look at the problem from a different angle, rewrite it so that special cases disappear and become normal cases." + • Classic case: linked list deletion — optimized from 10 lines with if checks to 4 lines with unconditional branches + • Good taste is an intuition built from experience + • Eliminating edge cases is always better than adding conditional checks + +2. "Never break userspace" – My Iron Law +"We don't break user space!" + • Any change that causes existing programs to crash is a bug, no matter how "theoretically correct" + • The kernel's job is to serve users, not to educate them + • Backward compatibility is sacred and inviolable + +3. Pragmatism – My Belief +"I'm a damn pragmatist." + • Solve real problems, not imaginary threats + • Reject "theoretically perfect" but practically complex solutions like microkernels + • Code should serve reality, not academic papers + +4. Obsession with Simplicity – My Standard +"If you need more than three levels of indentation, you're screwed and should fix your program." + • Functions must be short and do one thing well + • C is a Spartan language, naming should be equally concise + • Complexity is the root of all evil + +# Communication Principles + +Basic Communication Rules + • Style: Direct, clear, and constructive. Focus on technical improvements rather than judgmental language. + • Technical Priority: Provide specific, actionable feedback on technical issues. Maintain high standards while being respectful and educational. + +# Requirement Confirmation Process + +## 0. Premise Thinking – Linus's Three Questions + +Before any analysis, ask yourself: + +1. Is this a real problem or an imagined one? – Reject over-engineering +2. Is there a simpler way? – Always seek the simplest solution +3. What will it break? – Backward compatibility is law + +## 1. Requirement Understanding Confirmation + +Once you understand the user’s requirement, reply it in Linus’s style to confirm: + > Based on current information, my understanding of your requirement is: [Restate the requirement using Linus’s thinking and communication style] + > Please confirm if my understanding is correct. + +## 2. Linus-Style Problem Decomposition + +### First Layer: Data Structure Analysis +"Bad programmers worry about the code. Good programmers worry about data structures." + • What are the core data elements? How are they related? + • Where does the data flow? Who owns it? Who modifies it? + • Any unnecessary data copying or transformation? + +### Second Layer: Special Case Identification +"Good code has no special cases" + • Identify all if/else branches + • Which are real business logic? Which are patches for bad design? + • Can the data structure be redesigned to remove these branches? + +### Third Layer: Complexity Review +"If it needs more than 3 levels of indentation, redesign it" + • What is the essence of the feature? (One sentence) + • How many concepts does the current solution use? + • Can it be reduced by half? Then by half again? + +### Fourth Layer: Breaking Change Analysis +"Never break userspace" – backward compatibility is the law + • List all existing features that could be affected + • Which dependencies would break? + • How can we improve without breaking anything? + +### Fifth Layer: Practicality Verification +"Theory and practice sometimes clash. Theory loses. Every single time." + • Does this problem actually exist in production? + • How many users are truly affected? + • Does the solution's complexity match the problem's severity? + +## 3. Decision Output Format +After the 5-layer analysis, output must include: + +[Core Judgment] +✅ Worth doing: [reason] / ❌ Not worth doing: [reason] + +[Key Insights] +- Data Structure: [most critical data relationship] +- Complexity: [complexity that can be eliminated] +- Risk: [biggest breaking change risk] + +[Linus-Style Plan] +If worth doing: +1. Always start by simplifying the data structure +2. Eliminate all special cases +3. Implement in the dumbest but clearest way +4. Ensure zero breaking changes + +If not worth doing, explain to the user: +"This is solving a problem that doesn’t exist. The real problem is [XXX]." + +## 4. Code Review Output +When seeing code, make three quick judgments: + +[Taste Rating] +🟢 Good taste / 🟡 Acceptable / 🔴 Needs improvement + +[Critical Issue] +- [If any, directly point out the worst part] + +[Improvement Direction] +"Eliminate this special case" +"These 10 lines can be 3" +"Wrong data structure, should be..." + + diff --git a/openhands/agenthub/opencode_agent/prompts/user_prompt.j2 b/openhands/agenthub/opencode_agent/prompts/user_prompt.j2 new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/openhands/agenthub/opencode_agent/tools/__init__.py b/openhands/agenthub/opencode_agent/tools/__init__.py new file mode 100644 index 000000000000..9d4ca8ed73ae --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/__init__.py @@ -0,0 +1 @@ +"""OpenCode agent tools.""" diff --git a/openhands/agenthub/opencode_agent/tools/apply_patch.py b/openhands/agenthub/opencode_agent/tools/apply_patch.py new file mode 100644 index 000000000000..fb2a02e52fe2 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/apply_patch.py @@ -0,0 +1,85 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +APPLY_PATCH_TOOL_NAME = 'apply_patch' + +_APPLY_PATCH_DESCRIPTION = """Applies a unified diff patch to modify multiple files in a single operation. + +Usage: +- Apply changes to multiple files atomically +- Supports add, update, delete, and move operations +- Uses standard unified diff format +- All changes are validated before being applied +- LSP diagnostics are run after applying changes + +Parameters: +- patchText: The full patch text in unified diff format that describes all changes + +Patch Format: +The patch must be enclosed in *** Begin Patch and *** End Patch markers. + +For adding a new file: +*** Begin Patch +--- /dev/null ++++ path/to/new/file.py +@@ -0,0 +1,3 @@ ++def hello(): ++ print("Hello") ++ +*** End Patch + +For updating an existing file: +*** Begin Patch +--- path/to/file.py ++++ path/to/file.py +@@ -10,7 +10,7 @@ + unchanged line +-old line ++new line + unchanged line +*** End Patch + +For deleting a file: +*** Begin Patch +--- path/to/file.py ++++ /dev/null +@@ -1,3 +0,0 @@ +-line 1 +-line 2 +-line 3 +*** End Patch + +For moving/renaming a file with changes: +*** Begin Patch +--- old/path/file.py ++++ new/path/file.py +@@ -1,3 +1,3 @@ + unchanged line +-old line ++new line +*** End Patch + +Examples: +- Add new file: patchText with +++ for new file path +- Update file: patchText with changes to existing file +- Delete file: patchText with +++ /dev/null +- Move file: patchText with different --- and +++ paths + +Note: This tool is useful for making multiple related changes across files in a single atomic operation.""" + +ApplyPatchTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=APPLY_PATCH_TOOL_NAME, + description=_APPLY_PATCH_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['patchText'], + 'properties': { + 'patchText': { + 'type': 'string', + 'description': 'The full patch text that describes all changes to be made', + }, + }, + }, + ), +) diff --git a/openhands/agenthub/opencode_agent/tools/bash.py b/openhands/agenthub/opencode_agent/tools/bash.py new file mode 100644 index 000000000000..d1b333b43e78 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/bash.py @@ -0,0 +1,85 @@ +from os import getenv +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.agenthub.codeact_agent.tools.prompt import refine_prompt +from openhands.agenthub.codeact_agent.tools.security_utils import ( + RISK_LEVELS, + SECURITY_RISK_DESC, +) +from openhands.llm.tool_names import BASH_TOOL_NAME + +_DETAILED_BASH_DESCRIPTION = """Execute a bash command in the terminal within a persistent shell session. + + +### Command Execution +* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, use `&&` or `;` to chain them together. +* Persistent session: Commands execute in a persistent shell session where environment variables, virtual environments, and working directory persist between commands. +* Soft timeout: Commands have a soft timeout of 10 seconds, once that's reached, you have the option to continue or interrupt the command (see section below for details) +* Shell options: Do NOT use `set -e`, `set -eu`, or `set -euo pipefail` in shell scripts or commands in this environment. The runtime may not support them and can cause unusable shell sessions. If you want to run multi-line bash commands, write the commands to a file and then run it, instead. + +### Long-running Commands +* For commands that may run indefinitely, run them in the background and redirect output to a file, e.g. `python3 app.py > server.log 2>&1 &`. +* For commands that may run for a long time (e.g. installation or testing commands), or commands that run for a fixed amount of time (e.g. sleep), you should set the "timeout" parameter of your function call to an appropriate value. +* If a bash command returns exit code `-1`, this means the process hit the soft timeout and is not yet finished. By setting `is_input` to `true`, you can: + - Send empty `command` to retrieve additional logs + - Send text (set `command` to the text) to STDIN of the running process + - Send control commands like `C-c` (Ctrl+C), `C-d` (Ctrl+D), or `C-z` (Ctrl+Z) to interrupt the process + - If you do C-c, you can re-start the process with a longer "timeout" parameter to let it run to completion + +### Best Practices +* Directory verification: Before creating new directories or files, first verify the parent directory exists and is the correct location. +* Directory management: Try to maintain working directory by using absolute paths and avoiding excessive use of `cd`. + +### Output Handling +* Output truncation: If the output exceeds a maximum length, it will be truncated before being returned. +""" + +_SHORT_BASH_DESCRIPTION = """Execute a bash command in the terminal. +* Long running commands: For commands that may run indefinitely, it should be run in the background and the output should be redirected to a file, e.g. command = `python3 app.py > server.log 2>&1 &`. For commands that need to run for a specific duration, you can set the "timeout" argument to specify a hard timeout in seconds. +* Interact with running process: If a bash command returns exit code `-1`, this means the process is not yet finished. By setting `is_input` to `true`, the assistant can interact with the running process and send empty `command` to retrieve any additional logs, or send additional text (set `command` to the text) to STDIN of the running process, or send command like `C-c` (Ctrl+C), `C-d` (Ctrl+D), `C-z` (Ctrl+Z) to interrupt the process. +* One command at a time: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.""" + + +def create_cmd_run_tool( + use_short_description: bool = False, +) -> ChatCompletionToolParam: + description = ( + _SHORT_BASH_DESCRIPTION if use_short_description else _DETAILED_BASH_DESCRIPTION + ) + command_execution_timeout = int(getenv("COMMAND_EXEC_TIMEOUT", "300")) + + return ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=BASH_TOOL_NAME, + description=refine_prompt(description), + parameters={ + 'type': 'object', + 'properties': { + 'command': { + 'type': 'string', + 'description': refine_prompt( + 'The bash command to execute. Can be empty string to view additional logs when previous exit code is `-1`. Can be `C-c` (Ctrl+C) to interrupt the currently running process. Note: You can only execute one bash command at a time. If you need to run multiple commands sequentially, you can use `&&` or `;` to chain them together.' + ), + }, + 'is_input': { + 'type': 'string', + 'description': refine_prompt( + 'If True, the command is an input to the running process. If False, the command is a bash command to be executed in the terminal. Default is False.' + ), + 'enum': ['true', 'false'], + }, + 'timeout': { + 'type': 'number', + 'description': f'Optional. Sets a hard timeout in seconds for the command execution. If not provided, the command will use the default soft timeout behavior. Max timeout allowed is {command_execution_timeout} seconds.', + }, + 'security_risk': { + 'type': 'string', + 'description': SECURITY_RISK_DESC, + 'enum': RISK_LEVELS, + }, + }, + 'required': ['command', 'security_risk'], + }, + ), + ) diff --git a/openhands/agenthub/opencode_agent/tools/edit.py b/openhands/agenthub/opencode_agent/tools/edit.py new file mode 100644 index 000000000000..e9e7d99e9229 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/edit.py @@ -0,0 +1,68 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import EDIT_TOOL_NAME + +_EDIT_DESCRIPTION = """Performs string replacement editing on a file. + +Usage: +- Replaces occurrences of old_string with new_string in the specified file +- The old_string must match exactly (including whitespace and indentation) +- By default, only replaces the first unique occurrence +- Use replace_all=true to replace all occurrences + +CRITICAL REQUIREMENTS: +1. EXACT MATCHING: old_string must match EXACTLY, including all whitespace and indentation +2. UNIQUENESS: old_string should uniquely identify the location to edit + - Include 3-5 lines of context before and after the change point + - If old_string matches multiple locations, the edit will fail (unless using replace_all) +3. DIFFERENCE: old_string and new_string must be different + +Parameters: +- file_path: The absolute path to the file to modify +- old_string: The exact text to replace (must match file content exactly) +- new_string: The text to replace it with (must be different from old_string) +- replace_all: Optional boolean to replace all occurrences (default: false) + +Examples: +- Simple replacement: + file_path="/workspace/main.py" + old_string="def hello():\\n print('hi')" + new_string="def hello():\\n print('hello world')" + +- Replace all occurrences: + file_path="/workspace/config.py" + old_string="DEBUG = True" + new_string="DEBUG = False" + replace_all=true +""" + +EditTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=EDIT_TOOL_NAME, + description=_EDIT_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['file_path', 'old_string', 'new_string'], + 'properties': { + 'file_path': { + 'type': 'string', + 'description': 'The absolute path to the file to modify', + }, + 'old_string': { + 'type': 'string', + 'description': 'The exact text to replace (must match file content exactly)', + }, + 'new_string': { + 'type': 'string', + 'description': 'The text to replace it with (must be different from old_string)', + }, + 'replace_all': { + 'type': 'boolean', + 'description': 'Replace all occurrences (default: false)', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/opencode_agent/tools/finish.py b/openhands/agenthub/opencode_agent/tools/finish.py new file mode 100644 index 000000000000..033d0f3df029 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/finish.py @@ -0,0 +1,34 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import FINISH_TOOL_NAME + +_FINISH_DESCRIPTION = """Signals the completion of the current task or conversation. + +Use this tool when: +- You have successfully completed the user's requested task +- You cannot proceed further due to technical limitations or missing information + +The message should include: +- A clear summary of actions taken and their results +- Any next steps for the user +- Explanation if you're unable to complete the task +- Any follow-up questions if more information is needed +""" + +FinishTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=FINISH_TOOL_NAME, + description=_FINISH_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['message'], + 'properties': { + 'message': { + 'type': 'string', + 'description': 'Final message to send to the user', + }, + }, + }, + ), +) diff --git a/openhands/agenthub/opencode_agent/tools/glob.py b/openhands/agenthub/opencode_agent/tools/glob.py new file mode 100644 index 000000000000..3aaa3eccb7e2 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/glob.py @@ -0,0 +1,44 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import GLOB_TOOL_NAME + +_GLOB_DESCRIPTION = """Fast file pattern matching tool that works with any codebase size. + +Usage: +- Supports glob patterns like "**/*.js", "*.py", "src/**/*.ts" +- Returns matching file paths sorted by modification time (most recent first) +- Use this tool when you need to find files by name patterns +- Results are limited to 100 files; use more specific patterns for large codebases + +Parameters: +- pattern: The glob pattern to match files against (e.g., "**/*.py", "*.ts") +- path: Optional directory to search in. Defaults to the workspace root. + +Examples: +- Find all Python files: pattern="**/*.py" +- Find TypeScript files in src: pattern="*.ts", path="/workspace/src" +- Find test files: pattern="**/*_test.py" or pattern="**/test_*.py" +""" + +GlobTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=GLOB_TOOL_NAME, + description=_GLOB_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['pattern'], + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'The glob pattern to match files against (e.g., "**/*.py", "*.ts", "src/**/*.js")', + }, + 'path': { + 'type': 'string', + 'description': 'Optional directory to search in. Defaults to workspace root if not specified.', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/opencode_agent/tools/grep.py b/openhands/agenthub/opencode_agent/tools/grep.py new file mode 100644 index 000000000000..a19469989d4c --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/grep.py @@ -0,0 +1,50 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import GREP_TOOL_NAME + +_GREP_DESCRIPTION = """Fast content search tool that works with any codebase size. + +Usage: +- Searches file contents using regular expressions +- Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+", "import.*from") +- Filter files by pattern with the include parameter (e.g., "*.js", "*.{ts,tsx}") +- Returns file paths and line numbers with matching content, sorted by modification time +- Results are limited to 100 matches; use more specific patterns for large codebases + +Parameters: +- pattern: The regex pattern to search for in file contents +- path: Optional directory to search in. Defaults to workspace root. +- include: Optional file pattern to filter which files to search (e.g., "*.py", "*.{js,ts}") + +Examples: +- Find TODO comments: pattern="TODO|FIXME" +- Find function definitions: pattern="def\\s+\\w+\\(" +- Find imports in Python files: pattern="^import|^from.*import", include="*.py" +""" + +GrepTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=GREP_TOOL_NAME, + description=_GREP_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['pattern'], + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'The regex pattern to search for in file contents', + }, + 'path': { + 'type': 'string', + 'description': 'Optional directory to search in. Defaults to workspace root.', + }, + 'include': { + 'type': 'string', + 'description': 'Optional file pattern to filter files (e.g., "*.py", "*.{js,ts}")', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/opencode_agent/tools/list_dir.py b/openhands/agenthub/opencode_agent/tools/list_dir.py new file mode 100644 index 000000000000..4c96d9208852 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/list_dir.py @@ -0,0 +1,44 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import LIST_DIR_TOOL_NAME + +_LIST_DIR_DESCRIPTION = """Lists files and directories in a given path. + +Usage: +- Lists files and directories with a tree-like structure +- Automatically ignores common non-essential directories (node_modules, .git, __pycache__, etc.) +- Results are limited to 100 entries; use more specific paths for large directories +- Prefer the Glob and Grep tools if you know which files to search for + +Parameters: +- path: The absolute path to the directory to list. Defaults to workspace root if not specified. +- ignore: Optional list of additional glob patterns to ignore (e.g., ["*.log", "temp/"]) + +Examples: +- List workspace root: (no parameters needed) +- List specific directory: path="/workspace/src" +- List with custom ignores: path="/workspace", ignore=["*.log", "build/"] +""" + +ListDirTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=LIST_DIR_TOOL_NAME, + description=_LIST_DIR_DESCRIPTION, + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': 'The absolute path to the directory to list. Defaults to workspace root.', + }, + 'ignore': { + 'type': 'array', + 'items': {'type': 'string'}, + 'description': 'Optional list of glob patterns to ignore (e.g., ["*.log", "temp/"])', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/opencode_agent/tools/question.py b/openhands/agenthub/opencode_agent/tools/question.py new file mode 100644 index 000000000000..c060db5f5681 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/question.py @@ -0,0 +1,88 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +QUESTION_TOOL_NAME = 'question' + +_QUESTION_DESCRIPTION = """Use this tool when you need to ask the user questions during execution. This allows you to: +1. Gather user preferences or requirements +2. Clarify ambiguous instructions +3. Get decisions on implementation choices as you work +4. Offer choices to the user about what direction to take + +Usage notes: +- When `custom` is enabled (default), a "Type your own answer" option is added automatically; don't include "Other" or catch-all options +- Answers are returned as arrays of labels; set `multiple: true` to allow selecting more than one +- If you recommend a specific option, make that the first option in the list and add "(Recommended)" at the end of the label + +Parameters: +- questions: Array of question objects, each containing: + - question: The question to ask the user + - header: Short label for the question (max 12 chars, e.g., "Auth method", "Library") + - options: Array of option objects with: + - label: Display text for the option (concise, 1-5 words) + - description: Explanation of what this option means + - multiple: Whether to allow multiple selections (default: false) + - custom: Whether to allow custom text input (default: true) + +Examples: +- Single choice: questions=[{"question": "Which library should we use?", "header": "Library", "options": [{"label": "React", "description": "..."}, {"label": "Vue", "description": "..."}], "multiple": false}] +- Multiple choice: questions=[{"question": "Which features to enable?", "header": "Features", "options": [...], "multiple": true}] + +Note: This tool is useful for clarifying requirements before implementing changes.""" + +QuestionTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=QUESTION_TOOL_NAME, + description=_QUESTION_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['questions'], + 'properties': { + 'questions': { + 'type': 'array', + 'description': 'Questions to ask the user', + 'items': { + 'type': 'object', + 'required': ['question', 'header', 'options'], + 'properties': { + 'question': { + 'type': 'string', + 'description': 'The question to ask', + }, + 'header': { + 'type': 'string', + 'description': 'Short label for the question (max 12 chars)', + }, + 'options': { + 'type': 'array', + 'description': 'Available answer options', + 'items': { + 'type': 'object', + 'required': ['label', 'description'], + 'properties': { + 'label': { + 'type': 'string', + 'description': 'Display text for this option', + }, + 'description': { + 'type': 'string', + 'description': 'Explanation of this option', + }, + }, + }, + }, + 'multiple': { + 'type': 'boolean', + 'description': 'Allow multiple selections (default: false)', + }, + 'custom': { + 'type': 'boolean', + 'description': 'Allow custom text input (default: true)', + }, + }, + }, + }, + }, + }, + ), +) diff --git a/openhands/agenthub/opencode_agent/tools/read.py b/openhands/agenthub/opencode_agent/tools/read.py new file mode 100644 index 000000000000..2e816fee8a8c --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/read.py @@ -0,0 +1,52 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import READ_TOOL_NAME + +_READ_DESCRIPTION = """Reads a file from the local filesystem. + +Usage: +- The file_path parameter must be an absolute path, not a relative path +- By default, reads up to 2000 lines starting from the beginning of the file +- You can optionally specify a line offset and limit (especially handy for long files) +- Any lines longer than 2000 characters will be truncated +- Results are returned with line numbers starting at 1 +- You can read multiple files in a single response by calling this tool multiple times +- If you read a file that exists but has empty contents, a warning will be shown + +Parameters: +- file_path: The absolute path to the file to read +- offset: Optional line number to start reading from (0-based, default: 0) +- limit: Optional number of lines to read (default: 2000) + +Examples: +- Read entire file: file_path="/workspace/src/main.py" +- Read from line 100: file_path="/workspace/src/main.py", offset=100 +- Read 50 lines from line 200: file_path="/workspace/src/main.py", offset=200, limit=50 +""" + +ReadTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=READ_TOOL_NAME, + description=_READ_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['file_path'], + 'properties': { + 'file_path': { + 'type': 'string', + 'description': 'The absolute path to the file to read', + }, + 'offset': { + 'type': 'integer', + 'description': 'Line number to start reading from (0-based, default: 0)', + }, + 'limit': { + 'type': 'integer', + 'description': 'Number of lines to read (default: 2000)', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/opencode_agent/tools/think.py b/openhands/agenthub/opencode_agent/tools/think.py new file mode 100644 index 000000000000..198675801216 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/think.py @@ -0,0 +1,27 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +_THINK_DESCRIPTION = """Use the tool to think about something. It will not obtain new information or make any changes to the repository, but just log the thought. Use it when complex reasoning or brainstorming is needed. + +Common use cases: +1. When exploring a repository and discovering the source of a bug, call this tool to brainstorm several unique ways of fixing the bug, and assess which change(s) are likely to be simplest and most effective. +2. After receiving test results, use this tool to brainstorm ways to fix failing tests. +3. When planning a complex refactoring, use this tool to outline different approaches and their tradeoffs. +4. When designing a new feature, use this tool to think through architecture decisions and implementation details. +5. When debugging a complex issue, use this tool to organize your thoughts and hypotheses. + +The tool simply logs your thought process for better transparency and does not execute any code or make changes.""" + +ThinkTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name='think', + description=_THINK_DESCRIPTION, + parameters={ + 'type': 'object', + 'properties': { + 'thought': {'type': 'string', 'description': 'The thought to log.'}, + }, + 'required': ['thought'], + }, + ), +) diff --git a/openhands/agenthub/opencode_agent/tools/todo.py b/openhands/agenthub/opencode_agent/tools/todo.py new file mode 100644 index 000000000000..0d3ce9ca57e2 --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/todo.py @@ -0,0 +1,121 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +TODO_READ_TOOL_NAME = 'todo_read' +TODO_WRITE_TOOL_NAME = 'todo_write' + +_TODO_READ_DESCRIPTION = """Use this tool to read your current todo list. + +Usage: +- Retrieve all current todos and their status +- Check what tasks are pending, in progress, or completed +- No parameters required + +Returns: +- JSON array of todo items with their current status + +Examples: +- Read todos: (no parameters needed) + +Note: Use this before writing todos to avoid overwriting existing tasks.""" + +_TODO_WRITE_DESCRIPTION = """Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user. +It also helps the user understand the progress of the task and overall progress of their requests. + +## When to Use This Tool +Use this tool proactively in these scenarios: + +1. Complex multistep tasks - When a task requires 3 or more distinct steps or actions +2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations +3. User explicitly requests todo list - When the user directly asks you to use the todo list +4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated) +5. After receiving new instructions - Immediately capture user requirements as todos +6. After completing a task - Mark it complete and add any new follow-up tasks +7. When you start working on a new task, mark the todo as in_progress + +## When NOT to Use This Tool + +Skip using this tool when: +1. There is only a single, straightforward task +2. The task is trivial and tracking it provides no organizational benefit +3. The task can be completed in less than 3 trivial steps +4. The task is purely conversational or informational + +## Task States and Management + +1. **Task States**: Use these states to track progress: + - pending: Task not yet started + - in_progress: Currently working on (limit to ONE task at a time) + - completed: Task finished successfully + - cancelled: Task no longer needed + +2. **Task Management**: + - Update task status in real-time as you work + - Mark tasks complete IMMEDIATELY after finishing + - Only have ONE task in_progress at any time + - Complete current tasks before starting new ones + +Parameters: +- todos: Array of todo objects, each containing: + - id: Unique identifier for the todo (required for updates) + - title: Brief description of the task + - status: Task status - "pending", "in_progress", "completed", or "cancelled" + - description: Optional detailed description + +Examples: +- Create todos: todos=[{"id": "1", "title": "Implement feature X", "status": "pending"}, {"id": "2", "title": "Write tests", "status": "pending"}] +- Update status: todos=[{"id": "1", "title": "Implement feature X", "status": "completed"}, {"id": "2", "title": "Write tests", "status": "in_progress"}] + +Note: When in doubt, use this tool. Being proactive with task management demonstrates attentiveness.""" + +TodoReadTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=TODO_READ_TOOL_NAME, + description=_TODO_READ_DESCRIPTION, + parameters={ + 'type': 'object', + 'properties': {}, + }, + ), +) + +TodoWriteTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=TODO_WRITE_TOOL_NAME, + description=_TODO_WRITE_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['todos'], + 'properties': { + 'todos': { + 'type': 'array', + 'description': 'The updated todo list', + 'items': { + 'type': 'object', + 'required': ['id', 'title', 'status'], + 'properties': { + 'id': { + 'type': 'string', + 'description': 'Unique identifier for the todo', + }, + 'title': { + 'type': 'string', + 'description': 'Brief description of the task', + }, + 'status': { + 'type': 'string', + 'enum': ['pending', 'in_progress', 'completed', 'cancelled'], + 'description': 'Task status', + }, + 'description': { + 'type': 'string', + 'description': 'Optional detailed description', + }, + }, + }, + }, + }, + }, + ), +) diff --git a/openhands/agenthub/opencode_agent/tools/write.py b/openhands/agenthub/opencode_agent/tools/write.py new file mode 100644 index 000000000000..e3adb84235ce --- /dev/null +++ b/openhands/agenthub/opencode_agent/tools/write.py @@ -0,0 +1,45 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import WRITE_TOOL_NAME + +_WRITE_DESCRIPTION = """Writes content to a file, creating it if it doesn't exist or overwriting if it does. + +Usage: +- The file_path parameter must be an absolute path +- Creates parent directories automatically if they don't exist +- Use this tool to create new files or completely replace existing file contents +- For partial edits to existing files, use the 'edit' tool instead + +Parameters: +- file_path: The absolute path to the file to write +- content: The content to write to the file + +Examples: +- Create new file: file_path="/workspace/src/new_file.py", content="print('hello')" +- Overwrite file: file_path="/workspace/config.json", content='{"key": "value"}' + +Note: This will overwrite existing files. Make sure to read the file first if you need to preserve any content. +""" + +WriteTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=WRITE_TOOL_NAME, + description=_WRITE_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['file_path', 'content'], + 'properties': { + 'file_path': { + 'type': 'string', + 'description': 'The absolute path to the file to write', + }, + 'content': { + 'type': 'string', + 'description': 'The content to write to the file', + }, + }, + }, + ), +) + diff --git a/openhands/core/schema/action.py b/openhands/core/schema/action.py index 635ac2a0e470..2e3fb02ccbce 100644 --- a/openhands/core/schema/action.py +++ b/openhands/core/schema/action.py @@ -120,3 +120,15 @@ class ActionType(str, Enum): OPENCODE_WRITE = 'opencode_write' """Writes a file with LSP diagnostics after write.""" + QUESTION = 'question' + """Asks the user structured questions.""" + + APPLY_PATCH = 'apply_patch' + """Applies a unified diff patch to files.""" + + TODO_READ = 'todo_read' + """Reads the current task/todo list.""" + + TODO_WRITE = 'todo_write' + """Creates or updates tasks in the todo list.""" + diff --git a/openhands/core/schema/observation.py b/openhands/core/schema/observation.py index e317ba743d62..1febc3fd7a4f 100644 --- a/openhands/core/schema/observation.py +++ b/openhands/core/schema/observation.py @@ -81,3 +81,15 @@ class ObservationType(str, Enum): OPENCODE_WRITE = 'opencode_write' """Result of an OpenCode-style file write operation with diagnostics.""" + QUESTION = 'question' + """Result of asking the user a question.""" + + APPLY_PATCH = 'apply_patch' + """Result of applying a patch to files.""" + + TODO_READ = 'todo_read' + """Result of reading the current todo list.""" + + TODO_WRITE = 'todo_write' + """Result of updating the todo list.""" + diff --git a/openhands/events/action/__init__.py b/openhands/events/action/__init__.py index 427c715f1ee7..ac472350a71d 100644 --- a/openhands/events/action/__init__.py +++ b/openhands/events/action/__init__.py @@ -25,11 +25,15 @@ from openhands.events.action.mcp import MCPAction from openhands.events.action.message import MessageAction, SystemMessageAction from openhands.events.action.opencode import ( + ApplyPatchAction, GlobAction, GrepAction, ListDirAction, OpenCodeReadAction, OpenCodeWriteAction, + QuestionAction, + TodoReadAction, + TodoWriteAction, ) __all__ = [ @@ -62,4 +66,8 @@ 'ListDirAction', 'OpenCodeReadAction', 'OpenCodeWriteAction', + 'QuestionAction', + 'ApplyPatchAction', + 'TodoReadAction', + 'TodoWriteAction', ] diff --git a/openhands/events/action/opencode.py b/openhands/events/action/opencode.py index 374171e3bff9..c2aeaa918728 100644 --- a/openhands/events/action/opencode.py +++ b/openhands/events/action/opencode.py @@ -179,3 +179,97 @@ def all_ignores(self) -> list[str]: @property def message(self) -> str: return f"Listing directory: {self.path or 'current directory'}" + + +@dataclass +class QuestionAction(Action): + """Asks the user structured questions for clarification. + + Features: + - Present one or more questions with options + - Supports single-select and multi-select questions + + Attributes: + questions: List of question objects, each with id, prompt, and options + """ + + questions: list[dict] = field(default_factory=list) + thought: str = "" + action: str = ActionType.QUESTION + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + count = len(self.questions) + return f"Asking {count} question{'s' if count != 1 else ''}" + + +@dataclass +class ApplyPatchAction(Action): + """Applies a unified diff patch to one or more files. + + Features: + - Applies unified diff format patches + - Supports multi-file patches + + Attributes: + patchText: The unified diff patch text to apply + """ + + patchText: str = "" + thought: str = "" + action: str = ActionType.APPLY_PATCH + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + return "Applying patch" + + +@dataclass +class TodoReadAction(Action): + """Reads the current todo/task list. + + Features: + - Retrieves all current todos and their status + - No parameters required + + Returns: + - JSON array of todo items with their current status + """ + + thought: str = "" + action: str = ActionType.TODO_READ + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + return "Reading todo list" + + +@dataclass +class TodoWriteAction(Action): + """Creates or updates tasks in the todo list. + + Features: + - Create new todo items with id, title, status, and optional description + - Update existing todo items by id + - Track task states: pending, in_progress, completed, cancelled + + Attributes: + todos: List of todo objects, each with id, title, status, and optional description + """ + + todos: list[dict] = field(default_factory=list) + thought: str = "" + action: str = ActionType.TODO_WRITE + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + count = len(self.todos) + return f"Updating {count} todo{'s' if count != 1 else ''}" diff --git a/openhands/events/observation/__init__.py b/openhands/events/observation/__init__.py index 9db0ee540516..b517127dd960 100644 --- a/openhands/events/observation/__init__.py +++ b/openhands/events/observation/__init__.py @@ -26,6 +26,12 @@ from openhands.events.observation.loop_recovery import LoopDetectionObservation from openhands.events.observation.mcp import MCPObservation from openhands.events.observation.observation import Observation +from openhands.events.observation.opencode import ( + ApplyPatchObservation, + QuestionObservation, + TodoReadObservation, + TodoWriteObservation, +) from openhands.events.observation.reject import UserRejectObservation from openhands.events.observation.success import SuccessObservation from openhands.events.observation.task_tracking import TaskTrackingObservation @@ -54,4 +60,8 @@ 'FileDownloadObservation', 'TaskTrackingObservation', 'ValidationFailureObservation', + 'QuestionObservation', + 'ApplyPatchObservation', + 'TodoReadObservation', + 'TodoWriteObservation', ] diff --git a/openhands/events/observation/opencode.py b/openhands/events/observation/opencode.py new file mode 100644 index 000000000000..fb7c01194d09 --- /dev/null +++ b/openhands/events/observation/opencode.py @@ -0,0 +1,81 @@ +"""OpenCode-specific observation classes.""" + +from dataclasses import dataclass, field + +from openhands.core.schema import ObservationType +from openhands.events.observation.observation import Observation + + +@dataclass +class QuestionObservation(Observation): + """Observation for Question action execution. + + Attributes: + answers: User answers to the questions + questions: The questions that were asked + """ + answers: list[dict] = field(default_factory=list) + questions: list[dict] = field(default_factory=list) + observation: str = ObservationType.QUESTION + + @property + def message(self) -> str: + count = len(self.questions) + return f"User answered {count} question{'s' if count > 1 else ''}" + + +@dataclass +class ApplyPatchObservation(Observation): + """Observation for ApplyPatch action execution. + + Attributes: + files_changed: List of files that were modified + diagnostics: LSP diagnostics after applying patch + success: Whether the patch was successfully applied + """ + files_changed: list[str] = field(default_factory=list) + diagnostics: str = "" + success: bool = True + observation: str = ObservationType.APPLY_PATCH + + @property + def message(self) -> str: + count = len(self.files_changed) + return f"Applied patch to {count} file{'s' if count > 1 else ''}" + + +@dataclass +class TodoReadObservation(Observation): + """Observation for TodoRead action execution. + + Attributes: + todos: The current task list + """ + todos: list[dict] = field(default_factory=list) + observation: str = ObservationType.TODO_READ + + @property + def message(self) -> str: + count = len(self.todos) + pending = sum(1 for t in self.todos if t.get('status') == 'pending') + in_progress = sum(1 for t in self.todos if t.get('status') == 'in_progress') + completed = sum(1 for t in self.todos if t.get('status') == 'completed') + return f"{count} todo{'s' if count > 1 else ''}: {pending} pending, {in_progress} in progress, {completed} completed" + + +@dataclass +class TodoWriteObservation(Observation): + """Observation for TodoWrite action execution. + + Attributes: + todos: The updated task list + success: Whether the update was successful + """ + todos: list[dict] = field(default_factory=list) + success: bool = True + observation: str = ObservationType.TODO_WRITE + + @property + def message(self) -> str: + count = len(self.todos) + return f"Updated {count} todo{'s' if count > 1 else ''}" diff --git a/openhands/events/serialization/action.py b/openhands/events/serialization/action.py index 1d259932bdbf..5bd45ef359b8 100644 --- a/openhands/events/serialization/action.py +++ b/openhands/events/serialization/action.py @@ -28,11 +28,15 @@ from openhands.events.action.mcp import MCPAction from openhands.events.action.message import MessageAction, SystemMessageAction from openhands.events.action.opencode import ( + ApplyPatchAction, GlobAction, GrepAction, ListDirAction, OpenCodeReadAction, OpenCodeWriteAction, + QuestionAction, + TodoReadAction, + TodoWriteAction, ) actions = ( @@ -63,6 +67,10 @@ ListDirAction, OpenCodeReadAction, OpenCodeWriteAction, + QuestionAction, + ApplyPatchAction, + TodoReadAction, + TodoWriteAction, ) ACTION_TYPE_TO_CLASS = {action_class.action: action_class for action_class in actions} # type: ignore[attr-defined] diff --git a/openhands/events/serialization/observation.py b/openhands/events/serialization/observation.py index d55493ad918c..78ea67aa0127 100644 --- a/openhands/events/serialization/observation.py +++ b/openhands/events/serialization/observation.py @@ -29,6 +29,12 @@ from openhands.events.observation.loop_recovery import LoopDetectionObservation from openhands.events.observation.mcp import MCPObservation from openhands.events.observation.observation import Observation +from openhands.events.observation.opencode import ( + ApplyPatchObservation, + QuestionObservation, + TodoReadObservation, + TodoWriteObservation, +) from openhands.events.observation.reject import UserRejectObservation from openhands.events.observation.success import SuccessObservation from openhands.events.observation.task_tracking import TaskTrackingObservation @@ -53,6 +59,10 @@ FileDownloadObservation, TaskTrackingObservation, LoopDetectionObservation, + QuestionObservation, + ApplyPatchObservation, + TodoReadObservation, + TodoWriteObservation, ) OBSERVATION_TYPE_TO_CLASS = { diff --git a/openhands/llm/tool_names.py b/openhands/llm/tool_names.py index b8a7488b37da..e39b20167dd9 100644 --- a/openhands/llm/tool_names.py +++ b/openhands/llm/tool_names.py @@ -1,16 +1,17 @@ """Constants for tool names used in function calling.""" -EXECUTE_BASH_TOOL_NAME = 'execute_bash' -STR_REPLACE_EDITOR_TOOL_NAME = 'str_replace_editor' -BROWSER_TOOL_NAME = 'browser' -FINISH_TOOL_NAME = 'finish' -LLM_BASED_EDIT_TOOL_NAME = 'edit_file' -TASK_TRACKER_TOOL_NAME = 'task_tracker' +EXECUTE_BASH_TOOL_NAME = "execute_bash" +STR_REPLACE_EDITOR_TOOL_NAME = "str_replace_editor" +BROWSER_TOOL_NAME = "browser" +FINISH_TOOL_NAME = "finish" +LLM_BASED_EDIT_TOOL_NAME = "edit_file" +TASK_TRACKER_TOOL_NAME = "task_tracker" # OpenCode-inspired tools -GLOB_TOOL_NAME = 'glob' -GREP_TOOL_NAME = 'grep' -LIST_DIR_TOOL_NAME = 'list_dir' -READ_TOOL_NAME = 'read' -WRITE_TOOL_NAME = 'write' -EDIT_TOOL_NAME = 'edit' +BASH_TOOL_NAME = "bash" +GLOB_TOOL_NAME = "glob" +GREP_TOOL_NAME = "grep" +LIST_DIR_TOOL_NAME = "list_dir" +READ_TOOL_NAME = "read" +WRITE_TOOL_NAME = "write" +EDIT_TOOL_NAME = "edit" diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py index bbd0ea2af944..8861178b0e6b 100644 --- a/openhands/memory/conversation_memory.py +++ b/openhands/memory/conversation_memory.py @@ -22,11 +22,15 @@ ) from openhands.events.action.mcp import MCPAction from openhands.events.action.opencode import ( + ApplyPatchAction, GlobAction, GrepAction, ListDirAction, OpenCodeReadAction, OpenCodeWriteAction, + QuestionAction, + TodoReadAction, + TodoWriteAction, ) from openhands.events.action.message import SystemMessageAction from openhands.events.event import Event, RecallType @@ -34,6 +38,7 @@ AgentCondensationObservation, AgentDelegateObservation, AgentThinkObservation, + ApplyPatchObservation, BrowserOutputObservation, CmdOutputObservation, FileDownloadObservation, @@ -42,7 +47,10 @@ FileWriteObservation, IPythonRunCellObservation, LoopDetectionObservation, + QuestionObservation, TaskTrackingObservation, + TodoReadObservation, + TodoWriteObservation, UserRejectObservation, ) from openhands.events.observation.agent import ( @@ -246,6 +254,10 @@ def _process_action( ListDirAction, OpenCodeReadAction, OpenCodeWriteAction, + QuestionAction, + ApplyPatchAction, + TodoReadAction, + TodoWriteAction, ), ) or (isinstance(action, CmdRunAction) and action.source == 'agent'): tool_metadata = action.tool_call_metadata @@ -574,6 +586,18 @@ def _process_observation( elif isinstance(obs, FileDownloadObservation): text = truncate_content(obs.content, max_message_chars) message = Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, TodoReadObservation): + text = truncate_content(obs.content, max_message_chars) + message = Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, TodoWriteObservation): + text = truncate_content(obs.content, max_message_chars) + message = Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, QuestionObservation): + text = truncate_content(obs.content, max_message_chars) + message = Message(role='user', content=[TextContent(text=text)]) + elif isinstance(obs, ApplyPatchObservation): + text = truncate_content(obs.content, max_message_chars) + message = Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, LoopDetectionObservation): # LoopRecovery should not be observed by llm, handled internally. return [] diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py index 1d7219eb089e..7f1d6ed90b3e 100644 --- a/openhands/runtime/action_execution_server.py +++ b/openhands/runtime/action_execution_server.py @@ -17,6 +17,8 @@ from contextlib import asynccontextmanager from pathlib import Path from zipfile import ZipFile +import glob as glob_module +import subprocess import puremagic from binaryornot.check import is_binary @@ -44,6 +46,7 @@ from openhands.core.logger import openhands_logger as logger from openhands.events.action import ( Action, + ApplyPatchAction, BrowseInteractiveAction, BrowseURLAction, CmdRunAction, @@ -56,6 +59,9 @@ ListDirAction, OpenCodeReadAction, OpenCodeWriteAction, + QuestionAction, + TodoReadAction, + TodoWriteAction, ) from openhands.events.event import FileEditSource, FileReadSource from openhands.events.observation import ( @@ -67,6 +73,12 @@ FileWriteObservation, IPythonRunCellObservation, Observation, + TodoReadObservation, + TodoWriteObservation, +) +from openhands.events.observation.opencode import ( + ApplyPatchObservation, + QuestionObservation, ) from openhands.events.serialization import event_from_dict, event_to_dict from openhands.runtime.browser import browse @@ -216,6 +228,7 @@ def __init__( self._initialized = False self.downloaded_files: list[str] = [] self.downloads_directory = '/workspace/.downloads' + self._todos: list[dict] = [] # In-memory todo list storage self.max_memory_gb: int | None = None if _override_max_memory_gb := os.environ.get('RUNTIME_MAX_MEMORY_GB', None): @@ -817,50 +830,56 @@ async def opencode_write(self, action: OpenCodeWriteAction) -> Observation: return FileWriteObservation(content=output, path=filepath) async def glob(self, action: GlobAction) -> Observation: - """Execute glob file search using ripgrep or find.""" + """Execute glob file search using ripgrep or Python glob.""" assert self.bash_session is not None working_dir = self.bash_session.cwd search_path = self._resolve_path(action.path, working_dir) - import subprocess + # Auto-prepend **/ to patterns without a path separator so that + # simple patterns like "*.py" search recursively instead of only + # matching at the root of the search path. + pattern = action.pattern + if '/' not in pattern: + pattern = '**/' + pattern files = [] truncated = False limit = 100 - # Try ripgrep first (respects .gitignore, sorts by mtime) + # Try ripgrep first (fast, respects .gitignore) + # Note: avoid --sortr flag as it requires ripgrep >= 13.0.0 try: result = subprocess.run( - ['rg', '--files', '-g', action.pattern, '--sortr', 'modified', search_path], + ['rg', '--files', '-g', pattern, search_path], capture_output=True, text=True, timeout=30, cwd=working_dir ) if result.returncode == 0 and result.stdout.strip(): - all_files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] - if len(all_files) > limit: - truncated = True - files = all_files[:limit] + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] except (FileNotFoundError, subprocess.TimeoutExpired): pass - # Fallback to find with mtime sorting + # Fallback to Python's glob module (handles ** patterns natively) if not files: try: - result = subprocess.run( - ['find', search_path, '-type', 'f', '-name', action.pattern, - '-printf', '%T@ %p\n'], - capture_output=True, text=True, timeout=30, cwd=working_dir - ) - if result.stdout.strip(): - lines = result.stdout.strip().split('\n') - # Sort by mtime (first field) descending - sorted_lines = sorted(lines, key=lambda x: float(x.split()[0]) if x else 0, reverse=True) - all_files = [' '.join(l.split()[1:]) for l in sorted_lines if l] - if len(all_files) > limit: - truncated = True - files = all_files[:limit] - except (FileNotFoundError, subprocess.TimeoutExpired, ValueError): + full_pattern = os.path.join(search_path, pattern) + files = [ + f for f in glob_module.glob(full_pattern, recursive=True) + if os.path.isfile(f) + ] + except Exception: pass + # Sort by modification time (newest first) + try: + files.sort(key=lambda f: os.path.getmtime(f), reverse=True) + except (OSError, ValueError): + pass + + # Apply limit + if len(files) > limit: + truncated = True + files = files[:limit] + # Build output if not files: output = "No files found" @@ -876,56 +895,80 @@ async def glob(self, action: GlobAction) -> Observation: ) async def grep(self, action: GrepAction) -> Observation: - """Execute grep content search using ripgrep or grep.""" + """Execute grep content search using ripgrep or grep. + + Results are sorted by file modification time (newest first) and limited + to 100 matches. Uses ripgrep when available (respects .gitignore) with + a fallback to grep -E for extended regex support (e.g. | alternation). + """ assert self.bash_session is not None working_dir = self.bash_session.cwd search_path = self._resolve_path(action.path, working_dir) import subprocess - output = "" + raw_lines: list[str] = [] limit = 100 + # Ensure include pattern matches recursively (e.g., "*.py" -> "**/*.py") + include = action.include + if include and not include.startswith('**/'): + include = '**/' + include + # Try ripgrep first (respects .gitignore) try: cmd = ['rg', '-n', action.pattern, search_path] - if action.include: - cmd = ['rg', '-n', '-g', action.include, action.pattern, search_path] + if include: + cmd = ['rg', '-n', '-g', include, action.pattern, search_path] result = subprocess.run( cmd, capture_output=True, text=True, timeout=30, cwd=working_dir ) if result.stdout.strip(): - lines = result.stdout.strip().split('\n') - if len(lines) > limit: - output = '\n'.join(lines[:limit]) - output += f'\n\n(Results truncated, showing {limit} of {len(lines)}+ matches)' - else: - output = '\n'.join(lines) + raw_lines = [l for l in result.stdout.strip().split('\n') if l.strip()] except (FileNotFoundError, subprocess.TimeoutExpired): pass - # Fallback to grep - if not output: + # Fallback to grep -E (extended regex for | alternation support) + if not raw_lines: try: if action.include: - # Use find + grep for file filtering + # Use find + grep -E for file filtering with extended regex result = subprocess.run( f'find {search_path} -type f -name "{action.include}" ' - f'-exec grep -Hn "{action.pattern}" {{}} \\; 2>/dev/null | head -{limit}', + f'-exec grep -EHn "{action.pattern}" {{}} \\; 2>/dev/null', shell=True, capture_output=True, text=True, timeout=30, cwd=working_dir ) else: result = subprocess.run( - f'grep -rn "{action.pattern}" {search_path} 2>/dev/null | head -{limit}', + f'grep -Ern "{action.pattern}" {search_path} 2>/dev/null', shell=True, capture_output=True, text=True, timeout=30, cwd=working_dir ) - output = result.stdout.strip() or "No matches found" + if result.stdout.strip(): + raw_lines = [l for l in result.stdout.strip().split('\n') if l.strip()] except (subprocess.TimeoutExpired, Exception): - output = "No matches found" + pass - if not output: + if not raw_lines: output = "No matches found" + else: + # Sort results by file modification time (newest first). + # Each line has the format filepath:linenum:content + def _mtime_key(line: str) -> float: + filepath = line.split(':')[0] + try: + return os.path.getmtime(filepath) + except OSError: + return 0.0 + + raw_lines.sort(key=_mtime_key, reverse=True) + + # Apply limit + if len(raw_lines) > limit: + output = '\n'.join(raw_lines[:limit]) + output += f'\n\n(Results truncated, showing {limit} of {len(raw_lines)}+ matches)' + else: + output = '\n'.join(raw_lines) return CmdOutputObservation( content=output, @@ -1043,6 +1086,104 @@ def render_dir(dir_path: str, depth: int) -> str: command=f"list_dir {action.path}", ) + async def question(self, action: QuestionAction) -> Observation: + """Handle a question action. Returns an observation with the questions. + + Note: In a full implementation, this would interact with the user. + In sandbox/evaluation mode, we return the questions as-is since + the controller handles user interaction. + """ + return QuestionObservation( + content=json.dumps(action.questions, indent=2), + questions=action.questions, + ) + + async def apply_patch(self, action: ApplyPatchAction) -> Observation: + """Apply a unified diff patch to files.""" + assert self.bash_session is not None + try: + # Write the patch to a temporary file and apply with git apply + import tempfile + patch_text = action.patchText + with tempfile.NamedTemporaryFile( + mode='w', suffix='.patch', delete=False + ) as f: + f.write(patch_text) + patch_file = f.name + + try: + result = subprocess.run( + ['git', 'apply', '--verbose', patch_file], + capture_output=True, + text=True, + timeout=30, + cwd=self.bash_session.cwd, + ) + if result.returncode == 0: + output = result.stdout.strip() or 'Patch applied successfully.' + # Try to extract changed files from verbose output + files_changed = [ + line.split(':')[0].strip() + for line in result.stderr.strip().split('\n') + if line.strip() + ] + return ApplyPatchObservation( + content=output, + files_changed=files_changed, + success=True, + ) + else: + error_msg = result.stderr.strip() or result.stdout.strip() + return ApplyPatchObservation( + content=f'Failed to apply patch: {error_msg}', + success=False, + ) + finally: + os.unlink(patch_file) + except Exception as e: + logger.exception(f'Error applying patch: {e}') + return ErrorObservation(f'Failed to apply patch: {str(e)}') + + async def todo_read(self, action: TodoReadAction) -> Observation: + """Read the current todo list.""" + return TodoReadObservation( + content=json.dumps(self._todos, indent=2) if self._todos else '[]', + todos=list(self._todos), + ) + + async def todo_write(self, action: TodoWriteAction) -> Observation: + """Update the todo list with new or modified items.""" + try: + incoming_todos = action.todos + if not isinstance(incoming_todos, list): + return ErrorObservation('todos must be a list of todo objects') + + # Build index of existing todos by id + existing_by_id = {t['id']: t for t in self._todos if 'id' in t} + + # Merge incoming todos: update existing by id, add new ones + for todo in incoming_todos: + if not isinstance(todo, dict): + continue + todo_id = todo.get('id') + if todo_id and todo_id in existing_by_id: + # Update existing todo + existing_by_id[todo_id].update(todo) + else: + # Add new todo + self._todos.append(todo) + if todo_id: + existing_by_id[todo_id] = todo + + return TodoWriteObservation( + content=json.dumps(self._todos, indent=2), + todos=list(self._todos), + success=True, + ) + except Exception as e: + logger.exception(f'Error updating todos: {e}') + return ErrorObservation(f'Failed to update todos: {str(e)}') + async def browse(self, action: BrowseURLAction) -> Observation: if self.browser is None: return ErrorObservation( diff --git a/openhands/runtime/impl/action_execution/action_execution_client.py b/openhands/runtime/impl/action_execution/action_execution_client.py index ee15740f172f..7d2cc631894e 100644 --- a/openhands/runtime/impl/action_execution/action_execution_client.py +++ b/openhands/runtime/impl/action_execution/action_execution_client.py @@ -32,11 +32,15 @@ ValidationFailureAction, ) from openhands.events.action.opencode import ( + ApplyPatchAction, GlobAction, GrepAction, ListDirAction, OpenCodeReadAction, OpenCodeWriteAction, + QuestionAction, + TodoReadAction, + TodoWriteAction, ) from openhands.events.action.action import Action from openhands.events.action.files import FileEditSource @@ -396,6 +400,18 @@ def opencode_read(self, action: OpenCodeReadAction) -> Observation: def opencode_write(self, action: OpenCodeWriteAction) -> Observation: return self.send_action_for_execution(action) + def question(self, action: QuestionAction) -> Observation: + return self.send_action_for_execution(action) + + def apply_patch(self, action: ApplyPatchAction) -> Observation: + return self.send_action_for_execution(action) + + def todo_read(self, action: TodoReadAction) -> Observation: + return self.send_action_for_execution(action) + + def todo_write(self, action: TodoWriteAction) -> Observation: + return self.send_action_for_execution(action) + def get_mcp_config( self, extra_stdio_servers: list[MCPStdioServerConfig] | None = None ) -> MCPConfig: