-
Notifications
You must be signed in to change notification settings - Fork 1.5k
feat: add Harbor Terminal-Bench integration for Sisyphus agent #574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
0badc86
e2bfa71
08c9e80
d5e4f13
1d0dabb
042a9b1
8eee7ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| #!/bin/bash | ||
| set -e | ||
|
|
||
| apt-get update | ||
| apt-get install -y curl unzip | ||
|
|
||
| # Install bun | ||
| curl -fsSL https://bun.sh/install | bash | ||
| export BUN_INSTALL="$HOME/.bun" | ||
| export PATH="$BUN_INSTALL/bin:$PATH" | ||
| bun --version | ||
|
|
||
| # Install OpenCode | ||
| {% if version %} | ||
| bun install -g opencode-ai@{{ version }} | ||
| {% else %} | ||
| bun install -g opencode-ai@latest | ||
| {% endif %} | ||
|
|
||
| # Pre-create oh-my-opencode config BEFORE install to disable problematic hooks | ||
| # This prevents hooks from initializing during plugin load | ||
| # - comment-checker: Downloads Go binary from GitHub (rate limiting with multiple containers) | ||
| # - auto-update-checker: Checks for updates (unnecessary in benchmarks) | ||
| # - session-notification: OS notifications (no display in containers) | ||
| # - background-notification: OS notifications (no display in containers) | ||
| mkdir -p ~/.config/opencode | ||
| cat > ~/.config/opencode/oh-my-opencode.json << 'EOF' | ||
| { | ||
| "disabled_hooks": [ | ||
| "auto-update-checker", | ||
| "session-notification", | ||
| "background-notification" | ||
| ] | ||
| } | ||
| EOF | ||
|
|
||
| # Install oh-my-opencode plugin (provides Sisyphus agent) | ||
| # --claude=no uses opencode/zen provider (free) instead of Anthropic | ||
| {% if omo_version %} | ||
| bunx oh-my-opencode@{{ omo_version }} install --no-tui --claude=no --chatgpt=no --gemini=no | ||
| {% else %} | ||
| bunx oh-my-opencode@latest install --no-tui --claude=no --chatgpt=no --gemini=no | ||
| {% endif %} | ||
|
|
||
| opencode --version | ||
| echo "Sisyphus agent ready" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| import os | ||
| import shlex | ||
| from pathlib import Path | ||
|
|
||
| from harbor.agents.installed.base import BaseInstalledAgent, ExecInput | ||
| from harbor.models.agent.context import AgentContext | ||
|
|
||
|
|
||
| class SisyphusAgent(BaseInstalledAgent): | ||
| """ | ||
| Sisyphus agent uses OpenCode with oh-my-opencode plugin. | ||
| """ | ||
|
|
||
| @staticmethod | ||
| def name() -> str: | ||
| return "sisyphus" | ||
|
|
||
| @property | ||
| def _install_agent_template_path(self) -> Path: | ||
| return Path(__file__).parent / "install-sisyphus.sh.j2" | ||
|
|
||
| def populate_context_post_run(self, context: AgentContext) -> None: | ||
| pass | ||
|
|
||
| def create_run_agent_commands(self, instruction: str) -> list[ExecInput]: | ||
| escaped_instruction = shlex.quote(instruction) | ||
|
|
||
| if not self.model_name or "/" not in self.model_name: | ||
| raise ValueError("Model name must be in the format provider/model_name") | ||
|
|
||
| provider, _ = self.model_name.split("/", 1) | ||
|
|
||
| env = self._get_provider_env(provider) | ||
| env["OPENCODE_FAKE_VCS"] = "git" | ||
|
|
||
| return [ | ||
| ExecInput( | ||
| command=( | ||
| f"opencode --model {self.model_name} run " | ||
| f"--agent Sisyphus --format=json {escaped_instruction} " | ||
| f"2>&1 | tee /logs/agent/sisyphus.txt" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: The piped command loses the exit status of Prompt for AI agents |
||
| ), | ||
| env=env, | ||
| ) | ||
| ] | ||
|
|
||
| def _get_provider_env(self, provider: str) -> dict[str, str]: | ||
| env = {} | ||
| provider_keys = { | ||
| "amazon-bedrock": [ | ||
| "AWS_ACCESS_KEY_ID", | ||
| "AWS_SECRET_ACCESS_KEY", | ||
| "AWS_REGION", | ||
| ], | ||
| "anthropic": ["ANTHROPIC_API_KEY"], | ||
| "azure": ["AZURE_RESOURCE_NAME", "AZURE_API_KEY"], | ||
| "deepseek": ["DEEPSEEK_API_KEY"], | ||
| "github-copilot": ["GITHUB_TOKEN"], | ||
| "google": [ | ||
| "GEMINI_API_KEY", | ||
| "GOOGLE_GENERATIVE_AI_API_KEY", | ||
| "GOOGLE_APPLICATION_CREDENTIALS", | ||
| "GOOGLE_CLOUD_PROJECT", | ||
| "GOOGLE_CLOUD_LOCATION", | ||
| "GOOGLE_GENAI_USE_VERTEXAI", | ||
| "GOOGLE_API_KEY", | ||
| ], | ||
| "groq": ["GROQ_API_KEY"], | ||
| "huggingface": ["HF_TOKEN"], | ||
| "llama": ["LLAMA_API_KEY"], | ||
| "mistral": ["MISTRAL_API_KEY"], | ||
| "openai": ["OPENAI_API_KEY"], | ||
| "opencode": [], # opencode/zen - no API key required | ||
| "xai": ["XAI_API_KEY"], | ||
| } | ||
|
|
||
| keys = provider_keys.get(provider, []) | ||
|
|
||
| for key in keys: | ||
| if key in os.environ: | ||
| env[key] = os.environ[key] | ||
|
|
||
| return env | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The command pipes
opencodeoutput throughtee, which makes the shell exit status come fromteerather thanopencodeunlesspipefail(orPIPESTATUS) is used. In runs whereopencodefails (e.g., bad API key, CLI crash), the pipeline can still exit 0 becauseteesucceeds, so Harbor may treat the trial as successful and skip failure handling while producing a partial/empty trajectory. Consider enablingset -o pipefailor explicitly propagatingopencode’s exit code beforetee.Useful? React with 👍 / 👎.