Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Dependencies
.sisyphus/
node_modules/
__pycache__

# Build output
dist/
Expand All @@ -18,6 +19,7 @@ Thumbs.db
# Logs
*.log
npm-debug.log*
jobs

# Lock files (use bun.lockb instead)
package-lock.json
Expand Down
Empty file added benchmark/__init__.py
Empty file.
46 changes: 46 additions & 0 deletions benchmark/install-sisyphus.sh.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
set -e

apt-get update
apt-get install -y curl unzip

# Install bun
curl -fsSL https://bun.sh/install | bash
export BUN_INSTALL="$HOME/.bun"
export PATH="$BUN_INSTALL/bin:$PATH"
bun --version

# Install OpenCode
{% if version %}
bun install -g opencode-ai@{{ version }}
{% else %}
bun install -g opencode-ai@latest
{% endif %}

# Pre-create oh-my-opencode config BEFORE install to disable problematic hooks
# This prevents hooks from initializing during plugin load
# - comment-checker: Downloads Go binary from GitHub (rate limiting with multiple containers)
# - auto-update-checker: Checks for updates (unnecessary in benchmarks)
# - session-notification: OS notifications (no display in containers)
# - background-notification: OS notifications (no display in containers)
mkdir -p ~/.config/opencode
cat > ~/.config/opencode/oh-my-opencode.json << 'EOF'
{
"disabled_hooks": [
"auto-update-checker",
"session-notification",
"background-notification"
]
}
EOF

# Install oh-my-opencode plugin (provides Sisyphus agent)
# --claude=no uses opencode/zen provider (free) instead of Anthropic
{% if omo_version %}
bunx oh-my-opencode@{{ omo_version }} install --no-tui --claude=no --chatgpt=no --gemini=no
{% else %}
bunx oh-my-opencode@latest install --no-tui --claude=no --chatgpt=no --gemini=no
{% endif %}

opencode --version
echo "Sisyphus agent ready"
83 changes: 83 additions & 0 deletions benchmark/sisyphus_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
import shlex
from pathlib import Path

from harbor.agents.installed.base import BaseInstalledAgent, ExecInput
from harbor.models.agent.context import AgentContext


class SisyphusAgent(BaseInstalledAgent):
"""
Sisyphus agent uses OpenCode with oh-my-opencode plugin.
"""

@staticmethod
def name() -> str:
return "sisyphus"

@property
def _install_agent_template_path(self) -> Path:
return Path(__file__).parent / "install-sisyphus.sh.j2"

def populate_context_post_run(self, context: AgentContext) -> None:
pass

def create_run_agent_commands(self, instruction: str) -> list[ExecInput]:
escaped_instruction = shlex.quote(instruction)

if not self.model_name or "/" not in self.model_name:
raise ValueError("Model name must be in the format provider/model_name")

provider, _ = self.model_name.split("/", 1)

env = self._get_provider_env(provider)
env["OPENCODE_FAKE_VCS"] = "git"

return [
ExecInput(
command=(
f"opencode --model {self.model_name} run "
f"--agent Sisyphus --format=json {escaped_instruction} "
f"2>&1 | tee /logs/agent/sisyphus.txt"
Comment on lines +38 to +41

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve opencode failures in piped command

The command pipes opencode output through tee, which makes the shell exit status come from tee rather than opencode unless pipefail (or PIPESTATUS) is used. In runs where opencode fails (e.g., bad API key, CLI crash), the pipeline can still exit 0 because tee succeeds, so Harbor may treat the trial as successful and skip failure handling while producing a partial/empty trajectory. Consider enabling set -o pipefail or explicitly propagating opencode’s exit code before tee.

Useful? React with 👍 / 👎.

Copy link

@cubic-dev-ai cubic-dev-ai bot Jan 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: The piped command loses the exit status of opencode because the shell returns the exit status of the last command in the pipeline (tee). If opencode fails (e.g., bad API key, CLI crash), the pipeline will still exit 0 since tee succeeds, potentially causing failed runs to be treated as successful. Consider wrapping with bash -o pipefail -c '...' or using ${PIPESTATUS[0]} to preserve the exit code.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At benchmark/sisyphus_agent.py, line 65:

<comment>The piped command loses the exit status of `opencode` because the shell returns the exit status of the last command in the pipeline (`tee`). If `opencode` fails (e.g., bad API key, CLI crash), the pipeline will still exit 0 since `tee` succeeds, potentially causing failed runs to be treated as successful. Consider wrapping with `bash -o pipefail -c '...'` or using `${PIPESTATUS[0]}` to preserve the exit code.</comment>

<file context>
@@ -0,0 +1,248 @@
+                command=(
+                    f"opencode --model {self.model_name} run "
+                    f"--agent Sisyphus --format=json {escaped_instruction} "
+                    f"2>&1 | tee /logs/agent/sisyphus.txt"
+                ),
+                env=env,
</file context>
Fix with Cubic

),
env=env,
)
]

def _get_provider_env(self, provider: str) -> dict[str, str]:
env = {}
provider_keys = {
"amazon-bedrock": [
"AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY",
"AWS_REGION",
],
"anthropic": ["ANTHROPIC_API_KEY"],
"azure": ["AZURE_RESOURCE_NAME", "AZURE_API_KEY"],
"deepseek": ["DEEPSEEK_API_KEY"],
"github-copilot": ["GITHUB_TOKEN"],
"google": [
"GEMINI_API_KEY",
"GOOGLE_GENERATIVE_AI_API_KEY",
"GOOGLE_APPLICATION_CREDENTIALS",
"GOOGLE_CLOUD_PROJECT",
"GOOGLE_CLOUD_LOCATION",
"GOOGLE_GENAI_USE_VERTEXAI",
"GOOGLE_API_KEY",
],
"groq": ["GROQ_API_KEY"],
"huggingface": ["HF_TOKEN"],
"llama": ["LLAMA_API_KEY"],
"mistral": ["MISTRAL_API_KEY"],
"openai": ["OPENAI_API_KEY"],
"opencode": [], # opencode/zen - no API key required
"xai": ["XAI_API_KEY"],
}

keys = provider_keys.get(provider, [])

for key in keys:
if key in os.environ:
env[key] = os.environ[key]

return env