diff --git a/.gitignore b/.gitignore index 4c18cdf..d4e5bb9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,19 +1,36 @@ +# ────────────────────────────────────────────── +# Logs +# ────────────────────────────────────────────── logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* lerna-debug.log* -report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# ────────────────────────────────────────────── +# Runtime / process +# ────────────────────────────────────────────── pids *.pid *.seed *.pid.lock -lib-cov + +# ────────────────────────────────────────────── +# Coverage & test reports +# ────────────────────────────────────────────── coverage *.lcov .nyc_output -build/Release +lib-cov +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json +/test-reports/ +junit.xml +/coverage/ + +# ────────────────────────────────────────────── +# Node / JS +# ────────────────────────────────────────────── node_modules/ jspm_packages/ *.tsbuildinfo @@ -21,36 +38,76 @@ jspm_packages/ *.tgz .yarn-integrity .cache -cdk.out/ -*.DS_STORE -!.node-version +.parcel-cache/ + +# ────────────────────────────────────────────── +# Python +# ────────────────────────────────────────────── *.pyc __pycache__/ -!.ort.yml +agent/.venv/ + +# ────────────────────────────────────────────── +# CDK / CloudFormation outputs +# ────────────────────────────────────────────── +cdk.out/ +/cdk/cdk.out*/ +.cdk.staging/ +cdk.context.json +/assets/ + +# ────────────────────────────────────────────── +# Build outputs (compiled TS → JS) +# ────────────────────────────────────────────── +/cdk/lib/ +/cli/lib/ +/dist/ +build/Release + +# ────────────────────────────────────────────── +# Docs build +# ────────────────────────────────────────────── +/docs/dist/ +local-docs/ + +# ────────────────────────────────────────────── +# IDE / editor +# ────────────────────────────────────────────── .idea .vscode -cdk.context.json -*.bkp +*.DS_STORE + +# ────────────────────────────────────────────── +# Security scan outputs +# ────────────────────────────────────────────── gitleaks-*.json agent/gitleaks-report.json + +# ────────────────────────────────────────────── +# Environment / secrets +# ────────────────────────────────────────────── +.env +.env.* .claude/settings.local.json -/test-reports/ -junit.xml -/coverage/ + +# ────────────────────────────────────────────── +# Misc +# ────────────────────────────────────────────── +*.bkp +Plans/ + +# ────────────────────────────────────────────── +# Explicit keeps (override ignores above) +# ────────────────────────────────────────────── +!.node-version +!.ort.yml !/.github/workflows/build.yml -!/.mergify.yml +!/.github/workflows/docs.yml !/.github/pull_request_template.md +!/.mergify.yml !/cdk/test/ !/cdk/tsconfig.json !/cdk/tsconfig.dev.json !/cdk/src/ -/cdk/lib -/dist/ !/cdk/.eslintrc.json -/assets/ !/cdk/cdk.json -/cdk/cdk.out/ -.cdk.staging/ -.parcel-cache/ -!/.github/workflows/docs.yml -local-docs/ diff --git a/README.md b/README.md index ecfaa43..6d917b8 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,78 @@ See the full [ROADMAP](./docs/guides/ROADMAP.md) for details on each iteration. ## Getting started -### Installation and deployment +### Claude Code plugin (recommended) + +This repository ships a [Claude Code plugin](https://docs.anthropic.com/en/docs/claude-code/plugins) that provides guided workflows for setup, deployment, task submission, and troubleshooting. + +#### Installing the plugin + +```bash +git clone https://github.com/aws-samples/sample-autonomous-cloud-coding-agents.git +cd sample-autonomous-cloud-coding-agents +claude --plugin-dir abca-plugin +``` + +The `--plugin-dir` flag tells Claude Code to load the local plugin from the `abca-plugin/` directory. The plugin's skills, commands, agents, and hooks will be available immediately. + +> **Tip:** If you use Claude Code via VS Code or JetBrains, you can add `--plugin-dir abca-plugin` to the extension's CLI arguments setting. + +#### What the plugin provides + +**Skills** (guided multi-step workflows — Claude activates these automatically based on your request): + +| Skill | Triggers on | What it does | +|-------|------------|--------------| +| `setup` | "get started", "install", "first time setup" | Full guided setup: prerequisites, toolchain, deploy, smoke test | +| `deploy` | "deploy", "cdk diff", "destroy" | Deploy, diff, or destroy the CDK stack with pre-checks | +| `onboard-repo` | "add a repo", "onboard", 422 errors | Add a new GitHub repository via Blueprint construct | +| `submit-task` | "submit task", "run agent", "review PR" | Submit a coding task with prompt quality coaching | +| `troubleshoot` | "debug", "error", "not working", "failed" | Diagnose deployment, auth, or task execution issues | +| `abca-status` | "status", "health check", "is ABCA running" | Quick platform health check: stack status, running tasks, build health | +| `abca-submit` | "submit task", "quick submit" | Shortcut for task submission with auto-detected task type | + +**Agents** (specialized subagents, spawned automatically or via the Agent tool): + +| Agent | When it's used | +|-------|---------------| +| `cdk-expert` | CDK architecture, construct design, handler implementation, stack modifications | +| `agent-debugger` | Task failure investigation, CloudWatch log analysis, agent runtime debugging | + +**Hook** (runs automatically): + +A `SessionStart` hook injects ABCA project context (key directories, commands, conventions) into every Claude Code session. + +#### Local plugin development + +If you're modifying the plugin itself, here's the file layout: + +``` +abca-plugin/ + plugin.json # Plugin manifest (name, version, description) + skills/ + setup/SKILL.md # First-time setup workflow + deploy/SKILL.md # CDK deployment workflow + onboard-repo/SKILL.md # Repository onboarding workflow + submit-task/SKILL.md # Task submission with prompt coaching + troubleshoot/SKILL.md # Diagnostic workflow + abca-status/SKILL.md # Quick platform health check + abca-submit/SKILL.md # Quick task submission shortcut + agents/ + cdk-expert.md # CDK infrastructure specialist + agent-debugger.md # Task failure debugger + hooks/ + hooks.json # SessionStart hook configuration +``` + +**Key conventions:** +- All plugin components live inside `abca-plugin/` alongside the manifest +- Skills live in subdirectories with a `SKILL.md` file (not flat `.md` files) +- Agents are flat `.md` files with YAML frontmatter +- Hooks use JSON configuration in `hooks/hooks.json` + +**After editing plugin files**, restart Claude Code with `claude --plugin-dir abca-plugin` to pick up changes. + +### Manual installation and deployment Install [mise](https://mise.jdx.dev/getting-started.html) if you want to use repo tasks (`mise run install`, `mise run build`). For monorepo-prefixed tasks (`mise //cdk:build`, etc.), set **`MISE_EXPERIMENTAL=1`** — see [CONTRIBUTING.md](./CONTRIBUTING.md). diff --git a/abca-plugin/README.md b/abca-plugin/README.md new file mode 100644 index 0000000..d858ab7 --- /dev/null +++ b/abca-plugin/README.md @@ -0,0 +1,111 @@ +# ABCA Plugin for Claude Code + +A Claude Code plugin that provides guided workflows for setting up, deploying, operating, and troubleshooting the ABCA (Autonomous Background Coding Agents on AWS) platform. + +## Installation + +```bash +claude --plugin-dir abca-plugin +``` + +Or add to your project's `.claude/settings.json`: + +```json +{ + "plugins": ["./abca-plugin"] +} +``` + +## What's Included + +### Skills (slash commands) + +| Skill | Trigger | Description | +|-------|---------|-------------| +| `/setup` | First-time setup, prerequisites | Walk through prerequisites, toolchain, and first deployment | +| `/deploy` | Deploy, diff, destroy | Deploy, diff, or destroy the CDK stack | +| `/onboard-repo` | Add a repository | Onboard a GitHub repo via Blueprint CDK construct | +| `/submit-task` | Submit a coding task | Submit tasks with prompt quality guidance and cost controls | +| `/troubleshoot` | Debug, errors, failures | Diagnose build, deployment, auth, and task execution issues | +| `/abca-status` | Status, health check | Check stack health, running tasks, and recent history | +| `/abca-submit` | Quick submit | Shortcut for rapid task submission | + +### Agents + +| Agent | Model | Description | +|-------|-------|-------------| +| `cdk-expert` | Sonnet | AWS CDK infrastructure expert for construct design, handler implementation, and stack modifications | +| `agent-debugger` | Sonnet | Read-only debugging specialist for task failures, preflight errors, and CloudWatch log analysis | + +### Hooks + +- **SessionStart** — Injects ABCA project context (directory structure, key commands, task types, available skills/agents) into every Claude Code session. + +## Plugin Structure + +``` +abca-plugin/ + plugin.json # Plugin manifest + agents/ + cdk-expert.md # CDK infrastructure agent + agent-debugger.md # Runtime debugging agent + hooks/ + hooks.json # SessionStart context injection + skills/ + setup/SKILL.md # First-time setup workflow + deploy/SKILL.md # CDK deployment management + onboard-repo/SKILL.md # Repository onboarding + submit-task/SKILL.md # Task submission workflow + troubleshoot/SKILL.md # Troubleshooting guide + abca-status/SKILL.md # Platform status checks + abca-submit/SKILL.md # Quick task submission +``` + +## Testing + +This plugin is markdown and configuration only (no executable code), so traditional unit tests don't apply. Instead, a **4-layer validation strategy** verifies correctness: + +| Layer | What it checks | +|-------|---------------| +| **1. Structural** | `plugin.json` fields, file discovery, JSON/YAML validity, no orphaned files | +| **2. Agent Config** | Frontmatter fields (`model`, `tools`, `description`), valid tool names, file path accuracy, capability alignment with examples | +| **3. Content Integrity** | All repo paths exist, all `mise run` commands are valid tasks, all `bgagent` CLI flags match actual help output, skill cross-references resolve, AWS CLI syntax is correct | +| **4. Hooks** | `hooks.json` structure, supported event names, skills/agents listed in hook content all exist, no sensitive data | + +### Running the tests + +From the repo root with Claude Code: + +``` +claude --plugin-dir abca-plugin +``` + +Then ask Claude to validate the plugin: + +``` +Validate the abca-plugin using the plugin-validator agent, then verify +all command references and file paths in the skills are accurate. +``` + +Or run the checks manually: + +```bash +# Layer 1: Structural — valid JSON +python3 -c "import json; json.load(open('abca-plugin/plugin.json')); print('plugin.json OK')" +python3 -c "import json; json.load(open('abca-plugin/hooks/hooks.json')); print('hooks.json OK')" + +# Layer 3: Content — mise tasks exist +MISE_EXPERIMENTAL=1 mise tasks --all 2>/dev/null | grep -E '(build|install|compile|test|deploy|destroy|diff|synth|bootstrap)' + +# Layer 3: Content — CLI flags match +node cli/lib/bin/bgagent.js submit --help && node cli/lib/bin/bgagent.js list --help +``` + +## Development + +To modify the plugin: + +1. Edit the relevant `.md` file under `skills/`, `agents/`, or `hooks/` +2. Re-validate using the testing strategy above +3. Ensure any new file paths or commands you reference actually exist in the repo +4. Keep the `SessionStart` hook prompt in sync if you add/remove/rename skills or agents diff --git a/abca-plugin/agents/agent-debugger.md b/abca-plugin/agents/agent-debugger.md new file mode 100644 index 0000000..3c7ca07 --- /dev/null +++ b/abca-plugin/agents/agent-debugger.md @@ -0,0 +1,80 @@ +--- +name: agent-debugger +description: | + Debugs ABCA agent runtime issues — task failures, preflight errors, agent + execution problems, and log analysis. Use when investigating why a task failed, + analyzing CloudWatch logs, or debugging the Python agent code. + + + Context: User has a failed task and wants to know why + user: "My task failed, can you check what happened?" + assistant: "I'll use the agent-debugger to investigate the failure." + Task failure investigation triggers agent-debugger. + + + + Context: User sees a preflight error + user: "I'm getting GITHUB_UNREACHABLE errors on my tasks" + assistant: "I'll use the agent-debugger to diagnose the preflight failure." + Preflight error diagnosis triggers agent-debugger. + + + + Context: User wants to check CloudWatch logs for a task + user: "Can you check the logs for task abc-123?" + assistant: "I'll use the agent-debugger to analyze the CloudWatch logs." + Log analysis triggers agent-debugger. + +model: sonnet +color: red +tools: ["Read", "Grep", "Glob", "Bash"] +--- + +You are a debugging specialist for the ABCA agent runtime. You diagnose task execution failures by analyzing logs, events, and agent code. + +## Your Expertise + +- Task lifecycle: SUBMITTED -> HYDRATING -> RUNNING -> COMPLETED/FAILED +- Agent runtime: Python code in `agent/src/` (pipeline.py, runner.py, config.py, hooks.py, policy.py) +- System prompts: `agent/src/prompts/` with base template and per-task-type variants +- Orchestrator: Lambda durable functions managing task state +- Preflight checks: GitHub API validation before compute allocation + +## Debugging Workflow + +1. **Get task events** to understand what happened: + ```bash + node cli/lib/bin/bgagent.js events --output json + ``` + +2. **Check event types** for clues: + - `preflight_failed` — GitHub access issue (PAT permissions, repo existence) + - `guardrail_blocked` — Content screening rejected the prompt + - `admission_rejected` — Concurrency limit or validation failure + - `task_failed` — Agent runtime error + - `task_timed_out` — 9-hour limit exceeded + +3. **For runtime failures**, check CloudWatch logs: + ```bash + aws logs filter-log-events --log-group-name /aws/lambda/backgroundagent-dev-orchestrator \ + --filter-pattern "" --limit 50 + ``` + +4. **For agent code issues**, read the relevant Python source: + - `agent/src/pipeline.py` — Main execution pipeline + - `agent/src/runner.py` — Task runner loop + - `agent/src/config.py` — Configuration loading + - `agent/src/hooks.py` — Lifecycle hooks + - `agent/src/prompts/` — System prompt templates + +5. **For local reproduction**, use: + ```bash + DRY_RUN=1 ./agent/run.sh "owner/repo" "task description" + ``` + +## Common Failure Patterns + +- **PAT scope too narrow**: Preflight fails because fine-grained token doesn't include the target repo +- **Missing CLAUDE.md**: Agent can't find build/test commands, wastes turns exploring +- **Stuck in test loop**: Agent repeatedly fails tests without making progress — reduce max_turns +- **Memory/compute limits**: 2 vCPU, 8 GB RAM — large monorepos may need optimization diff --git a/abca-plugin/agents/cdk-expert.md b/abca-plugin/agents/cdk-expert.md new file mode 100644 index 0000000..e420f4b --- /dev/null +++ b/abca-plugin/agents/cdk-expert.md @@ -0,0 +1,65 @@ +--- +name: cdk-expert +description: | + AWS CDK and ABCA infrastructure expert. Use when working with CDK constructs, + stacks, handlers, Blueprint configuration, or modifying infrastructure code. + Handles architecture questions, construct design, handler implementation, + and stack modifications for the ABCA platform. + + + Context: User wants to add a new CDK construct + user: "I need to add a new construct for the notification system" + assistant: "I'll use the cdk-expert to design and implement the construct." + CDK construct work triggers cdk-expert. + + + + Context: User wants to modify a Lambda handler + user: "The create-task handler needs a new validation check" + assistant: "I'll use the cdk-expert to implement the handler change." + Handler modification triggers cdk-expert. + + + + Context: User asks about ABCA infrastructure architecture + user: "How does the orchestrator interact with the compute environment?" + assistant: "I'll use the cdk-expert to explain the architecture." + Architecture questions trigger cdk-expert. + +model: sonnet +color: blue +tools: ["Read", "Grep", "Glob", "Bash", "Edit", "Write"] +--- + +You are an expert AWS CDK developer specializing in the ABCA (Autonomous Background Coding Agents) platform. You have deep knowledge of CDK v2, TypeScript, and the ABCA architecture. + +## Your Expertise + +- **CDK Constructs**: Blueprint, TaskApi, TaskOrchestrator, database constructs +- **Lambda Handlers**: Task CRUD, orchestration, webhooks, shared utilities +- **AWS Services**: API Gateway, Lambda, DynamoDB, Secrets Manager, Cognito, Bedrock, AgentCore, Step Functions +- **Testing**: Jest tests mirroring the source structure under `cdk/test/` + +## Project Layout + +- `cdk/src/stacks/agent.ts` — Main stack definition +- `cdk/src/constructs/` — Reusable CDK constructs +- `cdk/src/handlers/` — Lambda handler implementations +- `cdk/src/handlers/shared/` — Shared logic (types, validation, context hydration, etc.) +- `cdk/test/` — Jest tests mirroring source structure + +## Key Conventions + +- Shared API types live in `cdk/src/handlers/shared/types.ts` — if you change these, `cli/src/types.ts` MUST stay in sync +- Use `mise //cdk:compile` to verify TypeScript, `mise //cdk:test` for tests, `mise //cdk:synth` to synthesize +- Blueprint constructs are the mechanism for repository onboarding — they write RepoConfig records to DynamoDB +- cdk-nag is enabled for security/compliance checks +- Follow existing patterns: look at how current constructs and handlers are structured before adding new ones + +## When Helping Users + +1. Always read relevant source files before suggesting changes +2. Run `mise //cdk:compile` after TypeScript changes to verify +3. Run `mise //cdk:test` after logic changes +4. Show `mise //cdk:diff` output before recommending deployment +5. Flag any security implications of infrastructure changes diff --git a/abca-plugin/hooks/hooks.json b/abca-plugin/hooks/hooks.json new file mode 100644 index 0000000..977c231 --- /dev/null +++ b/abca-plugin/hooks/hooks.json @@ -0,0 +1,10 @@ +{ + "hooks": { + "SessionStart": [ + { + "type": "prompt", + "prompt": "You are working in the ABCA (Autonomous Background Coding Agents on AWS) repository — a self-hosted platform for background coding agents.\n\nKey directories: cdk/src/ (CDK infra), agent/src/ (Python agent), cli/src/ (bgagent CLI), docs/ (documentation).\n\nEssential commands (require MISE_EXPERIMENTAL=1): mise run install, mise run build, mise //cdk:compile, mise //cdk:test, mise //cdk:deploy.\n\nCritical: types in cdk/src/handlers/shared/types.ts must stay in sync with cli/src/types.ts. Don't edit docs/src/content/docs/ directly — it's generated.\n\nTask types: new_task (create PR), pr_iteration (update PR), pr_review (review PR).\n\nAvailable plugin skills: /setup, /deploy, /onboard-repo, /submit-task, /troubleshoot, /abca-status, /abca-submit\nAvailable agents: cdk-expert, agent-debugger\n\nDocs: https://aws-samples.github.io/sample-autonomous-cloud-coding-agents/" + } + ] + } +} diff --git a/abca-plugin/plugin.json b/abca-plugin/plugin.json new file mode 100644 index 0000000..31e8756 --- /dev/null +++ b/abca-plugin/plugin.json @@ -0,0 +1,7 @@ +{ + "name": "abca-getting-started", + "version": "1.0.0", + "description": "Getting started guide and developer toolkit for ABCA (Autonomous Background Coding Agents on AWS). Provides guided setup, deployment, task submission, repository onboarding, and troubleshooting workflows.", + "author": "aws-samples", + "homepage": "https://github.com/aws-samples/sample-autonomous-cloud-coding-agents" +} diff --git a/abca-plugin/skills/abca-status/SKILL.md b/abca-plugin/skills/abca-status/SKILL.md new file mode 100644 index 0000000..b215839 --- /dev/null +++ b/abca-plugin/skills/abca-status/SKILL.md @@ -0,0 +1,52 @@ +--- +name: abca-status +description: Check ABCA platform status — stack health, running tasks, and recent task history. Use when the user says "status", "health check", "is ABCA running", "check platform", or "what's the state". +allowed-tools: + - Bash + - Read +--- + +# ABCA Platform Status + +Check the current state of the ABCA platform and report a concise status summary. + +## Checks to Run + +Run these in parallel where possible: + +1. **Stack status:** + ```bash + aws cloudformation describe-stacks --stack-name backgroundagent-dev \ + --query 'Stacks[0].{Status:StackStatus,Updated:LastUpdatedTime}' --output json 2>/dev/null || echo "Stack not found" + ``` + +2. **Running tasks:** + ```bash + node cli/lib/bin/bgagent.js list --status RUNNING,SUBMITTED,HYDRATING --output json 2>/dev/null || echo "CLI not configured" + ``` + +3. **Recent completed tasks:** + ```bash + node cli/lib/bin/bgagent.js list --limit 5 --output json 2>/dev/null || echo "CLI not configured" + ``` + +4. **Local build health:** + ```bash + export MISE_EXPERIMENTAL=1 && mise run //cdk:compile 2>&1 | tail -5 + ``` + +## Output Format + +Present a concise status report: + +``` +ABCA Platform Status +==================== +Stack: [UPDATE_COMPLETE | CREATE_COMPLETE | ...] +Updated: [timestamp] +Active: [N] tasks running +Recent: [N] tasks in last batch (show status breakdown) +Build: [PASS | FAIL with error summary] +``` + +If the CLI isn't configured, note this and suggest running the `setup` skill. diff --git a/abca-plugin/skills/abca-submit/SKILL.md b/abca-plugin/skills/abca-submit/SKILL.md new file mode 100644 index 0000000..723825d --- /dev/null +++ b/abca-plugin/skills/abca-submit/SKILL.md @@ -0,0 +1,37 @@ +--- +name: abca-submit +description: Quick task submission shortcut — submit a coding task with guided prompts. Use when the user says "submit task", "run agent on", "submit to ABCA", or "quick submit". +argument-hint: [description] +allowed-tools: + - Bash + - Read +--- + +# Quick Task Submission + +Submit a task to ABCA quickly. If arguments are missing, ask the user. + +## Collect Required Info + +If the repository is not provided, ask for the repository (owner/repo format). +If the description is not provided, ask what the agent should do. + +## Determine Task Type + +Parse the description to infer the type: +- If it looks like a PR number or mentions "review PR": use `--review-pr` +- If it mentions "iterate on PR" or "fix PR feedback": use `--pr` +- If it's an issue number (just a number): use `--issue` +- Otherwise: use `--task` with the text description + +## Submit + +```bash +node cli/lib/bin/bgagent.js submit \ + --repo $REPO \ + $FLAGS \ + --max-turns 100 \ + --wait +``` + +Show the task ID and status. If `--wait` is used, show the final outcome including PR URL if created. diff --git a/abca-plugin/skills/deploy/SKILL.md b/abca-plugin/skills/deploy/SKILL.md new file mode 100644 index 0000000..ca0471c --- /dev/null +++ b/abca-plugin/skills/deploy/SKILL.md @@ -0,0 +1,83 @@ +--- +name: deploy +description: >- + Deploy, diff, or destroy the ABCA CDK stack. Handles pre-deployment validation, + synthesis, and post-deployment verification. Use when the user says "deploy", + "cdk deploy", "deploy the stack", "destroy", "cdk diff", "what changed", + "redeploy", or "update the stack". +--- + +# ABCA Deployment + +You are managing CDK deployment for the ABCA platform. Determine the user's intent and execute the appropriate workflow. + +## Determine Action + +Ask the user (or infer from context) which action they want: +- **deploy** — Build and deploy the CDK stack +- **diff** — Show what would change without deploying +- **destroy** — Tear down the stack (requires explicit confirmation) +- **synth** — Synthesize CloudFormation without deploying + +## Pre-Deployment Checks + +Before any deployment action, verify: + +1. **Build is clean:** + ```bash + export MISE_EXPERIMENTAL=1 + mise run build + ``` + This runs agent quality checks, CDK compilation + tests, CLI build, and docs build. Do NOT deploy if the build fails. + +2. **Docker is running** — Required for CDK asset bundling +3. **AWS credentials are configured** — `aws sts get-caller-identity` + +## Deploy Workflow + +```bash +export MISE_EXPERIMENTAL=1 +mise run //cdk:deploy +``` + +After successful deployment, retrieve and display stack outputs: +```bash +aws cloudformation describe-stacks --stack-name backgroundagent-dev \ + --query 'Stacks[0].Outputs' --output table +``` + +Key outputs to highlight: `ApiUrl`, `RuntimeArn`, `UserPoolId`, `AppClientId`, `GitHubTokenSecretArn`. + +## Diff Workflow + +```bash +export MISE_EXPERIMENTAL=1 +mise run //cdk:diff +``` + +Summarize the changes: new resources, modified resources, removed resources. Flag any potentially destructive changes (resource replacements, security group changes). + +## Destroy Workflow + +**CRITICAL: Ask for explicit confirmation before destroying.** Use AskUserQuestion to confirm, explaining consequences. + +```bash +export MISE_EXPERIMENTAL=1 +mise run //cdk:destroy +``` + +## Synth Workflow + +```bash +export MISE_EXPERIMENTAL=1 +mise run //cdk:synth +``` + +Output goes to `cdk/cdk.out/`. Useful for reviewing generated CloudFormation templates. + +## Post-Deployment + +After a successful deploy, remind the user to: +- Store/update the GitHub PAT in Secrets Manager if this is a fresh deployment +- Onboard repositories via Blueprint constructs if needed +- Run a smoke test: `curl -s -H "Authorization: $TOKEN" $API_URL/tasks` diff --git a/abca-plugin/skills/onboard-repo/SKILL.md b/abca-plugin/skills/onboard-repo/SKILL.md new file mode 100644 index 0000000..8c863f3 --- /dev/null +++ b/abca-plugin/skills/onboard-repo/SKILL.md @@ -0,0 +1,129 @@ +--- +name: onboard-repo +description: >- + Onboard a new GitHub repository to the ABCA platform by adding a Blueprint CDK + construct. Use when the user says "onboard a repo", "add a repository", + "register a repo", "new repo", "Blueprint construct", "REPO_NOT_ONBOARDED error", + or gets a 422 error about an unregistered repository. +--- + +# Repository Onboarding + +You are guiding the user through onboarding a new GitHub repository to ABCA. Repositories must be registered as `Blueprint` constructs in the CDK stack before tasks can target them. + +## Step 1: Gather Repository Details + +Use AskUserQuestion to collect: +- **Repository**: GitHub `owner/repo` format +- **Compute type**: `agentcore` (default) or `ecs` +- **Model preference**: Claude Sonnet 4 (default), Claude Opus 4 (complex repos), or Claude Haiku (lightweight). **Important:** Models must be specified using their cross-region inference profile ID (e.g. `us.anthropic.claude-opus-4-20250514-v1:0`), not the raw foundation model ID. On-demand invocation of raw model IDs is not supported for most models. +- **Max turns**: Default 100 (range: 1-500) +- **Max budget**: USD cost ceiling per task (optional) +- **Custom GitHub PAT**: If this repo needs a different token than the platform default + +## Step 2: Read the Current Stack + +Read the CDK stack file to understand existing Blueprint definitions: + +``` +Read cdk/src/stacks/agent.ts +``` + +Identify: +- Where existing Blueprint constructs are defined +- The `repoTable` reference used +- Any patterns for compute/model overrides + +## Step 3: Add the Blueprint Construct + +Add a new `Blueprint` construct instance to the stack. Follow the existing pattern. Example: + +```typescript +new Blueprint(this, 'MyRepoBlueprint', { + repo: 'owner/repo', + repoTable: repoTable, + // Optional overrides: + // computeType: 'agentcore', + // modelId: 'us.anthropic.claude-sonnet-4-20250514-v1:0', + // maxTurns: 100, + // maxBudgetUsd: 50, + // runtimeArn: runtime.runtimeArn, + // githubTokenSecretArn: 'arn:aws:secretsmanager:...', +}); +``` + +Use a descriptive construct ID derived from the repo name. + +### Model ID and IAM Permissions + +When specifying a non-default model via `agent.modelId`, two things are required: + +1. **Use the inference profile ID, not the raw model ID.** Bedrock does not support on-demand invocation of raw foundation model IDs for most models. Use the cross-region inference profile ID instead: + - Sonnet 4: `us.anthropic.claude-sonnet-4-20250514-v1:0` + - Opus 4: `us.anthropic.claude-opus-4-20250514-v1:0` + - Haiku 4.5: `us.anthropic.claude-haiku-4-5-20251001-v1:0` (on-demand works for Haiku) + +2. **Grant the runtime IAM permissions for the model.** The Blueprint construct does not automatically grant `bedrock:InvokeModel*` — this is by design (least privilege). You must add a `grantInvoke` block in the stack for each model used: + ```typescript + const opusModel = new bedrock.BedrockFoundationModel('anthropic.claude-opus-4-20250514-v1:0', { + supportsAgents: true, + supportsCrossRegion: true, + }); + opusModel.grantInvoke(runtime); + + const opusProfile = bedrock.CrossRegionInferenceProfile.fromConfig({ + geoRegion: bedrock.CrossRegionInferenceProfileRegion.US, + model: opusModel, + }); + opusProfile.grantInvoke(runtime); + ``` + +## Step 4: Deploy + +After adding the Blueprint, the stack must be redeployed: + +```bash +export MISE_EXPERIMENTAL=1 +mise run //cdk:compile # Verify TypeScript compiles +mise run //cdk:test # Run tests +mise run //cdk:diff # Preview changes +``` + +Show the diff to the user. If it looks correct, ask if they want to deploy now. + +```bash +mise run //cdk:deploy +``` + +## Step 5: Verify + +After deployment, verify the repo config was written to DynamoDB: + +```bash +aws dynamodb scan --table-name \ + --filter-expression "repo = :r" \ + --expression-attribute-values '{":r":{"S":"owner/repo"}}' \ + --output json +``` + +## Per-Repository Configuration Reference + +| Setting | Purpose | Default | +|---------|---------|---------| +| `compute_type` | Execution strategy | `agentcore` | +| `runtime_arn` | AgentCore runtime override | Platform default | +| `model_id` | AI model for tasks | Platform default (Sonnet 4) | +| `max_turns` | Turn limit per task | 100 | +| `max_budget_usd` | Cost ceiling per task | Unlimited | +| `system_prompt_overrides` | Custom system instructions | None | +| `github_token_secret_arn` | Repo-specific GitHub token | Platform default | +| `poll_interval_ms` | Completion polling frequency | 30000ms | + +Task-level parameters override Blueprint defaults. If neither specifies a value, platform defaults apply. + +## Common Issues + +- **422 "Repository not onboarded"** — Blueprint hasn't been deployed yet. Add the construct and redeploy. +- **Preflight failures after onboarding** — GitHub PAT may lack permissions for the new repo. Check the PAT's fine-grained access includes the target repository with Contents (read/write) and Pull requests (read/write) permissions. +- **400 "Invocation with on-demand throughput isn't supported"** — The Blueprint `modelId` is using a raw foundation model ID instead of an inference profile ID. Change e.g. `anthropic.claude-opus-4-20250514-v1:0` to `us.anthropic.claude-opus-4-20250514-v1:0`. +- **403 "not authorized to perform bedrock:InvokeModelWithResponseStream"** — The runtime IAM role lacks permissions for the model specified in the Blueprint. Add `grantInvoke` for both the model and its cross-region inference profile in `agent.ts`. diff --git a/abca-plugin/skills/setup/SKILL.md b/abca-plugin/skills/setup/SKILL.md new file mode 100644 index 0000000..5bcbfc2 --- /dev/null +++ b/abca-plugin/skills/setup/SKILL.md @@ -0,0 +1,99 @@ +--- +name: setup +description: >- + Guided installation and first-time setup for ABCA. Walks through prerequisites, + toolchain installation, dependency setup, and initial deployment. Use when the user + says "set up the project", "get started", "install", "first time setup", + "how do I start", "prerequisites", or is new to the project. +--- + +# ABCA First-Time Setup + +You are guiding a developer through the complete ABCA setup process. Work through each phase sequentially, verifying success before moving on. Use AskUserQuestion when you need input. + +## Phase 1: Verify Prerequisites + +Check each prerequisite and report status. Run these checks: + +```bash +# Check each tool +aws --version 2>/dev/null +docker --version 2>/dev/null +mise --version 2>/dev/null +node --version 2>/dev/null +cdk --version 2>/dev/null +yarn --version 2>/dev/null +``` + +**Required tools:** +- AWS CLI (configured with credentials for a dedicated AWS account) +- Docker (running — needed for local agent runs and CDK asset builds) +- mise (task runner and version manager — https://mise.jdx.dev/) +- Node.js 22.x (managed by mise) +- Yarn Classic 1.22.x (via Corepack) +- AWS CDK CLI >= 2.233.0 +- GitHub fine-grained PAT with repository access + +For any missing tool, provide the specific installation command for the user's platform. Do NOT proceed until all prerequisites are met. + +## Phase 2: Toolchain Setup + +Run these steps in order, verifying each: + +1. `mise trust` — Trust the project config +2. `mise install` — Install tool versions +3. `corepack enable && corepack prepare yarn@1.22.22 --activate` — Enable Yarn +4. Verify: `node --version` (should be v22.x), `yarn --version` (should be 1.22.x) +5. `export MISE_EXPERIMENTAL=1` — Required for namespaced tasks +6. `mise run install` — Install all workspace dependencies +7. `mise run build` — Full monorepo build (agent quality + CDK + CLI + docs) + +If `mise run install` fails with "yarn: command not found", Corepack wasn't activated. If `prek install` fails about `core.hooksPath`, another hook manager owns hooks — suggest `git config --unset-all core.hooksPath`. + +## Phase 3: One-Time AWS Setup + +```bash +aws xray update-trace-segment-destination --destination CloudWatchLogs +``` + +This must be run once per AWS account before first deployment. + +## Phase 4: First Deployment + +Guide through: + +1. `mise run //cdk:bootstrap` — Bootstrap CDK (if not already done for this account/region) +2. `mise run //cdk:deploy` — Deploy the stack (~9.5 minutes) +3. Retrieve stack outputs: + ```bash + aws cloudformation describe-stacks --stack-name backgroundagent-dev \ + --query 'Stacks[0].Outputs' --output table + ``` +4. Store the GitHub PAT in Secrets Manager using the `GitHubTokenSecretArn` output +5. Create a Cognito user (self-signup is disabled): + ```bash + aws cognito-idp admin-create-user --user-pool-id $USER_POOL_ID \ + --username user@example.com --temporary-password 'TempPass123!@#' + aws cognito-idp admin-set-user-password --user-pool-id $USER_POOL_ID \ + --username user@example.com --password 'YourPermanentPass123!@#' --permanent + ``` + +## Phase 5: Smoke Test + +1. Authenticate and get a JWT token +2. Test the API: `curl -s -H "Authorization: $TOKEN" $API_URL/tasks` +3. Configure the CLI: + ```bash + mise //cli:build + node cli/lib/bin/bgagent.js configure \ + --api-url $API_URL --region $REGION \ + --user-pool-id $USER_POOL_ID --client-id $APP_CLIENT_ID + node cli/lib/bin/bgagent.js login --username user@example.com + ``` + +## Completion + +After all phases pass, summarize: +- Stack outputs (API URL, User Pool ID, etc.) +- Next steps: onboard a repository (use the `onboard-repo` skill) +- Point to the User Guide: https://aws-samples.github.io/sample-autonomous-cloud-coding-agents/user-guide/introduction/ diff --git a/abca-plugin/skills/submit-task/SKILL.md b/abca-plugin/skills/submit-task/SKILL.md new file mode 100644 index 0000000..1bd0363 --- /dev/null +++ b/abca-plugin/skills/submit-task/SKILL.md @@ -0,0 +1,136 @@ +--- +name: submit-task +description: >- + Submit a coding task to the ABCA platform via CLI or REST API. Guides prompt + quality, task type selection, and cost controls. Use when the user says "submit a task", + "create a task", "run the agent", "send task to agent", "bgagent submit", + "new_task", "pr_iteration", "pr_review", "review a PR", or wants to automate coding work. +--- + +# Submit a Coding Task + +You are helping the user submit a well-crafted coding task to the ABCA platform. Good prompts are critical — the agent works autonomously without asking clarifying questions. + +## Step 1: Determine Task Type + +Use AskUserQuestion to understand what the user wants: + +| Type | When to use | Outcome | +|------|------------|---------| +| `new_task` | Implement a feature, fix a bug, refactor code | Creates a branch + opens a PR | +| `pr_iteration` | Address review feedback on an existing PR | Updates the existing PR | +| `pr_review` | Get a structured code review (read-only) | Posts review comments | + +## Step 2: Collect Task Details + +Based on the task type, gather: + +**For `new_task`:** +- Repository (`owner/repo`) +- GitHub issue number (preferred — agent fetches full context) OR text description +- If using text: the desired end state, scope constraints, acceptance criteria + +**For `pr_iteration`:** +- Repository (`owner/repo`) +- PR number +- Optional: Additional guidance ("Focus on security feedback only") + +**For `pr_review`:** +- Repository (`owner/repo`) +- PR number +- Optional: Review focus ("Check for SQL injection", "Review test coverage") + +## Step 3: Prompt Quality Check + +Before submitting, validate the prompt against these principles: + +**Good prompts:** +- Describe the desired END STATE, not step-by-step instructions +- Are specific about scope (one logical change per task) +- State preconditions and constraints ("Use React 18, not 19") +- Define verifiable goals ("npm test must pass") +- Provide examples when behavior is ambiguous +- Reference relevant files/modules (not specific line numbers) + +**Bad prompts (fix these before submitting):** +- Too vague: "Fix the bug" -> "Fix 500 on POST /users when email has plus signs" +- Too prescriptive: "Change line 42" -> "Increase auth timeout to 10s for slow connections" +- Kitchen sink: "Fix login, add dark mode, update README" -> Split into separate tasks +- Missing context: "Make it work like the other service" -> Describe explicitly + +If the prompt needs improvement, suggest a rewritten version before submitting. + +## Step 4: Set Cost Controls + +Recommend appropriate limits based on task complexity: + +| Task complexity | Suggested max_turns | Suggested max_budget | +|----------------|--------------------|--------------------| +| Typo/config change | 10-30 | $1-5 | +| Bug fix (clear scope) | 50-100 | $5-20 | +| New feature | 100-200 | $20-50 | +| Large refactor | 200-500 | $50-100 | +| PR iteration | 30-100 | $5-20 | +| PR review | 30-80 | $5-10 | + +## Step 5: Submit + +**Via CLI (preferred):** +```bash +# From the repo root +node cli/lib/bin/bgagent.js submit \ + --repo owner/repo \ + --issue 42 \ + --max-turns 100 \ + --max-budget 20 \ + --wait # Optional: poll until completion +``` + +**CLI flag reference:** +- `--repo owner/repo` (required) +- `--issue N` — GitHub issue number +- `--task "description"` — Text description +- `--pr N` — PR number (sets type to pr_iteration) +- `--review-pr N` — PR number (sets type to pr_review) +- `--max-turns N` — Turn limit (1-500) +- `--max-budget N` — USD cost limit ($0.01-$100) +- `--idempotency-key KEY` — Deduplication key for safe retries +- `--wait` — Poll until terminal status +- `--output json|text` — Output format + +At least one of `--issue`, `--task`, `--pr`, or `--review-pr` is required. + +**Via REST API:** +```bash +curl -X POST "$API_URL/tasks" \ + -H "Authorization: $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "owner/repo", + "issue_number": 42, + "task_type": "new_task", + "max_turns": 100, + "max_budget_usd": 20 + }' +``` + +## Step 6: Monitor + +After submission, show how to check status: +```bash +node cli/lib/bin/bgagent.js status +node cli/lib/bin/bgagent.js events +node cli/lib/bin/bgagent.js list --status RUNNING +``` + +Task states: SUBMITTED -> HYDRATING -> RUNNING -> COMPLETED/FAILED/CANCELLED/TIMED_OUT + +## Input Mode Quick Reference + +- `--issue` only: Issue is well-written with clear requirements +- `--task` only: Ad-hoc work or pre-issue tasks +- `--issue + --task`: Issue context + scope narrowing or extra guidance +- `--pr` only: Address all PR review feedback +- `--pr + --task`: PR feedback + focused scope +- `--review-pr` only: Full structured code review +- `--review-pr + --task`: Focused review on specific concerns diff --git a/abca-plugin/skills/troubleshoot/SKILL.md b/abca-plugin/skills/troubleshoot/SKILL.md new file mode 100644 index 0000000..27b18dd --- /dev/null +++ b/abca-plugin/skills/troubleshoot/SKILL.md @@ -0,0 +1,188 @@ +--- +name: troubleshoot +description: >- + Diagnose and fix common ABCA issues: deployment failures, preflight errors, + authentication problems, agent failures, and build issues. Use when the user says + "troubleshoot", "debug", "not working", "error", "failed", "help me fix", + "preflight_failed", "task failed", "deploy failed", "auth error", "401", "422", "503", + or describes something not working as expected. +--- + +# ABCA Troubleshooting + +You are diagnosing an issue with the ABCA platform. Follow a systematic approach: gather symptoms, check the most common causes, and apply targeted fixes. + +## Step 1: Identify the Problem Category + +Determine which area the issue falls into: + +1. **Build/Compilation** — TypeScript errors, test failures, lint issues +2. **Deployment** — CDK deploy/synth failures, CloudFormation errors +3. **Authentication** — Cognito errors, token issues, 401 responses +4. **Task Submission** — 422 errors, validation failures, guardrail blocks +5. **Task Execution** — Preflight failures, agent failures, timeouts +6. **Local Agent Testing** — Docker issues, run.sh problems + +## Build/Compilation Issues + +```bash +export MISE_EXPERIMENTAL=1 +mise run //cdk:compile 2>&1 | tail -50 # TypeScript errors +mise run //cdk:test 2>&1 | tail -50 # Test failures +``` + +**Common causes:** +- Missing `mise run install` after pulling changes +- `yarn: command not found` — Run `corepack enable && corepack prepare yarn@1.22.22 --activate` +- Type mismatches after editing `cdk/src/handlers/shared/types.ts` without updating `cli/src/types.ts` + +## Deployment Issues + +```bash +# Check CloudFormation events for the failed stack +aws cloudformation describe-stack-events --stack-name backgroundagent-dev \ + --query 'StackEvents[?ResourceStatus==`CREATE_FAILED` || ResourceStatus==`UPDATE_FAILED`].[LogicalResourceId,ResourceStatusReason]' \ + --output table +``` + +**Common causes:** +- Docker not running — Required for CDK asset bundling +- Missing CDK bootstrap — Run `mise run //cdk:bootstrap` +- IAM permission issues — Check `aws sts get-caller-identity` +- Region mismatch — Ensure consistent region across all commands + +## Authentication Issues + +```bash +# Verify credentials +aws sts get-caller-identity + +# Check Cognito user exists +aws cognito-idp admin-get-user \ + --user-pool-id $USER_POOL_ID \ + --username user@example.com +``` + +**Common causes:** +- "App client does not exist" — Region mismatch between CLI config and stack deployment +- Token expired — Re-authenticate with `bgagent login` +- 401 on API calls — Token not included or malformed in Authorization header +- User not created — Self-signup is disabled; admin must create users + +## Task Submission Issues (422 / 400) + +**"Repository not onboarded" (422):** +- The repo needs a Blueprint construct. Use the `onboard-repo` skill. + +**"GUARDRAIL_BLOCKED" (400):** +- Task description triggered Bedrock Guardrails content screening +- Review and rephrase the task description to remove potentially flagged content + +**Validation errors:** +- Check required fields: `repo` is required, plus at least one of `issue_number`, `task_description`, `pr_number` +- `max_turns` range: 1-500 +- `max_budget_usd` range: $0.01-$100 + +## Task Execution Issues + +```bash +# Check task events for details +node cli/lib/bin/bgagent.js events --output json +``` + +**`preflight_failed`:** +- GitHub PAT lacks permissions for the repo +- Repository doesn't exist or is private without proper token scope +- Check event `reason` and `detail` fields for specifics +- Verify PAT: fine-grained token must include the target repository with Contents (read/write), Pull Requests (read/write), Issues (read) + +**`task_failed` / task completes with 0 tokens and no PR:** +- Agent encountered an error during execution +- Check CloudWatch logs for the session: + ```bash + aws logs filter-log-events \ + --log-group-name "/aws/vendedlogs/bedrock-agentcore/runtime/APPLICATION_LOGS/jean_cloude" \ + --filter-pattern "" \ + --region us-west-2 --query 'events[*].message' --output text + ``` +- Common: repo build/test commands not documented in CLAUDE.md + +**403 "not authorized to perform bedrock:InvokeModelWithResponseStream":** +- The Blueprint specifies a model that the runtime IAM role doesn't have permissions for +- Fix: add `grantInvoke` for the model and its cross-region inference profile in `cdk/src/stacks/agent.ts`, then redeploy + +**400 "Invocation with on-demand throughput isn't supported":** +- The Blueprint `modelId` uses a raw foundation model ID (e.g. `anthropic.claude-opus-4-20250514-v1:0`) +- Fix: change to the inference profile ID (e.g. `us.anthropic.claude-opus-4-20250514-v1:0`), update DynamoDB via redeploy + +**503 "Too many connections" / task completes with 0 tokens after long duration:** +- Bedrock is throttling model invocations. The agent retries for minutes then gives up. +- Symptoms: task runs for 10-15 minutes, completes with `COMPLETED` status but 0 tokens, 0 cost, no PR, `disk_delta: 0 B` +- Diagnosis: + 1. Check application logs for `"text": "API Error: 503 Too many connections"` + 2. **Check what model_id is actually being passed** — the DynamoDB record may have a stale model override: + ```bash + aws dynamodb get-item \ + --table-name \ + --key '{"repo": {"S": "owner/repo"}}' \ + --query 'Item.model_id' --output text + ``` +- Causes: + - **Stale model_id in DynamoDB** (most common) — the Blueprint `onUpdate` only sets fields present in props; removing a `modelId` prop does NOT remove the field from DynamoDB. The task keeps using the old model. + - Bedrock service-level throttling for the specific model (especially Opus 4 which has limited availability) + - Account quota limits reached +- Fix: + 1. **Check and fix the DynamoDB record first** — remove stale `model_id` if present: + ```bash + aws dynamodb update-item \ + --table-name \ + --key '{"repo": {"S": "owner/repo"}}' \ + --update-expression "REMOVE model_id" + ``` + 2. If model_id is correct, wait and retry — throttling is often transient + 3. Switch to a model with higher availability (Sonnet 4.6 > Opus 4 > Haiku) + 4. Request a Bedrock quota increase for `InvokeModel` RPM on your model + +**`task_timed_out`:** +- 9-hour maximum exceeded +- Consider reducing scope or increasing `max_turns` for complex tasks +- Check if the agent is stuck in a loop (review logs) + +**Concurrency limit:** +- Default: 3 concurrent tasks per user +- Wait for running tasks to complete or cancel them + +## Local Agent Testing Issues + +```bash +# Verify Docker is running +docker info + +# Test locally with dry run +DRY_RUN=1 ./agent/run.sh "owner/repo" "Test task" +``` + +**Common causes:** +- Missing environment variables: `GITHUB_TOKEN`, `AWS_REGION` +- Docker not running or insufficient resources (needs 2 vCPU, 8 GB RAM) +- Missing AWS credentials for Bedrock access + +## Diagnostic Commands Quick Reference + +```bash +# Stack status +aws cloudformation describe-stacks --stack-name backgroundagent-dev --query 'Stacks[0].StackStatus' + +# Stack outputs +aws cloudformation describe-stacks --stack-name backgroundagent-dev --query 'Stacks[0].Outputs' --output table + +# Task status +node cli/lib/bin/bgagent.js status +node cli/lib/bin/bgagent.js events --output json + +# List running tasks +node cli/lib/bin/bgagent.js list --status RUNNING + +# Build health +mise run build +``` diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 7d16690..da99401 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -198,6 +198,20 @@ export class AgentStack extends Stack { // Grant the runtime permissions to invoke the inference profile inferenceProfile.grantInvoke(runtime); + const model3 = new bedrock.BedrockFoundationModel('anthropic.claude-opus-4-20250514-v1:0', { + supportsAgents: true, + supportsCrossRegion: true, + }); + + model3.grantInvoke(runtime); + + const inferenceProfile3 = bedrock.CrossRegionInferenceProfile.fromConfig({ + geoRegion: bedrock.CrossRegionInferenceProfileRegion.US, + model: model3, + }); + + inferenceProfile3.grantInvoke(runtime); + const model2 = new bedrock.BedrockFoundationModel('anthropic.claude-haiku-4-5-20251001-v1:0', { supportsAgents: true, supportsCrossRegion: true,