diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index e85de66..d3165a2 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -1,7 +1,7 @@
 {
   "$schema": "https://anthropic.com/claude-code/marketplace.schema.json",
   "name": "braintrust-claude-plugin",
-  "version": "1.2.0",
+  "version": "1.3.0",
   "description": "Braintrust plugins for LLM evaluation, logging, and observability",
   "owner": {
     "name": "Braintrust",
@@ -12,14 +12,14 @@
       "name": "braintrust",
       "description": "Enables AI agents to use Braintrust for LLM evaluation, logging, and observability. Provides correct API usage, working examples, and helper scripts.",
       "version": "1.1.0",
-      "source": "./",
+      "source": "./plugins/braintrust",
       "category": "development"
     },
     {
       "name": "trace-claude-code",
       "description": "Automatically trace Claude Code conversations to Braintrust. Captures user messages, assistant responses, and tool calls for observability.",
-      "version": "1.0.0",
-      "source": "./skills/trace-claude-code",
+      "version": "1.1.0",
+      "source": "./plugins/trace-claude-code",
       "category": "observability"
     }
   ]
diff --git a/AGENTS.md b/AGENTS.md
index d05c756..9b0c185 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,5 +1,34 @@
 # Agent guidelines
 
+## About this repository
+
+This is the **Braintrust Claude Code plugin marketplace** - a repository that distributes Claude Code plugins for Braintrust integration.
+
+### Structure
+
+```
+claude-plugin/
+├── .claude-plugin/
+│   └── marketplace.json      # Marketplace catalog (lists available plugins)
+├── plugins/
+│   ├── braintrust/           # Plugin: Braintrust evaluation & logging
+│   └── trace-claude-code/    # Plugin: Session tracing to Braintrust
+└── evals/                    # Evaluation suite for testing the plugins
+```
+
+### Plugins
+
+| Plugin | Description |
+|--------|-------------|
+| `braintrust` | Enables AI agents to use Braintrust for LLM evaluation, logging, and observability. Includes MCP server config and the `troubleshoot-braintrust-mcp` skill. |
+| `trace-claude-code` | Automatically traces Claude Code conversations to Braintrust. Uses hooks to capture sessions, turns, and tool calls. |
+
+### Terminology
+
+- **Marketplace**: A repository with a `marketplace.json` that catalogs multiple plugins for distribution
+- **Plugin**: An installable unit with its own `.claude-plugin/plugin.json` manifest
+- **Skill**: A capability within a plugin (e.g., `troubleshoot-braintrust-mcp` is a skill in the `braintrust` plugin)
+
 ## Style conventions
 
 - Use sentence case for all text (capitalize first word only, except for proper nouns and code references)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..e1323c9
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,49 @@
+# Development of the plugin itself
+
+## Prerequisites
+
+- Python 3.12+
+- [uv](https://docs.astral.sh/uv/) package manager
+
+## Local testing
+
+Test a plugin without installing from marketplace:
+
+```bash
+claude --plugin-dir /path/to/thisrepo/plugins/{plugin dir here}
+# example
+claude --plugin-dir /path/to/thisrepo/plugins/braintrust
+```
+
+## Running evals
+
+The `evals/` directory contains tests that verify the plugin works correctly (e.g., Claude generates valid SQL queries, logs data properly).
+
+```bash
+cd evals
+export BRAINTRUST_API_KEY="your-key"
+
+# Run all evals
+uv run braintrust eval .
+
+# Run specific eval
+uv run braintrust eval eval_e2e_log_fetch.py
+```
+
+## Pre-commit hooks
+
+```bash
+# Install hooks
+uv run pre-commit install
+
+# Run all hooks
+uv run pre-commit run --all-files
+```
+
+# Updating the plugin
+
+After making changes:
+
+1. Bump version in `.claude-plugin/plugin.json` and `.claude-plugin/marketplace.json`
+2. Commit and push
+3. Users update with: `claude plugin marketplace update braintrust-claude-plugin`
diff --git a/README.md b/README.md
index 2a0520c..bd4611c 100644
--- a/README.md
+++ b/README.md
@@ -1,171 +1,52 @@
-# Braintrust Claude plugins
+# Braintrust Claude Code Marketplace
 
-Claude Code plugins for Braintrust - LLM evaluation, logging, observability, and tracing.
+A Claude Code plugin marketplace for [Braintrust](https://braintrust.dev) integration - LLM evaluation, logging, observability, and session tracing.
 
-## Plugins
-
-### 1. Braintrust (evaluation & logging)
-
-Enables AI agents to use Braintrust for LLM evaluation, logging, and observability.
-
-```bash
-claude plugin marketplace add braintrustdata/braintrust-claude-plugin
-claude plugin install braintrust@braintrust-claude-plugin
-```
-
-### 2. Trace Claude Code (observability)
-
-Automatically trace Claude Code conversations to Braintrust.
-
-```bash
-claude plugin install trace-claude-code@braintrust-claude-plugin
-```
+## Prerequisites
 
-See [trace-claude-code/SKILL.md](skills/trace-claude-code/SKILL.md) for setup instructions.
+- A [Braintrust account](https://braintrust.dev)
+- `BRAINTRUST_API_KEY` exported in your environment
 
-## Agent skills
+## Installation
 
-This repo includes skills built on the open [Agent Skills](https://agentskills.io/home) format, compatible with Claude Code, Cursor, Amp, and other agents.
+Add the marketplace:
 
-**Install all skills:**
 ```bash
-curl -sL https://github.com/braintrustdata/braintrust-claude-plugin/archive/main.tar.gz | tar -xz -C ~/.claude/skills --strip-components=2 braintrust-claude-plugin-main/skills
-```
-
-Available skills:
-- [using-braintrust](skills/using-braintrust/SKILL.md) - Evaluation, logging, and SQL queries
-- [trace-claude-code](skills/trace-claude-code/SKILL.md) - Automatic conversation tracing
-
-## Setup
-
-Create a `.env` file in your project directory:
-
-```
-BRAINTRUST_API_KEY=your-api-key-here
-```
-
-The plugin scripts automatically load `.env` files from the current directory or parent directories.
-
-## What the plugin provides
-
-### Scripts
-
-The plugin includes ready-to-use scripts for common operations:
-
-**Query logs with SQL:**
-```bash
-uv run query_logs.py --project "My Project" --query "SELECT count(*) as count FROM logs WHERE created > now() - interval 1 day"
-```
-
-**Log data:**
-```bash
-uv run log_data.py --project "My Project" --input "hello" --output "world"
-```
-
-**Run evaluations:**
-```bash
-uv run run_eval.py --project "My Project" --data '[{"input": "test", "expected": "test"}]'
-```
-
-### SDK patterns
-
-The skill teaches Claude how to use the Braintrust SDK correctly:
-
-```python
-# Correct Eval() usage - project name is FIRST POSITIONAL arg
-braintrust.Eval(
-    "My Project",  # NOT project_name="My Project"
-    data=lambda: [...],
-    task=lambda input: ...,
-    scores=[Factuality],
-)
-
-# Logging with flush
-logger = braintrust.init_logger(project="My Project")
-logger.log(input="hello", output="world")
-logger.flush()  # Important!
-```
-
-### SQL query syntax
-
-The skill teaches Claude to write SQL queries for Braintrust logs:
-
-```sql
-SELECT input, output, created FROM logs WHERE created > now() - interval 1 day LIMIT 10
-```
-
-**SQL quirks in Braintrust:**
-- Use `hour()`, `day()`, `month()`, `year()` instead of `date_trunc()`
-- Intervals use format `interval 1 day` (no quotes, singular unit)
-
-## Project structure
-
-```
-braintrust-claude-plugin/
-├── .claude-plugin/
-│   ├── plugin.json         # Plugin manifest
-│   └── marketplace.json    # Marketplace index
-├── skills/
-│   ├── using-braintrust/
-│   │   ├── SKILL.md        # Evaluation & logging skill
-│   │   └── scripts/        # Helper scripts
-│   │       ├── query_logs.py
-│   │       ├── log_data.py
-│   │       └── run_eval.py
-│   └── trace-claude-code/
-│       ├── SKILL.md        # Claude Code tracing skill
-│       └── hooks/
-│           └── stop_hook.sh  # Hook script
-├── evals/                  # Evaluation suite
-│   ├── eval_e2e_*.py       # End-to-end tests
-│   └── eval_*.py           # Baseline tests
-└── README.md
+claude plugin marketplace add braintrustdata/braintrust-claude-plugin
 ```
 
-## Development
+Then install the plugins you need:
 
-### Prerequisites
+## Plugins
 
-- Python 3.12+
-- [uv](https://docs.astral.sh/uv/) package manager
+### braintrust
 
-### Local testing
+Enables AI agents to use Braintrust for LLM evaluation, logging, and observability.
 
-Test the plugin without installing from marketplace:
+- Query Braintrust projects, experiments, datasets, and logs
+- Instrument your code with the Braintrust SDK and write evals
 
 ```bash
-claude --plugin-dir /path/to/braintrust-claude-plugin
+claude plugin install braintrust@braintrust-claude-plugin
 ```
 
-### Running evals
+### trace-claude-code
 
-The `evals/` directory contains tests that verify the skill works correctly (e.g., Claude generates valid SQL queries, logs data properly).
+Automatically traces Claude Code conversations to Braintrust. Captures sessions, conversation turns, and tool calls as hierarchical traces.
 
 ```bash
-cd evals
-export BRAINTRUST_API_KEY="your-key"
-
-# Run all evals
-uv run braintrust eval .
-
-# Run specific eval
-uv run braintrust eval eval_e2e_log_fetch.py
+claude plugin install trace-claude-code@braintrust-claude-plugin
 ```
 
-### Pre-commit hooks
+To enable tracing, add the following to your `~/.claude/settings.json` or your project's `.claude/settings.local.json`:
 
-```bash
-# Install hooks
-uv run pre-commit install
-
-# Run all hooks
-uv run pre-commit run --all-files
+```json
+{
+  "env": {
+    "TRACE_TO_BRAINTRUST": "true",
+    "BRAINTRUST_CC_PROJECT": "project-name-to-send-cc-traces-to"
+  }
+}
 ```
 
-## Updating the plugin
-
-After making changes:
-
-1. Bump version in `.claude-plugin/plugin.json` and `.claude-plugin/marketplace.json`
-2. Commit and push
-3. Users update with: `claude plugin marketplace update braintrust-claude-plugin`
+Traces are sent to the `claude-code` project by default.
diff --git a/evals/eval_datasets.py b/evals/eval_datasets.py
index fa4ac90..1034f36 100644
--- a/evals/eval_datasets.py
+++ b/evals/eval_datasets.py
@@ -142,7 +142,6 @@ def baseline_task(input_str):
     scores=[criteria_scorer],
     metadata={
         "description": "Tests agent's ability to create and manage Braintrust datasets",
-        "skill": "using-braintrust",
         "category": "datasets",
     },
 )
diff --git a/evals/eval_docs_search.py b/evals/eval_docs_search.py
index 9b6d96b..6856b11 100644
--- a/evals/eval_docs_search.py
+++ b/evals/eval_docs_search.py
@@ -159,7 +159,6 @@ def baseline_task(input: str) -> str:
     scores=[criteria_scorer],
     metadata={
         "description": "Tests agent's ability to answer Braintrust documentation questions",
-        "skill": "using-braintrust",
         "category": "docs_search",
     },
 )
diff --git a/evals/eval_e2e_eval_improve.py b/evals/eval_e2e_eval_improve.py
index 14890b6..8c4eaf5 100644
--- a/evals/eval_e2e_eval_improve.py
+++ b/evals/eval_e2e_eval_improve.py
@@ -14,6 +14,7 @@
 """
 
 import asyncio
+import os
 import sys
 import uuid
 from pathlib import Path
@@ -30,41 +31,43 @@
 TEST_RUN_ID = str(uuid.uuid4())[:8]
 TEST_PROJECT_NAME = f"skill-eval-experiment-{TEST_RUN_ID}"
 
-# Load skill content
-SKILL_PATH = Path(__file__).parent.parent / "skill" / "SKILL.md"
-SKILL_CONTENT = SKILL_PATH.read_text() if SKILL_PATH.exists() else ""
-
 # Setup Claude Agent SDK patching
 setup_claude_agent_sdk()
 
+# MCP server configuration for Braintrust
+MCP_SERVERS = {
+    "braintrust": {
+        "type": "http",
+        "url": "https://api.braintrust.dev/mcp",
+        "headers": {"Authorization": f"Bearer {os.environ.get('BRAINTRUST_API_KEY', '')}"},
+    }
+}
+
 
-async def run_claude_agent(prompt: str, max_turns: int = 15, use_skill: bool = True) -> dict:
+async def run_claude_agent(prompt: str, max_turns: int = 15, use_mcp: bool = True) -> dict:
     """
     Run Claude Agent with code execution enabled.
+
+    Args:
+        prompt: The prompt to send to Claude
+        max_turns: Maximum number of conversation turns
+        use_mcp: If True, connect the Braintrust MCP server for enhanced capabilities
     """
-    base_prompt = """You are an expert at Braintrust, an LLM evaluation platform.
+    system_prompt = """You are an expert at Braintrust, an LLM evaluation platform.
 You have access to code execution. Use Python to complete the tasks.
-Be concise and execute code directly - don't just explain."""
+Be concise and execute code directly - don't just explain.
 
-    if use_skill and SKILL_CONTENT:
-        system_prompt = f"""{base_prompt}
-
-Here is the reference documentation for using Braintrust:
-
-{SKILL_CONTENT}
-
-Follow the examples in the documentation exactly. Pay special attention to:
+When running evals, use braintrust.Eval() with proper task and scorer functions.
+Pay special attention to:
 - Eval() takes the project name as the FIRST POSITIONAL argument, not a keyword argument
 - Always call logger.flush() after logging"""
-    else:
-        system_prompt = f"""{base_prompt}
-When running evals, use braintrust.Eval() with proper task and scorer functions."""
 
     options = ClaudeAgentOptions(
         model="claude-sonnet-4-5-20250929",
         system_prompt=system_prompt,
         max_turns=max_turns,
         permission_mode="bypassPermissions",
+        mcp_servers=MCP_SERVERS if use_mcp else {},
     )
 
     success = False
@@ -319,7 +322,6 @@ def eval_ran_scorer(output: dict, expected: dict, **kwargs) -> Score:
     scores=[experiments_created_scorer, task_completed_scorer, eval_ran_scorer],
     metadata={
         "description": "Tests Claude's ability to create and run experiments, verified via Braintrust API",
-        "skill": "using-braintrust",
         "category": "e2e",
         "test_run_id": TEST_RUN_ID,
         "test_project": TEST_PROJECT_NAME,
diff --git a/evals/eval_e2e_log_fetch.py b/evals/eval_e2e_log_fetch.py
index 095b236..8b2d28a 100644
--- a/evals/eval_e2e_log_fetch.py
+++ b/evals/eval_e2e_log_fetch.py
@@ -12,6 +12,7 @@
 """
 
 import asyncio
+import os
 import sys
 import uuid
 from pathlib import Path
@@ -28,42 +29,44 @@
 TEST_RUN_ID = str(uuid.uuid4())[:8]
 TEST_PROJECT_NAME = f"skill-eval-e2e-{TEST_RUN_ID}"
 
-# Load skill content
-SKILL_PATH = Path(__file__).parent.parent / "skill" / "SKILL.md"
-SKILL_CONTENT = SKILL_PATH.read_text() if SKILL_PATH.exists() else ""
-
 # Setup Claude Agent SDK patching (will trace within parent span context)
 setup_claude_agent_sdk()
 
+# MCP server configuration for Braintrust
+MCP_SERVERS = {
+    "braintrust": {
+        "type": "http",
+        "url": "https://api.braintrust.dev/mcp",
+        "headers": {"Authorization": f"Bearer {os.environ.get('BRAINTRUST_API_KEY', '')}"},
+    }
+}
+
 
-async def run_claude_agent(prompt: str, max_turns: int = 10, use_skill: bool = True) -> dict:
+async def run_claude_agent(prompt: str, max_turns: int = 10, use_mcp: bool = True) -> dict:
     """
     Run Claude Agent with code execution enabled and collect results.
     Returns dict with 'success', 'output', 'error' fields.
+
+    Args:
+        prompt: The prompt to send to Claude
+        max_turns: Maximum number of conversation turns
+        use_mcp: If True, connect the Braintrust MCP server for enhanced capabilities
     """
-    base_prompt = """You are an expert at Braintrust, an LLM evaluation platform.
+    system_prompt = """You are an expert at Braintrust, an LLM evaluation platform.
 You have access to code execution. Use Python to complete the tasks.
-Be concise and execute code directly - don't just explain."""
-
-    if use_skill and SKILL_CONTENT:
-        system_prompt = f"""{base_prompt}
-
-Here is the reference documentation for using Braintrust:
-
-{SKILL_CONTENT}
+Be concise and execute code directly - don't just explain.
 
-Follow the examples in the documentation exactly. Pay special attention to:
+Always use the braintrust SDK for logging and querying.
+Pay special attention to:
 - Always call logger.flush() after logging to ensure data is sent
 - Use init_logger(project="name") to create a logger"""
-    else:
-        system_prompt = f"""{base_prompt}
-Always use the braintrust SDK for logging and querying."""
 
     options = ClaudeAgentOptions(
         model="claude-sonnet-4-5-20250929",
         system_prompt=system_prompt,
         max_turns=max_turns,
         permission_mode="bypassPermissions",
+        mcp_servers=MCP_SERVERS if use_mcp else {},
     )
 
     success = False
@@ -354,7 +357,6 @@ def sql_query_scorer(output: dict, expected: dict, **kwargs) -> Score:
     scores=[logs_created_scorer, correct_count_scorer, task_completed_scorer],
     metadata={
         "description": "Tests Claude's ability to log data, verified by querying Braintrust directly",
-        "skill": "using-braintrust",
         "category": "e2e",
         "test_run_id": TEST_RUN_ID,
         "test_project": TEST_PROJECT_NAME,
@@ -369,7 +371,6 @@ def sql_query_scorer(output: dict, expected: dict, **kwargs) -> Score:
     scores=[sql_query_scorer, task_completed_scorer],
     metadata={
         "description": "Tests Claude's ability to query logs using SQL syntax",
-        "skill": "using-braintrust",
         "category": "e2e",
         "test_run_id": TEST_RUN_ID,
     },
diff --git a/evals/eval_experiments.py b/evals/eval_experiments.py
index 4e7993a..e77d49b 100644
--- a/evals/eval_experiments.py
+++ b/evals/eval_experiments.py
@@ -142,7 +142,6 @@ def baseline_task(input_str):
     scores=[criteria_scorer],
     metadata={
         "description": "Tests agent's ability to create, run, and analyze Braintrust experiments",
-        "skill": "using-braintrust",
         "category": "experiments",
     },
 )
diff --git a/evals/eval_log_querying.py b/evals/eval_log_querying.py
index cd2dea2..0ed0a3c 100644
--- a/evals/eval_log_querying.py
+++ b/evals/eval_log_querying.py
@@ -126,7 +126,6 @@ def baseline_task(input_data):
     scores=[criteria_scorer],
     metadata={
         "description": "Tests agent's ability to write correct SQL queries and log operations",
-        "skill": "using-braintrust",
         "category": "log_querying",
     },
 )
diff --git a/.claude-plugin/plugin.json b/plugins/braintrust/.claude-plugin/plugin.json
similarity index 92%
rename from .claude-plugin/plugin.json
rename to plugins/braintrust/.claude-plugin/plugin.json
index 5d859ea..a14383a 100644
--- a/.claude-plugin/plugin.json
+++ b/plugins/braintrust/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "braintrust",
   "description": "Enables AI agents to use Braintrust for LLM evaluation, logging, and observability. Provides correct API usage, working examples, and helper scripts for common operations.",
-  "version": "1.1.0",
+  "version": "1.3.0",
   "author": {
     "name": "Braintrust"
   }
diff --git a/plugins/braintrust/.mcp.json b/plugins/braintrust/.mcp.json
new file mode 100644
index 0000000..16048c4
--- /dev/null
+++ b/plugins/braintrust/.mcp.json
@@ -0,0 +1,11 @@
+{
+  "mcpServers": {
+    "braintrust": {
+      "type": "http",
+      "url": "https://api.braintrust.dev/mcp",
+      "headers": {
+        "Authorization": "Bearer ${BRAINTRUST_API_KEY}"
+      }
+    }
+  }
+}
diff --git a/plugins/braintrust/skills/troubleshoot-braintrust-mcp/SKILL.md b/plugins/braintrust/skills/troubleshoot-braintrust-mcp/SKILL.md
new file mode 100644
index 0000000..24f566e
--- /dev/null
+++ b/plugins/braintrust/skills/troubleshoot-braintrust-mcp/SKILL.md
@@ -0,0 +1,46 @@
+---
+name: troubleshoot-braintrust-mcp
+description: |
+  This plugin auto-configures a "braintrust" MCP server. If you can't see it or reach it, activate this skill
+version: 1.0.0
+---
+
+This Claude plugin automatically sets up a Braintrust MCP connection. The connection reads the `BRAINTRUST_API_KEY` environment variable to establish the MCP connection.
+
+## Troubleshooting steps
+
+### 1. Verify the environment variable is set
+
+Run `echo $BRAINTRUST_API_KEY` to check if the variable is exported
+
+API keys can be created at https://www.braintrust.dev/app/settings?subroute=api-keys
+
+### 2. Verify the API key is valid
+
+Test the key by calling the Braintrust API:
+
+```bash
+curl -s https://api.braintrust.dev/api/self/me -H "Authorization: Bearer $BRAINTRUST_API_KEY"
+```
+
+- If valid: returns JSON with user info (id, email, organizations, etc.)
+- If invalid: returns an authentication error
+
+NOTE: Even if you can curl the api via http, continue to attempt MCP setup. Http is just a troubleshooting tool, not a replacement for MCP
+
+### 3. Check if the MCP server is reachable
+
+If the key is valid but connection still fails, check if the MCP server is up:
+
+```bash
+curl -s -o /dev/null -w "%{http_code}" https://api.braintrust.dev/mcp
+```
+
+- Any HTTP response (even 401 or 405) means the server is reachable
+- Connection timeout or "connection refused" means the server may be down
+
+### 4. Contact support
+
+If nothing else works, encourage the user to reach out:
+- Discord: https://discord.com/invite/6G8s47F44X
+- Email: support@braintrust.dev
diff --git a/plugins/trace-claude-code/.claude-plugin/plugin.json b/plugins/trace-claude-code/.claude-plugin/plugin.json
new file mode 100644
index 0000000..ccf7d47
--- /dev/null
+++ b/plugins/trace-claude-code/.claude-plugin/plugin.json
@@ -0,0 +1,8 @@
+{
+  "name": "trace-claude-code",
+  "description": "Automatically trace Claude Code conversations to Braintrust for observability. Captures sessions, conversation turns, and tool calls as hierarchical traces.",
+  "version": "1.1.0",
+  "author": {
+    "name": "Braintrust"
+  }
+}
diff --git a/skills/trace-claude-code/hooks/common.sh b/plugins/trace-claude-code/hooks/common.sh
similarity index 96%
rename from skills/trace-claude-code/hooks/common.sh
rename to plugins/trace-claude-code/hooks/common.sh
index 237b6e7..452d613 100755
--- a/skills/trace-claude-code/hooks/common.sh
+++ b/plugins/trace-claude-code/hooks/common.sh
@@ -88,13 +88,14 @@ check_requirements() {
     return 0
 }
 
-# Get or create project ID (cached)
+# Get or create project ID (cached per project name)
 get_project_id() {
     local name="$1"
+    local cache_key="project_id_$name"
 
     # Check cache first
     local cached_id
-    cached_id=$(get_state_value "project_id")
+    cached_id=$(get_state_value "$cache_key")
     if [ -n "$cached_id" ]; then
         echo "$cached_id"
         return 0
@@ -112,7 +113,7 @@ get_project_id() {
     pid=$(echo "$resp" | jq -r '.id // empty' 2>/dev/null)
 
     if [ -n "$pid" ]; then
-        set_state_value "project_id" "$pid"
+        set_state_value "$cache_key" "$pid"
         echo "$pid"
         return 0
     fi
@@ -124,7 +125,7 @@ get_project_id() {
     pid=$(echo "$resp" | jq -r '.id // empty' 2>/dev/null)
 
     if [ -n "$pid" ]; then
-        set_state_value "project_id" "$pid"
+        set_state_value "$cache_key" "$pid"
         echo "$pid"
         return 0
     fi
diff --git a/plugins/trace-claude-code/hooks/hooks.json b/plugins/trace-claude-code/hooks/hooks.json
new file mode 100644
index 0000000..0b5db46
--- /dev/null
+++ b/plugins/trace-claude-code/hooks/hooks.json
@@ -0,0 +1,55 @@
+{
+  "hooks": {
+    "SessionStart": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/session_start.sh"
+          }
+        ]
+      }
+    ],
+    "UserPromptSubmit": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/user_prompt_submit.sh"
+          }
+        ]
+      }
+    ],
+    "PostToolUse": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/post_tool_use.sh"
+          }
+        ]
+      }
+    ],
+    "Stop": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/stop_hook.sh"
+          }
+        ]
+      }
+    ],
+    "SessionEnd": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/session_end.sh"
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/skills/trace-claude-code/hooks/post_tool_use.sh b/plugins/trace-claude-code/hooks/post_tool_use.sh
similarity index 100%
rename from skills/trace-claude-code/hooks/post_tool_use.sh
rename to plugins/trace-claude-code/hooks/post_tool_use.sh
diff --git a/skills/trace-claude-code/hooks/session_end.sh b/plugins/trace-claude-code/hooks/session_end.sh
similarity index 100%
rename from skills/trace-claude-code/hooks/session_end.sh
rename to plugins/trace-claude-code/hooks/session_end.sh
diff --git a/skills/trace-claude-code/hooks/session_start.sh b/plugins/trace-claude-code/hooks/session_start.sh
similarity index 98%
rename from skills/trace-claude-code/hooks/session_start.sh
rename to plugins/trace-claude-code/hooks/session_start.sh
index 7a584bf..620110a 100755
--- a/skills/trace-claude-code/hooks/session_start.sh
+++ b/plugins/trace-claude-code/hooks/session_start.sh
@@ -9,6 +9,7 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 source "$SCRIPT_DIR/common.sh"
 
 debug "SessionStart hook triggered"
+debug "TRACE_TO_BRAINTRUST=$TRACE_TO_BRAINTRUST"
 
 tracing_enabled || { debug "Tracing disabled"; exit 0; }
 check_requirements || exit 0
diff --git a/skills/trace-claude-code/hooks/stop_hook.sh b/plugins/trace-claude-code/hooks/stop_hook.sh
similarity index 100%
rename from skills/trace-claude-code/hooks/stop_hook.sh
rename to plugins/trace-claude-code/hooks/stop_hook.sh
diff --git a/skills/trace-claude-code/hooks/user_prompt_submit.sh b/plugins/trace-claude-code/hooks/user_prompt_submit.sh
similarity index 100%
rename from skills/trace-claude-code/hooks/user_prompt_submit.sh
rename to plugins/trace-claude-code/hooks/user_prompt_submit.sh
diff --git a/skills/trace-claude-code/setup.sh b/plugins/trace-claude-code/setup.sh
similarity index 100%
rename from skills/trace-claude-code/setup.sh
rename to plugins/trace-claude-code/setup.sh
diff --git a/skills/trace-claude-code/SKILL.md b/skills/trace-claude-code/SKILL.md
deleted file mode 100644
index 0c1bda3..0000000
--- a/skills/trace-claude-code/SKILL.md
+++ /dev/null
@@ -1,247 +0,0 @@
----
-name: trace-claude-code
-description: |
-  Automatically trace Claude Code conversations to Braintrust for observability.
-  Captures sessions, conversation turns, and tool calls as hierarchical traces.
-version: 1.1.0
----
-
-# Trace Claude Code to Braintrust
-
-Automatically send Claude Code conversations to Braintrust for tracing and observability. Get full visibility into your AI coding sessions with hierarchical traces showing sessions, turns, and every tool call.
-
-## What you get
-
-```
-Claude Code Session (root trace)
-├── Turn 1: "Add error handling"
-│   ├── Read: src/app.ts
-│   ├── Edit: src/app.ts
-│   └── Response: "I've added try-catch..."
-├── Turn 2: "Now run the tests"
-│   ├── Terminal: npm test
-│   └── Response: "All tests pass..."
-└── Turn 3: "Great, commit this"
-    ├── Terminal: git add .
-    ├── Terminal: git commit -m "..."
-    └── Response: "Changes committed..."
-```
-
-## How it works
-
-Four hooks capture the complete workflow:
-
-| Hook | What it captures |
-|------|------------------|
-| **SessionStart** | Creates root trace when you start Claude Code |
-| **PostToolUse** | Captures every tool call (file reads, edits, terminal commands) |
-| **Stop** | Captures conversation turns (your message + Claude's response) |
-| **SessionEnd** | Logs session summary when you exit |
-
-## Quick setup
-
-Run the setup script in any project directory where you want tracing:
-
-```bash
-bash /path/to/skills/trace-claude-code/setup.sh
-```
-
-The script prompts for your API key and project name, then configures all hooks automatically.
-
-## Manual setup
-
-### Prerequisites
-
-- [Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code) installed
-- [Braintrust API key](https://www.braintrust.dev/app/settings/api-keys)
-- `jq` command-line tool (`brew install jq` on macOS)
-
-### Configuration
-
-Create `.claude/settings.local.json` in your project directory:
-
-```json
-{
-  "hooks": {
-    "SessionStart": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "bash /path/to/hooks/session_start.sh"
-          }
-        ]
-      }
-    ],
-    "PostToolUse": [
-      {
-        "matcher": "*",
-        "hooks": [
-          {
-            "type": "command",
-            "command": "bash /path/to/hooks/post_tool_use.sh"
-          }
-        ]
-      }
-    ],
-    "Stop": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "bash /path/to/hooks/stop_hook.sh"
-          }
-        ]
-      }
-    ],
-    "SessionEnd": [
-      {
-        "hooks": [
-          {
-            "type": "command",
-            "command": "bash /path/to/hooks/session_end.sh"
-          }
-        ]
-      }
-    ]
-  },
-  "env": {
-    "TRACE_TO_BRAINTRUST": "true",
-    "BRAINTRUST_API_KEY": "sk-...",
-    "BRAINTRUST_CC_PROJECT": "my-project"
-  }
-}
-```
-
-Replace `/path/to/hooks/` with the actual path to this skill's hooks directory.
-
-### Environment variables
-
-| Variable | Required | Description |
-|----------|----------|-------------|
-| `TRACE_TO_BRAINTRUST` | Yes | Set to `"true"` to enable tracing |
-| `BRAINTRUST_API_KEY` | Yes | Your Braintrust API key |
-| `BRAINTRUST_CC_PROJECT` | No | Project name (default: `claude-code`) |
-| `BRAINTRUST_CC_DEBUG` | No | Set to `"true"` for verbose logging |
-
-## Viewing traces
-
-After running Claude Code with tracing enabled:
-
-1. Go to [braintrust.dev](https://www.braintrust.dev)
-2. Navigate to your project (e.g., `claude-code`)
-3. Click **Logs** to see all traced sessions
-
-Each trace shows:
-- **Session root**: The overall Claude Code session
-- **Turns**: Each conversation exchange (user input → assistant response)
-- **Tool calls**: Individual operations (file reads, edits, terminal commands)
-
-## Trace structure
-
-Traces are hierarchical:
-
-- **Session** (root span)
-  - `span_attributes.type`: `"task"`
-  - `metadata.session_id`: Unique session identifier
-  - `metadata.workspace`: Project directory
-
-- **Turn** (child of session)
-  - `span_attributes.type`: `"llm"`
-  - `input`: User message
-  - `output`: Assistant response
-  - `metadata.turn_number`: Sequential turn number
-
-- **Tool call** (child of turn or session)
-  - `span_attributes.type`: `"tool"`
-  - `input`: Tool input (file path, command, etc.)
-  - `output`: Tool result
-  - `metadata.tool_name`: Name of the tool used
-
-## Troubleshooting
-
-### No traces appearing
-
-1. **Check hooks are running:**
-   ```bash
-   tail -f ~/.claude/state/braintrust_hook.log
-   ```
-
-2. **Verify environment variables** in `.claude/settings.local.json`:
-   - `TRACE_TO_BRAINTRUST` must be `"true"`
-   - `BRAINTRUST_API_KEY` must be valid
-
-3. **Enable debug mode:**
-   ```json
-   {
-     "env": {
-       "BRAINTRUST_CC_DEBUG": "true"
-     }
-   }
-   ```
-
-### Permission errors
-
-Make hook scripts executable:
-
-```bash
-chmod +x /path/to/hooks/*.sh
-```
-
-### Missing jq command
-
-Install jq:
-- **macOS**: `brew install jq`
-- **Ubuntu/Debian**: `sudo apt-get install jq`
-
-### State issues
-
-Reset the tracing state:
-
-```bash
-rm ~/.claude/state/braintrust_state.json
-```
-
-### Hook logs
-
-View detailed hook execution logs:
-
-```bash
-# Follow logs in real-time
-tail -f ~/.claude/state/braintrust_hook.log
-
-# View last 50 lines
-tail -50 ~/.claude/state/braintrust_hook.log
-
-# Clear logs
-> ~/.claude/state/braintrust_hook.log
-```
-
-## File structure
-
-```
-hooks/
-├── common.sh          # Shared utilities (logging, API, state)
-├── session_start.sh   # Creates root trace span
-├── post_tool_use.sh   # Captures tool calls
-├── stop_hook.sh       # Captures conversation turns
-└── session_end.sh     # Finalizes trace
-```
-
-## Alternative: SDK integration
-
-For programmatic use with the Claude Agent SDK, use the native Braintrust integration:
-
-```typescript
-import { initLogger, wrapClaudeAgentSDK } from "braintrust";
-import * as claudeSDK from "@anthropic-ai/claude-agent-sdk";
-
-initLogger({
-  projectName: "my-project",
-  apiKey: process.env.BRAINTRUST_API_KEY,
-});
-
-const { query, tool } = wrapClaudeAgentSDK(claudeSDK);
-```
-
-See [Braintrust Claude Agent SDK docs](https://www.braintrust.dev/docs/integrations/sdk-integrations/claude-agent-sdk) for details.
diff --git a/skills/using-braintrust/SKILL.md b/skills/using-braintrust/SKILL.md
deleted file mode 100644
index 0cc455b..0000000
--- a/skills/using-braintrust/SKILL.md
+++ /dev/null
@@ -1,170 +0,0 @@
----
-name: using-braintrust
-description: |
-  Enables AI agents to use Braintrust for LLM evaluation, logging, and observability.
-  Includes scripts for querying logs with SQL, running evals, and logging data.
-version: 1.0.0
----
-
-# Using Braintrust
-
-Braintrust is a platform for evaluating, logging, and monitoring LLM applications.
-
-## Listing projects
-
-Use `scripts/list_projects.py` to see all available projects:
-
-```bash
-uv run /path/to/scripts/list_projects.py
-```
-
-## Querying logs with SQL
-
-Use the `query_logs.py` script to run SQL queries against Braintrust logs.
-
-**Always share the SQL query you used** when reporting results, so the user understands what was executed.
-
-**Script location:** `scripts/query_logs.py` (relative to this file)
-
-**Run from the user's project directory** (where `.env` with `BRAINTRUST_API_KEY` exists):
-
-```bash
-uv run /path/to/scripts/query_logs.py --project "Project Name" --query "SQL_QUERY"
-```
-
-### Common queries
-
-**Count logs from last 24 hours:**
-```sql
-SELECT count(*) as count FROM logs WHERE created > now() - interval 1 day
-```
-
-**Get recent logs:**
-```sql
-SELECT input, output, created FROM logs ORDER BY created DESC LIMIT 10
-```
-
-**Filter by metadata:**
-```sql
-SELECT input, output FROM logs WHERE metadata.user_id = 'user123' LIMIT 20
-```
-
-**Filter by time range:**
-```sql
-SELECT * FROM logs WHERE created > now() - interval 7 day LIMIT 50
-```
-
-**Aggregate by field:**
-```sql
-SELECT metadata.model, count(*) as count FROM logs GROUP BY metadata.model
-```
-
-**Group by hour:**
-```sql
-SELECT hour(created) as hr, count(*) as count FROM logs GROUP BY hour(created)
-```
-
-### SQL quirks in Braintrust
-
-- **Time functions**: Use `hour()`, `day()`, `month()`, `year()` instead of `date_trunc()`
-  - ✅ `hour(created)`
-  - ❌ `date_trunc('hour', created)`
-- **Intervals**: Use `interval 1 day`, `interval 7 day`, `interval 1 hour` (no quotes, singular unit)
-- **Nested fields**: Use dot notation: `metadata.user_id`, `scores.Factuality`, `metrics.duration`
-- **Table name**: Always use `FROM logs` (the script handles project scoping)
-
-### SQL reference
-
-**Operators:**
-- `=`, `!=`, `>`, `<`, `>=`, `<=`
-- `IS NULL`, `IS NOT NULL`
-- `LIKE 'pattern%'`
-- `AND`, `OR`, `NOT`
-
-**Aggregations:**
-- `count(*)`, `count(field)`
-- `avg(field)`, `sum(field)`
-- `min(field)`, `max(field)`
-
-**Time filters:**
-- `created > now() - interval 1 day`
-- `created > now() - interval 7 day`
-- `created > now() - interval 1 hour`
-
-## Logging data
-
-Use `scripts/log_data.py` to log data to a project:
-
-```bash
-uv run /path/to/scripts/log_data.py --project "Project Name" --input "query" --output "response"
-```
-
-With metadata:
-```bash
---input "query" --output "response" --metadata '{"user_id": "123"}'
-```
-
-Batch from JSON:
-```bash
---data '[{"input": "a", "output": "b"}, {"input": "c", "output": "d"}]'
-```
-
-## Running evaluations
-
-Use `scripts/run_eval.py` to run evaluations:
-
-```bash
-uv run /path/to/scripts/run_eval.py --project "Project Name" --data '[{"input": "test", "expected": "test"}]'
-```
-
-From file:
-```bash
---data-file test_cases.json --scorer factuality
-```
-
-## Setup
-
-Create a `.env` file in your project directory:
-
-```
-BRAINTRUST_API_KEY=your-api-key-here
-```
-
-## Writing evaluation code (SDK)
-
-For custom evaluation logic, use the SDK directly.
-
-**IMPORTANT**: First argument to `Eval()` is the project name (positional).
-
-```python
-import braintrust
-from autoevals import Factuality
-
-braintrust.Eval(
-    "My Project",  # Project name (required, positional)
-    data=lambda: [{"input": "What is 2+2?", "expected": "4"}],
-    task=lambda input: my_llm_call(input),
-    scores=[Factuality],
-)
-```
-
-**Common mistakes:**
-- ❌ `Eval(project_name="My Project", ...)` - Wrong!
-- ❌ `Eval(name="My Project", ...)` - Wrong!
-- ✅ `Eval("My Project", data=..., task=..., scores=...)` - Correct!
-
-## Writing logging code (SDK)
-
-```python
-import braintrust
-
-logger = braintrust.init_logger(project="My Project")
-logger.log(input="query", output="response", metadata={"user_id": "123"})
-logger.flush()  # Always flush!
-```
-
-## Common issues
-
-- **"Eval() got an unexpected keyword argument 'project_name'"**: Use positional argument
-- **Logs not appearing**: Call `logger.flush()` after logging
-- **Authentication errors**: Create `.env` file with `BRAINTRUST_API_KEY=your-key`
diff --git a/skills/using-braintrust/scripts/_common.py b/skills/using-braintrust/scripts/_common.py
deleted file mode 100644
index e1b4226..0000000
--- a/skills/using-braintrust/scripts/_common.py
+++ /dev/null
@@ -1,53 +0,0 @@
-"""
-Common utilities for Braintrust scripts.
-
-This module provides shared functionality for loading environment variables,
-checking API keys, and initializing the Braintrust SDK.
-"""
-
-import os
-import sys
-from pathlib import Path
-
-from dotenv import load_dotenv
-
-
-def load_env():
-    """Load environment from .env file in current directory or parents."""
-    for path in [Path.cwd(), *Path.cwd().parents]:
-        env_file = path / ".env"
-        if env_file.exists():
-            load_dotenv(env_file)
-            return True
-    return False
-
-
-def require_api_key():
-    """Ensure BRAINTRUST_API_KEY is set, exit with error if not."""
-    if not os.environ.get("BRAINTRUST_API_KEY"):
-        print("Error: BRAINTRUST_API_KEY not found.", file=sys.stderr)
-        print("Set it via environment variable or create a .env file with:", file=sys.stderr)
-        print('  BRAINTRUST_API_KEY="your-api-key"', file=sys.stderr)
-        sys.exit(1)
-
-
-def init_braintrust():
-    """
-    Initialize Braintrust SDK: load env, check API key, and login.
-
-    This handles API URL discovery automatically via the login endpoint.
-    Supports BRAINTRUST_APP_URL env var for alternate deployments.
-    """
-    import braintrust
-
-    load_env()
-    require_api_key()
-    braintrust.login()
-
-
-def get_api_conn():
-    """Get the Braintrust API connection."""
-    import braintrust
-
-    braintrust.login()  # No-op if already logged in
-    return braintrust.api_conn()
diff --git a/skills/using-braintrust/scripts/list_projects.py b/skills/using-braintrust/scripts/list_projects.py
deleted file mode 100644
index f53fff0..0000000
--- a/skills/using-braintrust/scripts/list_projects.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python3
-# /// script
-# requires-python = ">=3.9"
-# dependencies = ["braintrust", "python-dotenv"]
-# ///
-"""
-List Braintrust projects.
-
-Usage:
-    uv run list_projects.py
-    uv run list_projects.py --limit 20
-
-Environment variables:
-    BRAINTRUST_API_KEY: Your Braintrust API key (required)
-    BRAINTRUST_APP_URL: Braintrust app URL (default: https://www.braintrust.dev)
-"""
-
-import argparse
-
-from _common import get_api_conn, init_braintrust
-
-
-def main():
-    parser = argparse.ArgumentParser(description="List Braintrust projects")
-    parser.add_argument("--limit", type=int, default=50, help="Maximum number of projects to list")
-    args = parser.parse_args()
-
-    init_braintrust()
-    conn = get_api_conn()
-
-    resp = conn.get("v1/project", params={"limit": args.limit})
-
-    if resp.status_code != 200:
-        print(f"Error: {resp.status_code} - {resp.text}")
-        return
-
-    projects = resp.json().get("objects", [])
-
-    if not projects:
-        print("No projects found.")
-        return
-
-    print(f"Found {len(projects)} projects:\n")
-    for p in projects:
-        name = p.get("name", "unnamed")
-        project_id = p.get("id", "")
-        created = p.get("created", "")[:10] if p.get("created") else ""
-        print(f"  - {name}")
-        print(f"    ID: {project_id}")
-        if created:
-            print(f"    Created: {created}")
-        print()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/skills/using-braintrust/scripts/log_data.py b/skills/using-braintrust/scripts/log_data.py
deleted file mode 100644
index ff40d1d..0000000
--- a/skills/using-braintrust/scripts/log_data.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-# /// script
-# requires-python = ">=3.9"
-# dependencies = ["braintrust", "python-dotenv"]
-# ///
-"""
-Log data to a Braintrust project.
-
-Usage:
-    uv run log_data.py --project "My Project" --input "hello" --output "world"
-    uv run log_data.py --project "My Project" --data '[{"input": "a", "output": "b"}]'
-
-Environment variables:
-    BRAINTRUST_API_KEY: Your Braintrust API key (required)
-    BRAINTRUST_APP_URL: Braintrust app URL (default: https://www.braintrust.dev)
-"""
-
-import argparse
-import json
-import sys
-
-import braintrust
-from _common import load_env, require_api_key
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Log data to Braintrust")
-    parser.add_argument("--project", required=True, help="Project name")
-    parser.add_argument("--input", help="Input value")
-    parser.add_argument("--output", help="Output value")
-    parser.add_argument("--expected", help="Expected value (optional)")
-    parser.add_argument("--metadata", help="JSON metadata (optional)")
-    parser.add_argument("--scores", help="JSON scores (optional)")
-    parser.add_argument("--data", help="JSON array of log entries")
-    parser.add_argument("--data-file", help="Path to JSON file with log entries")
-    args = parser.parse_args()
-
-    load_env()
-    require_api_key()
-
-    logger = braintrust.init_logger(project=args.project)
-
-    # Batch logging
-    if args.data or args.data_file:
-        if args.data:
-            entries = json.loads(args.data)
-        else:
-            with open(args.data_file) as f:
-                entries = json.load(f)
-
-        if not isinstance(entries, list):
-            entries = [entries]
-
-        for entry in entries:
-            logger.log(**entry)
-
-        logger.flush()
-        print(f"Logged {len(entries)} entries to project: {args.project}")
-        return
-
-    # Single entry logging
-    if not args.input:
-        print("Error: Provide --input or --data/--data-file", file=sys.stderr)
-        sys.exit(1)
-
-    log_kwargs = {"input": args.input}
-
-    if args.output:
-        log_kwargs["output"] = args.output
-    if args.expected:
-        log_kwargs["expected"] = args.expected
-    if args.metadata:
-        log_kwargs["metadata"] = json.loads(args.metadata)
-    if args.scores:
-        log_kwargs["scores"] = json.loads(args.scores)
-
-    logger.log(**log_kwargs)
-    logger.flush()
-
-    print(f"Logged entry to project: {args.project}")
-    print(f"  Input: {args.input}")
-    if args.output:
-        print(f"  Output: {args.output}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/skills/using-braintrust/scripts/query_logs.py b/skills/using-braintrust/scripts/query_logs.py
deleted file mode 100644
index 86ad46c..0000000
--- a/skills/using-braintrust/scripts/query_logs.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-# /// script
-# requires-python = ">=3.9"
-# dependencies = ["braintrust", "python-dotenv"]
-# ///
-"""
-Execute a SQL query against Braintrust project logs.
-
-Usage:
-    uv run query_logs.py --project "My Project" --query "SELECT input, output FROM logs LIMIT 10"
-    uv run query_logs.py --project "My Project" --query "SELECT count(*) as count FROM logs WHERE created > now() - interval '1 day'"
-
-Environment variables:
-    BRAINTRUST_API_KEY: Your Braintrust API key (required)
-    BRAINTRUST_APP_URL: Braintrust app URL (default: https://www.braintrust.dev)
-"""
-
-import argparse
-import json
-import re
-import sys
-
-from _common import get_api_conn, init_braintrust
-
-
-def get_project_id(project_name: str) -> str:
-    """Get project ID from name using the SDK's API connection."""
-    conn = get_api_conn()
-
-    # Try to get by name
-    resp = conn.get("v1/project", params={"project_name": project_name})
-    if resp.status_code == 200:
-        projects = resp.json().get("objects", [])
-        if projects:
-            return projects[0]["id"]
-
-    # Try listing all projects and matching by name
-    resp = conn.get("v1/project")
-    if resp.status_code == 200:
-        projects = resp.json().get("objects", [])
-        for p in projects:
-            if p.get("name", "").lower() == project_name.lower():
-                return p["id"]
-
-    print(f"Error: Project '{project_name}' not found", file=sys.stderr)
-    print("Available projects:", file=sys.stderr)
-    if resp.status_code == 200:
-        for p in resp.json().get("objects", [])[:10]:
-            print(f"  - {p.get('name')}", file=sys.stderr)
-    sys.exit(1)
-
-
-def run_sql(project_id: str, query: str) -> list[dict]:
-    """Execute SQL query against Braintrust logs using the SDK's API connection."""
-    conn = get_api_conn()
-
-    # Replace "FROM logs" with the project-scoped source
-    full_query = re.sub(
-        r"\bFROM\s+logs\b", f"FROM project_logs('{project_id}')", query, flags=re.IGNORECASE
-    )
-
-    resp = conn.post("btql", json={"query": full_query, "fmt": "json"})
-
-    if resp.status_code == 200:
-        return resp.json().get("data", [])
-    else:
-        print(f"Error: {resp.status_code} - {resp.text}", file=sys.stderr)
-        sys.exit(1)
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Execute SQL query against Braintrust logs")
-    parser.add_argument("--project", required=True, help="Project name")
-    parser.add_argument(
-        "--query", required=True, help="SQL query (use 'FROM logs' for the project)"
-    )
-    parser.add_argument(
-        "--format", choices=["json", "table"], default="table", help="Output format"
-    )
-    args = parser.parse_args()
-
-    init_braintrust()
-
-    project_id = get_project_id(args.project)
-
-    # Show the SQL query being executed
-    executed_query = re.sub(
-        r"\bFROM\s+logs\b", f"FROM project_logs('{project_id}')", args.query, flags=re.IGNORECASE
-    )
-    print(f"Executing SQL: {executed_query}\n", file=sys.stderr)
-
-    results = run_sql(project_id, args.query)
-
-    if args.format == "json":
-        print(json.dumps(results, indent=2, default=str))
-    else:
-        if not results:
-            print("No results")
-        elif len(results) == 1 and len(results[0]) == 1:
-            # Single value result (like count)
-            key, value = list(results[0].items())[0]
-            print(f"{key}: {value}")
-        else:
-            print(f"Found {len(results)} results:\n")
-            for i, row in enumerate(results):
-                print(f"--- Result {i+1} ---")
-                for key, value in row.items():
-                    val_str = str(value)[:200] if value else "null"
-                    print(f"  {key}: {val_str}")
-                print()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/skills/using-braintrust/scripts/run_eval.py b/skills/using-braintrust/scripts/run_eval.py
deleted file mode 100644
index 6327f44..0000000
--- a/skills/using-braintrust/scripts/run_eval.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python3
-# /// script
-# requires-python = ">=3.9"
-# dependencies = ["braintrust", "autoevals", "python-dotenv"]
-# ///
-"""
-Run a Braintrust evaluation with custom data.
-
-Usage:
-    uv run run_eval.py --project "My Project" --data '[{"input": "test", "expected": "test"}]'
-    uv run run_eval.py --project "My Project" --data-file data.json
-
-Environment variables:
-    BRAINTRUST_API_KEY: Your Braintrust API key (required)
-    BRAINTRUST_APP_URL: Braintrust app URL (default: https://www.braintrust.dev)
-"""
-
-import argparse
-import json
-import sys
-
-import braintrust
-from _common import load_env, require_api_key
-from autoevals import Factuality, Score
-
-
-def simple_task(input_data):
-    """Default task that just echoes input. Replace with your LLM call."""
-    if isinstance(input_data, dict):
-        return str(input_data.get("input", input_data))
-    return str(input_data)
-
-
-def exact_match_scorer(input, output, expected=None, **kwargs):
-    """Scorer that checks for exact match with expected."""
-    if expected is None:
-        return Score(name="Exact Match", score=1.0, metadata={"reason": "no expected"})
-
-    match = str(output).strip().lower() == str(expected).strip().lower()
-    return Score(
-        name="Exact Match",
-        score=1.0 if match else 0.0,
-        metadata={"output": str(output)[:100], "expected": str(expected)[:100]},
-    )
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Run a Braintrust evaluation")
-    parser.add_argument("--project", required=True, help="Project name")
-    parser.add_argument("--data", help="JSON string of data")
-    parser.add_argument("--data-file", help="Path to JSON file with data")
-    parser.add_argument("--experiment", help="Experiment name (optional)")
-    parser.add_argument(
-        "--scorer", default="exact", choices=["exact", "factuality"], help="Scorer to use"
-    )
-    args = parser.parse_args()
-
-    load_env()
-    require_api_key()
-
-    # Load data
-    if args.data:
-        data = json.loads(args.data)
-    elif args.data_file:
-        with open(args.data_file) as f:
-            data = json.load(f)
-    else:
-        print("Error: Provide --data or --data-file", file=sys.stderr)
-        sys.exit(1)
-
-    # Ensure data is a list
-    if not isinstance(data, list):
-        data = [data]
-
-    # Select scorer
-    scorers = [Factuality] if args.scorer == "factuality" else [exact_match_scorer]
-
-    # Run eval
-    print(f"Running evaluation on project: {args.project}")
-    print(f"Data: {len(data)} items")
-    print(f"Scorer: {args.scorer}")
-
-    braintrust.Eval(
-        args.project,
-        data=lambda: data,
-        task=simple_task,
-        scores=scorers,
-        experiment_name=args.experiment,
-    )
-
-
-if __name__ == "__main__":
-    main()