From a7e1beb2e8486a0c46d145a8a41a0d76214570a9 Mon Sep 17 00:00:00 2001
From: sheeki003 <36009418+sheeki03@users.noreply.github.com>
Date: Fri, 20 Mar 2026 18:15:55 +0530
Subject: [PATCH 1/7] fix: address 22 user-facing frictions across CLI, REST,
MCP, and browser tools
Node version enforcement (bin/schrute.cjs), test DB isolation via
SCHRUTE_DATA_DIR, serve auth token hint, skills delete --yes, empty
body parser, CLI rate-limit auto-retry, MCP HTTP admin opt-in, import
preview with transactional writes + policy persistence, OpenAPI probe
paths, progress indicators, doctor audit chain downgraded to warning,
browser_fill_form schema, v0 deprecation header, --url/--token on
subcommands, search ranking lexical filter, config get --reveal,
skills list --status, batch execute rate-limit retry, README MCP note.
---
README.md | 77 ++++--
bin/schrute.cjs | 8 +
package.json | 13 +-
src/app/import-service.ts | 257 ++++++++++++++++++
src/browser/base-browser-adapter.ts | 6 +-
src/core/config.ts | 8 +
src/discovery/openapi-scanner.ts | 5 +
src/doctor.ts | 2 +-
src/index.ts | 389 +++++++++++++++-------------
src/server/mcp-http.ts | 4 +-
src/server/rest-server.ts | 22 ++
src/server/skill-helpers.ts | 2 +-
src/server/tool-dispatch.ts | 8 +
src/server/tool-registry.ts | 61 +++--
src/shared/admin-auth.ts | 4 +-
src/skill/types.ts | 4 +
tests/e2e/v02-mcp-http.test.ts | 3 +-
tests/global-setup.ts | 18 ++
tests/unit/admin-auth.test.ts | 41 ++-
tests/unit/config-env.test.ts | 49 ++--
tests/unit/config.test.ts | 17 ++
tests/unit/doctor.test.ts | 23 ++
tests/unit/import-service.test.ts | 366 ++++++++++++++++++++++++++
tests/unit/rest-server.test.ts | 57 ++++
tests/unit/tool-dispatch.test.ts | 44 ++++
tests/unit/tool-registry.test.ts | 19 +-
vitest.config.ts | 1 +
27 files changed, 1241 insertions(+), 267 deletions(-)
create mode 100755 bin/schrute.cjs
create mode 100644 src/app/import-service.ts
create mode 100644 tests/global-setup.ts
create mode 100644 tests/unit/import-service.test.ts
diff --git a/README.md b/README.md
index 572819c..04ee03d 100644
--- a/README.md
+++ b/README.md
@@ -8,11 +8,12 @@
Teach your AI a website once. After that, it replays the same backend requests directly — no browser needed.
-Schrute watches real browser traffic, turns repeatable actions into MCP tools, and reuses browser auth when needed. No hand-written API integration, and often no API keys, because Schrute learns from the requests your browser already knows how to make.
+Schrute watches real browser traffic, learns the underlying API patterns, and turns them into replayable skills you can call via **MCP**, **REST API**, or **CLI**. No hand-written API integration, and often no API keys, because Schrute learns from the requests your browser already knows how to make.
-- Faster repeated tasks
+- Faster repeated tasks — browser once, direct HTTP after
- Less brittle than selector-only browser automation
-- No hand-written API integration for every site
+- Works through Cloudflare-protected sites via real-Chrome fallback
+- **MCP** · **REST API** · **CLI** — use from AI agents, any language, or your terminal
Measured on repeated runs of tested workflows:
@@ -44,7 +45,9 @@ npm install -g schrute
schrute setup
```
-Add to your MCP client config (Claude Code, Cursor, Windsurf, Cline, or any MCP client):
+### MCP (AI agents)
+
+Add to your MCP client config (Claude Code, Cursor, Windsurf, Cline):
```json
{
@@ -57,7 +60,23 @@ Add to your MCP client config (Claude Code, Cursor, Windsurf, Cline, or any MCP
}
```
-Your AI agent now has `schrute_explore`, `schrute_record`, and 40+ other tools.
+> **Note:** MCP HTTP requires `Accept: application/json, text/event-stream` header.
+
+### CLI
+
+```bash
+schrute explore https://example.com # Open browser, record traffic
+schrute record --name my-action # Mark an action
+schrute stop # Generate skills
+schrute execute my_skill.v1 # Replay the learned API call
+```
+
+### REST API
+
+```bash
+schrute serve --http --port 3000 # Start HTTP server
+curl http://127.0.0.1:3000/api/sites # Use from any language
+```
## See it work in 60 seconds
@@ -121,7 +140,7 @@ Every example below was recorded on 2026-03-17, on macOS (Apple Silicon), over W
**Returned:** `{"origin": "49.43.xxx.x"}`
-**What changed after learning:** Four browser navigations became four replayable MCP tools. Each call returns JSON directly — no page load, no DOM parsing, no selectors.
+**What changed after learning:** Four browser navigations became four replayable skills — callable via MCP tools, REST API, or CLI. Each call returns JSON directly — no page load, no DOM parsing, no selectors.
---
@@ -150,7 +169,7 @@ Every example below was recorded on 2026-03-17, on macOS (Apple Silicon), over W
- Time: **1,033ms**
- Returned: Full Wikipedia search results (10 articles with titles, snippets, page IDs)
-**What changed after learning:** A single MCP tool that takes a search query and returns structured Wikipedia results. The agent calls `schrute_execute({ skillId: "en_wikipedia_org.get_api_php.v1", params: { "query.srsearch": "quantum computing" } })` instead of navigating Wikipedia's UI.
+**What changed after learning:** A single skill that takes a search query and returns structured Wikipedia results. Call it via MCP (`schrute_execute`), REST API (`POST /api/sites/en.wikipedia.org/skills/get_api_php`), or CLI (`schrute execute en_wikipedia_org.get_api_php.v1 "query.srsearch=quantum computing"`) — instead of navigating Wikipedia's UI.
---
@@ -190,25 +209,39 @@ Every example below was recorded on 2026-03-17, on macOS (Apple Silicon), over W
**Site:** www.coingecko.com (finance/crypto)
-**Why this workflow matters:** Shows how Schrute handles sites behind Cloudflare. Direct HTTP fails — the skill stays at Tier 3 (browser-proxied) and uses the browser's Cloudflare clearance cookies.
+**Why this workflow matters:** CoinGecko is protected by Cloudflare Turnstile, which blocks headless browsers. When Schrute detects a Cloudflare challenge, it launches a real Chrome browser to attempt clearance, then retries the API call. If the challenge auto-clears (common for Turnstile), execution succeeds automatically. If manual intervention is needed, Schrute returns a 202 with a hint to solve the challenge in the opened browser.
**First run:**
- Path: `schrute explore` → agent navigates CoinGecko, clicks on Bitcoin, views price charts → `schrute stop`
-- Pipeline result: **5 skills generated** from the captured API calls
+- Pipeline result: **5+ skills generated** from captured API traffic (exact count depends on page traffic)
**Learned skills:**
- `www_coingecko_com.get_24_hours_json.v1` — `GET /price_charts/bitcoin/usd/24_hours.json`
-- `www_coingecko_com.get_max_longer_cache_json.v1` — `GET /price_charts/bitcoin/usd/max_longer_cache.json`
-- `www_coingecko_com.get_insight_annotations.v1` — `GET /price_charts/bitcoin/insight_annotations`
-- Plus 2 more (user info, OTP center)
+- `www_coingecko_com.get_coins.v1` — `GET /price_charts/.../coins`
+- `www_coingecko_com.get_csrf_meta_json.v1` — `GET /accounts/csrf_meta.json`
+- Plus user info, OTP center, assets endpoints
- Auth used: Cloudflare cookies (browser session)
- Safety class: read-only
-**Direct HTTP attempt:** Failed after 9,129ms — Cloudflare returns a challenge page, not JSON.
+**Execution with live-Chrome recovery:**
-**Why it still works:** At Tier 3, Schrute executes the `fetch()` inside the browser context, which already has Cloudflare clearance cookies. The request succeeds where direct HTTP cannot. This skill will not promote to Tier 1 because the endpoint requires Cloudflare cookies — Schrute detects this and keeps it at the browser-proxied tier.
+```
+schrute execute www_coingecko_com.get_24_hours_json.v1 --yes
+```
-**What this shows:** Not every skill promotes to direct HTTP. Schrute adapts to the site's security model instead of breaking against it.
+| Step | What happens | Time |
+|------|-------------|------|
+| 1 | Browser-proxied fetch (Playwright) | Blocked by Cloudflare |
+| 2 | Full-browser fetch (Playwright) | Blocked by Cloudflare |
+| 3 | Schrute detects Cloudflare challenge page | ~0ms |
+| 4 | Launches real Chrome, navigates to CoinGecko | Challenge clears (in tested run) |
+| 5 | Retries API call through live Chrome session | **Success** |
+
+- Total latency: **~7,400ms** (includes Chrome launch + challenge clearance)
+- Returned: 288 BTC/USD price data points (24h) with timestamps and volumes
+- Recovery status: `live_chrome_opened`
+
+**What this shows:** Schrute handles Cloudflare-protected sites by escalating to real Chrome when headless Playwright is blocked. In the tested run, Turnstile auto-cleared and execution succeeded on the first call. Sites with interactive CAPTCHAs may require manual clearance in the opened browser window.
---
@@ -235,17 +268,9 @@ Every example below was recorded on 2026-03-17, on macOS (Apple Silicon), over W
| dog.ceo | `get_all` | 551ms | — | — | None | 3/6 |
| dog.ceo | `get_random` | 558ms | 472ms | — | None | 3/6 |
| en.wikipedia.org | `get_api_php` | 1,033ms | — | — | None | 0/4 |
-| www.coingecko.com | `get_24_hours_json` | 9,129ms (fail) | — | — | Cloudflare cookies | — |
-
-All runs at Tier 3 (browser-proxied). Skills promote to Tier 1 (direct HTTP, ~5-50ms) after 5+ consecutive successful validations. Cloudflare-protected skills remain at Tier 3.
+| www.coingecko.com | `get_24_hours_json` | ~7,400ms | — | — | CF cookies | — |
-**Methodology:**
-- Machine: MacBook (Apple Silicon)
-- Network: WiFi, India
-- Browser engine: Playwright Chromium
-- Cache state: warm (browser session open)
-- Timing: `latencyMs` field from Schrute execution result
-- Date tested: 2026-03-17
+All runs at Tier 3 (browser-proxied) except CoinGecko which uses live-Chrome recovery. Skills promote to Tier 1 (direct HTTP, ~5-50ms) after 5+ consecutive successful validations. Cloudflare-protected skills remain at Tier 3 with real-Chrome fallback.
## Where Schrute works best
@@ -287,7 +312,7 @@ Before a learned skill executes, Schrute enforces:
Dangerous browser tools (`browser_evaluate`, `browser_run_code`) are blocked entirely.
-For the full 9-gate security model, see [SECURITY.md](SECURITY.md).
+For the full security model, see [SECURITY.md](SECURITY.md).
## Auth, cookies, and storage
diff --git a/bin/schrute.cjs b/bin/schrute.cjs
new file mode 100755
index 0000000..6cb9002
--- /dev/null
+++ b/bin/schrute.cjs
@@ -0,0 +1,8 @@
+#!/usr/bin/env node
+'use strict';
+const major = parseInt(process.version.slice(1), 10);
+if (major < 22) {
+ console.error(`Error: Node >= 22 required (found ${process.version}). Run: nvm use 22`);
+ process.exit(1);
+}
+import('../dist/index.js');
diff --git a/package.json b/package.json
index a707dfd..525b840 100644
--- a/package.json
+++ b/package.json
@@ -15,10 +15,10 @@
}
},
"bin": {
- "schrute": "dist/index.js"
+ "schrute": "bin/schrute.cjs"
},
"scripts": {
- "prebuild": "node scripts/sync-version.js",
+ "prebuild": "node -e \"if(parseInt(process.version.slice(1))<22){console.error('Node>=22 required');process.exit(1)}\" && node scripts/sync-version.js",
"build": "tsc -p tsconfig.json",
"prepublishOnly": "node scripts/sync-version.js && npm run build",
"build:native": "cd native && cargo build --release && cp target/release/libschrute_native.dylib index.node 2>/dev/null; cp target/release/schrute_native.dll index.node 2>/dev/null; cp target/release/libschrute_native.so index.node 2>/dev/null; echo 'Native build complete'",
@@ -27,10 +27,10 @@
"test:watch": "vitest",
"test:coverage": "vitest run --coverage",
"lint": "tsc --noEmit",
- "start": "node dist/index.js",
- "serve": "node dist/index.js serve",
- "setup": "node dist/index.js setup",
- "doctor": "node dist/index.js doctor",
+ "start": "node bin/schrute.cjs",
+ "serve": "node bin/schrute.cjs serve",
+ "setup": "node bin/schrute.cjs setup",
+ "doctor": "node bin/schrute.cjs doctor",
"rebuild:native": "bash scripts/rebuild-native.sh",
"build:binary": "npm run build && pkg dist/index.js --config pkg.config.json",
"build:binary:macos": "npm run build:binary -- --target node22-macos-arm64",
@@ -91,6 +91,7 @@
"schrute"
],
"files": [
+ "bin/",
"dist/",
".claude-plugin/",
"commands/",
diff --git a/src/app/import-service.ts b/src/app/import-service.ts
new file mode 100644
index 0000000..bbea79a
--- /dev/null
+++ b/src/app/import-service.ts
@@ -0,0 +1,257 @@
+import * as fs from 'node:fs';
+import * as readline from 'node:readline';
+import type { SkillSpec, SiteManifest, SitePolicy } from '../skill/types.js';
+import { validateImportableSkill, validateImportableSite } from '../storage/import-validator.js';
+import { getSitePolicy, setSitePolicy } from '../core/policy.js';
+import type { SkillRepository } from '../storage/skill-repository.js';
+import type { SiteRepository } from '../storage/site-repository.js';
+import type { AgentDatabase } from '../storage/database.js';
+import type { SchruteConfig } from '../core/config.js';
+
+export interface ImportDeps {
+ db: AgentDatabase;
+ skillRepo: SkillRepository;
+ siteRepo: SiteRepository;
+ config: SchruteConfig;
+}
+
+export interface ImportOptions {
+ yes?: boolean;
+}
+
+export interface ImportResult {
+ created: number;
+ updated: number;
+ skipped: number;
+ siteAction?: 'created' | 'updated';
+ hasAuthSkills: boolean;
+ policyWarnings: string[];
+ cancelled?: boolean;
+}
+
+export async function performImport(
+ file: string,
+ deps: ImportDeps,
+ options: ImportOptions = {},
+): Promise {
+ if (!fs.existsSync(file)) {
+ throw new Error(`File '${file}' not found.`);
+ }
+
+ let bundle: {
+ version: string;
+ site: SiteManifest;
+ skills: SkillSpec[];
+ policy?: SitePolicy;
+ };
+
+ try {
+ const raw = fs.readFileSync(file, 'utf-8');
+ bundle = JSON.parse(raw);
+ } catch (err) {
+ throw new Error(`Failed to parse bundle: ${err instanceof Error ? err.message : String(err)}`);
+ }
+
+ if (!bundle.site || !bundle.skills || !Array.isArray(bundle.skills)) {
+ throw new Error('Invalid bundle format: missing site or skills.');
+ }
+
+ // Validate site
+ const siteResult = validateImportableSite(bundle.site);
+ if (!siteResult.valid) {
+ throw new Error(`Site validation failed:\n ${siteResult.errors.join('\n ')}`);
+ }
+
+ // Validate each skill; warn + skip invalid ones
+ const validSkills: SkillSpec[] = [];
+ const skipped: string[] = [];
+ const expectedSiteId = bundle.site.id;
+
+ for (const skill of bundle.skills) {
+ const skillResult = validateImportableSkill(skill);
+ if (!skillResult.valid) {
+ const label = (skill as unknown as Record).id ?? '(unknown)';
+ console.warn(
+ `Warning: skill '${label}' failed validation -- skipping.\n ${skillResult.errors.join('\n ')}`,
+ );
+ skipped.push(String(label));
+ continue;
+ }
+
+ if (Array.isArray(skill.allowedDomains) && skill.allowedDomains.length === 0) {
+ console.warn(
+ `Warning: skill '${skill.id}' has no allowedDomains -- may not execute without a domain policy.`,
+ );
+ }
+
+ if (skill.siteId !== expectedSiteId) {
+ console.warn(
+ `Warning: skill '${skill.id}' has siteId '${skill.siteId}', expected '${expectedSiteId}'. Skipping.`,
+ );
+ skipped.push(skill.id);
+ continue;
+ }
+
+ validSkills.push(skill);
+ }
+
+ // Check for overwrites — track corrupt rows separately
+ const { db, skillRepo, siteRepo } = deps;
+ let existingSite: SiteManifest | undefined;
+ let siteCorrupt = false;
+ try {
+ existingSite = siteRepo.getById(bundle.site.id);
+ } catch {
+ siteCorrupt = true;
+ console.warn(`Warning: existing site '${bundle.site.id}' has corrupt data — will overwrite.`);
+ }
+
+ const overwriteIds: string[] = [];
+ const corruptIds: string[] = [];
+ const existingCreatedAt = new Map();
+ let newCount = 0;
+ for (const skill of validSkills) {
+ try {
+ const existing = skillRepo.getById(skill.id);
+ if (existing) {
+ overwriteIds.push(skill.id);
+ if (existing.createdAt) existingCreatedAt.set(skill.id, existing.createdAt);
+ } else {
+ newCount++;
+ }
+ } catch {
+ corruptIds.push(skill.id);
+ console.warn(`Warning: existing skill '${skill.id}' has corrupt data — will overwrite.`);
+ }
+ }
+ const existingCount = overwriteIds.length + corruptIds.length;
+
+ // Preview
+ console.log(`Import preview for '${file}':`);
+ console.log(` Site: ${bundle.site.id} (${existingSite ? 'will update' : 'will create'})`);
+ console.log(` Valid skills: ${validSkills.length}`);
+ if (skipped.length > 0) {
+ console.log(` Skipped (invalid): ${skipped.length}`);
+ }
+ if (existingCount > 0) {
+ console.log(` Will overwrite: ${existingCount} existing skill(s)`);
+ for (const id of overwriteIds) console.log(` overwrite: ${id}`);
+ for (const id of corruptIds) console.log(` overwrite (corrupt): ${id}`);
+ }
+
+ // Policy preview
+ const policyWarnings: string[] = [];
+ if (bundle.policy) {
+ console.log(` Policy: will ${existingSite ? 'replace' : 'set'}`);
+ const currentPolicy = getSitePolicy(bundle.site.id, deps.config);
+ if (bundle.policy.maxConcurrent !== currentPolicy.maxConcurrent) {
+ policyWarnings.push(`maxConcurrent: current=${currentPolicy.maxConcurrent}, import=${bundle.policy.maxConcurrent}`);
+ }
+ }
+ if (policyWarnings.length > 0) {
+ console.log(` Policy changes: ${policyWarnings.join('; ')}`);
+ }
+
+ // Confirmation — require when anything will be overwritten
+ if ((existingCount > 0 || existingSite || siteCorrupt) && !options.yes) {
+ if (!process.stdin.isTTY) {
+ throw new Error('Non-interactive terminal: use --yes to confirm import.');
+ }
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+ const answer = await new Promise(resolve => rl.question('Proceed with import? [y/N] ', resolve));
+ rl.close();
+ if (answer.toLowerCase() !== 'y') {
+ return { created: 0, updated: 0, skipped: skipped.length, hasAuthSkills: false, policyWarnings, cancelled: true };
+ }
+ }
+
+ // Fill defaults for NOT NULL DB fields
+ const now = Date.now();
+ for (const skill of validSkills) {
+ if (!skill.name) {
+ const parts = skill.id.split('.');
+ skill.name = parts.length >= 2 ? parts[parts.length - 2] : skill.id;
+ }
+ if (skill.inputSchema === undefined) skill.inputSchema = {};
+ if (skill.sideEffectClass === undefined) skill.sideEffectClass = 'read-only';
+ if (skill.currentTier === undefined) skill.currentTier = 'tier_3';
+ if (skill.status === undefined) skill.status = 'draft';
+ if (skill.confidence === undefined) skill.confidence = 0;
+ if (skill.consecutiveValidations === undefined) skill.consecutiveValidations = 0;
+ if (skill.sampleCount === undefined) skill.sampleCount = 0;
+ if (skill.successRate === undefined) skill.successRate = 0;
+ if (skill.version === undefined) skill.version = 1;
+ if (skill.allowedDomains === undefined) skill.allowedDomains = [];
+ if (skill.isComposite === undefined) skill.isComposite = false;
+ if (skill.directCanaryEligible === undefined) skill.directCanaryEligible = false;
+ if (skill.directCanaryAttempts === undefined) skill.directCanaryAttempts = 0;
+ if (skill.validationsSinceLastCanary === undefined) skill.validationsSinceLastCanary = 0;
+ if (skill.createdAt === undefined) {
+ skill.createdAt = existingCreatedAt.get(skill.id) ?? now;
+ }
+ if (skill.updatedAt === undefined) skill.updatedAt = now;
+ }
+
+ // Phase 1: Site + skills in a single synchronous transaction
+ const corruptSet = new Set(corruptIds);
+ const overwriteSet = new Set(overwriteIds);
+ let created = 0;
+ let updated = 0;
+ let siteAction: 'created' | 'updated';
+
+ db.transaction(() => {
+ if (existingSite && !siteCorrupt) {
+ siteRepo.update(bundle.site.id, bundle.site);
+ siteAction = 'updated';
+ } else {
+ // Delete corrupt/stale row (cascade may delete skills too)
+ try { siteRepo.delete(bundle.site.id); } catch { /* row may not exist */ }
+ siteRepo.create(bundle.site);
+ siteAction = 'created';
+ }
+
+ if (siteCorrupt) {
+ // Site was deleted+recreated → cascade killed all skills → all are creates
+ for (const skill of validSkills) {
+ skillRepo.create(skill);
+ created++;
+ }
+ } else {
+ for (const skill of validSkills) {
+ if (corruptSet.has(skill.id)) {
+ try { skillRepo.delete(skill.id); } catch { /* may already be gone */ }
+ skillRepo.create(skill);
+ updated++;
+ } else if (overwriteSet.has(skill.id)) {
+ skillRepo.update(skill.id, skill);
+ updated++;
+ } else {
+ skillRepo.create(skill);
+ created++;
+ }
+ }
+ }
+ });
+
+ // Phase 2: Policy (separate write — setSitePolicy does its own DB call)
+ if (bundle.policy) {
+ const p = bundle.policy;
+ if (p.siteId && p.siteId !== bundle.site.id) {
+ console.error(`Warning: policy siteId '${p.siteId}' does not match site '${bundle.site.id}'. Skipping policy.`);
+ } else {
+ p.siteId = bundle.site.id;
+ try {
+ const result = setSitePolicy(p, deps.config);
+ if (!result.persisted) {
+ console.error('Warning: policy imported to cache but failed to persist to DB.');
+ }
+ } catch (err) {
+ console.error(`Warning: policy import failed: ${err instanceof Error ? err.message : String(err)}`);
+ }
+ }
+ }
+
+ const hasAuthSkills = validSkills.some((s: SkillSpec) => s.authType != null);
+
+ return { created, updated, skipped: skipped.length, siteAction: siteAction!, hasAuthSkills, policyWarnings };
+}
diff --git a/src/browser/base-browser-adapter.ts b/src/browser/base-browser-adapter.ts
index bb650fe..525e67b 100644
--- a/src/browser/base-browser-adapter.ts
+++ b/src/browser/base-browser-adapter.ts
@@ -924,7 +924,11 @@ export abstract class BaseBrowserAdapter implements BrowserProvider {
case 'browser_fill_form': {
const values = args.values;
if (typeof values !== 'object' || values === null || Array.isArray(values)) {
- throw new Error('Expected values to be a Record');
+ throw new Error(
+ 'browser_fill_form expects { values: { "