diff --git a/package.json b/package.json index d976a8e7f..77875243b 100644 --- a/package.json +++ b/package.json @@ -37,6 +37,7 @@ "clean": "rimraf dist", "prepare": "npm run build", "lint:changed": "node scripts/lint-changed-src.js", + "lint:categories": "node scripts/lint-tool-categories.mjs", "harness:parallel-smoke": "ts-node tests/harness/parallel-smoke.ts" }, "keywords": [ diff --git a/scripts/lint-tool-categories.mjs b/scripts/lint-tool-categories.mjs new file mode 100644 index 000000000..5ff2c902c --- /dev/null +++ b/scripts/lint-tool-categories.mjs @@ -0,0 +1,136 @@ +#!/usr/bin/env node +/** + * lint-tool-categories.mjs (#847) + * + * CI guard: every tool name registered via REGISTRATION_ENTRIES in + * src/tools/index.ts must be present in TOOL_TO_CATEGORY in + * src/tools/_shared/category.ts. A missing entry would either fail loud at + * runtime (registerAllTools throws) or — worse — silently default into the + * full surface even when the operator passed --slim. This script catches + * both classes of regression at PR time. + * + * Strategy: + * 1. Read src/tools/index.ts and extract every entry of the form + * `tools: ['name', ...]` from REGISTRATION_ENTRIES. + * 2. Read src/tools/_shared/category.ts and extract every key of + * TOOL_TO_CATEGORY. + * 3. Diff. Exit non-zero on any mismatch (missing assignment OR stale + * entry no longer used by any registrar). + * + * Why a regex parser instead of importing the modules: + * - Keeps the script dependency-free and runnable in pre-build CI stages + * (no need to compile TypeScript first). + * - The `tools:` and `TOOL_TO_CATEGORY` shapes are deliberately simple + * literal arrays/objects with no interpolation — see the comments in + * src/tools/_shared/category.ts and src/tools/index.ts. + */ + +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const repoRoot = join(__dirname, '..'); + +const indexPath = join(repoRoot, 'src', 'tools', 'index.ts'); +const categoryPath = join(repoRoot, 'src', 'tools', '_shared', 'category.ts'); + +function readSource(filePath) { + try { + return readFileSync(filePath, 'utf8'); + } catch (err) { + console.error( + `[lint-tool-categories] Could not read ${filePath}: ${err.message}`, + ); + process.exit(2); + } +} + +/** + * Extract every `tools: ['a', 'b', ...]` array from REGISTRATION_ENTRIES. + * Multi-line arrays are supported — the regex spans newlines. + */ +function extractRegisteredNames(source) { + const names = new Set(); + // Match `tools: [ ... ]` — the array body may span multiple lines. + const re = /tools:\s*\[([\s\S]*?)\]/g; + let m; + while ((m = re.exec(source)) !== null) { + const body = m[1]; + const stringRe = /['"]([A-Za-z0-9_]+)['"]/g; + let s; + while ((s = stringRe.exec(body)) !== null) { + names.add(s[1]); + } + } + return names; +} + +/** + * Extract every key from `TOOL_TO_CATEGORY = { ... }` — both bare-identifier + * keys (`navigate: 'navigation',`) and quoted-string keys. + */ +function extractCategorizedNames(source) { + const names = new Set(); + const objMatch = source.match( + /TOOL_TO_CATEGORY[^=]*=\s*{([\s\S]*?)\n};?/, + ); + if (!objMatch) { + console.error( + '[lint-tool-categories] Could not locate TOOL_TO_CATEGORY object literal in category.ts', + ); + process.exit(2); + } + const body = objMatch[1]; + // Match a property line: leading whitespace, an identifier or quoted name, + // a colon, then a quoted category. Comments are skipped because they don't + // contain `:` followed by a quoted token at the start of a line. + const propRe = /(?:^|\n)\s*(?:['"]([A-Za-z0-9_]+)['"]|([A-Za-z_][A-Za-z0-9_]*))\s*:\s*['"][a-z]+['"]/g; + let m; + while ((m = propRe.exec(body)) !== null) { + names.add(m[1] ?? m[2]); + } + return names; +} + +const indexSource = readSource(indexPath); +const categorySource = readSource(categoryPath); + +const registered = extractRegisteredNames(indexSource); +const categorized = extractCategorizedNames(categorySource); + +const missing = [...registered].filter((n) => !categorized.has(n)).sort(); +const stale = [...categorized].filter((n) => !registered.has(n)).sort(); + +if (missing.length === 0 && stale.length === 0) { + console.error( + `[lint-tool-categories] OK — ${registered.size} registered tools all have a category assignment.`, + ); + process.exit(0); +} + +if (missing.length > 0) { + console.error( + `[lint-tool-categories] FAIL — ${missing.length} tool(s) registered in src/tools/index.ts have no entry in TOOL_TO_CATEGORY:`, + ); + for (const name of missing) { + console.error(` - ${name}`); + } + console.error( + ' Fix: add each name to src/tools/_shared/category.ts under the appropriate category.', + ); +} + +if (stale.length > 0) { + console.error( + `[lint-tool-categories] FAIL — ${stale.length} stale entry/entries in TOOL_TO_CATEGORY no longer correspond to any registered tool:`, + ); + for (const name of stale) { + console.error(` - ${name}`); + } + console.error( + ' Fix: remove from src/tools/_shared/category.ts (or re-register the tool in src/tools/index.ts).', + ); +} + +process.exit(1); diff --git a/src/index.ts b/src/index.ts index f9aa608f9..351ee40b9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -9,8 +9,12 @@ import { Command } from 'commander'; import { getMCPServer, setMCPServerOptions } from './mcp-server'; -import { TOOL_CAPABILITIES, type ToolCapability } from './types/mcp'; import { registerAllTools } from './tools'; +import { + CategorySelection, + parseCategoryCsv, + categoryHelpText, +} from './tools/_shared/category'; import { createTransport } from './transports/index'; import { getGlobalConfig, setGlobalConfig } from './config/global'; import { resolveHeadlessMode } from './config/headless-resolver'; @@ -99,13 +103,14 @@ program .option('--transport ', 'Transport mode: stdio, http, or both (default: stdio)') .option('--idle-timeout ', 'Self-exit (code 0) after idle window with zero sessions. Format: (ms|s|m|h), e.g. 30m, 90s, 500ms. Bare numbers are rejected. Also: OPENCHROME_IDLE_TIMEOUT_MS env var (integer ms). Default: disabled.') .option('--pilot', 'Enable experimental pilot tier (see docs/roadmap/portability-harness-contract.md). Off by default; lazy-loads src/pilot/ modules when set. Also: OPENCHROME_PILOT=1 env var.') - .option('--tools-only ', 'Expose only tools belonging to the specified capability groups (comma-separated). Valid values: core,crawl,recording,workflow,storage,profile,totp,pilot. Default: all groups exposed.') - .option('--disable-tools ', 'Remove tools belonging to the specified capability groups (comma-separated). Valid values: core,crawl,recording,workflow,storage,profile,totp,pilot.') .option('--introspect-tools-list', 'Print tools/list as compact JSON to stdout and exit (no Chrome/CDP startup). Used by lint-tool-schemas.mjs.') .option('--auto-connect [userDataDir]', 'Attach to a Chrome you started yourself by reading /DevToolsActivePort (#849). When omitted, uses the platform-default Chrome user-data dir. Also: OPENCHROME_AUTO_CONNECT= env var. Implies --launch-mode=attach.') .option('--launch-mode ', 'Chrome launch mode: auto | attach | isolated (#659). Also: OPENCHROME_LAUNCH_MODE env var.') .option('--secrets ', 'Load a dotenv-format secrets file (KEY=value per line). Tokens "${SECRET:NAME}" in tool arguments are substituted to the real value at MCP request deserialization; the same values are redacted from every LLM-visible artifact (responses, trace, skill records, journal). Default: no secrets loaded. P3: no OS keychain integration.') - .action(async (options: { port: string; autoLaunch?: boolean; userDataDir?: string; profileDirectory?: string; chromeBinary?: string; headlessShell?: boolean; headless?: boolean; visible?: boolean; windowSize?: string; windowPosition?: string; windowBounds?: string; startMaximized?: boolean; restartChrome?: boolean; hybrid?: boolean; lpPort?: string; blockedDomains?: string; auditLog?: boolean; sanitizeContent?: boolean; allTools?: boolean; serverMode?: boolean; http?: string | boolean; authToken?: string; transport?: string; idleTimeout?: string; allowUnauthenticatedHttp?: boolean; pilot?: boolean; toolsOnly?: string; disableTools?: string; introspectToolsList?: boolean; autoConnect?: string | boolean; launchMode?: string; secrets?: string }) => { + .option('--slim', `Register only the slim-mode tool categories (chrome-devtools-mcp parity). Always-on categories (reliability, observe) are kept. Also: OPENCHROME_SLIM=1 env var.\n${categoryHelpText()}`) + .option('--enable-categories ', 'Comma-separated allow-list of tool categories to register. Mutually exclusive with --slim (slim wins). Also: OPENCHROME_ENABLE_CATEGORIES env var.') + .option('--disable-categories ', 'Comma-separated deny-list of tool categories to skip. Always-on categories cannot be disabled. Also: OPENCHROME_DISABLE_CATEGORIES env var.') + .action(async (options: { port: string; autoLaunch?: boolean; userDataDir?: string; profileDirectory?: string; chromeBinary?: string; headlessShell?: boolean; headless?: boolean; visible?: boolean; windowSize?: string; windowPosition?: string; windowBounds?: string; startMaximized?: boolean; restartChrome?: boolean; hybrid?: boolean; lpPort?: string; blockedDomains?: string; auditLog?: boolean; sanitizeContent?: boolean; allTools?: boolean; serverMode?: boolean; http?: string | boolean; authToken?: string; transport?: string; idleTimeout?: string; allowUnauthenticatedHttp?: boolean; pilot?: boolean; introspectToolsList?: boolean; autoConnect?: string | boolean; launchMode?: string; secrets?: string; slim?: boolean; enableCategories?: string; disableCategories?: string }) => { // --introspect-tools-list: print tools/list JSON and exit, NO Chrome/CDP/transport startup. if (options.introspectToolsList) { const { MCPServer } = await import('./mcp-server'); @@ -124,9 +129,7 @@ program return; } - let port = parseInt(options.port, 10); - let autoLaunch = options.autoLaunch || false; // ─── --auto-connect (#849) ────────────────────────────────────────── @@ -378,47 +381,16 @@ program console.error('[openchrome] Content sanitization: disabled'); } - const mcpOptions: Parameters[0] = {}; - // Tool tier configuration const envTier = parseInt(process.env.OPENCHROME_TOOL_TIER || '', 10); if (options.allTools || envTier >= 3) { - mcpOptions.initialToolTier = 3 as ToolTier; + setMCPServerOptions({ initialToolTier: 3 as ToolTier }); console.error('[openchrome] All tools exposed from startup'); } else if (envTier === 2) { - mcpOptions.initialToolTier = 2 as ToolTier; + setMCPServerOptions({ initialToolTier: 2 as ToolTier }); console.error('[openchrome] Tier 2 tools exposed from startup'); } - // Capability filter configuration (#829) - const allCapabilities: readonly ToolCapability[] = TOOL_CAPABILITIES; - if (options.toolsOnly && options.disableTools) { - console.error('[openchrome] Error: --tools-only and --disable-tools are mutually exclusive'); - process.exit(2); - } - if (options.toolsOnly) { - const requested = options.toolsOnly.split(',').map(s => s.trim()).filter(Boolean) as ToolCapability[]; - const invalid = requested.filter(c => !allCapabilities.includes(c)); - if (invalid.length > 0) { - console.error(`[openchrome] Error: unknown capability group(s): ${invalid.join(', ')}. Valid: ${allCapabilities.join(', ')}`); - process.exit(2); - } - mcpOptions.capabilityFilter = new Set(requested); - console.error(`[openchrome] Capability filter (tools-only): ${requested.join(', ')}`); - } else if (options.disableTools) { - const disabled = options.disableTools.split(',').map(s => s.trim()).filter(Boolean) as ToolCapability[]; - const invalid = disabled.filter(c => !allCapabilities.includes(c)); - if (invalid.length > 0) { - console.error(`[openchrome] Error: unknown capability group(s): ${invalid.join(', ')}. Valid: ${allCapabilities.join(', ')}`); - process.exit(2); - } - const allowed = allCapabilities.filter(c => !disabled.includes(c)); - mcpOptions.capabilityFilter = new Set(allowed); - console.error(`[openchrome] Capability filter (disable-tools): disabled=${disabled.join(', ')}`); - } - - setMCPServerOptions(mcpOptions); - // Set infinite reconnection for HTTP daemon mode BEFORE creating CDPClient singleton. // getMCPServer() → SessionManager → getCDPClient() reads this env var at construction. // Resolve transport mode: --transport flag takes precedence over --http flag @@ -437,7 +409,54 @@ program resetReadinessMachine(); const server = getMCPServer(); - await registerAllTools(server); + + // Tool category selection (#847). Flags win over env vars; env vars exist + // so MCP host configs that cannot pass argv (Claude Desktop config blocks) + // can still trim the surface. CSV parse errors are fatal — a typo in + // --enable-categories should not silently degrade to "all tools". + let categorySelection: CategorySelection; + try { + const cliCategoryOverride = + options.enableCategories !== undefined || options.disableCategories !== undefined; + const slim = + options.slim === true || + (!cliCategoryOverride && process.env.OPENCHROME_SLIM === '1'); + const enabledCsv = slim + ? '' + : options.enableCategories ?? + process.env.OPENCHROME_ENABLE_CATEGORIES ?? + ''; + const disabledCsv = + options.disableCategories ?? + process.env.OPENCHROME_DISABLE_CATEGORIES ?? + ''; + categorySelection = { + slim, + enabled: + enabledCsv.length > 0 + ? parseCategoryCsv( + enabledCsv, + options.enableCategories + ? '--enable-categories' + : 'OPENCHROME_ENABLE_CATEGORIES', + ) + : undefined, + disabled: + disabledCsv.length > 0 + ? parseCategoryCsv( + disabledCsv, + options.disableCategories + ? '--disable-categories' + : 'OPENCHROME_DISABLE_CATEGORIES', + ) + : undefined, + }; + } catch (err) { + console.error(`[openchrome] ${(err as Error).message}`); + process.exit(2); + } + registerAllTools(server, categorySelection); + // Pilot dynamic-skills (#889): lazy attach only when explicitly enabled. { diff --git a/src/mcp-server.ts b/src/mcp-server.ts index b4647397d..8d7e08f9d 100644 --- a/src/mcp-server.ts +++ b/src/mcp-server.ts @@ -27,6 +27,11 @@ import { parseDomainFromUri, readSkillGraphResource, } from './resources/skill-graph'; +import { + disabledToolsResource, + DISABLED_TOOLS_RESOURCE_URI, + getDisabledToolsContent, +} from './resources/tools-disabled'; import { HintEngine } from './hints'; import { buildAutomationInsight, formatAutomationFallback, shouldInjectAutomationFallback } from './hints/result-guidance'; import { validateToolSchema } from './utils/schema-validator'; @@ -466,6 +471,9 @@ export class MCPServer { // Register built-in resources this.registerResource(usageGuideResource); this.registerResource(skillGraphResourceTemplate); + // Sidecar discovery surface for tools filtered out by category selection + // (#847). The snapshot is populated by registerAllTools() at startup. + this.registerResource(disabledToolsResource); // Initialize dashboard if enabled if (options.dashboard) { @@ -1363,6 +1371,8 @@ export class MCPServer { let content: string; if (uri === 'openchrome://usage-guide') { content = getUsageGuideContent(); + } else if (uri === DISABLED_TOOLS_RESOURCE_URI) { + content = getDisabledToolsContent(); } else { throw new Error(`No content handler for resource: ${uri}`); } diff --git a/src/resources/tools-disabled.ts b/src/resources/tools-disabled.ts new file mode 100644 index 000000000..f5397892f --- /dev/null +++ b/src/resources/tools-disabled.ts @@ -0,0 +1,80 @@ +/** + * Disabled-Tools MCP Resource (#847) + * + * URI: openchrome://tools/disabled + * + * Sidecar resource that lists tools filtered out by the category selection + * resolved at process start. Calls to a disabled name still return a normal + * MCP "unknown tool" error (because the name was never registered), but + * agents can read this resource to discover what's missing and learn the + * exact restart flag that would re-enable the tool. + * + * The resource content is a snapshot taken at registration time; it does + * not update at runtime (selection is fixed for the process lifetime). + */ + +import type { ToolCategory } from '../tools/_shared/category'; +import type { MCPResourceDefinition } from './usage-guide'; + +export const DISABLED_TOOLS_RESOURCE_URI = 'openchrome://tools/disabled'; + +export const disabledToolsResource: MCPResourceDefinition = { + uri: DISABLED_TOOLS_RESOURCE_URI, + name: 'tools-disabled', + description: + 'Tools excluded at startup by --slim / --enable-categories / --disable-categories. Includes the restart hint to re-enable each one.', + mimeType: 'application/json', +}; + +export interface DisabledToolEntry { + name: string; + category: ToolCategory; + hint: string; +} + +let snapshot: { tools: DisabledToolEntry[]; capturedAt: string } = { + tools: [], + capturedAt: new Date(0).toISOString(), +}; + +/** + * Replace the cached snapshot. Called once by the registration filter in + * `src/tools/index.ts` after it knows which tools were skipped. Idempotent + * — calling it again with a fresh list is the supported way to reset for + * tests. + */ +export function setDisabledToolsSnapshot(entries: DisabledToolEntry[]): void { + // Defensive copy + canonical sort by name so the JSON serialization is + // stable regardless of registration order. + const copy = entries.map((e) => ({ ...e })); + copy.sort((a, b) => a.name.localeCompare(b.name)); + snapshot = { + tools: copy, + capturedAt: new Date().toISOString(), + }; +} + +export function getDisabledToolsSnapshot(): { + tools: DisabledToolEntry[]; + capturedAt: string; +} { + return snapshot; +} + +/** + * Build the JSON payload returned by `resources/read` for this URI. + * Pretty-printed for human readability — the resource is meant to be + * eyeballed by an agent operator, not parsed by hot code. + */ +export function getDisabledToolsContent(): string { + return JSON.stringify(snapshot, null, 2); +} + +/** + * Build the per-tool restart hint. Centralized so the wording stays + * consistent across the snapshot and any future error messages that may + * want to surface the same recovery action. + */ +export function buildDisabledHint(category: ToolCategory): string { + return `Restart openchrome with --enable-categories=${category}`; +} diff --git a/src/tools/_shared/category.ts b/src/tools/_shared/category.ts new file mode 100644 index 000000000..1137861bc --- /dev/null +++ b/src/tools/_shared/category.ts @@ -0,0 +1,319 @@ +/** + * Tool Category Taxonomy (#847) + * + * Each MCP tool registered by openchrome belongs to exactly one category. + * Operators can opt out of categories at process start via: + * --slim + * --enable-categories= + * --disable-categories= + * (or the equivalent OPENCHROME_* env vars). + * + * Why categories at all: + * The full surface ships ~65 tools. For small-context model deployments + * (Sonnet 4.6 / Haiku 4.5) and multi-MCP-server setups, the JSON-schema + * registration alone consumes a measurable fraction of the system prompt. + * chrome-devtools-mcp solves this with a `--slim` switch; we generalize + * that to a per-category opt-in/out at registration time. + * + * Boundary: + * `src/tools/index.ts` filters registration through `resolveEnabledCategories()` + * before invoking each register*Tool function. Skipped tools never appear in + * `tools/list` — discoverability lives in the sidecar MCP resource + * `openchrome://tools/disabled`. + */ + +export type ToolCategory = + | 'navigation' + | 'interact' + | 'inspect' + | 'tabs' + | 'workflow' + | 'session' + | 'capture' + | 'emulation' + | 'storage' + | 'observe' + | 'memory' + | 'contracts' + | 'vision' + | 'crawl' + | 'security' + | 'host' + | 'reliability' + | 'pilot'; + +/** + * The complete set of categories. Ordered for deterministic listing/serialization. + */ +export const ALL_CATEGORIES: readonly ToolCategory[] = [ + 'navigation', + 'interact', + 'inspect', + 'tabs', + 'workflow', + 'session', + 'capture', + 'emulation', + 'storage', + 'observe', + 'memory', + 'contracts', + 'vision', + 'crawl', + 'security', + 'host', + 'reliability', + 'pilot', +] as const; + +/** + * Categories that are ALWAYS included regardless of operator selection. + * An openchrome instance must remain diagnosable (`observe`) and its + * lifecycle controllable (`reliability`); excluding these would produce a + * server you cannot stop or inspect. + */ +export const ALWAYS_ON_CATEGORIES: readonly ToolCategory[] = [ + 'reliability', + 'observe', +] as const; + +/** + * Slim allow-list — chrome-devtools-mcp parity, plus the two always-on + * categories per resolution rule 4. Picked to cover the "navigate + read + + * click + type" minimal coding-agent loop without pulling in capture/storage/ + * tabs management. + */ +export const SLIM_CATEGORIES: readonly ToolCategory[] = [ + 'navigation', + 'interact', + 'inspect', +] as const; + +/** + * Canonical tool-name → category map. + * + * Every tool registered in `src/tools/index.ts` MUST have an entry here. + * `scripts/lint-tool-categories.mjs` enforces this at CI time, and + * `src/tools/_shared/category-map.test.ts` snapshots the full mapping so any + * future tool addition forces the author to pick a category (test-fail). + * + * Names are taken verbatim from the `name:` field of each tool's + * `MCPToolDefinition` (i.e. the MCP-visible name). + */ +export const TOOL_TO_CATEGORY: Readonly> = { + // navigation + navigate: 'navigation', + page_reload: 'navigation', + wait_for: 'navigation', + + // interact + interact: 'interact', + computer: 'interact', + find: 'interact', + form_input: 'interact', + fill_form: 'interact', + act: 'interact', + drag_drop: 'interact', + file_upload: 'interact', + + // inspect + read_page: 'inspect', + page_content: 'inspect', + query_dom: 'inspect', + inspect: 'inspect', + javascript_tool: 'inspect', + extract_data: 'inspect', + + // tabs + tabs_create: 'tabs', + tabs_close: 'tabs', + tabs_context: 'tabs', + worker: 'tabs', + worker_update: 'tabs', + + // workflow + workflow_init: 'workflow', + workflow_status: 'workflow', + workflow_collect: 'workflow', + workflow_collect_partial: 'workflow', + workflow_cleanup: 'workflow', + worker_complete: 'workflow', + execute_plan: 'workflow', + batch_execute: 'workflow', + batch_paginate: 'workflow', + lightweight_scroll: 'workflow', + + // session + oc_session_snapshot: 'session', + oc_session_resume: 'session', + oc_context_export: 'session', + oc_context_import: 'session', + oc_checkpoint: 'session', + oc_profile_status: 'session', + list_profiles: 'session', + + // capture + page_screenshot: 'capture', + page_pdf: 'capture', + oc_recording_start: 'capture', + oc_recording_stop: 'capture', + oc_recording_status: 'capture', + oc_recording_list: 'capture', + oc_recording_export: 'capture', + + // emulation + emulate_device: 'emulation', + user_agent: 'emulation', + geolocation: 'emulation', + network: 'emulation', + network_capture_lite: 'emulation', + network_capture_full: 'emulation', + request_intercept: 'emulation', + + // storage + cookies: 'storage', + storage: 'storage', + http_auth: 'storage', + + // observe (always-on) + console_capture: 'observe', + performance_metrics: 'observe', + oc_journal: 'observe', + oc_connection_health: 'observe', + oc_doctor_report: 'observe', + oc_performance_insights: 'observe', + oc_performance_analyze: 'observe', + oc_observe: 'observe', + + // memory + memory: 'memory', + oc_skill_record: 'memory', + oc_skill_recall: 'memory', + + // contracts + oc_assert: 'contracts', + oc_evidence_bundle: 'contracts', + + // vision + vision_find: 'vision', + + // crawl + crawl: 'crawl', + crawl_sitemap: 'crawl', + crawl_start: 'crawl', + crawl_status: 'crawl', + crawl_cancel: 'crawl', + + // security + oc_totp_generate: 'security', + + // host + oc_get_connection_info: 'host', + oc_devtools_url: 'host', + oc_copy_to_clipboard: 'host', + oc_open_host_settings: 'host', + + // reliability (always-on) + validate_page: 'reliability', + oc_stop: 'reliability', + oc_reap_orphans: 'reliability', + oc_proxy_hook: 'pilot', + oc_skill_replay: 'pilot', +}; + +/** + * Operator-supplied selection. All fields optional; if all three are + * undefined the resolver returns the full set (default behavior — byte- + * identical to v1.11.0). + */ +export interface CategorySelection { + /** Allow-list. If set, only these categories are enabled (before always-on union). */ + enabled?: readonly ToolCategory[]; + /** Deny-list. Applied AFTER `enabled`/`slim`. Always-on categories cannot be removed. */ + disabled?: readonly ToolCategory[]; + /** Shortcut for `enabled = SLIM_CATEGORIES`. Wins over `enabled` if both set. */ + slim?: boolean; +} + +/** + * Resolve the final set of enabled categories per the rules in #847: + * 1. If `slim`, ignore `enabled` and start from SLIM_CATEGORIES. + * 2. Else apply `enabled` as the working set (default = ALL_CATEGORIES). + * 3. Subtract `disabled`. + * 4. Force-include ALWAYS_ON_CATEGORIES regardless of selection. + * + * The returned set is deterministic (Set with insertion order matching + * ALL_CATEGORIES) so callers can snapshot it. + */ +export function resolveEnabledCategories( + selection: CategorySelection = {}, +): Set { + let working: Set; + + if (selection.slim) { + working = new Set(SLIM_CATEGORIES); + } else if (selection.enabled && selection.enabled.length > 0) { + working = new Set(selection.enabled); + } else { + working = new Set(ALL_CATEGORIES); + } + + if (selection.disabled && selection.disabled.length > 0) { + for (const cat of selection.disabled) { + working.delete(cat); + } + } + + // Always-on union — applied last so neither --disable-categories nor an + // overly narrow --enable-categories can take down the diagnostic surface. + for (const cat of ALWAYS_ON_CATEGORIES) { + working.add(cat); + } + + // Re-emit in canonical order for deterministic snapshots. + const ordered = new Set(); + for (const cat of ALL_CATEGORIES) { + if (working.has(cat)) ordered.add(cat); + } + return ordered; +} + +/** + * Validate that a CSV string contains only known categories. Returns the + * parsed array on success; throws on unknown category. Trims whitespace and + * filters empty segments. Used by both CLI flag parsing and the env-var + * fallback so error messages are consistent. + */ +export function parseCategoryCsv(raw: string, source: string): ToolCategory[] { + const known = new Set(ALL_CATEGORIES); + const out: ToolCategory[] = []; + const seen = new Set(); + for (const rawSegment of raw.split(',')) { + const segment = rawSegment.trim(); + if (segment.length === 0) continue; + if (!known.has(segment as ToolCategory)) { + throw new Error( + `[${source}] Unknown tool category "${segment}". Known categories: ${ALL_CATEGORIES.join(', ')}`, + ); + } + if (!seen.has(segment)) { + seen.add(segment); + out.push(segment as ToolCategory); + } + } + return out; +} + +/** + * Operator-facing description of how categories interact with selection. + * Wired into `--help` so the `--slim` / `--enable-categories` / + * `--disable-categories` flags are self-documenting. + */ +export function categoryHelpText(): string { + return [ + 'Tool categories (use with --slim / --enable-categories / --disable-categories):', + ` ${ALL_CATEGORIES.join(', ')}`, + ` Always-on (cannot be disabled): ${ALWAYS_ON_CATEGORIES.join(', ')}`, + ` --slim shortcut → enabled = ${SLIM_CATEGORIES.join(', ')} (plus always-on)`, + ].join('\n'); +} diff --git a/src/tools/index.ts b/src/tools/index.ts index 55a808975..7f63a7123 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -1,14 +1,29 @@ /** - * Tool Registry - Registers all MCP tools + * Tool Registry — Registers all MCP tools, gated by category selection (#847). * - * Capability tagging (#829): every tool is assigned a capability group via - * TOOL_CAPABILITY_MAP below. The CapabilityInjectingServer wrapper injects the - * capability into each MCPToolDefinition at registerTool() time, so callers - * never need to know about capability grouping — it is authoritative here. + * Each register*Tool entry below is paired with the MCP-visible name(s) it + * adds and the category that gates it. The pair is consulted before invoking + * the registrar, so disabled categories never touch `server.registerTool()` — + * the disabled name does not appear in `tools/list` or in any cached tool + * manifest, exactly matching chrome-devtools-mcp `--slim` semantics. + * + * Default behavior (no category flags) is byte-identical to v1.11.0; pinned + * by `tests/tools/registration-default.snapshot.test.ts`. */ import { MCPServer } from '../mcp-server'; -import type { ToolCapability, MCPToolDefinition, ToolHandler } from '../types/mcp'; +import type { ToolCapability } from '../types/mcp'; +import { + CategorySelection, + resolveEnabledCategories, + TOOL_TO_CATEGORY, + ToolCategory, +} from './_shared/category'; +import { + buildDisabledHint, + setDisabledToolsSnapshot, + DisabledToolEntry, +} from '../resources/tools-disabled'; import { registerNavigateTool } from './navigate'; import { registerComputerTool } from './computer'; import { registerReadPageTool } from './read-page'; @@ -76,7 +91,6 @@ import { registerListProfilesTool } from './list-profiles'; import { registerSessionSnapshotTool } from './session-snapshot'; import { registerSessionResumeTool } from './session-resume'; import { registerJournalTool } from './journal'; -import { registerOcReflectTool } from './oc-reflect'; // Self-healing tools (#347) import { registerConnectionHealthTool } from './connection-health'; @@ -121,14 +135,6 @@ import { registerOcEvidenceBundleTool } from './oc-evidence-bundle'; import { registerOcSkillRecordTool } from './oc-skill-record'; import { registerOcSkillRecallTool } from './oc-skill-recall'; -// Skill memory tools (#875) — deterministic replay -import { registerOcSkillReplayTool } from './oc-skill-replay'; -// Async task ledger (#855) — start/list/get/cancel/wait for long-running tools -import { registerOcTaskStartTool, getTaskStore, setTaskStartupReapPromise } from './oc-task-start'; -import { registerOcTaskListTool } from './oc-task-list'; -import { registerOcTaskGetTool } from './oc-task-get'; -import { registerOcTaskCancelTool } from './oc-task-cancel'; -import { registerOcTaskWaitTool } from './oc-task-wait'; // Doctor report tool (#898) — read cached `openchrome doctor` output import { registerOcDoctorReportTool } from './oc-doctor-report'; // Performance insights two-step API (#846) @@ -150,32 +156,29 @@ import { registerOcObserveTool } from './oc-observe'; import { registerOcDevToolsUrlTool } from './oc-devtools-url'; // Portable context envelope (#873) — export/import surface import { registerOcContextTools } from './oc-context'; -// Action schema normalizer (#1062) — side-effect-free diagnostics. -import { registerOcNormalizeActionTool } from './oc-normalize-action'; -import { isRunHarnessEnabled } from '../run-harness/flags'; -import { registerRunHarnessTools } from '../run-harness/tools'; -// Goal-level TaskRun lifecycle (#1039) -import { registerTaskRunTools } from './task-run'; -// Read-only progress diagnostics (#1060). -import { registerOcProgressStatusTool } from './oc-progress-status'; - /** - * Authoritative capability map for every registered tool (#829). - * - * Groups: - * core — fundamental browser control & session management - * storage — cookie and web-storage access - * profile — Chrome profile management - * crawl — multi-page crawling, batch pagination, worker coordination - * recording — session recording (start/stop/list/export) - * workflow — Chrome-Sisyphus orchestration workflow - * totp — 2FA / TOTP generation - * pilot — experimental pilot-tier tools + * One entry per registrar invocation. `tools` is the list of MCP-visible + * names that calling `register()` will produce. * - * Absent entry → defaults to 'core' (P1 backward-compat). - * lint:tools-capabilities enforces that every registered tool appears here. + * Selection is per-TOOL, not per-registrar: a registrar that emits tools + * across multiple categories (e.g. orchestration emits `worker_update` in + * `tabs` and `workflow_*` in `workflow`) is still invoked, and the proxy + * server passed to it silently drops the individual `registerTool()` calls + * whose category is disabled. The `tools` list is preserved here both for + * the category lint (`scripts/lint-tool-categories.mjs` parses it) and so + * the disabled-tools sidecar resource can report the exact names that were + * filtered out, with category-specific restart hints. */ +interface RegistrationEntry { + /** MCP names produced by `register`. Must all be present in TOOL_TO_CATEGORY. */ + tools: readonly string[]; + register: (server: MCPServer) => void; +} + +// Legacy capability filter (#829) — kept alongside the new category filter so +// that existing --tools-only / --disable-tools / OPENCHROME_TOOL_TIER callers +// keep working unchanged while operators migrate to --slim / --enable-categories. export const TOOL_CAPABILITY_MAP: Record = { // core — fundamental browser control act: 'core', @@ -306,247 +309,332 @@ export const TOOL_CAPABILITY_MAP: Record = { }; /** - * Build a proxy around MCPServer that injects the capability field from - * TOOL_CAPABILITY_MAP into every MCPToolDefinition at registerTool() time. + * Build a `MCPServer` proxy that delegates `registerTool` only when the + * tool's category is in the enabled set. Every other property/method is + * forwarded untouched. Disabled tools are appended to `disabledOut` so the + * caller can publish them on the sidecar resource. * - * Uses a real ES Proxy so every other method/property on the underlying - * MCPServer is forwarded automatically. The previous implementation listed - * methods explicitly and required `as unknown as MCPServer` casts at every - * call site, which would TypeError at runtime if a register* function ever - * reached for an un-listed method. - * - * Keeping capability metadata in one authoritative location (this file) - * means individual tool files do not need to know about capability groups. + * Why a Proxy instead of editing each `registerXxx(server)` callsite: + * - A registrar may emit tools in DIFFERENT categories (orchestration ⇒ + * `tabs` + `workflow`). The pre-#944 "all-or-nothing per registrar" + * branch silently dropped workflow_* tools when only `tabs` was + * disabled. The proxy makes the filter run per individual tool, which + * restores the contract advertised by --disable-categories. + * - Keeps every existing registrar untouched: they keep calling + * `server.registerTool(...)` exactly as before. */ -function makeCapabilityInjectingProxy(server: MCPServer): MCPServer { +function makeFilteredServer( + server: MCPServer, + enabled: Set, + disabledOut: DisabledToolEntry[], +): MCPServer { return new Proxy(server, { get(target, prop, receiver) { if (prop === 'registerTool') { - return ( + return function filteredRegisterTool( + this: unknown, name: string, - handler: ToolHandler, - definition: MCPToolDefinition, - options?: { timeoutRecoverable?: boolean }, - ): void => { - const capability: ToolCapability = TOOL_CAPABILITY_MAP[name] ?? 'core'; - target.registerTool(name, handler, { ...definition, capability }, options); + ...rest: unknown[] + ): void { + const category = TOOL_TO_CATEGORY[name]; + if (category === undefined) { + // Misconfiguration — fail loud at startup so a missing category + // assignment never silently slips into production. Mirrors the + // CI lint check (scripts/lint-tool-categories.mjs). + throw new Error( + `[Tools] Tool "${name}" has no category in TOOL_TO_CATEGORY. ` + + `Add it to src/tools/_shared/category.ts.`, + ); + } + if (!enabled.has(category)) { + disabledOut.push({ + name, + category, + hint: buildDisabledHint(category), + }); + return; + } + // Forward to the real registerTool. We invoke via the underlying + // method bound to `target` so internal `this` references resolve + // against the real server, not the proxy. + const original = Reflect.get(target, prop, target) as ( + ...args: unknown[] + ) => void; + original.call(target, name, ...rest); }; } - const value = Reflect.get(target, prop, receiver); - return typeof value === 'function' ? value.bind(target) : value; + return Reflect.get(target, prop, receiver); }, }); } - -export function registerAllTools(server: MCPServer): void { - // Wrap the real server so every registerTool() call gets a capability tag. - const proxy = makeCapabilityInjectingProxy(server); - +const REGISTRATION_ENTRIES: readonly RegistrationEntry[] = [ // Core browser tools - registerNavigateTool(proxy); - registerComputerTool(proxy); - registerReadPageTool(proxy); - registerFindTool(proxy); - registerFormInputTool(proxy); - registerJavascriptTool(proxy); - registerNetworkTool(proxy); + { tools: ['navigate'], register: registerNavigateTool }, + { tools: ['computer'], register: registerComputerTool }, + { tools: ['read_page'], register: registerReadPageTool }, + { tools: ['find'], register: registerFindTool }, + { tools: ['form_input'], register: registerFormInputTool }, + { tools: ['javascript_tool'], register: registerJavascriptTool }, + { tools: ['network'], register: registerNetworkTool }, // Phase 1: Page and content tools - registerPageReloadTool(proxy); - registerCookiesTool(proxy); - registerQueryDomTool(proxy); - registerPageContentTool(proxy); - registerWaitForTool(proxy); - registerStorageTool(proxy); + { tools: ['page_reload'], register: registerPageReloadTool }, + { tools: ['cookies'], register: registerCookiesTool }, + { tools: ['query_dom'], register: registerQueryDomTool }, + { tools: ['page_content'], register: registerPageContentTool }, + { tools: ['wait_for'], register: registerWaitForTool }, + { tools: ['storage'], register: registerStorageTool }, // Phase 2: Device emulation and settings - registerUserAgentTool(proxy); - registerGeolocationTool(proxy); - registerEmulateDeviceTool(proxy); - registerPagePdfTool(proxy); - registerPageScreenshotTool(proxy); - registerConsoleCaptureTool(proxy); - registerPerformanceMetricsTool(proxy); - registerRequestInterceptTool(proxy); + { tools: ['user_agent'], register: registerUserAgentTool }, + { tools: ['geolocation'], register: registerGeolocationTool }, + { tools: ['emulate_device'], register: registerEmulateDeviceTool }, + { tools: ['page_pdf'], register: registerPagePdfTool }, + { tools: ['page_screenshot'], register: registerPageScreenshotTool }, + { tools: ['console_capture'], register: registerConsoleCaptureTool }, + { tools: ['performance_metrics'], register: registerPerformanceMetricsTool }, + { tools: ['request_intercept'], register: registerRequestInterceptTool }, // Passive network capture (#896) — lite=headers-only, full=bodies-with-cap. // Coexists with request_intercept (which owns setRequestInterception(true)). - registerNetworkCaptureLiteTool(proxy); - registerNetworkCaptureFullTool(proxy); + { tools: ['network_capture_lite'], register: registerNetworkCaptureLiteTool }, + { tools: ['network_capture_full'], register: registerNetworkCaptureFullTool }, // Phase 3: Advanced tools - registerFileUploadTool(proxy); - registerHttpAuthTool(proxy); - registerDragDropTool(proxy); + { tools: ['file_upload'], register: registerFileUploadTool }, + { tools: ['http_auth'], register: registerHttpAuthTool }, + { tools: ['drag_drop'], register: registerDragDropTool }, - // UX improvement composite tools (reduce tool call count) - registerFillFormTool(proxy); + // UX improvement composite tools + { tools: ['fill_form'], register: registerFillFormTool }, // Tab management - registerTabsContextTool(proxy); - registerTabsCreateTool(proxy); - registerTabsCloseTool(proxy); - - // Worker management (parallel browser operations) - registerWorkerTool(proxy); - - // Orchestration tools (Chrome-Sisyphus workflow management) - registerOrchestrationTools(proxy); - - // Performance tools (P0 - eliminate agent spawn overhead & screenshot bottleneck) - registerBatchExecuteTool(proxy); - registerLightweightScrollTool(proxy); - registerBatchPaginateTool(proxy); - - // Smart Tools (reduce LLM wandering — response enrichment + composite tools) - registerInteractTool(proxy); - registerInspectTool(proxy); - - // Vision tools (vision-based element discovery #577) - registerVisionFindTool(proxy); - - // Memory tools (domain knowledge persistence) - registerMemoryTools(proxy); + { tools: ['tabs_context'], register: registerTabsContextTool }, + { tools: ['tabs_create'], register: registerTabsCreateTool }, + { tools: ['tabs_close'], register: registerTabsCloseTool }, + + // Worker management + { tools: ['worker'], register: registerWorkerTool }, + + // Orchestration tools (multi-tool registrar) + { + tools: [ + 'workflow_init', + 'workflow_status', + 'workflow_collect', + 'workflow_collect_partial', + 'workflow_cleanup', + 'worker_update', + 'worker_complete', + 'execute_plan', + ], + register: registerOrchestrationTools, + }, + + // Performance tools + { tools: ['batch_execute'], register: registerBatchExecuteTool }, + { tools: ['lightweight_scroll'], register: registerLightweightScrollTool }, + { tools: ['batch_paginate'], register: registerBatchPaginateTool }, + + // Smart Tools + { tools: ['interact'], register: registerInteractTool }, + { tools: ['inspect'], register: registerInspectTool }, + + // Vision tools + { tools: ['vision_find'], register: registerVisionFindTool }, + + // Memory tools + { tools: ['memory'], register: registerMemoryTools }, // Lifecycle tools - registerShutdownTool(proxy); - registerReapOrphansTool(proxy); - registerProfileStatusTool(proxy); - registerListProfilesTool(proxy); - - // AI Agent Continuity tools (#355, #356) - registerSessionSnapshotTool(proxy); - registerSessionResumeTool(proxy); - registerJournalTool(proxy); - registerOcReflectTool(proxy); - - // Self-healing tools (#347) - registerConnectionHealthTool(proxy); - - // AI Agent Continuity tools (#347 Phase 4) - registerCheckpointTool(proxy); - - // Web AI host connection tools (#523) - registerConnectTools(proxy); - - // Session recording tools (#572) - registerRecordingTools(proxy); - - // Crawl tools (#576) - registerCrawlTool(proxy); - registerCrawlSitemapTool(proxy); + { tools: ['oc_stop'], register: registerShutdownTool }, + { tools: ['oc_reap_orphans'], register: registerReapOrphansTool }, + { tools: ['oc_profile_status'], register: registerProfileStatusTool }, + { tools: ['list_profiles'], register: registerListProfilesTool }, + + // AI Agent Continuity tools + { tools: ['oc_session_snapshot'], register: registerSessionSnapshotTool }, + { tools: ['oc_session_resume'], register: registerSessionResumeTool }, + { tools: ['oc_journal'], register: registerJournalTool }, + + // Self-healing tools + { tools: ['oc_connection_health'], register: registerConnectionHealthTool }, + + // AI Agent Continuity (#347 Phase 4) + { tools: ['oc_checkpoint'], register: registerCheckpointTool }, + + // Web AI host connection tools (multi-tool registrar) + { + tools: [ + 'oc_get_connection_info', + 'oc_copy_to_clipboard', + 'oc_open_host_settings', + ], + register: registerConnectTools, + }, + + // Session recording tools (multi-tool registrar) + { + tools: [ + 'oc_recording_start', + 'oc_recording_stop', + 'oc_recording_list', + 'oc_recording_export', + ], + register: registerRecordingTools, + }, + + // Crawl tools + { tools: ['crawl'], register: registerCrawlTool }, + { tools: ['crawl_sitemap'], register: registerCrawlSitemapTool }, // Resumable host-driven crawl jobs (#886) - registerCrawlStartTool(proxy); - registerCrawlStatusTool(proxy); - registerCrawlCancelTool(proxy); - - // Natural language action API (#578) - registerActTool(proxy); + { tools: ['crawl_start'], register: registerCrawlStartTool }, + { tools: ['crawl_status'], register: registerCrawlStatusTool }, + { tools: ['crawl_cancel'], register: registerCrawlCancelTool }, - // Composite page-health check (#token-efficiency) - registerValidatePageTool(proxy); + // Natural language action API + { tools: ['act'], register: registerActTool }, - // Structured extraction (#571) - registerExtractDataTool(proxy); + // Composite page-health check + { tools: ['validate_page'], register: registerValidatePageTool }, - // 2FA tools (#575) - registerTotpGenerateTool(proxy); + // Structured extraction + { tools: ['extract_data'], register: registerExtractDataTool }, - // Outcome Contracts (#784) — single-call assertion verifier - registerOcAssertTool(proxy); + // 2FA tools + { tools: ['oc_totp_generate'], register: registerTotpGenerateTool }, - // Action schema normalizer (#1062) — no browser side effects. - registerOcNormalizeActionTool(server); + // Outcome Contracts + { tools: ['oc_assert'], register: registerOcAssertTool }, + { tools: ['oc_evidence_bundle'], register: registerOcEvidenceBundleTool }, - // Read-only anti-wandering diagnostics (#1060). - registerOcProgressStatusTool(server); + // Skill memory tools + { tools: ['oc_skill_record'], register: registerOcSkillRecordTool }, + { tools: ['oc_skill_recall'], register: registerOcSkillRecallTool }, - // Outcome Contracts (#792) — evidence bundle capture - registerOcEvidenceBundleTool(proxy); - - // Skill memory tools (#785) — record + recall - registerOcSkillRecordTool(proxy); - registerOcSkillRecallTool(proxy); // Skill replay (#856) — pilot-tier. Dynamically imported so no // `src/pilot/**` dependency is loaded unless --pilot and // OPENCHROME_SKILL_REPLAY=1 are both active. - if (isSkillReplayEnabled()) { - // eslint-disable-next-line @typescript-eslint/no-var-requires - const { registerOcSkillReplayTool } = require('./oc-skill-replay') as typeof import('./oc-skill-replay'); - registerOcSkillReplayTool(proxy); - } + { + tools: ['oc_skill_replay'], + register: (server) => { + if (!isSkillReplayEnabled()) return; + // eslint-disable-next-line @typescript-eslint/no-var-requires + const { registerOcSkillReplayTool } = require('./oc-skill-replay') as typeof import('./oc-skill-replay'); + registerOcSkillReplayTool(server); + }, + }, + + // Doctor report tool (#898) + { tools: ['oc_doctor_report'], register: registerOcDoctorReportTool }, + + // Performance insights two-step API (#846) + { + tools: ['oc_performance_insights', 'oc_performance_analyze'], + register: (server) => { + if (process.env.OPENCHROME_PERF_INSIGHTS !== '0') { + registerOcPerformanceInsightsTool(server); + registerOcPerformanceAnalyzeTool(server); + const sm = getSessionManager(); + const store = getPerfTraceStore(); + sm.addEventListener((event) => { + if (event.type === 'session:deleted' && event.sessionId) { + const removed = store.evictSession(event.sessionId); + if (removed > 0) { + console.error( + `[PerfInsights] Evicted ${removed} trace handle(s) for session ${event.sessionId}`, + ); + } + } + }); + } + }, + }, + + // Pilot-tier: user-supplied proxy hook (#874). + { + tools: ['oc_proxy_hook'], + register: (server) => { + if (!isProxyHookEnabled()) return; + // eslint-disable-next-line @typescript-eslint/no-var-requires + const { registerOcProxyHookTool } = require('../pilot/proxy/hook') as typeof import('../pilot/proxy/hook'); + registerOcProxyHookTool(server); + }, + }, - // Async task ledger (#855) — persistent background task table - registerOcTaskStartTool(server); - registerOcTaskListTool(server); - registerOcTaskGetTool(server); - registerOcTaskCancelTool(server); - registerOcTaskWaitTool(server); - - // Reap any RUNNING task whose owner pid is no longer alive. Runs - // once at server start (issue #855 invariant #2) so a crash on a - // previous boot transitions orphaned rows to FAILED before new - // tasks are accepted. Best-effort: log and continue on failure. - setTaskStartupReapPromise( - getTaskStore() - .reapOrphans() - .then((reaped) => { - if (reaped.length > 0) { - console.error(`[task-ledger] Reaped ${reaped.length} orphaned task(s) at startup`); - } - }), - ); + // Deterministic observation / DevTools / portable context + { tools: ['oc_observe'], register: registerOcObserveTool }, + { tools: ['oc_devtools_url'], register: registerOcDevToolsUrlTool }, + { tools: ['oc_context_export', 'oc_context_import'], register: registerOcContextTools }, +]; - // Doctor report tool (#898) — read cached `openchrome doctor` output - registerOcDoctorReportTool(proxy); - // Performance insights two-step API (#846). - // TODO(#844): use isCoreFeatureEnabled() helper once #844 lands. - // Off-switch: when OPENCHROME_PERF_INSIGHTS=0 the two tools are NOT - // registered, preserving v1.10.4 tools/list parity (P2). Default on. - if (process.env.OPENCHROME_PERF_INSIGHTS !== '0') { - registerOcPerformanceInsightsTool(proxy); - registerOcPerformanceAnalyzeTool(proxy); - // Wire session-scoped trace eviction once. The store keeps an - // in-memory map of session_id -> trace_ids; on session deletion we - // delete every trace file owned by that session. - const sm = getSessionManager(); - const store = getPerfTraceStore(); - sm.addEventListener((event) => { - if (event.type === 'session:deleted' && event.sessionId) { - const removed = store.evictSession(event.sessionId); - if (removed > 0) { - console.error( - `[PerfInsights] Evicted ${removed} trace handle(s) for session ${event.sessionId}`, - ); - } +/** + * Register all tools, gated by the supplied category selection. + * + * Selection sources are resolved by the CLI layer (src/cli.ts) and + * collapsed into a single `CategorySelection` here — this function knows + * nothing about flags or env vars, so it is trivially testable. + * + * Default behavior (`selection` undefined or all fields unset) is the full + * surface, byte-identical to v1.11.0 — pinned by snapshot tests. + */ +export function registerAllTools( + server: MCPServer, + selection: CategorySelection = {}, +): void { + const enabled = resolveEnabledCategories(selection); + const disabledEntries: DisabledToolEntry[] = []; + + // Pre-validate every advertised tool name has a category assignment. We + // do this up front (rather than only during the proxy's registerTool + // dispatch) so a missing TOOL_TO_CATEGORY entry on a registrar that + // happens to be category-disabled still fails loud at startup, exactly + // as the v1.11.0 unconditional registration path did. + for (const entry of REGISTRATION_ENTRIES) { + for (const name of entry.tools) { + if (TOOL_TO_CATEGORY[name] === undefined) { + throw new Error( + `[Tools] Tool "${name}" has no category in TOOL_TO_CATEGORY. ` + + `Add it to src/tools/_shared/category.ts.`, + ); } - }); + } } - // Pilot-tier: user-supplied proxy hook (#874). Loaded lazily so v1.11 - // behaviour is byte-identical when the family is off — no code from - // `src/pilot/**` is reached unless both `--pilot` and - // `OPENCHROME_PROXY_HOOK=1` are set. - if (isProxyHookEnabled()) { - // eslint-disable-next-line @typescript-eslint/no-var-requires - const { registerOcProxyHookTool } = require('../pilot/proxy/hook') as typeof import('../pilot/proxy/hook'); - registerOcProxyHookTool(proxy); - } - // oc_observe (#866) — deterministic actionable-element enumeration - registerOcObserveTool(proxy); - // DevTools URL tool (#860) — gated by OPENCHROME_EXPOSE_DEVTOOLS_URL !== '0' - registerOcDevToolsUrlTool(proxy); - // Portable context envelope (#873) — oc_context_export / oc_context_import - registerOcContextTools(proxy); - - // Run harness (#1021) — opt-in tool-call event ledger. - if (isRunHarnessEnabled()) { - registerRunHarnessTools(server); + + // Per-tool filtering: mixed-category registrars still run behind a proxy, + // but registrars whose entire advertised surface is disabled are skipped + // before their module-level side effects can fire (for example optional + // pilot hooks when the pilot category is disabled). + const filteredServer = makeFilteredServer(server, enabled, disabledEntries); + for (const entry of REGISTRATION_ENTRIES) { + const enabledToolNames = entry.tools.filter((name) => enabled.has(TOOL_TO_CATEGORY[name])); + if (enabledToolNames.length === 0) { + for (const name of entry.tools) { + const category = TOOL_TO_CATEGORY[name]; + disabledEntries.push({ + name, + category, + hint: buildDisabledHint(category), + }); + } + continue; + } + entry.register(filteredServer); } - // Goal-level TaskRun lifecycle (#1039) — opt-in, no effect on existing tools. - registerTaskRunTools(server); + // Publish the disabled-tools snapshot to the sidecar resource so agents + // can introspect what was dropped. Empty list when no flags are set — + // that is the load-bearing default and forms part of the v1.11.0 parity + // contract. + setDisabledToolsSnapshot(disabledEntries); + + const enabledCats = Array.from(enabled).join(', '); + const skipped = disabledEntries.length; + console.error( + `[Tools] Registered ${server.getToolNames().length} tools ` + + `(categories: ${enabledCats}; skipped: ${skipped})`, + ); - console.error(`[Tools] Registered ${server.getToolNames().length} tools`); } diff --git a/tests/tools/category-resolution.test.ts b/tests/tools/category-resolution.test.ts new file mode 100644 index 000000000..8023170f8 --- /dev/null +++ b/tests/tools/category-resolution.test.ts @@ -0,0 +1,326 @@ +/// +/** + * Resolution-rules tests for tool category selection (#847). + * + * Pins the four rules from src/tools/_shared/category.ts::resolveEnabledCategories: + * 1. slim → SLIM_CATEGORIES + always-on + * 2. enabled subset → only those + always-on + * 3. disabled → those subtracted (but always-on still wins) + * 4. ALWAYS_ON_CATEGORIES (reliability + observe) is unconditional + * + * The order check matters: the resolver re-emits in canonical order so + * snapshot consumers (the disabled-tools resource, the registration snapshot + * test) get a stable serialization regardless of input order. + * + * Also covers per-tool registration filtering for registrars that emit + * tools across multiple categories (regression for PR #944 / Codex P1). + */ + +// ─── Mocks (mirrors tests/tools/registration-default.snapshot.test.ts) ────── +// Required because the per-tool filtering regression suite below constructs +// a real MCPServer and invokes registerAllTools, which transitively touches +// the session manager and chrome launcher singletons. + +jest.mock('../../src/session-manager', () => ({ + getSessionManager: jest.fn(() => ({ + getAllSessionInfos: jest.fn().mockReturnValue([]), + getOrCreateSession: jest.fn().mockResolvedValue({}), + cleanupAllSessions: jest.fn().mockResolvedValue(undefined), + deleteSession: jest.fn().mockResolvedValue(undefined), + addEventListener: jest.fn(), + })), +})); + +jest.mock('../../src/chrome/launcher', () => ({ + getChromeLauncher: jest.fn(() => ({ + isConnected: jest.fn().mockReturnValue(false), + getProfileState: jest.fn().mockReturnValue({ + type: 'temp', + extensionsAvailable: false, + }), + })), +})); + +import { MCPServer } from '../../src/mcp-server'; +import { registerAllTools } from '../../src/tools'; +import { + getDisabledToolsSnapshot, + setDisabledToolsSnapshot, +} from '../../src/resources/tools-disabled'; +import { + ALL_CATEGORIES, + ALWAYS_ON_CATEGORIES, + parseCategoryCsv, + resolveEnabledCategories, + SLIM_CATEGORIES, + ToolCategory, +} from '../../src/tools/_shared/category'; + +describe('resolveEnabledCategories', () => { + test('default selection (no flags) returns the full canonical set', () => { + const result = resolveEnabledCategories(); + expect(Array.from(result)).toEqual(Array.from(ALL_CATEGORIES)); + }); + + test('default selection (empty object) returns the full canonical set', () => { + const result = resolveEnabledCategories({}); + expect(Array.from(result)).toEqual(Array.from(ALL_CATEGORIES)); + }); + + describe('rule 1: slim mode', () => { + test('slim → SLIM_CATEGORIES + always-on, in canonical order', () => { + const result = Array.from(resolveEnabledCategories({ slim: true })); + // Every slim category must be present. + for (const cat of SLIM_CATEGORIES) { + expect(result).toContain(cat); + } + // Every always-on category must be present. + for (const cat of ALWAYS_ON_CATEGORIES) { + expect(result).toContain(cat); + } + // Nothing else should leak in. + const expected = new Set([ + ...SLIM_CATEGORIES, + ...ALWAYS_ON_CATEGORIES, + ]); + expect(result.length).toBe(expected.size); + }); + + test('slim wins over enabled when both supplied', () => { + const result = Array.from( + resolveEnabledCategories({ + slim: true, + enabled: ['vision', 'crawl'], + }), + ); + // vision/crawl must NOT appear — slim path is taken. + expect(result).not.toContain('vision'); + expect(result).not.toContain('crawl'); + // navigation/interact/inspect (slim) MUST appear. + expect(result).toContain('navigation'); + expect(result).toContain('interact'); + expect(result).toContain('inspect'); + }); + + test('emitted order matches ALL_CATEGORIES ordering', () => { + const result = Array.from(resolveEnabledCategories({ slim: true })); + const indexes = result.map((cat) => ALL_CATEGORIES.indexOf(cat)); + const sorted = [...indexes].sort((a, b) => a - b); + expect(indexes).toEqual(sorted); + }); + }); + + describe('rule 2: enable subset', () => { + test('enabled subset → only those + always-on', () => { + const result = Array.from( + resolveEnabledCategories({ enabled: ['vision', 'crawl'] }), + ); + const expected = new Set([ + 'vision', + 'crawl', + ...ALWAYS_ON_CATEGORIES, + ]); + expect(new Set(result)).toEqual(expected); + }); + + test('enabled = [] is treated as default (full set)', () => { + const result = resolveEnabledCategories({ enabled: [] }); + expect(Array.from(result)).toEqual(Array.from(ALL_CATEGORIES)); + }); + }); + + describe('rule 3: disable subtracts', () => { + test('disabled removes specified categories from the full set', () => { + const result = resolveEnabledCategories({ + disabled: ['vision', 'crawl', 'memory'], + }); + expect(result.has('vision')).toBe(false); + expect(result.has('crawl')).toBe(false); + expect(result.has('memory')).toBe(false); + // Sibling categories remain. + expect(result.has('navigation')).toBe(true); + expect(result.has('tabs')).toBe(true); + }); + + test('disabled is applied AFTER enabled', () => { + const result = resolveEnabledCategories({ + enabled: ['vision', 'crawl', 'memory'], + disabled: ['memory'], + }); + expect(result.has('vision')).toBe(true); + expect(result.has('crawl')).toBe(true); + expect(result.has('memory')).toBe(false); + }); + }); + + describe('rule 4: always-on cannot be disabled', () => { + test('reliability + observe survive an explicit --disable-categories', () => { + const result = resolveEnabledCategories({ + disabled: ['reliability', 'observe'], + }); + for (const cat of ALWAYS_ON_CATEGORIES) { + expect(result.has(cat)).toBe(true); + } + }); + + test('reliability + observe survive an --enable-categories that omits them', () => { + const result = resolveEnabledCategories({ + enabled: ['vision'], + }); + for (const cat of ALWAYS_ON_CATEGORIES) { + expect(result.has(cat)).toBe(true); + } + }); + + test('reliability + observe survive --slim + --disable-categories combo', () => { + const result = resolveEnabledCategories({ + slim: true, + disabled: ['reliability', 'observe', 'navigation'], + }); + // Always-on wins. + expect(result.has('reliability')).toBe(true); + expect(result.has('observe')).toBe(true); + // Slim minus navigation is honored. + expect(result.has('navigation')).toBe(false); + expect(result.has('interact')).toBe(true); + expect(result.has('inspect')).toBe(true); + }); + }); +}); + +describe('parseCategoryCsv', () => { + test('parses well-formed csv', () => { + expect(parseCategoryCsv('vision,crawl,memory', 'test')).toEqual([ + 'vision', + 'crawl', + 'memory', + ]); + }); + + test('trims whitespace and skips empty segments', () => { + expect(parseCategoryCsv(' vision , , crawl ', 'test')).toEqual([ + 'vision', + 'crawl', + ]); + }); + + test('deduplicates while preserving first-seen order', () => { + expect(parseCategoryCsv('vision,crawl,vision', 'test')).toEqual([ + 'vision', + 'crawl', + ]); + }); + + test('throws with the source label on unknown category', () => { + expect(() => parseCategoryCsv('vision,bogus', '--enable-categories')).toThrow( + /\[--enable-categories\] Unknown tool category "bogus"/, + ); + }); + + test('returns [] for empty input', () => { + expect(parseCategoryCsv('', 'test')).toEqual([]); + expect(parseCategoryCsv(' ', 'test')).toEqual([]); + }); +}); + +// ─── Per-tool filter regression (PR #944 / Codex P1) ──────────────────────── +// +// Before #944 the registration dispatch was all-or-nothing per registrar: +// if ANY tool produced by a registrar belonged to a disabled category, the +// ENTIRE registrar was skipped. The orchestration registrar emits +// `worker_update` (categorized `tabs`) alongside the `workflow_*` family +// (categorized `workflow`), so --disable-categories=tabs unintentionally +// removed all workflow_* tools. These tests pin the fixed behavior: every +// individual tool is gated by its own category, not its registrar's union. + +describe('registerAllTools — per-tool filter on mixed-category registrars', () => { + let server: MCPServer; + + beforeEach(() => { + // Reset the sidecar disabled-tools snapshot so cross-test state from + // the registration-default snapshot suite (or prior cases here) does + // not bleed in. + setDisabledToolsSnapshot([]); + server = new MCPServer(); + }); + + test('--disable-categories=tabs preserves orchestration workflow_* tools', () => { + registerAllTools(server, { disabled: ['tabs'] }); + const names = new Set(server.getToolNames()); + + // worker_update is the only orchestration tool in the `tabs` category + // and MUST be dropped. + expect(names.has('worker_update')).toBe(false); + // Sibling `tabs` tools also gone. + expect(names.has('worker')).toBe(false); + expect(names.has('tabs_create')).toBe(false); + expect(names.has('tabs_close')).toBe(false); + expect(names.has('tabs_context')).toBe(false); + + // Workflow-category tools live in the SAME registrar + // (registerOrchestrationTools) but must SURVIVE the tabs disable. + expect(names.has('workflow_init')).toBe(true); + expect(names.has('workflow_status')).toBe(true); + expect(names.has('workflow_collect')).toBe(true); + expect(names.has('workflow_collect_partial')).toBe(true); + expect(names.has('workflow_cleanup')).toBe(true); + expect(names.has('worker_complete')).toBe(true); + expect(names.has('execute_plan')).toBe(true); + }); + + test('--disable-categories=tabs,workflow drops both orchestration slices', () => { + registerAllTools(server, { disabled: ['tabs', 'workflow'] }); + const names = new Set(server.getToolNames()); + + // tabs slice + expect(names.has('worker_update')).toBe(false); + expect(names.has('worker')).toBe(false); + expect(names.has('tabs_create')).toBe(false); + + // workflow slice (same orchestration registrar) + expect(names.has('workflow_init')).toBe(false); + expect(names.has('workflow_status')).toBe(false); + expect(names.has('worker_complete')).toBe(false); + expect(names.has('execute_plan')).toBe(false); + // workflow-category tools outside the orchestration registrar also gone. + expect(names.has('batch_execute')).toBe(false); + expect(names.has('batch_paginate')).toBe(false); + expect(names.has('lightweight_scroll')).toBe(false); + + // Sibling categories untouched. + expect(names.has('navigate')).toBe(true); + expect(names.has('vision_find')).toBe(true); + }); + + test('disabled-tools snapshot lists per-tool category for mixed registrars', () => { + registerAllTools(server, { disabled: ['tabs'] }); + const snap = getDisabledToolsSnapshot(); + const byName = new Map(snap.tools.map((t) => [t.name, t])); + + // worker_update is categorized `tabs` and must surface as disabled + // with its OWN category, not the registrar's union. + const workerUpdate = byName.get('worker_update'); + expect(workerUpdate).toBeDefined(); + expect(workerUpdate?.category).toBe('tabs'); + expect(workerUpdate?.hint).toMatch(/--enable-categories=/); + + // workflow_* tools from the SAME registrar are NOT disabled and must + // not appear in the disabled snapshot. + expect(byName.has('workflow_init')).toBe(false); + expect(byName.has('worker_complete')).toBe(false); + expect(byName.has('execute_plan')).toBe(false); + }); + + test('fully disabled registrars are skipped but still recorded in disabled snapshot', () => { + registerAllTools(server, { enabled: ['navigation'] }); + const names = new Set(server.getToolNames()); + const snap = getDisabledToolsSnapshot(); + const byName = new Map(snap.tools.map((t) => [t.name, t])); + + // oc_proxy_hook is a pilot-only registrar with optional side effects. + // A narrow category allow-list must exclude it before invoking the + // registrar, while still documenting the skipped tool for operators. + expect(names.has('oc_proxy_hook')).toBe(false); + expect(byName.get('oc_proxy_hook')?.category).toBe('pilot'); + }); +}); diff --git a/tests/tools/registration-default.snapshot.test.ts b/tests/tools/registration-default.snapshot.test.ts new file mode 100644 index 000000000..d87da4b76 --- /dev/null +++ b/tests/tools/registration-default.snapshot.test.ts @@ -0,0 +1,292 @@ +/// +/** + * Default-registration snapshot test (#847). + * + * Pins the current baseline tools/list payload: with NO category flags or env + * vars set, registerAllTools() must produce the exact same set of tool names + * (and the same count) as the current default registration surface — categorization is a P2 zero-impact + * refactor and any drift is a regression. + * + * Strategy: + * - Construct an MCPServer with sessionManager mocked out (the manager has + * side effects we don't want in a unit test). + * - Call registerAllTools(server) with no selection argument. + * - Compare server.getToolNames() (sorted) against the baseline below. + * + * Updating the baseline: + * When you legitimately add or remove a tool from src/tools/index.ts you + * MUST also: + * 1. Update TOOL_TO_CATEGORY in src/tools/_shared/category.ts (lint + * script enforces this). + * 2. Update EXPECTED_DEFAULT_TOOLS below. + * 3. Confirm the addition is intentional in the PR description. + * The double-edit is a feature: it forces a human to acknowledge surface + * changes that small-context model deployments care about. + */ + +// ─── Mocks (mirrors tests/tools/journal.test.ts) ──────────────────────────── + +jest.mock('../../src/session-manager', () => ({ + getSessionManager: jest.fn(() => ({ + getAllSessionInfos: jest.fn().mockReturnValue([]), + getOrCreateSession: jest.fn().mockResolvedValue({}), + cleanupAllSessions: jest.fn().mockResolvedValue(undefined), + deleteSession: jest.fn().mockResolvedValue(undefined), + addEventListener: jest.fn(), + })), +})); + +jest.mock('../../src/chrome/launcher', () => ({ + getChromeLauncher: jest.fn(() => ({ + isConnected: jest.fn().mockReturnValue(false), + getProfileState: jest.fn().mockReturnValue({ + type: 'temp', + extensionsAvailable: false, + }), + })), +})); + +import { MCPServer } from '../../src/mcp-server'; +import { registerAllTools } from '../../src/tools'; +import { + resolveEnabledCategories, + SLIM_CATEGORIES, + ALWAYS_ON_CATEGORIES, + TOOL_TO_CATEGORY, +} from '../../src/tools/_shared/category'; +import { + getDisabledToolsSnapshot, + setDisabledToolsSnapshot, +} from '../../src/resources/tools-disabled'; + +// ─── Default tool baseline (sorted) ───────────────────────────────────────── +// +// Sourced from src/tools/index.ts REGISTRATION_ENTRIES — every name appearing +// in any `tools: [...]` array. Sorted alphabetically for stable diff output +// when a tool is added/removed. + +const EXPECTED_DEFAULT_TOOLS: readonly string[] = [ + 'act', + 'batch_execute', + 'batch_paginate', + 'computer', + 'console_capture', + 'cookies', + 'crawl', + 'crawl_cancel', + 'crawl_sitemap', + 'crawl_start', + 'crawl_status', + 'drag_drop', + 'emulate_device', + 'execute_plan', + 'extract_data', + 'file_upload', + 'fill_form', + 'find', + 'form_input', + 'geolocation', + 'http_auth', + 'inspect', + 'interact', + 'javascript_tool', + 'lightweight_scroll', + 'list_profiles', + 'memory', + 'navigate', + 'network', + 'network_capture_full', + 'network_capture_lite', + 'oc_assert', + 'oc_checkpoint', + 'oc_connection_health', + 'oc_context_export', + 'oc_context_import', + 'oc_copy_to_clipboard', + 'oc_devtools_url', + 'oc_doctor_report', + 'oc_evidence_bundle', + 'oc_get_connection_info', + 'oc_journal', + 'oc_observe', + 'oc_open_host_settings', + 'oc_performance_analyze', + 'oc_performance_insights', + 'oc_profile_status', + 'oc_reap_orphans', + 'oc_recording_export', + 'oc_recording_list', + 'oc_recording_start', + 'oc_recording_stop', + 'oc_session_resume', + 'oc_session_snapshot', + 'oc_skill_recall', + 'oc_skill_record', + 'oc_stop', + 'oc_totp_generate', + 'page_content', + 'page_pdf', + 'page_reload', + 'page_screenshot', + 'performance_metrics', + 'query_dom', + 'read_page', + 'request_intercept', + 'storage', + 'tabs_close', + 'tabs_context', + 'tabs_create', + 'user_agent', + 'validate_page', + 'vision_find', + 'wait_for', + 'worker', + 'worker_complete', + 'worker_update', + 'workflow_cleanup', + 'workflow_collect', + 'workflow_collect_partial', + 'workflow_init', + 'workflow_status', +]; + +// ─── Tests ────────────────────────────────────────────────────────────────── + +describe('registerAllTools — default selection (current parity)', () => { + let server: MCPServer; + + beforeEach(() => { + // Reset the disabled-tools snapshot module-level state so a prior test + // does not bleed into this one. + setDisabledToolsSnapshot([]); + server = new MCPServer(); + }); + + test('produces the current tool surface byte-for-byte', () => { + registerAllTools(server); + const actual = server.getToolNames().slice().sort(); + expect(actual).toEqual([...EXPECTED_DEFAULT_TOOLS]); + }); + + test('default surface size matches the current baseline count', () => { + registerAllTools(server); + expect(server.getToolNames().length).toBe(EXPECTED_DEFAULT_TOOLS.length); + }); + + test('disabled-tools snapshot is empty by default', () => { + registerAllTools(server); + const snap = getDisabledToolsSnapshot(); + expect(snap.tools).toEqual([]); + }); + + test('every default tool has a category in TOOL_TO_CATEGORY', () => { + // This is the runtime mirror of scripts/lint-tool-categories.mjs — the + // lint script catches the failure pre-merge, this catches it pre-commit + // for developers who forget to run the lint. + for (const name of EXPECTED_DEFAULT_TOOLS) { + expect(TOOL_TO_CATEGORY[name]).toBeDefined(); + } + }); +}); + +describe('registerAllTools — slim selection', () => { + let server: MCPServer; + + beforeEach(() => { + setDisabledToolsSnapshot([]); + server = new MCPServer(); + }); + + test('--slim registers only slim + always-on category tools', () => { + registerAllTools(server, { slim: true }); + const enabled = resolveEnabledCategories({ slim: true }); + const expected = EXPECTED_DEFAULT_TOOLS.filter((name) => + enabled.has(TOOL_TO_CATEGORY[name]), + ).sort(); + const actual = server.getToolNames().slice().sort(); + expect(actual).toEqual(expected); + }); + + test('always-on tools survive --slim (reliability + observe)', () => { + registerAllTools(server, { slim: true }); + const names = new Set(server.getToolNames()); + // Always-on category exemplars. + expect(names.has('oc_stop')).toBe(true); // reliability + expect(names.has('validate_page')).toBe(true); // reliability + expect(names.has('console_capture')).toBe(true); // observe + expect(names.has('oc_journal')).toBe(true); // observe + }); + + test('slim drops at least one tool per non-slim, non-always-on category', () => { + registerAllTools(server, { slim: true }); + const enabled = resolveEnabledCategories({ slim: true }); + const slimAndAlwaysOn = new Set([ + ...SLIM_CATEGORIES, + ...ALWAYS_ON_CATEGORIES, + ]); + // Sanity: every enabled category is in the slim+always-on set. + for (const cat of enabled) { + expect(slimAndAlwaysOn.has(cat)).toBe(true); + } + // And the surface is strictly smaller than the default. + expect(server.getToolNames().length).toBeLessThan( + EXPECTED_DEFAULT_TOOLS.length, + ); + }); + + test('disabled-tools snapshot lists every excluded tool with a restart hint', () => { + registerAllTools(server, { slim: true }); + const snap = getDisabledToolsSnapshot(); + expect(snap.tools.length).toBeGreaterThan(0); + for (const entry of snap.tools) { + expect(entry.name).toBeTruthy(); + expect(entry.category).toBeTruthy(); + // Restart hint must contain the exact flag text an operator can copy. + expect(entry.hint).toMatch(/--enable-categories=/); + } + // No always-on tool should appear as disabled. + const disabledNames = new Set(snap.tools.map((t) => t.name)); + expect(disabledNames.has('oc_stop')).toBe(false); + expect(disabledNames.has('console_capture')).toBe(false); + }); +}); + +describe('registerAllTools — enable / disable subsets', () => { + let server: MCPServer; + + beforeEach(() => { + setDisabledToolsSnapshot([]); + server = new MCPServer(); + }); + + test('--enable-categories=vision registers vision_find + always-on tools only', () => { + registerAllTools(server, { enabled: ['vision'] }); + const names = new Set(server.getToolNames()); + expect(names.has('vision_find')).toBe(true); + // Always-on still present. + expect(names.has('oc_stop')).toBe(true); + expect(names.has('console_capture')).toBe(true); + // Out-of-scope category is gone. + expect(names.has('navigate')).toBe(false); + expect(names.has('crawl')).toBe(false); + }); + + test('--disable-categories=crawl drops crawl tools and keeps everything else', () => { + registerAllTools(server, { disabled: ['crawl'] }); + const names = new Set(server.getToolNames()); + expect(names.has('crawl')).toBe(false); + expect(names.has('crawl_sitemap')).toBe(false); + // Sibling categories untouched. + expect(names.has('navigate')).toBe(true); + expect(names.has('vision_find')).toBe(true); + }); + + test('--disable-categories=reliability,observe is a no-op on always-on tools', () => { + registerAllTools(server, { disabled: ['reliability', 'observe'] }); + const names = new Set(server.getToolNames()); + expect(names.has('oc_stop')).toBe(true); + expect(names.has('validate_page')).toBe(true); + expect(names.has('console_capture')).toBe(true); + expect(names.has('oc_journal')).toBe(true); + }); +});