Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion scripts/lint-tool-schemas.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@ const TOOL_NAME_RE = /^[a-z][a-z0-9_]{2,63}$/;

const BASELINE_PATH = resolve(__dirname, 'lint-tool-schemas.baseline.json');

async function readStdin() {
let data = '';
process.stdin.setEncoding('utf8');
for await (const chunk of process.stdin) data += chunk;
return data;
}


// ── parse CLI args ─────────────────────────────────────────────────────────
const args = process.argv.slice(2);
const updateBaseline = args.includes('--update-baseline');
Expand All @@ -46,7 +54,7 @@ if (!inputFile) {
// ── load tools list ────────────────────────────────────────────────────────
let tools;
try {
tools = JSON.parse(readFileSync(inputFile === '-' ? 0 : resolve(inputFile), 'utf8'));
tools = JSON.parse(inputFile === '-' ? await readStdin() : readFileSync(resolve(inputFile), 'utf8'));
} catch (err) {
process.stderr.write(`Error reading tools list: ${err.message}\n`);
process.exit(2);
Expand Down
6 changes: 4 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,10 @@ program
const server = new MCPServer(undefined, { initialToolTier: 3 });
registerAllTools(server);
const manifest = server.getToolManifest();
process.stdout.write(JSON.stringify(manifest.tools) + '\n');
process.exit(0);
await new Promise<void>((resolve) => {
process.stdout.write(JSON.stringify(manifest.tools) + '\n', () => resolve());
});
return;
}

let port = parseInt(options.port, 10);
Expand Down
126 changes: 117 additions & 9 deletions src/mcp-server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,62 @@ const SKIP_RECORDING_TOOLS = new Set([
* Detect if an error is a Chrome/CDP connection error that may be recoverable
* by reconnecting to the browser.
*/
export function estimateOutputTokensFromChars(chars: number): number {
// Heuristic only; intentionally avoids provider-specific tokenizer deps.
return Math.max(0, Math.ceil(chars / 4));
}

function stringifyResultPayload(result: MCPResult): string {
try {
return JSON.stringify(result);
} catch {
return Array.isArray(result.content)
? result.content.map((c) => c.text ?? c.data ?? '').join('')
: '';
}
}

const CACHE_STATUS_LABELS = new Set(['HIT', 'MISS', 'BYPASS', 'ERROR']);
const CACHE_KEY_VERSION_LABEL_RE = /^v?\d{1,3}$/i;

function normalizeCacheStatusLabel(raw: string): string {
const normalized = raw.trim().toUpperCase();
return CACHE_STATUS_LABELS.has(normalized) ? normalized : 'UNKNOWN';
}

function normalizeCacheKeyVersionLabel(raw: unknown): string {
if (raw === undefined || raw === null || raw === '') return 'unknown';
const normalized = String(raw).trim();
if (normalized === '') return 'unknown';
return CACHE_KEY_VERSION_LABEL_RE.test(normalized) ? normalized : 'other';
}

export function extractCacheStatus(result: MCPResult): { status: string; keyVersion: string } | null {
const raw = (result as Record<string, unknown>)._cache
?? (result as Record<string, unknown>).cache
?? (result as Record<string, unknown>).cacheStatus;
if (typeof raw === 'string') {
return { status: normalizeCacheStatusLabel(raw), keyVersion: 'unknown' };
}
if (raw && typeof raw === 'object') {
const obj = raw as Record<string, unknown>;
const status = typeof obj.status === 'string' ? obj.status : typeof obj.cacheStatus === 'string' ? obj.cacheStatus : null;
if (!status) return null;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Continue cache-status fallback instead of returning early

When raw is an object but does not contain a string status/cacheStatus, this early return null prevents the later structuredContent.cacheStatus fallback from running. In mixed-shape results (for example, cache: { keyVersion: "v2" } plus structuredContent.cacheStatus: "miss"), openchrome_cache_status_total is silently dropped even though a valid cache status is present.

Useful? React with 👍 / 👎.

const keyVersion = obj.keyVersion ?? obj.version ?? 'unknown';
return {
status: normalizeCacheStatusLabel(status),
keyVersion: normalizeCacheKeyVersionLabel(keyVersion),
};
}
if (result.structuredContent && typeof result.structuredContent.cacheStatus === 'string') {
return {
status: normalizeCacheStatusLabel(result.structuredContent.cacheStatus),
keyVersion: normalizeCacheKeyVersionLabel(result.structuredContent.cacheKeyVersion),
};
}
return null;
}

export function isConnectionError(error: unknown): boolean {
if (error instanceof OpenChromeConnectionError) return true;
const message = formatError(error);
Expand Down Expand Up @@ -1236,7 +1292,7 @@ export class MCPServer {
} catch {
// best-effort
}
return {
const deniedResult: MCPResult = {
content: [
{
type: 'text',
Expand All @@ -1245,6 +1301,8 @@ export class MCPServer {
],
isError: true,
};
this.recordToolOutputObservability(toolName, deniedResult);
return deniedResult;
}
}

Expand All @@ -1271,7 +1329,7 @@ export class MCPServer {
} catch {
// best-effort
}
return {
const forbiddenResult: MCPResult = {
content: [
{
type: 'text',
Expand All @@ -1280,6 +1338,8 @@ export class MCPServer {
],
isError: true,
};
this.recordToolOutputObservability(toolName, forbiddenResult);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Bound tool labels before recording denied-call output metrics

recordToolOutputObservability is called on the scope-denied path before verifying that toolName is a registered tool, so a read-only API key can send arbitrary params.name values and create unbounded new time series in openchrome_tool_output_bytes / openchrome_tool_estimated_tokens (and potentially cache-status metrics). This introduces a cardinality-amplification vector from user input and can degrade Prometheus memory/query performance; normalize unknown names to a fixed bucket (e.g. unknown) or only emit these metrics after successful tool lookup.

Useful? React with 👍 / 👎.

return forbiddenResult;
}

// Handle the expand_tools meta-tool before normal tool lookup
Expand All @@ -1305,9 +1365,11 @@ export class MCPServer {
text += `\n\nNewly available tools:\n${JSON.stringify(newTools, null, 2)}\n\nYou can now call these tools directly by name.`;
}

return {
const expandResult: MCPResult = {
content: [{ type: 'text', text }],
};
this.recordToolOutputObservability(toolName, expandResult);
return expandResult;
}

const tool = this.tools.get(toolName);
Expand All @@ -1320,10 +1382,12 @@ export class MCPServer {
if (requiredFields && requiredFields.length > 0) {
const missing = requiredFields.filter((field) => !(field in toolArgs) || toolArgs[field] === undefined || toolArgs[field] === null);
if (missing.length > 0) {
return {
const missingArgsResult: MCPResult = {
content: [{ type: 'text', text: `Error: Missing required argument(s): ${missing.join(', ')}` }],
isError: true,
};
this.recordToolOutputObservability(toolName, missingArgsResult);
return missingArgsResult;
}
}

Expand Down Expand Up @@ -1392,7 +1456,7 @@ export class MCPServer {
if (!rateResult.allowed) {
console.error(`[MCPServer] Rate limit exceeded for session ${sessionId}, retry after ${rateResult.retryAfterSec}s`);
try { getMetricsCollector().inc('openchrome_rate_limit_rejections_total', withTenantLabel({ tool: toolName })); } catch { /* best-effort */ }
return {
const rateLimitResult: MCPResult = {
content: [
{
type: 'text',
Expand All @@ -1401,6 +1465,8 @@ export class MCPServer {
],
isError: true,
};
this.recordToolOutputObservability(toolName, rateLimitResult);
return rateLimitResult;
}
}

Expand Down Expand Up @@ -1440,7 +1506,7 @@ export class MCPServer {
});

if (reconnectResult !== 'reconnected') {
return {
const reconnectResultPayload: MCPResult = {
content: [
{
type: 'text',
Expand All @@ -1449,6 +1515,8 @@ export class MCPServer {
],
isError: true,
};
this.recordToolOutputObservability(toolName, reconnectResultPayload);
return reconnectResultPayload;
}
console.error(`[MCPServer] Reconnection complete, proceeding with "${toolName}"`);
}
Expand Down Expand Up @@ -1812,7 +1880,7 @@ export class MCPServer {
}
}

if (compressionConfig?.enabled && compressionConfig?.trackSavings) {
if (compressionConfig?.enabled && compressionConfig?.trackSavings && !(result as Record<string, unknown>)._compression) {
(result as Record<string, unknown>)._compression = {
level: compressionConfig.level ?? 'light',
verbosity,
Expand All @@ -1825,7 +1893,9 @@ export class MCPServer {
// the substituted input, returned it inside a JSON blob, or surfaced
// it via an error message) with `${SECRET:NAME}` placeholders. No-op
// when --secrets was not passed.
return redactSecrets(result);
const finalResult = redactSecrets(result);
this.recordToolOutputObservability(toolName, finalResult);
return finalResult;
} catch (error) {
const message = formatError(error);
const abortReason = isClientDisconnect(error) ? 'client_disconnect' : null;
Expand Down Expand Up @@ -1978,7 +2048,45 @@ export class MCPServer {

// Secrets redaction (#834) — see success path. Error messages can
// include the literal value (e.g. "type ... failed for value X").
return redactSecrets(errResult);
const finalErrResult = redactSecrets(errResult);
this.recordToolOutputObservability(toolName, finalErrResult);
return finalErrResult;
}
}


private recordToolOutputObservability(toolName: string, result: MCPResult): void {
try {
const metrics = getMetricsCollector();
const payload = stringifyResultPayload(result);
const bytes = Buffer.byteLength(payload, 'utf8');
metrics.observe('openchrome_tool_output_bytes', withTenantLabel({ tool: toolName }), bytes);
metrics.observe('openchrome_tool_estimated_tokens', withTenantLabel({ tool: toolName }), estimateOutputTokensFromChars(payload.length));
Comment on lines +2063 to +2064
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Bucket unregistered tool labels before emitting metrics

recordToolOutputObservability emits tool labels using raw toolName, but this function is now called on tenant/scope-denied paths before tool lookup (src/mcp-server.ts around lines 1295-1342), where toolName comes directly from request input. That lets callers generate unbounded time-series cardinality by sending many distinct bogus tool names, which can exhaust metrics memory and scrape performance; unregistered names should be normalized to a bounded value (e.g., unknown) before emitting openchrome_tool_output_bytes, openchrome_tool_estimated_tokens, openchrome_tool_compression_saved_bytes, and openchrome_cache_status_total.

Useful? React with 👍 / 👎.


const compression = (result as Record<string, unknown>)._compression;
if (compression && typeof compression === 'object') {
const originalChars = (compression as Record<string, unknown>).originalChars;
const compressedChars = (compression as Record<string, unknown>).compressedChars;
const level = String((compression as Record<string, unknown>).level ?? 'unknown');
if (typeof originalChars === 'number' && typeof compressedChars === 'number' && originalChars > compressedChars) {
Comment on lines +2068 to +2071
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve compression stats before observing saved bytes

recordToolOutputObservability expects _compression.originalChars and _compression.compressedChars, but the success path currently rewrites _compression to { level, verbosity } before this method runs when compression.trackSavings is enabled. In that configuration, any tool-provided compression stats are discarded, so openchrome_tool_compression_saved_bytes never records even when a tool returns valid savings data. Please merge/retain existing _compression fields instead of replacing the object.

Useful? React with 👍 / 👎.

metrics.observe(
'openchrome_tool_compression_saved_bytes',
withTenantLabel({ tool: toolName, mode: level }),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Normalize compression mode labels before emitting metrics

recordToolOutputObservability forwards _compression.level directly into the mode label for openchrome_tool_compression_saved_bytes. Because MCPResult is free-form, any tool can return request-specific strings in _compression.level, which creates unbounded label cardinality and can degrade Prometheus memory/query performance once compression stats are emitted. Please map this label to a small allowlist (and bucket unknowns) before calling observe.

Useful? React with 👍 / 👎.

originalChars - compressedChars,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Convert compression savings metric to true byte deltas

openchrome_tool_compression_saved_bytes is recorded from originalChars - compressedChars, but those _compression fields are character counts (for example read_page sets them from .length). For non-ASCII output (CJK, emoji, etc.), character deltas can differ substantially from UTF-8 byte deltas, so this metric reports incorrect units and can mislead dashboards/alerts that depend on byte savings. Compute the delta from UTF-8 byte lengths (or rename the metric to chars) before observing it.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Measure compression savings in bytes before reporting

This metric is named openchrome_tool_compression_saved_bytes, but it records originalChars - compressedChars from _compression. Since these values are currently based on string .length (UTF-16 code units), responses containing multi-byte UTF-8 characters will report incorrect byte savings. Convert both values to UTF-8 byte lengths (or rename the metric) so dashboards reflect actual payload reduction.

Useful? React with 👍 / 👎.

);
}
}

const cache = extractCacheStatus(result);
if (cache) {
metrics.inc('openchrome_cache_status_total', withTenantLabel({
tool: toolName,
status: cache.status,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Constrain cache status labels before emitting metrics

recordToolOutputObservability emits openchrome_cache_status_total with status taken directly from tool output via extractCacheStatus (only uppercased), so any tool that includes request-specific detail in cache status (for example reason strings or embedded IDs) will create unbounded label cardinality and degrade Prometheus memory/query performance. Please normalize this to a small allowlist (e.g., HIT/MISS/BYPASS/ERROR) or map unknown values to a fixed bucket before incrementing the counter.

Useful? React with 👍 / 👎.

key_version: cache.keyVersion,
}));
}
} catch {
// Metrics are best-effort and must never affect tool responses.
}
}

Expand Down
7 changes: 7 additions & 0 deletions src/metrics/collector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,13 @@ export function getMetricsCollector(): MetricsCollector {
instance.registerCounter('openchrome_tool_calls_total', 'Total MCP tool calls');
instance.registerHistogram('openchrome_tool_duration_seconds', 'Tool call duration in seconds',
[0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120]);
instance.registerHistogram('openchrome_tool_output_bytes', 'Final MCP tool result payload size in bytes',
[128, 512, 1024, 4096, 16384, 65536, 262144, 1048576]);
instance.registerHistogram('openchrome_tool_estimated_tokens', 'Estimated MCP tool result output tokens (chars / 4 heuristic)',
[32, 128, 256, 1024, 4096, 16384, 65536, 262144]);
instance.registerHistogram('openchrome_tool_compression_saved_bytes', 'Estimated response bytes saved by response compression or delta modes',
[128, 512, 1024, 4096, 16384, 65536, 262144]);
instance.registerCounter('openchrome_cache_status_total', 'Cache status observations by tool and key version');
instance.registerCounter('openchrome_reconnect_total', 'Total successful CDP reconnections');
instance.registerGauge('openchrome_heap_bytes', 'Node.js heap usage in bytes');
instance.registerGauge('openchrome_active_sessions', 'Current active MCP sessions');
Expand Down
2 changes: 1 addition & 1 deletion src/tools/crawl-cancel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const definition: MCPToolDefinition = {
inputSchema: {
type: 'object',
properties: {
jobId: { type: 'string', description: 'Job id returned by crawl_start.' },
jobId: { type: 'string', description: 'REQUIRED Job id returned by crawl_start.' },
},
required: ['jobId'],
},
Expand Down
2 changes: 1 addition & 1 deletion src/tools/crawl-start.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const definition: MCPToolDefinition = {
inputSchema: {
type: 'object',
properties: {
url: { type: 'string', description: 'Starting URL to crawl' },
url: { type: 'string', description: 'REQUIRED Starting URL to crawl' },
max_depth: { type: 'number', description: 'Max link-follow depth. Default: 2' },
max_pages: { type: 'number', description: 'Max pages to crawl. Default: 20' },
scope: { type: 'string', description: 'URL glob limiting which URLs to follow. Default: same origin.' },
Expand Down
2 changes: 1 addition & 1 deletion src/tools/crawl-status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const definition: MCPToolDefinition = {
inputSchema: {
type: 'object',
properties: {
jobId: { type: 'string', description: 'Job id returned by crawl_start.' },
jobId: { type: 'string', description: 'REQUIRED Job id returned by crawl_start.' },
advance: {
type: 'number',
description: 'Max pages to fetch in this call. Default OC_CRAWL_ADVANCE_DEFAULT (5). Use 0 for read-only.',
Expand Down
8 changes: 7 additions & 1 deletion src/tools/read-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,14 @@ const handler: ToolHandler = async (
const statsLine = `[page_stats] url: ${result.pageStats.url} | title: ${result.pageStats.title} | scroll: ${result.pageStats.scrollX},${result.pageStats.scrollY} | viewport: ${result.pageStats.viewportWidth}x${result.pageStats.viewportHeight} | docSize: ${result.pageStats.scrollWidth}x${result.pageStats.scrollHeight}\n\n`;
const includePaginationDom = args.includePagination !== false;
const domPaginationSection = includePaginationDom ? formatPaginationSection(await detectPagination(page, tabId)) : '';
const compressedText = statsLine + delta.content + nodeRefsBlock + domPaginationSection;
return {
content: [{ type: 'text', text: statsLine + delta.content + nodeRefsBlock + domPaginationSection }],
content: [{ type: 'text', text: compressedText }],
_compression: {
level: 'delta',
originalChars: outputText.length,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Compute original delta size from full DOM response

In the delta path, _compression.originalChars is derived from outputText.length, but the fallback non-delta DOM response includes nodeRefsBlock and optional pagination text as well. On pages with large [node_refs] sections (or pagination metadata), this underestimates the baseline size and can hide real compression savings in observability metrics. Use the same full-response text shape used by the non-delta return when calculating the original size.

Useful? React with 👍 / 👎.

compressedChars: compressedText.length,
Comment on lines +607 to +608
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Count full uncompressed response when reporting delta savings

_compression.originalChars is set from outputText.length, but the non-delta DOM response for the same call is outputText + domPaginationSection. When includePagination is left at its default (true) and pagination metadata is non-empty, this undercounts the baseline and can make originalChars <= compressedChars, so openchrome_tool_compression_saved_bytes is underreported or dropped even when delta compression actually saved bytes.

Useful? React with 👍 / 👎.

Comment on lines +607 to +608
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Compute delta compression savings from comparable payloads

The _compression metadata for read_page delta responses currently compares originalChars from outputText.length against compressedChars from compressedText.length, but compressedText includes nodeRefsBlock/pagination sections while originalChars does not. In calls where those shared sections are non-trivial, this undercounts (or can invert) savings, which then suppresses or distorts openchrome_tool_compression_saved_bytes in recordToolOutputObservability. This makes the new observability metrics inaccurate specifically for DOM delta mode responses with larger node-ref or pagination blocks.

Useful? React with 👍 / 👎.

},
};
}
// If not delta (too many changes), fall through to full response
Expand Down
Loading
Loading