diff --git a/open-sse/config/anthropicHeaders.ts b/open-sse/config/anthropicHeaders.ts index 14dfd83a8..1a322516f 100644 --- a/open-sse/config/anthropicHeaders.ts +++ b/open-sse/config/anthropicHeaders.ts @@ -26,7 +26,10 @@ export const ANTHROPIC_BETA_CLAUDE_OAUTH = [ ...ANTHROPIC_BETA_BASE.slice(3), ].join(","); -export const CLAUDE_CLI_VERSION = "2.1.121"; +// Static-config fallbacks for providerRegistry.ts. Runtime cloak in base.ts +// emits the same values via CLAUDE_CODE_VERSION in claudeIdentity.ts — +// keep both in sync. +export const CLAUDE_CLI_VERSION = "2.1.131"; export const CLAUDE_CLI_USER_AGENT = `claude-cli/${CLAUDE_CLI_VERSION} (external, cli)`; export const CLAUDE_CLI_STAINLESS_PACKAGE_VERSION = "0.81.0"; export const CLAUDE_CLI_STAINLESS_RUNTIME_VERSION = "v24.3.0"; diff --git a/open-sse/config/cliFingerprints.ts b/open-sse/config/cliFingerprints.ts index 444950210..64bcfcf85 100644 --- a/open-sse/config/cliFingerprints.ts +++ b/open-sse/config/cliFingerprints.ts @@ -63,30 +63,32 @@ export const CLI_FINGERPRINTS: Record = { // executor-provided version or user override. }, claude: { + // Header order matching real claude-cli: Title-Case (Stainless) keys + // alphabetically, then lowercase Anthropic keys alphabetically, then + // transport headers added by Node fetch. headerOrder: [ - "Host", + "Accept", + "Authorization", "Content-Type", - "x-api-key", - "anthropic-version", - "anthropic-beta", - "anthropic-dangerous-direct-browser-access", - "x-app", "User-Agent", "X-Claude-Code-Session-Id", - "x-client-request-id", - "X-Stainless-Retry-Count", - "X-Stainless-Timeout", + "X-Stainless-Arch", "X-Stainless-Lang", - "X-Stainless-Package-Version", "X-Stainless-OS", - "X-Stainless-Arch", + "X-Stainless-Package-Version", + "X-Stainless-Retry-Count", "X-Stainless-Runtime", "X-Stainless-Runtime-Version", - "Accept", - "accept-language", - "accept-encoding", - "sec-fetch-mode", + "X-Stainless-Timeout", + "anthropic-beta", + "anthropic-dangerous-direct-browser-access", + "anthropic-version", + "x-app", + "x-client-request-id", "Connection", + "Host", + "Accept-Encoding", + "Content-Length", ], bodyFieldOrder: [ "model", @@ -96,6 +98,7 @@ export const CLI_FINGERPRINTS: Record = { "tool_choice", "metadata", "max_tokens", + "temperature", "thinking", "context_management", "output_config", diff --git a/open-sse/executors/base.ts b/open-sse/executors/base.ts index 00576b774..1f5f8007b 100644 --- a/open-sse/executors/base.ts +++ b/open-sse/executors/base.ts @@ -10,11 +10,25 @@ import { modelSupportsContext1mBeta, } from "../services/claudeCodeCompatible.ts"; import { getClaudeCodeCompatibleRequestDefaults } from "@/lib/providers/requestDefaults"; -import { supportsXHighEffort } from "../config/providerModels.ts"; import { remapToolNamesInRequest } from "../services/claudeCodeToolRemapper.ts"; import { obfuscateInBody } from "../services/claudeCodeObfuscation.ts"; import { randomUUID } from "node:crypto"; -import { createHash } from "node:crypto"; +import { + CLAUDE_CODE_VERSION, + CLAUDE_CODE_STAINLESS_VERSION, + buildHashFor, + buildUserIdJson, + getSessionId, + parseUpstreamMetadataUserId, + passthroughUpstreamSessionId, + resolveAccountUUID, + resolveCliUserID, + selectBetaFlags, + stainlessArch, + stainlessOS, + stainlessRuntimeVersion, + stripProxyToolPrefix, +} from "./claudeIdentity.ts"; /** * Sanitizes a custom API path to prevent path traversal attacks. @@ -494,154 +508,231 @@ export class BaseExecutor { (clientHeaders?.["user-agent"] && clientHeaders["user-agent"].toLowerCase().includes("claude-cli")); + // Anthropic's user:sessions:claude_code OAuth scope expects CLI-shaped + // traffic. Apply the cloak whenever we have an OAuth token, regardless + // of upstream client. + const hasClaudeOAuthToken = + typeof activeCredentials?.accessToken === "string" && + activeCredentials.accessToken.startsWith("sk-ant-oat") && + !activeCredentials?.apiKey; + if ( this.provider === "claude" && - isClaudeCodeClient && + (isClaudeCodeClient || hasClaudeOAuthToken) && typeof transformedBody === "object" && transformedBody !== null ) { const tb = transformedBody as Record; + + stripProxyToolPrefix(tb); remapToolNamesInRequest(tb); obfuscateInBody(tb); - const ccVersion = "2.1.121"; - // Fix #1638: Use a stable fingerprint instead of message-derived one. - // The original computeFingerprint() hashed first-user-message chars, which - // changes every conversation turn. This mutated the system[] prefix on each - // request, invalidating Anthropic's prompt-cache prefix and forcing ~100% - // cache_create (vs 96% cache_read with a stable prefix). Using a per-day - // hash keeps the billing header format while preserving cache affinity. - const dayStamp = new Date().toISOString().slice(0, 10); // YYYY-MM-DD - const fp = createHash("sha256") - .update(`${dayStamp}${ccVersion}`) - .digest("hex") - .slice(0, 3); - const billingLine = `x-anthropic-billing-header: cc_version=${ccVersion}.${fp}; cc_entrypoint=cli; cch=00000;`; - - if (Array.isArray(tb.system)) { - const sysBlocks = tb.system as Array>; - // Fix #1712: Remove any existing billing headers from the client - // to prevent stacking that breaks Anthropic prompt cache prefix matching. - for (let i = sysBlocks.length - 1; i >= 0; i--) { - const block = sysBlocks[i]; - if ( - block && - typeof block.text === "string" && - block.text.startsWith("x-anthropic-billing-header:") - ) { - sysBlocks.splice(i, 1); - } - } - const firstSystemCacheControl = - sysBlocks[0] && - typeof sysBlocks[0] === "object" && - !Array.isArray(sysBlocks[0]) && - sysBlocks[0].cache_control - ? sysBlocks[0].cache_control - : undefined; - const billingBlock: Record = { type: "text", text: billingLine }; - if (firstSystemCacheControl) { - billingBlock.cache_control = firstSystemCacheControl; + // Real CLI never sets cache_control on tools. + if (Array.isArray(tb.tools)) { + for (const t of tb.tools as Array>) { + delete t.cache_control; } - sysBlocks.unshift(billingBlock); - } else if (typeof tb.system === "string") { - tb.system = [ - { type: "text", text: billingLine }, - { type: "text", text: tb.system }, - ]; - } else { - tb.system = [{ type: "text", text: billingLine }]; } - if (!tb.metadata || typeof tb.metadata !== "object") { - tb.metadata = { - user_id: JSON.stringify({ - device_id: createHash("sha256").update("omniroute").digest("hex").slice(0, 24), - account_uuid: "", - session_id: randomUUID(), - }), - }; + // Per-request behavior overrides via custom client headers. + // x-omniroute-effort: low | medium | high | xhigh | off + // x-omniroute-thinking: adaptive | off + // A header value applies only when the corresponding body field is + // not already set; "off" force-strips the field. + const headerEffort = ( + clientHeaders?.["x-omniroute-effort"] ?? + clientHeaders?.["X-OmniRoute-Effort"] + ) + ?.trim() + .toLowerCase(); + const headerThinking = ( + clientHeaders?.["x-omniroute-thinking"] ?? + clientHeaders?.["X-OmniRoute-Thinking"] + ) + ?.trim() + .toLowerCase(); + let appliedEffort: string | null = null; + let appliedThinking: string | null = null; + + if (headerEffort === "off") { + if (tb.output_config && typeof tb.output_config === "object") { + delete (tb.output_config as Record).effort; + } + appliedEffort = "off"; + } else if ( + headerEffort && + ["low", "medium", "high", "xhigh"].includes(headerEffort) + ) { + const oc = + tb.output_config && typeof tb.output_config === "object" + ? (tb.output_config as Record) + : {}; + if (oc.effort === undefined) { + oc.effort = headerEffort; + tb.output_config = oc; + appliedEffort = headerEffort; + } } - const supportsAdaptiveThinking = supportsXHighEffort("claude", model); - - // Fix #1761: Only inject adaptive thinking/high effort if the client didn't - // explicitly set these fields. This allows users to opt-out by sending - // `thinking: null` or `output_config: { effort: "low" }` to prevent forced - // quota drain on Claude Max accounts. - const originalBody = body as Record; - const clientExplicitThinking = originalBody?.thinking !== undefined; - const clientExplicitEffort = originalBody?.output_config !== undefined; - - if (supportsAdaptiveThinking && !tb.thinking && !clientExplicitThinking) { - tb.thinking = { type: "adaptive" }; + if (headerThinking === "adaptive") { + if (tb.thinking === undefined) { + tb.thinking = { type: "adaptive" }; + appliedThinking = "adaptive"; + } + if (tb.context_management === undefined) { + tb.context_management = { + edits: [{ type: "clear_thinking_20251015", keep: "all" }], + }; + } + } else if (headerThinking === "off") { + delete tb.thinking; + delete tb.context_management; + appliedThinking = "off"; } - if (supportsAdaptiveThinking && !tb.context_management && !clientExplicitThinking) { + // Real CLI always pairs context_management with thinking. Mirror + // that invariant so long sessions don't accumulate thinking blocks + // toward the context cap. + if (tb.thinking && !tb.context_management) { tb.context_management = { edits: [{ type: "clear_thinking_20251015", keep: "all" }], }; } - if (supportsAdaptiveThinking && !tb.output_config && !clientExplicitEffort) { - tb.output_config = { effort: "high" }; + const seed = + activeCredentials?.accessToken || activeCredentials?.apiKey || "anon"; + const psd = activeCredentials?.providerSpecificData as + | Record + | undefined; + + let identitySource: "upstream-metadata" | "upstream-header" | "synthesized" = + "synthesized"; + let sessionId: string; + let deviceId: string; + let accountUUID: string; + + const upstreamUserId = parseUpstreamMetadataUserId(tb); + if (upstreamUserId) { + sessionId = upstreamUserId.session_id; + deviceId = upstreamUserId.device_id; + accountUUID = upstreamUserId.account_uuid; + identitySource = "upstream-metadata"; + } else { + const headerSid = passthroughUpstreamSessionId( + clientHeaders as Record | undefined + ); + sessionId = headerSid ?? getSessionId(seed); + deviceId = resolveCliUserID(psd, seed); + accountUUID = resolveAccountUUID(psd, seed, activeCredentials?.accessToken); + identitySource = headerSid ? "upstream-header" : "synthesized"; } + // system[0] (billing) and system[1] (sentinel) must not carry + // cache_control — that belongs on upstream prompt blocks at [2..]. + const dayStamp = new Date().toISOString().slice(0, 10); + const buildHash = buildHashFor(CLAUDE_CODE_VERSION, dayStamp); + const billingLine = `x-anthropic-billing-header: cc_version=${CLAUDE_CODE_VERSION}.${buildHash}; cc_entrypoint=cli; cch=00000;`; + const SENTINEL = "You are Claude Code, Anthropic's official CLI for Claude."; + + const sysBlocks: Array> = Array.isArray(tb.system) + ? (tb.system as Array>) + : typeof tb.system === "string" + ? [{ type: "text", text: tb.system }] + : []; + + // Strip any pre-existing billing/sentinel before re-prepending — keeps + // retries idempotent and avoids stacking that breaks prompt-cache prefix + // matching (see issue #1712). + for (let i = sysBlocks.length - 1; i >= 0; i--) { + const t = sysBlocks[i]?.text; + if (typeof t === "string" && t.startsWith("x-anthropic-billing-header:")) { + sysBlocks.splice(i, 1); + } + } + for (let i = sysBlocks.length - 1; i >= 0; i--) { + const t = sysBlocks[i]?.text; + if (typeof t === "string" && t.startsWith(SENTINEL)) { + sysBlocks.splice(i, 1); + } + } + sysBlocks.unshift( + { type: "text", text: billingLine }, + { type: "text", text: SENTINEL } + ); + tb.system = sysBlocks; + + if (!tb.metadata || typeof tb.metadata !== "object") tb.metadata = {}; + (tb.metadata as Record).user_id = buildUserIdJson({ + deviceId, + accountUUID, + sessionId, + }); + + // Headers. Accept stays application/json even on streams (Stainless + // convention; SSE decoding is gated on body.stream). anthropic-beta + // is selected per request shape; the full set on a quota probe is + // itself a fingerprint. const ccHeaders: Record = { + Accept: "application/json", "anthropic-version": "2023-06-01", - "anthropic-beta": - "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,redact-thinking-2026-02-12,context-management-2025-06-27,prompt-caching-scope-2026-01-05,advisor-tool-2026-03-01,advanced-tool-use-2025-11-20,effort-2025-11-24", + "anthropic-beta": selectBetaFlags(tb), "anthropic-dangerous-direct-browser-access": "true", "x-app": "cli", - "User-Agent": `claude-cli/${ccVersion} (external, cli)`, - "X-Stainless-Package-Version": "0.81.0", + "User-Agent": `claude-cli/${CLAUDE_CODE_VERSION} (external, cli)`, + "X-Stainless-Package-Version": CLAUDE_CODE_STAINLESS_VERSION, "X-Stainless-Timeout": "600", - "accept-language": "*", "accept-encoding": "gzip, deflate, br, zstd", connection: "keep-alive", "x-client-request-id": randomUUID(), - "X-Claude-Code-Session-Id": randomUUID(), + "X-Claude-Code-Session-Id": sessionId, }; - // Remove any existing case variants of ccHeaders keys before merging. - // The claude provider config sets "Anthropic-Version" (Title-Case) while - // ccHeaders uses all-lowercase keys. Both JS keys normalise to the same - // HTTP header name, so undici would combine them into "2023-06-01, 2023-06-01" - // causing a 400 from Anthropic (see issue #1454). + + // Drop case variants of the same header name before merging — undici + // would otherwise concatenate them (issue #1454). const ccKeysLower = new Set(Object.keys(ccHeaders).map((k) => k.toLowerCase())); for (const key of Object.keys(headers)) { - if (ccKeysLower.has(key.toLowerCase())) { - delete headers[key]; - } + if (ccKeysLower.has(key.toLowerCase())) delete headers[key]; } Object.assign(headers, ccHeaders); delete headers["X-Stainless-Helper-Method"]; - // Add X-Stainless headers to match real Claude Code - headers["X-Stainless-Arch"] = "x64"; + // Stainless OS/Arch/Runtime are host-derived (Stainless SDK does the + // same at runtime). Hardcoding them was a unique-per-deployment tell. + headers["X-Stainless-Arch"] = stainlessArch(); headers["X-Stainless-Lang"] = "js"; - headers["X-Stainless-OS"] = "Windows"; + headers["X-Stainless-OS"] = stainlessOS(); headers["X-Stainless-Runtime"] = "node"; - headers["X-Stainless-Runtime-Version"] = "v24.3.0"; + headers["X-Stainless-Runtime-Version"] = stainlessRuntimeVersion(); headers["X-Stainless-Retry-Count"] = "0"; delete headers["X-Stainless-Os"]; - console.log( - `[CLAUDE-PATCH] provider=${this.provider} tools remapped, billing header injected, body fields added, headers patched` + const overrideTag = + appliedEffort || appliedThinking + ? ` overrides=effort:${appliedEffort ?? "-"},thinking:${appliedThinking ?? "-"}` + : ""; + log?.debug?.( + "CLAUDE", + `identity=${identitySource} sid=${sessionId.slice(0, 8)} dev=${deviceId.slice(0, 8)} acct=${accountUUID.slice(0, 8)}${overrideTag}` ); } - // Apply CLI fingerprint ordering if enabled for this provider + // CLI fingerprint ordering — always-on for native Claude OAuth, opt-in + // for other providers. Header + body field order is itself a fingerprint. let finalHeaders = headers; let bodyString = JSON.stringify(transformedBody); - if (isCliCompatEnabled(this.provider)) { + const shouldFingerprint = + isCliCompatEnabled(this.provider) || + (this.provider === "claude" && (isClaudeCodeClient || hasClaudeOAuthToken)); + if (shouldFingerprint) { const fingerprinted = applyFingerprint(this.provider, headers, transformedBody); finalHeaders = fingerprinted.headers; bodyString = fingerprinted.bodyString; } - // CCH signing: Claude Code-compatible providers AND native claude provider - // require an xxHash64 integrity token over the serialized body. + // CCH signing — replaces the cch=00000 placeholder in the billing + // header with an xxHash64 integrity token over the serialized body. if (isClaudeCodeCompatible(this.provider) || this.provider === "claude") { bodyString = await signRequestBody(bodyString); } diff --git a/open-sse/executors/claudeIdentity.ts b/open-sse/executors/claudeIdentity.ts new file mode 100644 index 000000000..baf5a5efe --- /dev/null +++ b/open-sse/executors/claudeIdentity.ts @@ -0,0 +1,337 @@ +/** + * Claude Code identity helpers used by the native `claude` provider when + * authenticating with an OAuth token. Anthropic's user:sessions:claude_code + * scope expects request shape that matches a real claude-cli session; + * everything in this module exists to produce that shape. + * + * Pinned to a captured claude-cli release. Bump in lockstep when a newer + * release is captured. + */ + +import { createHash, randomBytes, randomUUID } from "node:crypto"; + +// ---------- Versions ------------------------------------------------------ + +export const CLAUDE_CODE_VERSION = "2.1.131"; +/** Bundled @anthropic-ai/sdk version for the pinned CLI release. */ +export const CLAUDE_CODE_STAINLESS_VERSION = "0.81.0"; + +// ---------- Stainless OS / Arch / Runtime -------------------------------- + +export function stainlessOS(): string { + switch (process.platform) { + case "win32": + return "Windows"; + case "darwin": + return "MacOS"; + case "linux": + return "Linux"; + case "freebsd": + return "FreeBSD"; + default: + return "Unknown"; + } +} + +export function stainlessArch(): string { + switch (process.arch) { + case "x64": + return "x64"; + case "arm64": + return "arm64"; + case "ia32": + return "x32"; + default: + return process.arch; + } +} + +export function stainlessRuntimeVersion(): string { + return process.version; +} + +// ---------- Bounded-map helper ------------------------------------------- + +const IDENTITY_CACHE_LIMIT = 10_000; +const BOOTSTRAP_FETCH_TIMEOUT_MS = 10_000; + +/** Insert with FIFO eviction once a Map reaches `max`. JS Maps preserve insertion order. */ +function setBounded(m: Map, key: K, value: V, max: number): void { + if (!m.has(key) && m.size >= max) { + const oldest = m.keys().next().value as K | undefined; + if (oldest !== undefined) m.delete(oldest); + } + m.set(key, value); +} + +// ---------- Upstream session-id passthrough ------------------------------ + +const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + +export function passthroughUpstreamSessionId( + clientHeaders: Record | null | undefined +): string | null { + if (!clientHeaders) return null; + const raw = + clientHeaders["x-claude-code-session-id"] ?? clientHeaders["X-Claude-Code-Session-Id"]; + if (typeof raw !== "string") return null; + const v = raw.trim(); + return UUID_RE.test(v) ? v : null; +} + +// ---------- Session ID (per OAuth account, process lifetime) ------------- + +const sessionCache = new Map(); + +/** Same value MUST be emitted as both X-Claude-Code-Session-Id and metadata.user_id.session_id. */ +export function getSessionId(seed: string): string { + let id = sessionCache.get(seed); + if (id) return id; + id = randomUUID(); + setBounded(sessionCache, seed, id, IDENTITY_CACHE_LIMIT); + return id; +} + +// ---------- Device ID (cliUserID) ---------------------------------------- + +/** Real CLI uses crypto.randomBytes(32).toString("hex"), persisted to ~/.claude.json. */ +export function generateCliUserID(): string { + return randomBytes(32).toString("hex"); +} + +const lazyCliUserIDCache = new Map(); + +const HEX64_RE = /^[a-f0-9]{64}$/i; + +/** + * Resolve the cliUserID for an account, in priority order: + * 1. providerSpecificData.cliUserID — persisted at OAuth provisioning. + * 2. providerSpecificData.userID — alt key (matches real CLI's own). + * 3. lazy-random — fresh randomBytes(32), cached for the process lifetime. + * + * Never deterministic from the access token. + */ +export function resolveCliUserID( + providerSpecificData: Record | undefined, + seed: string +): string { + const cli = providerSpecificData?.cliUserID; + if (typeof cli === "string" && HEX64_RE.test(cli)) return cli; + const alt = providerSpecificData?.userID; + if (typeof alt === "string" && HEX64_RE.test(alt)) return alt; + let cached = lazyCliUserIDCache.get(seed); + if (cached) return cached; + cached = generateCliUserID(); + setBounded(lazyCliUserIDCache, seed, cached, IDENTITY_CACHE_LIMIT); + return cached; +} + +// ---------- Account UUID ------------------------------------------------- + +const ACCOUNT_FETCH_RETRY_MS = 5 * 60 * 1000; +const accountUuidCache = new Map(); +const inflightFetches = new Set(); + +async function backgroundFetchAccountUUID(accessToken: string, seed: string): Promise { + if (inflightFetches.has(seed)) return; + const cached = accountUuidCache.get(seed); + if (cached?.uuid) return; + if (cached && Date.now() - cached.fetchedAt < ACCOUNT_FETCH_RETRY_MS) return; + inflightFetches.add(seed); + const ctrl = new AbortController(); + const timer = setTimeout(() => ctrl.abort(), BOOTSTRAP_FETCH_TIMEOUT_MS); + try { + const res = await fetch("https://api.anthropic.com/api/claude_cli/bootstrap", { + method: "GET", + headers: { + Authorization: `Bearer ${accessToken}`, + Accept: "application/json", + "User-Agent": `claude-cli/${CLAUDE_CODE_VERSION} (external, cli)`, + "anthropic-beta": "oauth-2025-04-20", + }, + signal: ctrl.signal, + }); + const data: any = res.ok ? await res.json().catch(() => null) : null; + const uuid: string | null = data?.oauth_account?.account_uuid || null; + setBounded(accountUuidCache, seed, { uuid, fetchedAt: Date.now() }, IDENTITY_CACHE_LIMIT); + } catch { + setBounded(accountUuidCache, seed, { uuid: null, fetchedAt: Date.now() }, IDENTITY_CACHE_LIMIT); + } finally { + clearTimeout(timer); + inflightFetches.delete(seed); + } +} + +/** Format-correct UUIDv4 from a 64-hex hash (deterministic fallback shape). */ +export function uuidV4FromHash(hex64: string): string { + return [ + hex64.slice(0, 8), + hex64.slice(8, 12), + "4" + hex64.slice(13, 16), + ((parseInt(hex64.charAt(16), 16) & 0x3) | 0x8).toString(16) + hex64.slice(17, 20), + hex64.slice(20, 32), + ].join("-"); +} + +/** + * Resolve account_uuid in priority order: + * 1. providerSpecificData.accountUUID / account_uuid (real, from bootstrap). + * 2. in-memory cache from a background bootstrap fetch. + * 3. deterministic UUIDv4 derived from the access token (shape-correct fallback). + * + * Triggers a background bootstrap fetch when no real UUID is known yet. + */ +export function resolveAccountUUID( + providerSpecificData: Record | undefined, + seed: string, + accessToken?: string +): string { + const camel = providerSpecificData?.accountUUID; + if (typeof camel === "string" && camel.length >= 32) return camel; + const snake = providerSpecificData?.account_uuid; + if (typeof snake === "string" && snake.length >= 32) return snake; + + const cached = accountUuidCache.get(seed); + if (cached?.uuid) return cached.uuid; + + if (accessToken) void backgroundFetchAccountUUID(accessToken, seed); + + return uuidV4FromHash(createHash("sha256").update("account:" + seed).digest("hex")); +} + +// ---------- metadata.user_id (the JSON-stringified blob) ----------------- + +/** Real CLI emits this exact key order: device_id, account_uuid, session_id. */ +export function buildUserIdJson(opts: { + deviceId: string; + accountUUID: string; + sessionId: string; +}): string { + return JSON.stringify({ + device_id: opts.deviceId, + account_uuid: opts.accountUUID, + session_id: opts.sessionId, + }); +} + +export function parseUpstreamMetadataUserId( + body: Record | null | undefined +): { device_id: string; account_uuid: string; session_id: string } | null { + if (!body) return null; + const md = body.metadata as Record | undefined; + const raw = md?.user_id; + if (typeof raw !== "string" || raw.length === 0) return null; + let parsed: any; + try { + parsed = JSON.parse(raw); + } catch { + return null; + } + if (!parsed || typeof parsed !== "object") return null; + const { device_id, account_uuid, session_id } = parsed; + if ( + typeof device_id !== "string" || + !HEX64_RE.test(device_id) || + typeof account_uuid !== "string" || + !UUID_RE.test(account_uuid) || + typeof session_id !== "string" || + !UUID_RE.test(session_id) + ) { + return null; + } + return { device_id, account_uuid, session_id }; +} + +// ---------- anthropic-beta selector -------------------------------------- + +/** + * Pick the anthropic-beta flag set that matches the request shape. Real CLI + * uses three patterns: minimal probe, structured-output, and full agent. + * Sending the full set on every shape is itself a fingerprint. + */ +export function selectBetaFlags(body: Record | null | undefined): string { + const b = body || {}; + const hasSystem = + !!b.system && + (typeof b.system === "string" || (Array.isArray(b.system) && b.system.length > 0)); + const tools = b.tools as unknown[] | undefined; + const hasTools = Array.isArray(tools) && tools.length > 0; + const outputCfg = b.output_config as Record | undefined; + const hasStructuredOutput = + !!(outputCfg && (outputCfg.format as { type?: string } | undefined)?.type === "json_schema") || + !!(b.response_format as { type?: string } | undefined)?.type; + const isFullAgent = hasTools && hasSystem; + + const flags: string[] = []; + if (isFullAgent) flags.push("claude-code-20250219"); + flags.push("oauth-2025-04-20"); + if (isFullAgent) flags.push("context-1m-2025-08-07"); + flags.push( + "interleaved-thinking-2025-05-14", + "redact-thinking-2026-02-12", + "context-management-2025-06-27", + "prompt-caching-scope-2026-01-05" + ); + if (hasStructuredOutput || isFullAgent) flags.push("advisor-tool-2026-03-01"); + if (hasStructuredOutput && !isFullAgent) flags.push("structured-outputs-2025-12-15"); + if (isFullAgent) { + flags.push( + "advanced-tool-use-2025-11-20", + "effort-2025-11-24", + "extended-cache-ttl-2025-04-11" + ); + } + return flags.join(","); +} + +// ---------- billing-header build hash ------------------------------------ + +/** + * 3-char build hash for the billing header `cc_version=X.Y.Z.HASH`. Stable + * per (day, version) — Anthropic does not appear to validate the value, so + * we keep prompt-cache prefix stable within a day for a given version + * without coupling to any captured value. + */ +export function buildHashFor(version: string, dayStamp: string): string { + return createHash("sha256").update(`${dayStamp}${version}`).digest("hex").slice(0, 3); +} + +// ---------- Tool-name normalisation -------------------------------------- + +const TOOL_PREFIX = "proxy_"; + +/** Strip OmniRoute's `proxy_` tool-name prefix; real CLI never sends it. */ +export function stripProxyToolPrefix(body: Record): void { + const stripName = (n: unknown): string | undefined => { + if (typeof n !== "string") return undefined; + return n.startsWith(TOOL_PREFIX) ? n.slice(TOOL_PREFIX.length) : n; + }; + + const tools = body.tools as Array> | undefined; + if (Array.isArray(tools)) { + for (const t of tools) { + const stripped = stripName(t.name); + if (stripped !== undefined) t.name = stripped; + } + } + + const tc = body.tool_choice as Record | undefined; + if (tc && typeof tc.name === "string") { + const stripped = stripName(tc.name); + if (stripped !== undefined) tc.name = stripped; + } + + const messages = body.messages as Array> | undefined; + if (Array.isArray(messages)) { + for (const m of messages) { + const content = m.content; + if (!Array.isArray(content)) continue; + for (const block of content as Array>) { + if (block?.type === "tool_use") { + const stripped = stripName(block.name); + if (stripped !== undefined) block.name = stripped; + } + } + } + } +} diff --git a/open-sse/handlers/chatCore.ts b/open-sse/handlers/chatCore.ts index 04e2159af..2ec6a38be 100644 --- a/open-sse/handlers/chatCore.ts +++ b/open-sse/handlers/chatCore.ts @@ -2314,6 +2314,19 @@ export async function handleChatCore({ log?.debug?.("FORMAT", `claude passthrough (preserveCache=${preserveCacheControl})`); + // Migrate deprecated top-level `output_format` → `output_config.format`. + // Anthropic returns a 400 on the legacy field; some clients (e.g. ForgeCode) + // still emit it. Preserves an existing output_config.format if present. + if (translatedBody.output_format !== undefined) { + const oc = + translatedBody.output_config && typeof translatedBody.output_config === "object" + ? (translatedBody.output_config as Record) + : {}; + if (oc.format === undefined) oc.format = translatedBody.output_format; + translatedBody.output_config = oc; + delete translatedBody.output_format; + } + // Fix #1719: Strip output_config.format for non-Anthropic Claude-compatible providers. // Third-party Claude endpoints (MiniMax, DeepSeek via aggregators) reject this field // with 400 errors since they don't support Anthropic's structured output / json_schema. diff --git a/open-sse/services/claudeCodeCompatible.ts b/open-sse/services/claudeCodeCompatible.ts index c79a20fc5..8ef294fe5 100644 --- a/open-sse/services/claudeCodeCompatible.ts +++ b/open-sse/services/claudeCodeCompatible.ts @@ -33,8 +33,11 @@ export const CLAUDE_CODE_COMPATIBLE_ANTHROPIC_BETA = [ "interleaved-thinking-2025-05-14", "effort-2025-11-24", ].join(","); -export const CLAUDE_CODE_COMPATIBLE_VERSION = "2.1.121"; -export const CLAUDE_CODE_COMPATIBLE_USER_AGENT = "claude-cli/2.1.121 (external, sdk-cli)"; +// Keep aligned with CLAUDE_CODE_VERSION in claudeIdentity.ts. The +// "(external, sdk-cli)" suffix here distinguishes SDK-driven CC-compat +// relays from the native (external, cli) path. +export const CLAUDE_CODE_COMPATIBLE_VERSION = "2.1.131"; +export const CLAUDE_CODE_COMPATIBLE_USER_AGENT = "claude-cli/2.1.131 (external, sdk-cli)"; export const CLAUDE_CODE_COMPATIBLE_STAINLESS_PACKAGE_VERSION = "0.81.0"; export const CLAUDE_CODE_COMPATIBLE_STAINLESS_RUNTIME_VERSION = "v24.3.0"; export const CONTEXT_1M_BETA_HEADER = "context-1m-2025-08-07"; diff --git a/open-sse/translator/request/openai-to-claude.ts b/open-sse/translator/request/openai-to-claude.ts index 0b48332a0..78a8f1acb 100644 --- a/open-sse/translator/request/openai-to-claude.ts +++ b/open-sse/translator/request/openai-to-claude.ts @@ -280,8 +280,8 @@ export function openaiToClaudeRequest(model, body, stream) { // Filter out tools with empty names (would cause Claude 400 error) result.tools = result.tools.filter((tool) => tool.name && tool.name?.trim()); - // Add cache_control to last tool that doesn't have defer_loading - // Tools with defer_loading=true cannot have cache_control (API rejects it) + // Cache breakpoint on the last non-defer-loading tool — Anthropic + // rejects cache_control on defer_loading tools. for (let i = result.tools.length - 1; i >= 0; i--) { if (!result.tools[i].defer_loading) { result.tools[i].cache_control = { type: "ephemeral", ttl: "1h" }; diff --git a/src/lib/oauth/providers/claude.ts b/src/lib/oauth/providers/claude.ts index 6f3f3eb56..d1e7404c6 100644 --- a/src/lib/oauth/providers/claude.ts +++ b/src/lib/oauth/providers/claude.ts @@ -1,4 +1,42 @@ +import crypto from "node:crypto"; import { CLAUDE_CONFIG } from "../constants/oauth"; +import { CLAUDE_CODE_VERSION } from "@omniroute/open-sse/executors/claudeIdentity.ts"; + +const BOOTSTRAP_FETCH_TIMEOUT_MS = 10_000; + +// Best-effort: failure must not block OAuth — the access token is valid. +async function fetchClaudeBootstrap(accessToken) { + const ctrl = new AbortController(); + const timer = setTimeout(() => ctrl.abort(), BOOTSTRAP_FETCH_TIMEOUT_MS); + try { + const res = await fetch("https://api.anthropic.com/api/claude_cli/bootstrap", { + method: "GET", + headers: { + Authorization: `Bearer ${accessToken}`, + Accept: "application/json", + "User-Agent": `claude-cli/${CLAUDE_CODE_VERSION} (external, cli)`, + "anthropic-beta": "oauth-2025-04-20", + }, + signal: ctrl.signal, + }); + if (!res.ok) return null; + const data = await res.json(); + const acct = data?.oauth_account; + if (!acct || typeof acct !== "object") return null; + return { + account_uuid: acct.account_uuid || null, + account_email: acct.account_email || null, + organization_uuid: acct.organization_uuid || null, + organization_name: acct.organization_name || null, + organization_type: acct.organization_type || null, + organization_rate_limit_tier: acct.organization_rate_limit_tier || null, + }; + } catch { + return null; + } finally { + clearTimeout(timer); + } +} export const claude = { config: CLAUDE_CONFIG, @@ -48,10 +86,37 @@ export const claude = { return await response.json(); }, - mapTokens: (tokens) => ({ - accessToken: tokens.access_token, - refreshToken: tokens.refresh_token, - expiresIn: tokens.expires_in, - scope: tokens.scope, - }), + // Runs after exchangeToken; result is passed as `extra` to mapTokens. + postExchange: async (tokens) => { + if (!tokens?.access_token) return null; + return await fetchClaudeBootstrap(tokens.access_token); + }, + mapTokens: (tokens, extra) => { + const bs = extra || {}; + const providerSpecificData = { + // Generated once at provisioning; preserved across token refresh. + cliUserID: crypto.randomBytes(32).toString("hex"), + }; + if (bs.account_uuid) providerSpecificData.accountUUID = bs.account_uuid; + if (bs.organization_uuid) providerSpecificData.organizationUUID = bs.organization_uuid; + if (bs.organization_name) providerSpecificData.organizationName = bs.organization_name; + if (bs.organization_type) providerSpecificData.organizationType = bs.organization_type; + if (bs.organization_rate_limit_tier) + providerSpecificData.organizationRateLimitTier = bs.organization_rate_limit_tier; + + const result = { + accessToken: tokens.access_token, + refreshToken: tokens.refresh_token, + expiresIn: tokens.expires_in, + scope: tokens.scope, + }; + if (bs.account_email) { + result.email = bs.account_email; + result.displayName = bs.account_email; + } + if (Object.keys(providerSpecificData).length > 0) { + result.providerSpecificData = providerSpecificData; + } + return result; + }, }; diff --git a/src/lib/providers/validation.ts b/src/lib/providers/validation.ts index 8ea188b5f..6e8933a44 100644 --- a/src/lib/providers/validation.ts +++ b/src/lib/providers/validation.ts @@ -11,6 +11,7 @@ import { stripClaudeCodeCompatibleEndpointSuffix, stripAnthropicMessagesSuffix, } from "@omniroute/open-sse/services/claudeCodeCompatible.ts"; +import { getExecutor } from "@omniroute/open-sse/executors/index.ts"; import { isClaudeCodeCompatibleProvider, isAnthropicCompatibleProvider, @@ -544,6 +545,13 @@ async function validateAnthropicLikeProvider({ return { valid: false, error: "Missing base URL" }; } + // OAuth tokens need the same Claude Code cloak as production traffic in + // base.ts; a bare validation request gets flagged on the user:sessions: + // claude_code scope. + if (typeof apiKey === "string" && apiKey.startsWith("sk-ant-oat")) { + return validateClaudeOAuthInline({ apiKey, modelId, providerSpecificData }); + } + const requestHeaders = applyCustomUserAgent( { "Content-Type": "application/json", @@ -580,6 +588,45 @@ async function validateAnthropicLikeProvider({ return { valid: true, error: null }; } +// Probe a Claude OAuth credential through the same executor that handles +// production traffic so the cloak/signing/identity logic isn't duplicated. +async function validateClaudeOAuthInline({ + apiKey, + modelId, + providerSpecificData = {}, +}: { + apiKey: string; + modelId: string | null | undefined; + providerSpecificData?: Record; +}) { + const testModelId = + providerSpecificData?.validationModelId || modelId || "claude-haiku-4-5-20251001"; + + try { + const { response } = await getExecutor("claude").execute({ + model: testModelId, + body: { + model: testModelId, + max_tokens: 1, + messages: [{ role: "user", content: "test" }], + }, + stream: false, + credentials: { accessToken: apiKey, providerSpecificData }, + }); + + if (response.status === 401 || response.status === 403) { + return { valid: false, error: "Invalid OAuth token" }; + } + if (response.status >= 500) { + return { valid: false, error: `Provider unavailable (${response.status})` }; + } + // 2xx and non-auth 4xx (429 quota, 400 model) both mean the token is valid. + return { valid: true, error: null }; + } catch (error: any) { + return toValidationErrorResult(error); + } +} + async function validateGeminiLikeProvider({ apiKey, baseUrl,