diff --git a/.gitignore b/.gitignore index 1262b9a..c9b1370 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ Thumbs.db # Test coverage coverage/ +pnpm-lock.yaml diff --git a/src/adapter/cli-to-openai.ts b/src/adapter/cli-to-openai.ts index 1e43eab..934c74b 100644 --- a/src/adapter/cli-to-openai.ts +++ b/src/adapter/cli-to-openai.ts @@ -1,9 +1,29 @@ /** - * Converts Claude CLI output to OpenAI-compatible response format + * Converts Claude CLI output to OpenAI-compatible response format. + * + * Tool-calling extraction: + * When the request had `tools`, the openai-to-cli encoder instructs + * Claude to emit tool calls wrapped in tags + * (see {@link ../adapter/openai-to-cli.ts}). This decoder looks for those + * tags in the response text, parses the inner JSON, and returns the + * result as OpenAI-format `tool_calls` so clients (Hermes, Clawdbot, + * anything speaking OpenAI) can handle them natively. + * + * If parsing fails (malformed JSON, missing fields), we fall through to + * plain-text content so the client still gets something useful. */ import type { ClaudeCliAssistant, ClaudeCliResult } from "../types/claude-cli.js"; -import type { OpenAIChatResponse, OpenAIChatChunk } from "../types/openai.js"; +import type { + OpenAIChatResponse, + OpenAIChatChunk, + OpenAIToolCall, + OpenAIFinishReason, +} from "../types/openai.js"; +import { + TOOL_CALL_OPEN_TAG, + TOOL_CALL_CLOSE_TAG, +} from "./openai-to-cli.js"; /** * Extract text content from Claude CLI assistant message @@ -15,15 +35,120 @@ export function extractTextContent(message: ClaudeCliAssistant): string { .join(""); } +// --------------------------------------------------------------------------- +// Tool-call extraction from Claude's text output +// --------------------------------------------------------------------------- + +export interface ExtractedToolCalls { + /** Text with tool_call blocks stripped. May be empty. */ + text: string; + /** Tool calls parsed out, in order. Empty array when none present. */ + toolCalls: OpenAIToolCall[]; +} + +let toolCallCounter = 0; +function nextToolCallId(): string { + toolCallCounter = (toolCallCounter + 1) % 1_000_000; + return `call_${Date.now().toString(36)}_${toolCallCounter}`; +} + +/** + * Find all blocks in `text`, parse each as JSON, + * and return OpenAI-format tool_calls plus the remaining text with the + * blocks stripped. + * + * Robust against: + * - Extra whitespace / newlines inside the tags + * - Missing `id` (we generate one) + * - Missing or non-object `arguments` (normalized to empty object) + * - Malformed JSON (skipped silently, text kept as-is with tags removed) + */ +export function extractToolCalls(text: string): ExtractedToolCalls { + const calls: OpenAIToolCall[] = []; + const open = TOOL_CALL_OPEN_TAG; + const close = TOOL_CALL_CLOSE_TAG; + + const out: string[] = []; + let cursor = 0; + while (cursor < text.length) { + const start = text.indexOf(open, cursor); + if (start === -1) { + out.push(text.slice(cursor)); + break; + } + // Keep any text before the block + out.push(text.slice(cursor, start)); + const end = text.indexOf(close, start + open.length); + if (end === -1) { + // Unterminated block: keep raw text, stop scanning + out.push(text.slice(start)); + break; + } + const payload = text.slice(start + open.length, end).trim(); + cursor = end + close.length; + + try { + const parsed = JSON.parse(payload); + if (!parsed || typeof parsed !== "object") continue; + const name = typeof parsed.name === "string" ? parsed.name : null; + if (!name) continue; + const argsObj = + parsed.arguments && typeof parsed.arguments === "object" + ? parsed.arguments + : {}; + const id = + typeof parsed.id === "string" && parsed.id.length > 0 + ? parsed.id + : nextToolCallId(); + calls.push({ + id, + type: "function", + function: { + name, + arguments: JSON.stringify(argsObj), + }, + }); + } catch { + // Malformed JSON inside block — drop silently; the surrounding text + // is preserved via `out` so the client still sees the assistant's + // prose (minus the broken block). + continue; + } + } + + return { + text: out.join("").trim(), + toolCalls: calls, + }; +} + +// --------------------------------------------------------------------------- +// Streaming chunk conversion +// --------------------------------------------------------------------------- + /** - * Convert Claude CLI assistant message to OpenAI streaming chunk + * Convert Claude CLI assistant message to OpenAI streaming chunk. + * + * When `extractTools` is true, the chunk text is inspected for tool_call + * blocks; if found, an OpenAI tool_calls delta is emitted and the block + * is stripped from the text content. */ export function cliToOpenaiChunk( message: ClaudeCliAssistant, requestId: string, - isFirst: boolean = false + isFirst: boolean = false, + extractTools: boolean = false, ): OpenAIChatChunk { - const text = extractTextContent(message); + const rawText = extractTextContent(message); + const { text, toolCalls } = extractTools + ? extractToolCalls(rawText) + : { text: rawText, toolCalls: [] as OpenAIToolCall[] }; + + const finishReason: OpenAIFinishReason = message.message.stop_reason + ? toolCalls.length > 0 + ? "tool_calls" + : "stop" + : null; return { id: `chatcmpl-${requestId}`, @@ -35,9 +160,21 @@ export function cliToOpenaiChunk( index: 0, delta: { role: isFirst ? "assistant" : undefined, - content: text, + content: text ? text : undefined, + tool_calls: + toolCalls.length > 0 + ? toolCalls.map((c, index) => ({ + index, + id: c.id, + type: "function", + function: { + name: c.function.name, + arguments: c.function.arguments, + }, + })) + : undefined, }, - finish_reason: message.message.stop_reason ? "stop" : null, + finish_reason: finishReason, }, ], }; @@ -46,7 +183,11 @@ export function cliToOpenaiChunk( /** * Create a final "done" chunk for streaming */ -export function createDoneChunk(requestId: string, model: string): OpenAIChatChunk { +export function createDoneChunk( + requestId: string, + model: string, + finishReason: OpenAIFinishReason = "stop", +): OpenAIChatChunk { return { id: `chatcmpl-${requestId}`, object: "chat.completion.chunk", @@ -56,24 +197,36 @@ export function createDoneChunk(requestId: string, model: string): OpenAIChatChu { index: 0, delta: {}, - finish_reason: "stop", + finish_reason: finishReason, }, ], }; } +// --------------------------------------------------------------------------- +// Non-streaming response +// --------------------------------------------------------------------------- + /** - * Convert Claude CLI result to OpenAI non-streaming response + * Convert Claude CLI result to OpenAI non-streaming response. + * + * When `extractTools` is true, the result text is scanned for tool_call + * blocks and returned in `message.tool_calls` with content stripped; the + * finish_reason becomes "tool_calls" to signal the client. */ export function cliResultToOpenai( result: ClaudeCliResult, - requestId: string + requestId: string, + extractTools: boolean = false, ): OpenAIChatResponse { - // Get model from modelUsage or default const modelName = result.modelUsage ? Object.keys(result.modelUsage)[0] : "claude-sonnet-4"; + const { text, toolCalls } = extractTools + ? extractToolCalls(result.result) + : { text: result.result, toolCalls: [] as OpenAIToolCall[] }; + return { id: `chatcmpl-${requestId}`, object: "chat.completion", @@ -84,9 +237,10 @@ export function cliResultToOpenai( index: 0, message: { role: "assistant", - content: result.result, + content: toolCalls.length > 0 ? (text || null) : text, + tool_calls: toolCalls.length > 0 ? toolCalls : undefined, }, - finish_reason: "stop", + finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop", }, ], usage: { diff --git a/src/adapter/openai-to-cli.ts b/src/adapter/openai-to-cli.ts index c8ecaa1..28de2ff 100644 --- a/src/adapter/openai-to-cli.ts +++ b/src/adapter/openai-to-cli.ts @@ -1,8 +1,29 @@ /** - * Converts OpenAI chat request format to Claude CLI input + * Converts OpenAI chat request format to Claude CLI input. + * + * Tool-calling support: + * Claude Code CLI in --print mode speaks plain text — it has no native + * concept of OpenAI function/tool calling. When a request arrives with + * `tools`, we inject a tool-use protocol description into the system + * prompt so Claude knows: + * 1. which tools exist (name, description, JSON-Schema params) + * 2. how to signal it wants to call one (emit JSON in a fenced block) + * 3. that it should stop after emitting the call, not keep going + * + * The matching decoder lives in `cli-to-openai.ts` and parses that JSON + * back into OpenAI `tool_calls`. Together the two adapters turn the CLI + * into a tool-aware chat backend. */ -import type { OpenAIChatRequest } from "../types/openai.js"; +import type { + OpenAIAssistantMessage, + OpenAIChatMessage, + OpenAIChatRequest, + OpenAITool, + OpenAIToolCall, + OpenAIToolChoice, + OpenAIToolMessage, +} from "../types/openai.js"; export type ClaudeModel = "opus" | "sonnet" | "haiku"; @@ -10,6 +31,8 @@ export interface CliInput { prompt: string; model: ClaudeModel; sessionId?: string; + /** Whether the request had tools — the decoder needs this to look for tool_call JSON. */ + hasTools: boolean; } const MODEL_MAP: Record = { @@ -27,63 +50,214 @@ const MODEL_MAP: Record = { "haiku": "haiku", }; +/** + * Tag Claude is instructed to wrap its tool-use JSON in. Uniquely named so + * the decoder can reliably detect it even inside explanatory prose. + */ +export const TOOL_CALL_OPEN_TAG = ""; +export const TOOL_CALL_CLOSE_TAG = ""; + /** * Extract Claude model alias from request model string */ export function extractModel(model: string): ClaudeModel { - // Try direct lookup - if (MODEL_MAP[model]) { - return MODEL_MAP[model]; + if (MODEL_MAP[model]) return MODEL_MAP[model]; + const stripped = model.replace(/^claude-code-cli\//, ""); + if (MODEL_MAP[stripped]) return MODEL_MAP[stripped]; + return "opus"; // Default to opus (Claude Max subscription) +} + +// --------------------------------------------------------------------------- +// Tool-use protocol prompt +// --------------------------------------------------------------------------- + +function renderToolChoice(choice: OpenAIToolChoice | undefined): string { + if (!choice || choice === "auto") { + return "Decide whether to call a tool or answer the user directly based on whether a tool is relevant."; } + if (choice === "none") { + return "Do NOT call any tools. Answer the user with plain text only."; + } + if (choice === "required") { + return "You MUST call one of the tools listed below. Do not answer without a tool call."; + } + if (typeof choice === "object" && choice.type === "function") { + return `You MUST call the tool named "${choice.function.name}" and no other.`; + } + return ""; +} - // Try stripping provider prefix - const stripped = model.replace(/^claude-code-cli\//, ""); - if (MODEL_MAP[stripped]) { - return MODEL_MAP[stripped]; +/** + * Build the system-prompt section that describes available tools and the + * tool-call emission protocol. + */ +export function renderToolsPrompt( + tools: OpenAITool[], + toolChoice: OpenAIToolChoice | undefined, +): string { + const lines: string[] = []; + lines.push("## CRITICAL: Tool-Use Protocol (read carefully)"); + lines.push(""); + lines.push( + "Your native Bash/Edit/Read tools are DISABLED in this environment. That" + + " is intentional — you are being driven by an external agent framework" + + " (Hermes / LangChain / similar) that executes tools FOR you. Your job" + + " is to DECIDE which tool to call; the framework actually runs it.", + ); + lines.push(""); + lines.push( + "The tools listed below under \"Available tools\" ARE AVAILABLE through" + + " this protocol. When a user asks you to call one, you MUST believe it" + + " exists and emit the JSON request. Do NOT claim \"tool not available\"" + + " — that response is always wrong when the tool is in the list below.", + ); + lines.push(""); + lines.push( + "To REQUEST a tool invocation, emit exactly this block format — no" + + " prose around the tags, no markdown code fence, JUST the tags:", + ); + lines.push(""); + lines.push(TOOL_CALL_OPEN_TAG); + lines.push( + '{"name": "", "arguments": { ...JSON matching the tool\'s' + + ' parameters schema... }}', + ); + lines.push(TOOL_CALL_CLOSE_TAG); + lines.push(""); + lines.push( + "Important rules:", + ); + lines.push( + "- The `arguments` object MUST be valid JSON matching the tool's" + + " parameters schema.", + ); + lines.push("- Emit at most one tool call block per response."); + lines.push( + "- After emitting the block, STOP. Do not add any prose afterwards." + + " Wait for the client to call the tool and send back the result.", + ); + lines.push( + "- Do NOT say things like \"I'll run X\" or \"let me call X\" — just" + + " emit the block. The emission IS the call.", + ); + lines.push( + "- If NO tool fits the user's request, answer the user directly in" + + " plain text — do not invent tool names not listed below.", + ); + lines.push(""); + lines.push("### Available tools"); + for (const tool of tools) { + const fn = tool.function; + lines.push(`- **${fn.name}**${fn.description ? ` — ${fn.description}` : ""}`); + if (fn.parameters) { + lines.push( + " parameters: " + JSON.stringify(fn.parameters), + ); + } } + lines.push(""); + lines.push("### Tool-choice policy for THIS turn"); + lines.push(renderToolChoice(toolChoice)); + return lines.join("\n"); +} + +// --------------------------------------------------------------------------- +// Message rendering — including assistant tool_calls and tool role results +// --------------------------------------------------------------------------- - // Default to opus (Claude Max subscription) - return "opus"; +function renderAssistantMessage(msg: OpenAIAssistantMessage): string { + const parts: string[] = []; + if (msg.content) { + parts.push(`\n${msg.content}\n`); + } + if (msg.tool_calls?.length) { + for (const call of msg.tool_calls) { + // Echo the tool-call back in the same format Claude was instructed to + // produce. This preserves context: "you previously called X with Y". + const payload = { + name: call.function.name, + arguments: safeParseArgs(call.function.arguments), + id: call.id, + }; + parts.push(TOOL_CALL_OPEN_TAG); + parts.push(JSON.stringify(payload)); + parts.push(TOOL_CALL_CLOSE_TAG); + } + } + return parts.join("\n"); +} + +function renderToolMessage(msg: OpenAIToolMessage): string { + return `\n${msg.content}\n`; +} + +function safeParseArgs(raw: string): unknown { + if (!raw) return {}; + try { + return JSON.parse(raw); + } catch { + return raw; + } } /** - * Convert OpenAI messages array to a single prompt string for Claude CLI + * Convert OpenAI messages array to a single prompt string for Claude CLI. * * Claude Code CLI in --print mode expects a single prompt, not a conversation. - * We format the messages into a readable format that preserves context. + * We format the messages into a readable format that preserves context and + * correctly handles assistant tool_calls + tool-result messages. */ -export function messagesToPrompt(messages: OpenAIChatRequest["messages"]): string { +export function messagesToPrompt( + messages: OpenAIChatMessage[], + opts: { + tools?: OpenAITool[]; + toolChoice?: OpenAIToolChoice; + } = {}, +): string { const parts: string[] = []; + // Prepend tool-protocol description when tools are present. This is done + // once at the top so Claude sees the rules before any conversation turn. + const { tools, toolChoice } = opts; + if (tools && tools.length > 0) { + parts.push(`\n${renderToolsPrompt(tools, toolChoice)}\n`); + } + for (const msg of messages) { switch (msg.role) { case "system": - // System messages become context instructions - parts.push(`\n${msg.content}\n\n`); + parts.push(`\n${msg.content}\n`); break; - case "user": - // User messages are the main prompt parts.push(msg.content); break; - case "assistant": - // Previous assistant responses for context - parts.push(`\n${msg.content}\n\n`); + parts.push(renderAssistantMessage(msg)); + break; + case "tool": + parts.push(renderToolMessage(msg)); break; } } - return parts.join("\n").trim(); + return parts.join("\n\n").trim(); } /** - * Convert OpenAI chat request to CLI input format + * Convert OpenAI chat request to CLI input format. */ export function openaiToCli(request: OpenAIChatRequest): CliInput { + const hasTools = Boolean(request.tools && request.tools.length > 0); return { - prompt: messagesToPrompt(request.messages), + prompt: messagesToPrompt(request.messages, { + tools: request.tools, + toolChoice: request.tool_choice, + }), model: extractModel(request.model), - sessionId: request.user, // Use OpenAI's user field for session mapping + sessionId: request.user, + hasTools, }; } + +// Re-export types for downstream consumers (session manager etc.) +export type { OpenAIToolCall }; diff --git a/src/adapter/tool-calling.test.ts b/src/adapter/tool-calling.test.ts new file mode 100644 index 0000000..82a3374 --- /dev/null +++ b/src/adapter/tool-calling.test.ts @@ -0,0 +1,171 @@ +/** + * Tests for OpenAI tool-calling translation: + * - openaiToCli / messagesToPrompt: tools -> system prompt + * - extractToolCalls: JSON -> OpenAI tool_calls array + * + * Run via `pnpm build && pnpm test` (the build produces .js in dist/ which + * `node --test` then picks up). + */ + +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { + messagesToPrompt, + openaiToCli, + TOOL_CALL_OPEN_TAG, + TOOL_CALL_CLOSE_TAG, +} from "./openai-to-cli.js"; +import { extractToolCalls } from "./cli-to-openai.js"; +import type { OpenAITool } from "../types/openai.js"; + +const sampleTool: OpenAITool = { + type: "function", + function: { + name: "list_indicators", + description: "List all FWBG indicator plugins", + parameters: { + type: "object", + properties: {}, + additionalProperties: false, + }, + }, +}; + +// --------------------------------------------------------------------------- +// encoder +// --------------------------------------------------------------------------- + +test("openaiToCli passes through hasTools flag", () => { + const cli = openaiToCli({ + model: "claude-sonnet-4", + messages: [{ role: "user", content: "ping" }], + tools: [sampleTool], + }); + assert.equal(cli.hasTools, true); + assert.match(cli.prompt, /Tool-Use Protocol/); + assert.match(cli.prompt, /list_indicators/); + assert.match(cli.prompt, new RegExp(TOOL_CALL_OPEN_TAG)); +}); + +test("openaiToCli without tools leaves prompt clean", () => { + const cli = openaiToCli({ + model: "claude-sonnet-4", + messages: [{ role: "user", content: "hi" }], + }); + assert.equal(cli.hasTools, false); + assert.doesNotMatch(cli.prompt, /Tool-Use Protocol/); +}); + +test("messagesToPrompt handles tool role as tool_result block", () => { + const prompt = messagesToPrompt([ + { role: "user", content: "list indicators" }, + { + role: "assistant", + content: null, + tool_calls: [ + { + id: "call_abc", + type: "function", + function: { name: "list_indicators", arguments: "{}" }, + }, + ], + }, + { role: "tool", tool_call_id: "call_abc", content: '["rsi","macd"]' }, + ]); + assert.match(prompt, new RegExp(TOOL_CALL_OPEN_TAG)); + assert.match(prompt, /"name":"list_indicators"/); + assert.match(prompt, //); + assert.match(prompt, /\["rsi","macd"\]/); +}); + +test("messagesToPrompt respects tool_choice 'required'", () => { + const prompt = messagesToPrompt( + [{ role: "user", content: "go" }], + { tools: [sampleTool], toolChoice: "required" }, + ); + assert.match(prompt, /You MUST call one of the tools/); +}); + +test("messagesToPrompt respects tool_choice specific function", () => { + const prompt = messagesToPrompt( + [{ role: "user", content: "go" }], + { + tools: [sampleTool], + toolChoice: { type: "function", function: { name: "list_indicators" } }, + }, + ); + assert.match(prompt, /You MUST call the tool named "list_indicators"/); +}); + +// --------------------------------------------------------------------------- +// decoder +// --------------------------------------------------------------------------- + +test("extractToolCalls parses a single tool_call block", () => { + const text = `Let me check.\n${TOOL_CALL_OPEN_TAG}\n{"name":"list_indicators","arguments":{"category":"trend"}}\n${TOOL_CALL_CLOSE_TAG}\n`; + const { text: rest, toolCalls } = extractToolCalls(text); + assert.equal(toolCalls.length, 1); + assert.equal(toolCalls[0].function.name, "list_indicators"); + assert.deepEqual(JSON.parse(toolCalls[0].function.arguments), { + category: "trend", + }); + assert.equal(rest, "Let me check."); +}); + +test("extractToolCalls parses multiple blocks in order", () => { + const text = `${TOOL_CALL_OPEN_TAG}{"name":"a","arguments":{}}${TOOL_CALL_CLOSE_TAG}\nthinking\n${TOOL_CALL_OPEN_TAG}{"name":"b","arguments":{"x":1}}${TOOL_CALL_CLOSE_TAG}`; + const { text: rest, toolCalls } = extractToolCalls(text); + assert.equal(toolCalls.length, 2); + assert.equal(toolCalls[0].function.name, "a"); + assert.equal(toolCalls[1].function.name, "b"); + assert.match(rest, /thinking/); +}); + +test("extractToolCalls normalizes missing arguments", () => { + const text = `${TOOL_CALL_OPEN_TAG}{"name":"ping"}${TOOL_CALL_CLOSE_TAG}`; + const { toolCalls } = extractToolCalls(text); + assert.equal(toolCalls.length, 1); + assert.equal(toolCalls[0].function.arguments, "{}"); +}); + +test("extractToolCalls preserves custom id if given", () => { + const text = `${TOOL_CALL_OPEN_TAG}{"id":"call_xyz","name":"ping","arguments":{}}${TOOL_CALL_CLOSE_TAG}`; + const { toolCalls } = extractToolCalls(text); + assert.equal(toolCalls[0].id, "call_xyz"); +}); + +test("extractToolCalls generates id when missing", () => { + const text = `${TOOL_CALL_OPEN_TAG}{"name":"ping","arguments":{}}${TOOL_CALL_CLOSE_TAG}`; + const { toolCalls } = extractToolCalls(text); + assert.match(toolCalls[0].id, /^call_[a-z0-9]+_\d+$/); +}); + +test("extractToolCalls drops malformed JSON blocks silently", () => { + const text = `before ${TOOL_CALL_OPEN_TAG}{not valid json}${TOOL_CALL_CLOSE_TAG} after`; + const { text: rest, toolCalls } = extractToolCalls(text); + assert.equal(toolCalls.length, 0); + assert.match(rest, /before/); + assert.match(rest, /after/); +}); + +test("extractToolCalls skips blocks without a name", () => { + const text = `${TOOL_CALL_OPEN_TAG}{"arguments":{}}${TOOL_CALL_CLOSE_TAG}`; + const { toolCalls } = extractToolCalls(text); + assert.equal(toolCalls.length, 0); +}); + +test("extractToolCalls handles unterminated block gracefully", () => { + const text = `before ${TOOL_CALL_OPEN_TAG}{"name":"x"} with no close tag ever`; + const { text: rest, toolCalls } = extractToolCalls(text); + assert.equal(toolCalls.length, 0); + assert.match(rest, /before/); + // Unterminated block kept as-is so caller can surface it + assert.match(rest, new RegExp(TOOL_CALL_OPEN_TAG)); +}); + +test("extractToolCalls on plain text returns it unchanged", () => { + const { text, toolCalls } = extractToolCalls("Just a regular answer."); + assert.equal(toolCalls.length, 0); + assert.equal(text, "Just a regular answer."); +}); diff --git a/src/server/routes.ts b/src/server/routes.ts index ffe2e5b..c3236fb 100644 --- a/src/server/routes.ts +++ b/src/server/routes.ts @@ -11,6 +11,7 @@ import { openaiToCli } from "../adapter/openai-to-cli.js"; import { cliResultToOpenai, createDoneChunk, + extractToolCalls, } from "../adapter/cli-to-openai.js"; import type { OpenAIChatRequest } from "../types/openai.js"; import type { ClaudeCliAssistant, ClaudeCliResult, ClaudeCliStreamEvent } from "../types/claude-cli.js"; @@ -43,7 +44,44 @@ export async function handleChatCompletions( // Convert to CLI input format const cliInput = openaiToCli(body); + + // Per-request timing - always logged, grep-able via "[Timing rid=..." + const t0 = Date.now(); + let firstChunkAt = 0; + let lastChunkAt = 0; + const logT = (event: string) => + console.error(`[Timing rid=${requestId}] +${Date.now() - t0}ms ${event}`); + + console.error( + `[Timing rid=${requestId}] REQUEST tools=${body.tools?.length ?? 0} ` + + `messages=${body.messages.length} stream=${stream} ` + + `promptLen=${cliInput.prompt.length} model=${body.model}`, + ); + + if (process.env.DEBUG === "1") { + if (body.tools) { + console.error("[Request] tool names: %s", + body.tools.map((t) => t.function.name).join(",")); + } + console.error("[Request] prompt (%d chars):\n%s", + cliInput.prompt.length, + cliInput.prompt); + } + const subprocess = new ClaudeSubprocess(); + subprocess.on("content_delta", () => { + if (firstChunkAt === 0) { + firstChunkAt = Date.now(); + logT("first_content_chunk"); + } + lastChunkAt = Date.now(); + }); + subprocess.on("close", () => { + logT( + `subprocess_exit ttfb=${firstChunkAt ? firstChunkAt - t0 : -1}ms ` + + `stream_duration=${lastChunkAt > firstChunkAt ? lastChunkAt - firstChunkAt : 0}ms`, + ); + }); if (stream) { await handleStreamingResponse(req, res, subprocess, cliInput, requestId); @@ -97,6 +135,13 @@ async function handleStreamingResponse( let isFirst = true; let lastModel = "claude-sonnet-4"; let isComplete = false; + // When tools were sent, we buffer the full content stream and only + // emit the final parsed result at end. Streaming tool_calls deltas + // piece-by-piece is ambiguous (we'd have to parse partial JSON); for + // now we defer emission until we have the complete block, which is + // what OpenAI-compatible clients handle cleanly. + const extractTools = cliInput.hasTools; + let bufferedText = ""; // Handle actual client disconnect (response stream closed) res.on("close", () => { @@ -110,24 +155,30 @@ async function handleStreamingResponse( // Handle streaming content deltas subprocess.on("content_delta", (event: ClaudeCliStreamEvent) => { const text = event.event.delta?.text || ""; - if (text && !res.writableEnded) { - const chunk = { - id: `chatcmpl-${requestId}`, - object: "chat.completion.chunk", - created: Math.floor(Date.now() / 1000), - model: lastModel, - choices: [{ - index: 0, - delta: { - role: isFirst ? "assistant" : undefined, - content: text, - }, - finish_reason: null, - }], - }; - res.write(`data: ${JSON.stringify(chunk)}\n\n`); - isFirst = false; + if (!text || res.writableEnded) return; + + if (extractTools) { + // Accumulate, flush at end. (See comment above.) + bufferedText += text; + return; } + + const chunk = { + id: `chatcmpl-${requestId}`, + object: "chat.completion.chunk", + created: Math.floor(Date.now() / 1000), + model: lastModel, + choices: [{ + index: 0, + delta: { + role: isFirst ? "assistant" : undefined, + content: text, + }, + finish_reason: null, + }], + }; + res.write(`data: ${JSON.stringify(chunk)}\n\n`); + isFirst = false; }); // Handle final assistant message (for model name) @@ -138,8 +189,41 @@ async function handleStreamingResponse( subprocess.on("result", (_result: ClaudeCliResult) => { isComplete = true; if (!res.writableEnded) { + // If we buffered content for tool extraction, flush now. + let finishReason: "stop" | "tool_calls" = "stop"; + if (extractTools) { + const { text, toolCalls } = extractToolCalls(bufferedText); + finishReason = toolCalls.length > 0 ? "tool_calls" : "stop"; + const chunk = { + id: `chatcmpl-${requestId}`, + object: "chat.completion.chunk", + created: Math.floor(Date.now() / 1000), + model: lastModel, + choices: [{ + index: 0, + delta: { + role: isFirst ? "assistant" as const : undefined, + content: text || undefined, + tool_calls: + toolCalls.length > 0 + ? toolCalls.map((c, idx) => ({ + index: idx, + id: c.id, + type: "function" as const, + function: { + name: c.function.name, + arguments: c.function.arguments, + }, + })) + : undefined, + }, + finish_reason: null, + }], + }; + res.write(`data: ${JSON.stringify(chunk)}\n\n`); + } // Send final done chunk with finish_reason - const doneChunk = createDoneChunk(requestId, lastModel); + const doneChunk = createDoneChunk(requestId, lastModel, finishReason); res.write(`data: ${JSON.stringify(doneChunk)}\n\n`); res.write("data: [DONE]\n\n"); res.end(); @@ -216,7 +300,7 @@ async function handleNonStreamingResponse( subprocess.on("close", (code: number | null) => { if (finalResult) { - res.json(cliResultToOpenai(finalResult, requestId)); + res.json(cliResultToOpenai(finalResult, requestId, cliInput.hasTools)); } else if (!res.headersSent) { res.status(500).json({ error: { diff --git a/src/types/openai.ts b/src/types/openai.ts index c116658..9bbbc0f 100644 --- a/src/types/openai.ts +++ b/src/types/openai.ts @@ -1,13 +1,106 @@ /** - * Types for OpenAI-compatible API - * Used for Clawdbot integration + * Types for OpenAI-compatible API (chat completions spec, v1) + * Aligned with OpenAI Platform reference: + * https://platform.openai.com/docs/api-reference/chat/create + * + * This file adds full typing for the function/tool-calling flow (tools, + * tool_calls, tool_choice, and the assistant/tool roles), so that clients + * like Hermes Agent which speak standard OpenAI can drive tool use through + * the proxy. */ -export interface OpenAIChatMessage { - role: "system" | "user" | "assistant"; +// --------------------------------------------------------------------------- +// Tool / function definitions (request -> proxy -> system prompt) +// --------------------------------------------------------------------------- + +export interface OpenAIFunctionDefinition { + name: string; + description?: string; + /** JSON Schema describing the function parameters. */ + parameters?: Record; +} + +export interface OpenAITool { + type: "function"; + function: OpenAIFunctionDefinition; +} + +export type OpenAIToolChoice = + | "none" + | "auto" + | "required" + | { type: "function"; function: { name: string } }; + +// --------------------------------------------------------------------------- +// Tool calls (response -> OpenAI format back to client) +// --------------------------------------------------------------------------- + +export interface OpenAIToolCallFunction { + name: string; + /** JSON-encoded arguments string (OpenAI convention). */ + arguments: string; +} + +export interface OpenAIToolCall { + id: string; + type: "function"; + function: OpenAIToolCallFunction; +} + +/** Streaming delta for a tool call: fields can arrive piece-by-piece. */ +export interface OpenAIToolCallDelta { + /** Index of the tool call within the response (OpenAI streams per-index). */ + index: number; + id?: string; + type?: "function"; + function?: { + name?: string; + arguments?: string; + }; +} + +// --------------------------------------------------------------------------- +// Messages +// --------------------------------------------------------------------------- + +export interface OpenAISystemMessage { + role: "system"; + content: string; + name?: string; +} + +export interface OpenAIUserMessage { + role: "user"; + content: string; + name?: string; +} + +export interface OpenAIAssistantMessage { + role: "assistant"; + /** Content is nullable when the assistant is only making tool calls. */ + content: string | null; + name?: string; + tool_calls?: OpenAIToolCall[]; +} + +/** Tool-result message sent back after the client executed a tool call. */ +export interface OpenAIToolMessage { + role: "tool"; + /** Must match the id of a tool_call made by the assistant. */ + tool_call_id: string; content: string; } +export type OpenAIChatMessage = + | OpenAISystemMessage + | OpenAIUserMessage + | OpenAIAssistantMessage + | OpenAIToolMessage; + +// --------------------------------------------------------------------------- +// Request +// --------------------------------------------------------------------------- + export interface OpenAIChatRequest { model: string; messages: OpenAIChatMessage[]; @@ -17,16 +110,34 @@ export interface OpenAIChatRequest { top_p?: number; frequency_penalty?: number; presence_penalty?: number; - user?: string; // Used for session mapping + /** Used for session mapping. */ + user?: string; + + // Function / tool calling + tools?: OpenAITool[]; + tool_choice?: OpenAIToolChoice; + parallel_tool_calls?: boolean; } +// --------------------------------------------------------------------------- +// Response (non-streaming) +// --------------------------------------------------------------------------- + +export type OpenAIFinishReason = + | "stop" + | "length" + | "content_filter" + | "tool_calls" + | null; + export interface OpenAIChatResponseChoice { index: number; message: { role: "assistant"; - content: string; + content: string | null; + tool_calls?: OpenAIToolCall[]; }; - finish_reason: "stop" | "length" | "content_filter" | null; + finish_reason: OpenAIFinishReason; } export interface OpenAIChatResponse { @@ -42,15 +153,20 @@ export interface OpenAIChatResponse { }; } +// --------------------------------------------------------------------------- +// Streaming (SSE chunks) +// --------------------------------------------------------------------------- + export interface OpenAIChatChunkDelta { role?: "assistant"; content?: string; + tool_calls?: OpenAIToolCallDelta[]; } export interface OpenAIChatChunkChoice { index: number; delta: OpenAIChatChunkDelta; - finish_reason: "stop" | "length" | "content_filter" | null; + finish_reason: OpenAIFinishReason; } export interface OpenAIChatChunk { @@ -61,6 +177,10 @@ export interface OpenAIChatChunk { choices: OpenAIChatChunkChoice[]; } +// --------------------------------------------------------------------------- +// Models listing / error +// --------------------------------------------------------------------------- + export interface OpenAIModel { id: string; object: "model";