diff --git a/src/api/providers/__tests__/openrouter.spec.ts b/src/api/providers/__tests__/openrouter.spec.ts index ae36fc1399d6..9f0ad71bed76 100644 --- a/src/api/providers/__tests__/openrouter.spec.ts +++ b/src/api/providers/__tests__/openrouter.spec.ts @@ -157,7 +157,15 @@ describe("OpenRouterHandler", () => { // Verify stream chunks expect(chunks).toHaveLength(2) // One text chunk and one usage chunk expect(chunks[0]).toEqual({ type: "text", text: "test response" }) - expect(chunks[1]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 20, totalCost: 0.001 }) + // Cost is now calculated locally: (3/1M * 10) + (15/1M * 20) = 0.00003 + 0.0003 = 0.00033 + expect(chunks[1]).toEqual({ + type: "usage", + inputTokens: 10, + outputTokens: 20, + cacheReadTokens: undefined, + reasoningTokens: undefined, + totalCost: expect.closeTo(0.00033, 5), + }) // Verify OpenAI client was called with correct parameters. expect(mockCreate).toHaveBeenCalledWith( @@ -267,6 +275,57 @@ describe("OpenRouterHandler", () => { const generator = handler.createMessage("test", []) await expect(generator.next()).rejects.toThrow("OpenRouter API Error 500: API Error") }) + + it("calculates cost locally when OpenRouter API returns incorrect cost (issue #8650)", async () => { + const handler = new OpenRouterHandler({ + ...mockOptions, + openRouterModelId: "anthropic/claude-3.5-sonnet", // Use Claude 3.5 Sonnet as in the issue + }) + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + id: "test-id", + choices: [{ delta: { content: "test" } }], + } + // Simulate the issue: OpenRouter returns incorrect cost ($0.46) for 527k input tokens + // Actual cost should be: (527000 * 3 / 1M) + (7700 * 15 / 1M) = 1.581 + 0.1155 = 1.6965 + yield { + id: "test-id", + choices: [{ delta: {} }], + usage: { + prompt_tokens: 527000, + completion_tokens: 7700, + cost: 0.46, // OpenRouter's incorrect cost value + }, + } + }, + } + + const mockCreate = vitest.fn().mockResolvedValue(mockStream) + ;(OpenAI as any).prototype.chat = { + completions: { create: mockCreate }, + } as any + + const generator = handler.createMessage("test", []) + const chunks = [] + + for await (const chunk of generator) { + chunks.push(chunk) + } + + // Verify that we calculate the correct cost locally + // Model pricing: inputPrice: 3, outputPrice: 15 + // Cost = (527000 / 1M * 3) + (7700 / 1M * 15) = 1.581 + 0.1155 = 1.6965 + expect(chunks[1]).toEqual({ + type: "usage", + inputTokens: 527000, + outputTokens: 7700, + cacheReadTokens: undefined, + reasoningTokens: undefined, + totalCost: expect.closeTo(1.6965, 5), + }) + }) }) describe("completePrompt", () => { diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 580b17331194..2d4387258a12 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -10,6 +10,7 @@ import { } from "@roo-code/types" import type { ApiHandlerOptions, ModelRecord } from "../../shared/api" +import { calculateApiCostOpenAI } from "../../shared/cost" import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStreamChunk } from "../transform/stream" @@ -196,13 +197,31 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH } if (lastUsage) { + // Get model info to calculate cost locally + const modelInfo = this.getModel().info + + // Calculate cost locally using model pricing information + // OpenRouter uses OpenAI-style token counting (input tokens include cached tokens) + const localCost = calculateApiCostOpenAI( + modelInfo, + lastUsage.prompt_tokens || 0, + lastUsage.completion_tokens || 0, + undefined, // cache creation tokens - OpenRouter doesn't distinguish this + lastUsage.prompt_tokens_details?.cached_tokens, + ) + + // Use locally calculated cost, but fall back to API response if our calculation fails + // or if model pricing info is not available + const apiCost = (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0) + const totalCost = modelInfo.inputPrice && modelInfo.outputPrice ? localCost : apiCost + yield { type: "usage", inputTokens: lastUsage.prompt_tokens || 0, outputTokens: lastUsage.completion_tokens || 0, cacheReadTokens: lastUsage.prompt_tokens_details?.cached_tokens, reasoningTokens: lastUsage.completion_tokens_details?.reasoning_tokens, - totalCost: (lastUsage.cost_details?.upstream_inference_cost || 0) + (lastUsage.cost || 0), + totalCost, } } }