diff --git a/open-sse/handlers/chatCore/streamingHandler.js b/open-sse/handlers/chatCore/streamingHandler.js index aa907cd5c7..6a558c0c21 100644 --- a/open-sse/handlers/chatCore/streamingHandler.js +++ b/open-sse/handlers/chatCore/streamingHandler.js @@ -5,7 +5,7 @@ import { pipeWithDisconnect } from "../../utils/streamHandler.js"; import { PROVIDERS } from "../../config/providers.js"; import { STREAM_STALL_TIMEOUT_MS } from "../../config/runtimeConfig.js"; import { buildAbortedResponsesTerminalBytes } from "../../utils/responsesStreamHelpers.js"; -import { buildRequestDetail, extractRequestConfig, saveUsageStats } from "./requestDetail.js"; +import { buildRequestDetail, extractRequestConfig } from "./requestDetail.js"; import { saveRequestDetail } from "@/lib/usageDb.js"; import { SSE_HEADERS_CORS as SSE_HEADERS } from "../../utils/sseConstants.js"; @@ -101,7 +101,8 @@ export function buildOnStreamComplete({ provider, model, connectionId, apiKey, r console.error("[RequestDetail] Failed to update streaming content:", err.message); }); - saveUsageStats({ provider, model, tokens: usage, connectionId, apiKey, endpoint: clientRawRequest?.endpoint, label: "STREAM USAGE" }); + // Usage is already persisted by logUsage() in the SSE transform flush. Saving + // again here loses cache/reasoning detail and doubles dashboard costs. }; return { onStreamComplete, streamDetailId }; diff --git a/open-sse/providers/pricing.js b/open-sse/providers/pricing.js index 9e767a8049..94079eeb02 100644 --- a/open-sse/providers/pricing.js +++ b/open-sse/providers/pricing.js @@ -30,30 +30,48 @@ export const MODEL_PRICING = { "claude-opus-4-6-thinking": { input: 5.00, output: 25.00, cached: 0.50, reasoning: 37.50, cache_creation: 5.00 }, // === OpenAI / GPT === - "gpt-3.5-turbo": { input: 0.50, output: 1.50, cached: 0.25, reasoning: 2.25, cache_creation: 0.50 }, - "gpt-4": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 }, - "gpt-4-turbo": { input: 10.00, output: 30.00, cached: 5.00, reasoning: 45.00, cache_creation: 10.00 }, - "gpt-4o": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 }, - "gpt-4o-mini": { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.90, cache_creation: 0.15 }, - "gpt-4.1": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 }, - "gpt-5": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 }, - "gpt-5-mini": { input: 0.75, output: 3.00, cached: 0.375, reasoning: 4.50, cache_creation: 0.75 }, - "gpt-5-codex": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 }, - "gpt-5.1": { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 }, - "gpt-5.1-codex": { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 }, + // Standard API pricing from https://developers.openai.com/api/docs/pricing + // (input/cached input/output in $ per 1M tokens). + "gpt-3.5-turbo": { input: 0.50, output: 1.50, cached: null, reasoning: 1.50, cache_creation: 0.50 }, + "gpt-4": { input: 30.00, output: 60.00, cached: null, reasoning: 60.00, cache_creation: 30.00 }, + "gpt-4-turbo": { input: 10.00, output: 30.00, cached: null, reasoning: 30.00, cache_creation: 10.00 }, + "gpt-4o": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 10.00, cache_creation: 2.50 }, + "gpt-4o-mini": { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.60, cache_creation: 0.15 }, + "gpt-4.1": { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 }, + "gpt-4.1-mini": { input: 0.40, output: 1.60, cached: 0.10, reasoning: 1.60, cache_creation: 0.40 }, + "gpt-4.1-nano": { input: 0.10, output: 0.40, cached: 0.025, reasoning: 0.40, cache_creation: 0.10 }, + "gpt-5.5": { input: 5.00, output: 30.00, cached: 0.50, reasoning: 30.00, cache_creation: 5.00 }, + "gpt-5.5-pro": { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 }, + "gpt-5.4": { input: 2.50, output: 15.00, cached: 0.25, reasoning: 15.00, cache_creation: 2.50 }, + "gpt-5.4-mini": { input: 0.75, output: 4.50, cached: 0.075, reasoning: 4.50, cache_creation: 0.75 }, + "gpt-5.4-nano": { input: 0.20, output: 1.25, cached: 0.02, reasoning: 1.25, cache_creation: 0.20 }, + "gpt-5.4-pro": { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 }, + "gpt-5": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 }, + "gpt-5-mini": { input: 0.25, output: 2.00, cached: 0.025, reasoning: 2.00, cache_creation: 0.25 }, + "gpt-5-nano": { input: 0.05, output: 0.40, cached: 0.005, reasoning: 0.40, cache_creation: 0.05 }, + "gpt-5-pro": { input: 15.00, output: 120.00, cached: null, reasoning: 120.00, cache_creation: 15.00 }, + "gpt-5-codex": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 }, + "gpt-5.1": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 }, + "gpt-5.1-codex": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 }, "gpt-5.1-codex-mini": { input: 1.50, output: 6.00, cached: 0.75, reasoning: 9.00, cache_creation: 1.50 }, "gpt-5.1-codex-mini-high": { input: 2.00, output: 8.00, cached: 1.00, reasoning: 12.00, cache_creation: 2.00 }, "gpt-5.1-codex-max": { input: 8.00, output: 32.00, cached: 4.00, reasoning: 48.00, cache_creation: 8.00 }, - "gpt-5.2": { input: 5.00, output: 20.00, cached: 2.50, reasoning: 30.00, cache_creation: 5.00 }, - "gpt-5.2-codex": { input: 5.00, output: 20.00, cached: 2.50, reasoning: 30.00, cache_creation: 5.00 }, + "gpt-5.2": { input: 1.75, output: 14.00, cached: 0.175, reasoning: 14.00, cache_creation: 1.75 }, + "gpt-5.2-pro": { input: 21.00, output: 168.00, cached: null, reasoning: 168.00, cache_creation: 21.00 }, + "gpt-5.2-codex": { input: 1.75, output: 14.00, cached: 0.175, reasoning: 14.00, cache_creation: 1.75 }, "gpt-5.3-codex": { input: 6.00, output: 24.00, cached: 3.00, reasoning: 36.00, cache_creation: 6.00 }, "gpt-5.3-codex-xhigh": { input: 10.00, output: 40.00, cached: 5.00, reasoning: 60.00, cache_creation: 10.00 }, "gpt-5.3-codex-high": { input: 8.00, output: 32.00, cached: 4.00, reasoning: 48.00, cache_creation: 8.00 }, "gpt-5.3-codex-low": { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 }, "gpt-5.3-codex-none": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 }, "gpt-5.3-codex-spark": { input: 3.00, output: 12.00, cached: 0.30, reasoning: 12.00, cache_creation: 3.00 }, - "o1": { input: 15.00, output: 60.00, cached: 7.50, reasoning: 90.00, cache_creation: 15.00 }, - "o1-mini": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 }, + "o1": { input: 15.00, output: 60.00, cached: 7.50, reasoning: 60.00, cache_creation: 15.00 }, + "o1-mini": { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 }, + "o1-pro": { input: 150.00, output: 600.00, cached: null, reasoning: 600.00, cache_creation: 150.00 }, + "o3": { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 }, + "o3-mini": { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 }, + "o3-pro": { input: 20.00, output: 80.00, cached: null, reasoning: 80.00, cache_creation: 20.00 }, + "o4-mini": { input: 1.10, output: 4.40, cached: 0.275, reasoning: 4.40, cache_creation: 1.10 }, // === Gemini === "gemini-3-flash-preview": { input: 0.50, output: 3.00, cached: 0.03, reasoning: 4.50, cache_creation: 0.50 }, @@ -161,20 +179,40 @@ export const PATTERN_PRICING = [ { pattern: "gemini-*", pricing: { input: 0.50, output: 3.00, cached: 0.03, reasoning: 4.50, cache_creation: 0.50 } }, // --- GPT (specific first, generic last) --- + { pattern: "gpt-5.5-pro", pricing: { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 } }, + { pattern: "gpt-5.5", pricing: { input: 5.00, output: 30.00, cached: 0.50, reasoning: 30.00, cache_creation: 5.00 } }, + { pattern: "gpt-5.4-pro", pricing: { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 } }, + { pattern: "gpt-5.4-mini", pricing: { input: 0.75, output: 4.50, cached: 0.075, reasoning: 4.50, cache_creation: 0.75 } }, + { pattern: "gpt-5.4-nano", pricing: { input: 0.20, output: 1.25, cached: 0.02, reasoning: 1.25, cache_creation: 0.20 } }, + { pattern: "gpt-5.4", pricing: { input: 2.50, output: 15.00, cached: 0.25, reasoning: 15.00, cache_creation: 2.50 } }, { pattern: "gpt-5.3-*", pricing: { input: 6.00, output: 24.00, cached: 3.00, reasoning: 36.00, cache_creation: 6.00 } }, - { pattern: "gpt-5.2-*", pricing: { input: 5.00, output: 20.00, cached: 2.50, reasoning: 30.00, cache_creation: 5.00 } }, - { pattern: "gpt-5.1-*", pricing: { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 } }, - { pattern: "gpt-5-*", pricing: { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 } }, - { pattern: "gpt-5*", pricing: { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 } }, - { pattern: "gpt-4o-*", pricing: { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.90, cache_creation: 0.15 } }, - { pattern: "gpt-4o", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 } }, - { pattern: "gpt-4*", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 } }, + { pattern: "gpt-5.2-pro", pricing: { input: 21.00, output: 168.00, cached: null, reasoning: 168.00, cache_creation: 21.00 } }, + { pattern: "gpt-5.2-*", pricing: { input: 1.75, output: 14.00, cached: 0.175, reasoning: 14.00, cache_creation: 1.75 } }, + { pattern: "gpt-5.1-*", pricing: { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 } }, + { pattern: "gpt-5-pro", pricing: { input: 15.00, output: 120.00, cached: null, reasoning: 120.00, cache_creation: 15.00 } }, + { pattern: "gpt-5-mini", pricing: { input: 0.25, output: 2.00, cached: 0.025, reasoning: 2.00, cache_creation: 0.25 } }, + { pattern: "gpt-5-nano", pricing: { input: 0.05, output: 0.40, cached: 0.005, reasoning: 0.40, cache_creation: 0.05 } }, + { pattern: "gpt-5-*", pricing: { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 } }, + { pattern: "gpt-5*", pricing: { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 } }, + { pattern: "gpt-4.1-mini*", pricing: { input: 0.40, output: 1.60, cached: 0.10, reasoning: 1.60, cache_creation: 0.40 } }, + { pattern: "gpt-4.1-nano*", pricing: { input: 0.10, output: 0.40, cached: 0.025, reasoning: 0.40, cache_creation: 0.10 } }, + { pattern: "gpt-4.1*", pricing: { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 } }, + { pattern: "gpt-4o-mini*", pricing: { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.60, cache_creation: 0.15 } }, + { pattern: "gpt-4o-*", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 10.00, cache_creation: 2.50 } }, + { pattern: "gpt-4o", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 10.00, cache_creation: 2.50 } }, + { pattern: "gpt-4-turbo*", pricing: { input: 10.00, output: 30.00, cached: null, reasoning: 30.00, cache_creation: 10.00 } }, + { pattern: "gpt-4*", pricing: { input: 30.00, output: 60.00, cached: null, reasoning: 60.00, cache_creation: 30.00 } }, // --- o1 / o-series --- - { pattern: "o1-*", pricing: { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 } }, - { pattern: "o1", pricing: { input: 15.00, output: 60.00, cached: 7.50, reasoning: 90.00, cache_creation: 15.00 } }, - { pattern: "o3-*", pricing: { input: 10.00, output: 40.00, cached: 5.00, reasoning: 60.00, cache_creation: 10.00 } }, - { pattern: "o4-*", pricing: { input: 2.00, output: 8.00, cached: 1.00, reasoning: 12.00, cache_creation: 2.00 } }, + { pattern: "o1-pro", pricing: { input: 150.00, output: 600.00, cached: null, reasoning: 600.00, cache_creation: 150.00 } }, + { pattern: "o1-mini", pricing: { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 } }, + { pattern: "o1", pricing: { input: 15.00, output: 60.00, cached: 7.50, reasoning: 60.00, cache_creation: 15.00 } }, + { pattern: "o3-pro", pricing: { input: 20.00, output: 80.00, cached: null, reasoning: 80.00, cache_creation: 20.00 } }, + { pattern: "o3-mini", pricing: { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 } }, + { pattern: "o3-*", pricing: { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 } }, + { pattern: "o3", pricing: { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 } }, + { pattern: "o4-mini", pricing: { input: 1.10, output: 4.40, cached: 0.275, reasoning: 4.40, cache_creation: 1.10 } }, + { pattern: "o4-*", pricing: { input: 1.10, output: 4.40, cached: 0.275, reasoning: 4.40, cache_creation: 1.10 } }, // --- Qwen --- { pattern: "qwen3-coder-*", pricing: { input: 1.00, output: 4.00, cached: 0.50, reasoning: 6.00, cache_creation: 1.00 } }, @@ -263,42 +301,129 @@ export function getDefaultPricing() { */ export function formatCost(cost) { if (cost === null || cost === undefined || isNaN(cost)) return "$0.00"; - return `$${cost.toFixed(2)}`; + const value = Number(cost || 0); + if (!Number.isFinite(value) || value === 0) return "$0.00"; + const abs = Math.abs(value); + if (abs < 0.0001) return `$${value.toFixed(6)}`; + if (abs < 0.01) return `$${value.toFixed(4)}`; + return `$${value.toFixed(2)}`; } /** - * Calculate cost from tokens and pricing + * Calculate itemized cost from tokens and pricing. + * + * Reasoning tokens reported by OpenAI-compatible APIs are output-token details, + * not extra billable tokens. Only add separate reasoning cost when a pricing + * entry explicitly opts into reasoning_billed_separately. + * * @param {object} tokens * @param {object} pricing - * @returns {number} cost in dollars + * @returns {object} cost breakdown in dollars and tokens */ -export function calculateCostFromTokens(tokens, pricing) { - if (!tokens || !pricing) return 0; - - let cost = 0; - - const inputTokens = tokens.prompt_tokens || tokens.input_tokens || 0; - const cachedTokens = tokens.cached_tokens || tokens.cache_read_input_tokens || 0; - const nonCachedInput = Math.max(0, inputTokens - cachedTokens); - - cost += nonCachedInput * (pricing.input / 1000000); - - if (cachedTokens > 0) { - cost += cachedTokens * ((pricing.cached || pricing.input) / 1000000); - } - - const outputTokens = tokens.completion_tokens || tokens.output_tokens || 0; - cost += outputTokens * (pricing.output / 1000000); - - const reasoningTokens = tokens.reasoning_tokens || 0; - if (reasoningTokens > 0) { - cost += reasoningTokens * ((pricing.reasoning || pricing.output) / 1000000); +export function calculateCostBreakdownFromTokens(tokens, pricing) { + if (!tokens || !pricing) { + return { + promptTokens: 0, + completionTokens: 0, + uncachedPromptTokens: 0, + cacheReadTokens: 0, + cacheCreationTokens: 0, + reasoningTokens: 0, + inputCost: 0, + uncachedInputCost: 0, + cachedInputCost: 0, + cacheCreationCost: 0, + outputCost: 0, + visibleOutputCost: 0, + reasoningCost: 0, + totalCost: 0, + cacheSavings: 0, + }; } - const cacheCreationTokens = tokens.cache_creation_input_tokens || 0; - if (cacheCreationTokens > 0) { - cost += cacheCreationTokens * ((pricing.cache_creation || pricing.input) / 1000000); - } + const readNumber = (...values) => { + for (const value of values) { + const n = Number(value); + if (Number.isFinite(n) && n > 0) return n; + } + return 0; + }; + + const reportedPromptTokens = readNumber(tokens.prompt_tokens, tokens.input_tokens); + const cacheReadTokens = readNumber( + tokens.cache_read_input_tokens, + tokens.cached_tokens, + tokens.input_tokens_details?.cached_tokens, + tokens.prompt_tokens_details?.cached_tokens, + ); + const cacheCreationTokens = readNumber( + tokens.cache_creation_input_tokens, + tokens.input_tokens_details?.cache_creation_tokens, + tokens.prompt_tokens_details?.cache_creation_tokens, + ); + const cacheSideTokens = cacheReadTokens + cacheCreationTokens; + + const promptTokensIncludesCache = tokens.prompt_tokens !== undefined || tokens.input_tokens_include_cache === true; + const promptTokens = promptTokensIncludesCache + ? reportedPromptTokens + : reportedPromptTokens + cacheSideTokens; + + const uncachedPromptTokens = promptTokensIncludesCache + ? Math.max(0, reportedPromptTokens - cacheSideTokens) + : reportedPromptTokens; + + const completionTokens = readNumber(tokens.completion_tokens, tokens.output_tokens); + const reasoningTokens = readNumber( + tokens.reasoning_tokens, + tokens.output_tokens_details?.reasoning_tokens, + tokens.completion_tokens_details?.reasoning_tokens, + ); + + const inputRate = pricing.input || 0; + const cachedRate = pricing.cached ?? inputRate; + const cacheCreationRate = pricing.cache_creation ?? inputRate; + const outputRate = pricing.output || 0; + const reasoningRate = pricing.reasoning ?? outputRate; + + const uncachedInputCost = uncachedPromptTokens * (inputRate / 1000000); + const cachedInputCost = cacheReadTokens * (cachedRate / 1000000); + const cacheCreationCost = cacheCreationTokens * (cacheCreationRate / 1000000); + const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost; + + const outputCost = completionTokens * (outputRate / 1000000); + const reasoningBilledSeparately = pricing.reasoning_billed_separately === true; + const reasoningCost = reasoningTokens * ((reasoningBilledSeparately ? reasoningRate : outputRate) / 1000000); + const visibleOutputTokens = Math.max(0, completionTokens - reasoningTokens); + const visibleOutputCost = visibleOutputTokens * (outputRate / 1000000); + const separateReasoningCost = reasoningBilledSeparately ? reasoningTokens * (reasoningRate / 1000000) : 0; + const totalCost = inputCost + outputCost + separateReasoningCost; + const cacheSavings = cacheReadTokens * (Math.max(0, inputRate - cachedRate) / 1000000); + + return { + promptTokens, + completionTokens, + uncachedPromptTokens, + cacheReadTokens, + cacheCreationTokens, + reasoningTokens, + inputCost, + uncachedInputCost, + cachedInputCost, + cacheCreationCost, + outputCost, + visibleOutputCost, + reasoningCost, + totalCost, + cacheSavings, + }; +} - return cost; +/** + * Calculate cost from tokens and pricing + * @param {object} tokens + * @param {object} pricing + * @returns {number} cost in dollars + */ +export function calculateCostFromTokens(tokens, pricing) { + return calculateCostBreakdownFromTokens(tokens, pricing).totalCost; } diff --git a/open-sse/utils/usageTracking.js b/open-sse/utils/usageTracking.js index aed411189f..f6d0a5b651 100644 --- a/open-sse/utils/usageTracking.js +++ b/open-sse/utils/usageTracking.js @@ -174,11 +174,14 @@ export function extractUsage(chunk) { // Claude format (message_delta event) if (chunk.type === "message_delta" && chunk.usage && typeof chunk.usage === "object") { + const inputTokens = chunk.usage.input_tokens || 0; + const cacheReadTokens = chunk.usage.cache_read_input_tokens || 0; + const cacheCreationTokens = chunk.usage.cache_creation_input_tokens || 0; return normalizeUsage({ - prompt_tokens: chunk.usage.input_tokens || 0, + prompt_tokens: inputTokens + cacheReadTokens + cacheCreationTokens, completion_tokens: chunk.usage.output_tokens || 0, - cache_read_input_tokens: chunk.usage.cache_read_input_tokens, - cache_creation_input_tokens: chunk.usage.cache_creation_input_tokens + cache_read_input_tokens: cacheReadTokens, + cache_creation_input_tokens: cacheCreationTokens }); } @@ -186,12 +189,17 @@ export function extractUsage(chunk) { if ((chunk.type === "response.completed" || chunk.type === "response.done") && chunk.response?.usage && typeof chunk.response.usage === "object") { const usage = chunk.response.usage; const cachedTokens = usage.input_tokens_details?.cached_tokens; + const cacheCreationTokens = usage.input_tokens_details?.cache_creation_tokens; return normalizeUsage({ prompt_tokens: usage.input_tokens || usage.prompt_tokens || 0, completion_tokens: usage.output_tokens || usage.completion_tokens || 0, cached_tokens: cachedTokens, + cache_creation_input_tokens: cacheCreationTokens, reasoning_tokens: usage.output_tokens_details?.reasoning_tokens, - prompt_tokens_details: cachedTokens ? { cached_tokens: cachedTokens } : undefined + prompt_tokens_details: (cachedTokens || cacheCreationTokens) ? { + ...(cachedTokens ? { cached_tokens: cachedTokens } : {}), + ...(cacheCreationTokens ? { cache_creation_tokens: cacheCreationTokens } : {}), + } : undefined }); } @@ -308,10 +316,15 @@ export function logUsage(provider, usage, model = null, connectionId = null, api const p = provider?.toUpperCase() || "UNKNOWN"; + // Add cache info if present (unified from different formats) + const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens || usage.prompt_tokens_details?.cached_tokens || usage.input_tokens_details?.cached_tokens; + const cacheCreation = usage.cache_creation_input_tokens || usage.prompt_tokens_details?.cache_creation_tokens || usage.input_tokens_details?.cache_creation_tokens; + const reasoning = usage.reasoning_tokens || usage.completion_tokens_details?.reasoning_tokens || usage.output_tokens_details?.reasoning_tokens; + // Support both formats: // - OpenAI: prompt_tokens, completion_tokens // - Claude: input_tokens, output_tokens - const inTokens = usage?.prompt_tokens || usage?.input_tokens || 0; + const inTokens = usage?.prompt_tokens || ((usage?.input_tokens || 0) + (cacheRead || 0) + (cacheCreation || 0)); const outTokens = usage?.completion_tokens || usage?.output_tokens || 0; const accountPrefix = connectionId ? connectionId.slice(0, 8) + "..." : "unknown"; @@ -322,14 +335,10 @@ export function logUsage(provider, usage, model = null, connectionId = null, api msg += ` ${COLORS.yellow}(estimated)${COLORS.reset}`; } - // Add cache info if present (unified from different formats) - const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens || usage.prompt_tokens_details?.cached_tokens; if (cacheRead) msg += ` | cache_read=${cacheRead}`; - const cacheCreation = usage.cache_creation_input_tokens; if (cacheCreation) msg += ` | cache_create=${cacheCreation}`; - const reasoning = usage.reasoning_tokens; if (reasoning) msg += ` | reasoning=${reasoning}`; console.log(msg); diff --git a/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js b/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js index 5d08933de9..d2e254fc33 100644 --- a/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js +++ b/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js @@ -4,9 +4,32 @@ import PropTypes from "prop-types"; import Card from "@/shared/components/Card"; const fmt = (n) => new Intl.NumberFormat().format(n || 0); -const fmtCost = (n) => `$${(n || 0).toFixed(2)}`; +const fmtCost = (n) => { + const value = Number(n || 0); + if (!Number.isFinite(value) || value === 0) return "$0.00"; + const abs = Math.abs(value); + if (abs < 0.0001) return `$${value.toFixed(6)}`; + if (abs < 0.01) return `$${value.toFixed(4)}`; + return `$${value.toFixed(2)}`; +}; export default function OverviewCards({ stats }) { + const cachedInput = stats.totalCacheReadTokens || 0; + const cachePercent = stats.totalPromptTokens > 0 ? Math.round((cachedInput / stats.totalPromptTokens) * 100) : 0; + const hasTokenBreakdown = (stats.totalUncachedPromptTokens || cachedInput || stats.totalCacheCreationTokens) > 0; + const uncachedInput = hasTokenBreakdown ? (stats.totalUncachedPromptTokens || 0) : (stats.totalPromptTokens || 0); + const hasCostBreakdown = [ + stats.totalInputCost, + stats.totalOutputCost, + stats.totalCachedInputCost, + stats.totalCacheCreationCost, + ].some((value) => Number(value || 0) > 0); + const totalTokens = (stats.totalPromptTokens || 0) + (stats.totalCompletionTokens || 0); + const fallbackInputCost = totalTokens > 0 ? (stats.totalPromptTokens || 0) * ((stats.totalCost || 0) / totalTokens) : 0; + const fallbackOutputCost = totalTokens > 0 ? (stats.totalCompletionTokens || 0) * ((stats.totalCost || 0) / totalTokens) : 0; + const inputCost = hasCostBreakdown ? (stats.totalInputCost || 0) : fallbackInputCost; + const outputCost = hasCostBreakdown ? (stats.totalOutputCost || 0) : fallbackOutputCost; + return (
@@ -16,15 +39,20 @@ export default function OverviewCards({ stats }) { Total Input Tokens {fmt(stats.totalPromptTokens)} + + {fmt(uncachedInput)} uncached | {fmt(cachedInput)} cached ({cachePercent}%) + Output Tokens {fmt(stats.totalCompletionTokens)} + {fmt(stats.totalReasoningTokens)} reasoning Est. Cost ~{fmtCost(stats.totalCost)} - Estimated, not actual billing + Input {fmtCost(inputCost)} | Output {fmtCost(outputCost)} + Cache saved ~{fmtCost(stats.totalCacheSavings)}
); diff --git a/src/app/(dashboard)/dashboard/usage/components/UsageTable.js b/src/app/(dashboard)/dashboard/usage/components/UsageTable.js index 9f3d309922..cf76bb6d7c 100644 --- a/src/app/(dashboard)/dashboard/usage/components/UsageTable.js +++ b/src/app/(dashboard)/dashboard/usage/components/UsageTable.js @@ -6,7 +6,14 @@ import Card from "@/shared/components/Card"; import Badge from "@/shared/components/Badge"; const fmt = (n) => new Intl.NumberFormat().format(n || 0); -const fmtCost = (n) => `$${(n || 0).toFixed(2)}`; +const fmtCost = (n) => { + const value = Number(n || 0); + if (!Number.isFinite(value) || value === 0) return "$0.00"; + const abs = Math.abs(value); + if (abs < 0.0001) return `$${value.toFixed(6)}`; + if (abs < 0.01) return `$${value.toFixed(4)}`; + return `$${value.toFixed(2)}`; +}; function fmtTime(iso) { if (!iso) return "Never"; @@ -32,14 +39,33 @@ SortIcon.propTypes = { * Render 3 token or cost cells based on viewMode */ function ValueCells({ item, viewMode, isSummary = false }) { + const hasInputBreakdown = (item.cacheReadTokens || item.cacheCreationTokens || item.uncachedPromptTokens) > 0; + const hasInputCostBreakdown = (item.cachedInputCost || item.cacheCreationCost || item.uncachedInputCost) > 0; + const inputTokenParts = [ + item.uncachedPromptTokens ? `${fmt(item.uncachedPromptTokens)} uncached` : null, + item.cacheReadTokens ? `${fmt(item.cacheReadTokens)} cached` : null, + item.cacheCreationTokens ? `${fmt(item.cacheCreationTokens)} write` : null, + ].filter(Boolean); + const inputCostParts = [ + item.uncachedInputCost ? `${fmtCost(item.uncachedInputCost)} uncached` : null, + item.cachedInputCost ? `${fmtCost(item.cachedInputCost)} cached` : null, + item.cacheCreationCost ? `${fmtCost(item.cacheCreationCost)} write` : null, + ].filter(Boolean); + if (viewMode === "tokens") { return ( <> - {isSummary && item.promptTokens === undefined ? "—" : fmt(item.promptTokens)} +
{isSummary && item.promptTokens === undefined ? "—" : fmt(item.promptTokens)}
+ {hasInputBreakdown && inputTokenParts.length > 0 && ( +
{inputTokenParts.join(" | ")}
+ )} - {isSummary && item.completionTokens === undefined ? "—" : fmt(item.completionTokens)} +
{isSummary && item.completionTokens === undefined ? "—" : fmt(item.completionTokens)}
+ {item.reasoningTokens > 0 && ( +
{fmt(item.reasoningTokens)} reasoning
+ )} {fmt(item.totalTokens)} @@ -50,13 +76,22 @@ function ValueCells({ item, viewMode, isSummary = false }) { return ( <> - {isSummary && item.inputCost === undefined ? "—" : fmtCost(item.inputCost)} +
{isSummary && item.inputCost === undefined ? "—" : fmtCost(item.inputCost)}
+ {hasInputCostBreakdown && inputCostParts.length > 0 && ( +
{inputCostParts.join(" | ")}
+ )} - {isSummary && item.outputCost === undefined ? "—" : fmtCost(item.outputCost)} +
{isSummary && item.outputCost === undefined ? "—" : fmtCost(item.outputCost)}
+ {item.reasoningCost > 0 && ( +
{fmtCost(item.reasoningCost)} reasoning incl.
+ )} - {fmtCost(item.totalCost || item.cost)} +
{fmtCost(item.totalCost || item.cost)}
+ {item.cacheSavings > 0 && ( +
{fmtCost(item.cacheSavings)} cache saved
+ )} ); diff --git a/src/lib/db/repos/usageRepo.js b/src/lib/db/repos/usageRepo.js index 63d0494eb3..bdc706fcd0 100644 --- a/src/lib/db/repos/usageRepo.js +++ b/src/lib/db/repos/usageRepo.js @@ -2,11 +2,30 @@ import { EventEmitter } from "events"; import { getAdapter } from "../driver.js"; import { parseJson, stringifyJson } from "../helpers/jsonCol.js"; import { getMeta, setMeta } from "../helpers/metaStore.js"; +import { calculateCostBreakdownFromTokens } from "open-sse/providers/pricing.js"; const PENDING_TIMEOUT_MS = 60 * 1000; const RING_CAP = 50; const CONN_CACHE_TTL_MS = 30 * 1000; const PERIOD_MS = { "24h": 86400000, "7d": 604800000, "30d": 2592000000, "60d": 5184000000 }; +const USAGE_NUMERIC_FIELDS = [ + "requests", + "promptTokens", + "completionTokens", + "cost", + "uncachedPromptTokens", + "cacheReadTokens", + "cacheCreationTokens", + "reasoningTokens", + "inputCost", + "uncachedInputCost", + "cachedInputCost", + "cacheCreationCost", + "outputCost", + "visibleOutputCost", + "reasoningCost", + "cacheSavings", +]; // In-memory state shared across Next.js modules if (!global._pendingRequests) global._pendingRequests = { byModel: {}, byAccount: {} }; @@ -32,25 +51,107 @@ function getLocalDateKey(timestamp) { return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`; } +function usageCounter(extra = {}) { + const base = {}; + for (const field of USAGE_NUMERIC_FIELDS) base[field] = 0; + return { ...base, ...extra }; +} + +function addUsageFields(target, values = {}) { + for (const field of USAGE_NUMERIC_FIELDS) { + target[field] = (target[field] || 0) + (values[field] || 0); + } +} + +function readNumber(...values) { + for (const value of values) { + const n = Number(value); + if (Number.isFinite(n) && n > 0) return n; + } + return 0; +} + +function summarizeTokens(tokens = {}, breakdown = null) { + const cb = breakdown || tokens.cost_breakdown || tokens.costBreakdown || {}; + const cacheReadTokens = readNumber( + cb.cacheReadTokens, + tokens.cache_read_input_tokens, + tokens.cached_tokens, + tokens.input_tokens_details?.cached_tokens, + tokens.prompt_tokens_details?.cached_tokens, + ); + const cacheCreationTokens = readNumber( + cb.cacheCreationTokens, + tokens.cache_creation_input_tokens, + tokens.input_tokens_details?.cache_creation_tokens, + tokens.prompt_tokens_details?.cache_creation_tokens, + ); + const reportedPromptTokens = readNumber(tokens.prompt_tokens, tokens.input_tokens); + const promptTokens = readNumber( + cb.promptTokens, + tokens.prompt_tokens !== undefined ? reportedPromptTokens : reportedPromptTokens + cacheReadTokens + cacheCreationTokens, + ); + const completionTokens = readNumber(cb.completionTokens, tokens.completion_tokens, tokens.output_tokens); + const uncachedPromptTokens = readNumber( + cb.uncachedPromptTokens, + Math.max(0, promptTokens - cacheReadTokens - cacheCreationTokens), + ); + const reasoningTokens = readNumber( + cb.reasoningTokens, + tokens.reasoning_tokens, + tokens.output_tokens_details?.reasoning_tokens, + tokens.completion_tokens_details?.reasoning_tokens, + ); + + return usageCounter({ + requests: 1, + promptTokens, + completionTokens, + cost: readNumber(cb.totalCost), + uncachedPromptTokens, + cacheReadTokens, + cacheCreationTokens, + reasoningTokens, + inputCost: readNumber(cb.inputCost), + uncachedInputCost: readNumber(cb.uncachedInputCost), + cachedInputCost: readNumber(cb.cachedInputCost), + cacheCreationCost: readNumber(cb.cacheCreationCost), + outputCost: readNumber(cb.outputCost), + visibleOutputCost: readNumber(cb.visibleOutputCost), + reasoningCost: readNumber(cb.reasoningCost), + cacheSavings: readNumber(cb.cacheSavings), + }); +} + +function addStatsTotals(stats, values = {}) { + stats.totalPromptTokens += values.promptTokens || 0; + stats.totalCompletionTokens += values.completionTokens || 0; + stats.totalCost += values.cost || 0; + stats.totalUncachedPromptTokens += values.uncachedPromptTokens || 0; + stats.totalCacheReadTokens += values.cacheReadTokens || 0; + stats.totalCacheCreationTokens += values.cacheCreationTokens || 0; + stats.totalReasoningTokens += values.reasoningTokens || 0; + stats.totalInputCost += values.inputCost || 0; + stats.totalUncachedInputCost += values.uncachedInputCost || 0; + stats.totalCachedInputCost += values.cachedInputCost || 0; + stats.totalCacheCreationCost += values.cacheCreationCost || 0; + stats.totalOutputCost += values.outputCost || 0; + stats.totalVisibleOutputCost += values.visibleOutputCost || 0; + stats.totalReasoningCost += values.reasoningCost || 0; + stats.totalCacheSavings += values.cacheSavings || 0; +} + function addToCounter(target, key, values) { - if (!target[key]) target[key] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0 }; - target[key].requests += values.requests || 1; - target[key].promptTokens += values.promptTokens || 0; - target[key].completionTokens += values.completionTokens || 0; - target[key].cost += values.cost || 0; + if (!target[key]) target[key] = usageCounter(); + addUsageFields(target[key], values); if (values.meta) Object.assign(target[key], values.meta); } function aggregateEntryToDay(day, entry) { - const promptTokens = entry.tokens?.prompt_tokens || entry.tokens?.input_tokens || 0; - const completionTokens = entry.tokens?.completion_tokens || entry.tokens?.output_tokens || 0; - const cost = entry.cost || 0; - const vals = { promptTokens, completionTokens, cost }; + const vals = summarizeTokens(entry.tokens || {}, entry.costBreakdown || entry.tokens?.cost_breakdown); + vals.cost = entry.cost || vals.cost || 0; - day.requests = (day.requests || 0) + 1; - day.promptTokens = (day.promptTokens || 0) + promptTokens; - day.completionTokens = (day.completionTokens || 0) + completionTokens; - day.cost = (day.cost || 0) + cost; + addUsageFields(day, vals); day.byProvider ||= {}; day.byModel ||= {}; @@ -110,46 +211,23 @@ async function ensureRingInitialized() { } catch {} } -async function calculateCost(provider, model, tokens) { - if (!tokens || !provider || !model) return 0; +async function calculateCostBreakdown(provider, model, tokens) { + if (!tokens || !provider || !model) return summarizeTokens(tokens || {}); try { const { getPricingForModel } = await import("./pricingRepo.js"); const pricing = await getPricingForModel(provider, model); - if (!pricing) return 0; - - let cost = 0; - const inputTokens = tokens.prompt_tokens || tokens.input_tokens || 0; - const cachedTokens = tokens.cached_tokens || tokens.cache_read_input_tokens || 0; - const nonCachedInput = Math.max(0, inputTokens - cachedTokens); - cost += nonCachedInput * (pricing.input / 1000000); - - if (cachedTokens > 0) { - const cachedRate = pricing.cached || pricing.input; - cost += cachedTokens * (cachedRate / 1000000); - } - - const outputTokens = tokens.completion_tokens || tokens.output_tokens || 0; - cost += outputTokens * (pricing.output / 1000000); - - const reasoningTokens = tokens.reasoning_tokens || 0; - if (reasoningTokens > 0) { - const rate = pricing.reasoning || pricing.output; - cost += reasoningTokens * (rate / 1000000); - } - - const cacheCreationTokens = tokens.cache_creation_input_tokens || 0; - if (cacheCreationTokens > 0) { - const rate = pricing.cache_creation || pricing.input; - cost += cacheCreationTokens * (rate / 1000000); - } - - return cost; + if (!pricing) return summarizeTokens(tokens || {}); + return calculateCostBreakdownFromTokens(tokens, pricing); } catch (e) { console.error("Error calculating cost:", e); - return 0; + return summarizeTokens(tokens || {}); } } +async function calculateCost(provider, model, tokens) { + return (await calculateCostBreakdown(provider, model, tokens)).totalCost || 0; +} + export function trackPendingRequest(model, provider, connectionId, started, error = false) { const modelKey = provider ? `${model} (${provider})` : model; const timerKey = `${connectionId}|${modelKey}`; @@ -219,10 +297,15 @@ export async function getActiveRequests() { .sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp)) .map((e) => { const t = e.tokens || {}; + const usage = summarizeTokens(t); return { timestamp: e.timestamp, model: e.model, provider: e.provider || "", - promptTokens: t.prompt_tokens || t.input_tokens || 0, - completionTokens: t.completion_tokens || t.output_tokens || 0, + promptTokens: usage.promptTokens, + completionTokens: usage.completionTokens, + cacheReadTokens: usage.cacheReadTokens, + cacheCreationTokens: usage.cacheCreationTokens, + reasoningTokens: usage.reasoningTokens, + cost: e.cost || usage.cost || 0, status: e.status || "ok", }; }) @@ -245,11 +328,16 @@ export async function saveRequestUsage(entry) { const db = await getAdapter(); if (!entry.timestamp) entry.timestamp = new Date().toISOString(); - entry.cost = await calculateCost(entry.provider, entry.model, entry.tokens); + const tokens = { ...(entry.tokens || {}) }; + const costBreakdown = await calculateCostBreakdown(entry.provider, entry.model, tokens); + tokens.cost_breakdown = costBreakdown; + entry.tokens = tokens; + entry.costBreakdown = costBreakdown; + entry.cost = costBreakdown.totalCost || await calculateCost(entry.provider, entry.model, tokens); - const tokens = entry.tokens || {}; - const promptTokens = tokens.prompt_tokens || tokens.input_tokens || 0; - const completionTokens = tokens.completion_tokens || tokens.output_tokens || 0; + const tokenSummary = summarizeTokens(tokens, costBreakdown); + const promptTokens = tokenSummary.promptTokens || 0; + const completionTokens = tokenSummary.completionTokens || 0; // All 3 writes (history insert, daily upsert, lifetime counter) in ONE transaction. // better-sqlite3 is sync → no JS yield mid-transaction → no race in same process. @@ -347,10 +435,15 @@ export async function getUsageStats(period = "all") { const recentRequests = recentRows .map((r) => { const t = parseJson(r.tokens, {}) || {}; + const usage = summarizeTokens(t); return { timestamp: r.timestamp, model: r.model, provider: r.provider || "", - promptTokens: t.prompt_tokens || t.input_tokens || 0, - completionTokens: t.completion_tokens || t.output_tokens || 0, + promptTokens: usage.promptTokens, + completionTokens: usage.completionTokens, + cacheReadTokens: usage.cacheReadTokens, + cacheCreationTokens: usage.cacheCreationTokens, + reasoningTokens: usage.reasoningTokens, + cost: r.cost || usage.cost || 0, status: r.status || "ok", }; }) @@ -367,6 +460,9 @@ export async function getUsageStats(period = "all") { const stats = { totalRequests: 0, totalPromptTokens: 0, totalCompletionTokens: 0, totalCost: 0, + totalUncachedPromptTokens: 0, totalCacheReadTokens: 0, totalCacheCreationTokens: 0, totalReasoningTokens: 0, + totalInputCost: 0, totalUncachedInputCost: 0, totalCachedInputCost: 0, totalCacheCreationCost: 0, + totalOutputCost: 0, totalVisibleOutputCost: 0, totalReasoningCost: 0, totalCacheSavings: 0, byProvider: {}, byModel: {}, byAccount: {}, byApiKey: {}, byEndpoint: {}, last10Minutes: [], pending: pendingRequests, @@ -425,16 +521,11 @@ export async function getUsageStats(period = "all") { for (const dr of dayRows) { const dateKey = dr.dateKey; const day = parseJson(dr.data, {}); - stats.totalPromptTokens += day.promptTokens || 0; - stats.totalCompletionTokens += day.completionTokens || 0; - stats.totalCost += day.cost || 0; + addStatsTotals(stats, day); for (const [prov, p] of Object.entries(day.byProvider || {})) { - if (!stats.byProvider[prov]) stats.byProvider[prov] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0 }; - stats.byProvider[prov].requests += p.requests || 0; - stats.byProvider[prov].promptTokens += p.promptTokens || 0; - stats.byProvider[prov].completionTokens += p.completionTokens || 0; - stats.byProvider[prov].cost += p.cost || 0; + if (!stats.byProvider[prov]) stats.byProvider[prov] = usageCounter(); + addUsageFields(stats.byProvider[prov], p); } for (const [mk, m] of Object.entries(day.byModel || {})) { @@ -443,12 +534,9 @@ export async function getUsageStats(period = "all") { const statsKey = provider ? `${rawModel} (${provider})` : rawModel; const providerDisplayName = providerNodeNameMap[provider] || provider; if (!stats.byModel[statsKey]) { - stats.byModel[statsKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel, provider: providerDisplayName, lastUsed: dateKey }; + stats.byModel[statsKey] = usageCounter({ rawModel, provider: providerDisplayName, lastUsed: dateKey }); } - stats.byModel[statsKey].requests += m.requests || 0; - stats.byModel[statsKey].promptTokens += m.promptTokens || 0; - stats.byModel[statsKey].completionTokens += m.completionTokens || 0; - stats.byModel[statsKey].cost += m.cost || 0; + addUsageFields(stats.byModel[statsKey], m); if (dateKey > (stats.byModel[statsKey].lastUsed || "")) stats.byModel[statsKey].lastUsed = dateKey; } @@ -459,12 +547,9 @@ export async function getUsageStats(period = "all") { const providerDisplayName = providerNodeNameMap[provider] || provider; const accountKey = `${rawModel} (${provider} - ${accountName})`; if (!stats.byAccount[accountKey]) { - stats.byAccount[accountKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel, provider: providerDisplayName, connectionId: connId, accountName, lastUsed: dateKey }; + stats.byAccount[accountKey] = usageCounter({ rawModel, provider: providerDisplayName, connectionId: connId, accountName, lastUsed: dateKey }); } - stats.byAccount[accountKey].requests += a.requests || 0; - stats.byAccount[accountKey].promptTokens += a.promptTokens || 0; - stats.byAccount[accountKey].completionTokens += a.completionTokens || 0; - stats.byAccount[accountKey].cost += a.cost || 0; + addUsageFields(stats.byAccount[accountKey], a); if (dateKey > (stats.byAccount[accountKey].lastUsed || "")) stats.byAccount[accountKey].lastUsed = dateKey; } @@ -477,12 +562,9 @@ export async function getUsageStats(period = "all") { const keyName = keyInfo?.name || (apiKeyVal ? apiKeyVal.slice(0, 8) + "..." : "Local (No API Key)"); const apiKeyKey = apiKeyVal || "local-no-key"; if (!stats.byApiKey[akKey]) { - stats.byApiKey[akKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel, provider: providerDisplayName, apiKey: apiKeyVal, keyName, apiKeyKey, lastUsed: dateKey }; + stats.byApiKey[akKey] = usageCounter({ rawModel, provider: providerDisplayName, apiKey: apiKeyVal, keyName, apiKeyKey, lastUsed: dateKey }); } - stats.byApiKey[akKey].requests += ak.requests || 0; - stats.byApiKey[akKey].promptTokens += ak.promptTokens || 0; - stats.byApiKey[akKey].completionTokens += ak.completionTokens || 0; - stats.byApiKey[akKey].cost += ak.cost || 0; + addUsageFields(stats.byApiKey[akKey], ak); if (dateKey > (stats.byApiKey[akKey].lastUsed || "")) stats.byApiKey[akKey].lastUsed = dateKey; } @@ -492,12 +574,9 @@ export async function getUsageStats(period = "all") { const provider = ep.provider || ""; const providerDisplayName = providerNodeNameMap[provider] || provider; if (!stats.byEndpoint[epKey]) { - stats.byEndpoint[epKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, endpoint, rawModel, provider: providerDisplayName, lastUsed: dateKey }; + stats.byEndpoint[epKey] = usageCounter({ endpoint, rawModel, provider: providerDisplayName, lastUsed: dateKey }); } - stats.byEndpoint[epKey].requests += ep.requests || 0; - stats.byEndpoint[epKey].promptTokens += ep.promptTokens || 0; - stats.byEndpoint[epKey].completionTokens += ep.completionTokens || 0; - stats.byEndpoint[epKey].cost += ep.cost || 0; + addUsageFields(stats.byEndpoint[epKey], ep); if (dateKey > (stats.byEndpoint[epKey].lastUsed || "")) stats.byEndpoint[epKey].lastUsed = dateKey; } } @@ -545,41 +624,29 @@ export async function getUsageStats(period = "all") { for (const r of filtered) { const tokens = parseJson(r.tokens, {}) || {}; - const promptTokens = tokens.prompt_tokens || 0; - const completionTokens = tokens.completion_tokens || 0; - const entryCost = r.cost || 0; + const vals = summarizeTokens(tokens); + vals.cost = r.cost || vals.cost || 0; const providerDisplayName = providerNodeNameMap[r.provider] || r.provider; - stats.totalPromptTokens += promptTokens; - stats.totalCompletionTokens += completionTokens; - stats.totalCost += entryCost; + addStatsTotals(stats, vals); - if (!stats.byProvider[r.provider]) stats.byProvider[r.provider] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0 }; - stats.byProvider[r.provider].requests++; - stats.byProvider[r.provider].promptTokens += promptTokens; - stats.byProvider[r.provider].completionTokens += completionTokens; - stats.byProvider[r.provider].cost += entryCost; + if (!stats.byProvider[r.provider]) stats.byProvider[r.provider] = usageCounter(); + addUsageFields(stats.byProvider[r.provider], vals); const modelKey = r.provider ? `${r.model} (${r.provider})` : r.model; if (!stats.byModel[modelKey]) { - stats.byModel[modelKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp }; + stats.byModel[modelKey] = usageCounter({ rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp }); } - stats.byModel[modelKey].requests++; - stats.byModel[modelKey].promptTokens += promptTokens; - stats.byModel[modelKey].completionTokens += completionTokens; - stats.byModel[modelKey].cost += entryCost; + addUsageFields(stats.byModel[modelKey], vals); if (new Date(r.timestamp) > new Date(stats.byModel[modelKey].lastUsed)) stats.byModel[modelKey].lastUsed = r.timestamp; if (r.connectionId) { const accountName = connectionMap[r.connectionId] || `Account ${r.connectionId.slice(0, 8)}...`; const accountKey = `${r.model} (${r.provider} - ${accountName})`; if (!stats.byAccount[accountKey]) { - stats.byAccount[accountKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, connectionId: r.connectionId, accountName, lastUsed: r.timestamp }; + stats.byAccount[accountKey] = usageCounter({ rawModel: r.model, provider: providerDisplayName, connectionId: r.connectionId, accountName, lastUsed: r.timestamp }); } - stats.byAccount[accountKey].requests++; - stats.byAccount[accountKey].promptTokens += promptTokens; - stats.byAccount[accountKey].completionTokens += completionTokens; - stats.byAccount[accountKey].cost += entryCost; + addUsageFields(stats.byAccount[accountKey], vals); if (new Date(r.timestamp) > new Date(stats.byAccount[accountKey].lastUsed)) stats.byAccount[accountKey].lastUsed = r.timestamp; } @@ -588,27 +655,27 @@ export async function getUsageStats(period = "all") { const keyName = keyInfo?.name || r.apiKey.slice(0, 8) + "..."; const akKey = `${r.apiKey}|${r.model}|${r.provider || "unknown"}`; if (!stats.byApiKey[akKey]) { - stats.byApiKey[akKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, apiKey: r.apiKey, keyName, apiKeyKey: r.apiKey, lastUsed: r.timestamp }; + stats.byApiKey[akKey] = usageCounter({ rawModel: r.model, provider: providerDisplayName, apiKey: r.apiKey, keyName, apiKeyKey: r.apiKey, lastUsed: r.timestamp }); } const ake = stats.byApiKey[akKey]; - ake.requests++; ake.promptTokens += promptTokens; ake.completionTokens += completionTokens; ake.cost += entryCost; + addUsageFields(ake, vals); if (new Date(r.timestamp) > new Date(ake.lastUsed)) ake.lastUsed = r.timestamp; } else { if (!stats.byApiKey["local-no-key"]) { - stats.byApiKey["local-no-key"] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, apiKey: null, keyName: "Local (No API Key)", apiKeyKey: "local-no-key", lastUsed: r.timestamp }; + stats.byApiKey["local-no-key"] = usageCounter({ rawModel: r.model, provider: providerDisplayName, apiKey: null, keyName: "Local (No API Key)", apiKeyKey: "local-no-key", lastUsed: r.timestamp }); } const ake = stats.byApiKey["local-no-key"]; - ake.requests++; ake.promptTokens += promptTokens; ake.completionTokens += completionTokens; ake.cost += entryCost; + addUsageFields(ake, vals); if (new Date(r.timestamp) > new Date(ake.lastUsed)) ake.lastUsed = r.timestamp; } const endpoint = r.endpoint || "Unknown"; const epKey = `${endpoint}|${r.model}|${r.provider || "unknown"}`; if (!stats.byEndpoint[epKey]) { - stats.byEndpoint[epKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, endpoint, rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp }; + stats.byEndpoint[epKey] = usageCounter({ endpoint, rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp }); } const epe = stats.byEndpoint[epKey]; - epe.requests++; epe.promptTokens += promptTokens; epe.completionTokens += completionTokens; epe.cost += entryCost; + addUsageFields(epe, vals); if (new Date(r.timestamp) > new Date(epe.lastUsed)) epe.lastUsed = r.timestamp; } } @@ -720,9 +787,12 @@ export async function getRecentLogs(limit = 200) { const m = r.model || "-"; const account = connMap[r.connectionId] || (r.connectionId ? r.connectionId.slice(0, 8) : "-"); const tk = r.tokens ? parseJson(r.tokens, {}) : {}; - const sent = r.promptTokens ?? tk.prompt_tokens ?? "-"; - const received = r.completionTokens ?? tk.completion_tokens ?? "-"; - return `${ts} | ${m} | ${p} | ${account} | ${sent} | ${received} | ${r.status || "-"}`; + const usage = summarizeTokens(tk); + const sent = usage.promptTokens || r.promptTokens || tk.prompt_tokens || "-"; + const received = usage.completionTokens || r.completionTokens || tk.completion_tokens || "-"; + const cache = usage.cacheReadTokens ? ` | cache_read=${usage.cacheReadTokens}` : ""; + const reasoning = usage.reasoningTokens ? ` | reasoning=${usage.reasoningTokens}` : ""; + return `${ts} | ${m} | ${p} | ${account} | ${sent} | ${received}${cache}${reasoning} | ${r.status || "-"}`; }); } catch (e) { console.error("[usageRepo] getRecentLogs failed:", e.message); diff --git a/src/shared/components/UsageStats.js b/src/shared/components/UsageStats.js index 950a7af9d6..f0bd4b8c7f 100644 --- a/src/shared/components/UsageStats.js +++ b/src/shared/components/UsageStats.js @@ -61,6 +61,10 @@ function RecentRequests({ requests = [] }) { {requests.map((r, i) => { const ok = !r.status || r.status === "ok" || r.status === "success"; + const cacheInfo = [ + r.cacheReadTokens ? `${fmt(r.cacheReadTokens)} cached` : null, + r.reasoningTokens ? `${fmt(r.reasoningTokens)} reasoning` : null, + ].filter(Boolean).join(" | "); return ( @@ -71,6 +75,7 @@ function RecentRequests({ requests = [] }) { {fmt(r.promptTokens)}↑ {" "} {fmt(r.completionTokens)}↓ + {cacheInfo &&
{cacheInfo}
} @@ -89,9 +94,35 @@ function sortData(dataMap, pendingMap = {}, sortBy, sortOrder) { .map(([key, data]) => { const totalTokens = (data.promptTokens || 0) + (data.completionTokens || 0); const totalCost = data.cost || 0; - const inputCost = totalTokens > 0 ? (data.promptTokens || 0) * (totalCost / totalTokens) : 0; - const outputCost = totalTokens > 0 ? (data.completionTokens || 0) * (totalCost / totalTokens) : 0; - return { ...data, key, totalTokens, totalCost, inputCost, outputCost, pending: pendingMap[key] || 0 }; + const fallbackInputCost = totalTokens > 0 ? (data.promptTokens || 0) * (totalCost / totalTokens) : 0; + const fallbackOutputCost = totalTokens > 0 ? (data.completionTokens || 0) * (totalCost / totalTokens) : 0; + const hasItemizedCost = [ + data.inputCost, + data.outputCost, + data.uncachedInputCost, + data.cachedInputCost, + data.cacheCreationCost, + ].some((value) => Number(value || 0) > 0); + const inputCost = hasItemizedCost ? (data.inputCost || 0) : fallbackInputCost; + const outputCost = hasItemizedCost ? (data.outputCost || 0) : fallbackOutputCost; + const cacheReadTokens = data.cacheReadTokens || 0; + const cacheCreationTokens = data.cacheCreationTokens || 0; + const hasTokenBreakdown = (data.uncachedPromptTokens || cacheReadTokens || cacheCreationTokens) > 0; + const uncachedPromptTokens = hasTokenBreakdown + ? (data.uncachedPromptTokens || 0) + : Math.max(0, (data.promptTokens || 0) - cacheReadTokens - cacheCreationTokens); + return { + ...data, + key, + totalTokens, + totalCost, + inputCost, + outputCost, + uncachedPromptTokens, + cacheReadTokens, + cacheCreationTokens, + pending: pendingMap[key] || 0, + }; }) .sort((a, b) => { let valA = a[sortBy]; @@ -114,6 +145,19 @@ function getGroupKey(item, keyField) { } } +const USAGE_SUMMARY_FIELDS = [ + "uncachedPromptTokens", + "cacheReadTokens", + "cacheCreationTokens", + "reasoningTokens", + "uncachedInputCost", + "cachedInputCost", + "cacheCreationCost", + "visibleOutputCost", + "reasoningCost", + "cacheSavings", +]; + function groupDataByKey(data, keyField) { if (!Array.isArray(data)) return []; const groups = {}; @@ -135,6 +179,9 @@ function groupDataByKey(data, keyField) { s.inputCost += item.inputCost || 0; s.outputCost += item.outputCost || 0; s.pending += item.pending || 0; + for (const field of USAGE_SUMMARY_FIELDS) { + s[field] = (s[field] || 0) + (item[field] || 0); + } if (item.lastUsed && (!s.lastUsed || new Date(item.lastUsed) > new Date(s.lastUsed))) { s.lastUsed = item.lastUsed; } diff --git a/tests/unit/pricing-cache-breakdown.test.js b/tests/unit/pricing-cache-breakdown.test.js new file mode 100644 index 0000000000..07e6a8efc7 --- /dev/null +++ b/tests/unit/pricing-cache-breakdown.test.js @@ -0,0 +1,71 @@ +import { describe, expect, it } from "vitest"; +import { + MODEL_PRICING, + calculateCostBreakdownFromTokens, + calculateCostFromTokens, + getPricingForModel, +} from "../../open-sse/providers/pricing.js"; + +describe("pricing cache breakdown", () => { + it("uses exact standard API pricing for current OpenAI models", () => { + expect(MODEL_PRICING["gpt-5.5"]).toMatchObject({ input: 5.00, cached: 0.50, output: 30.00 }); + expect(MODEL_PRICING["gpt-5.4"]).toMatchObject({ input: 2.50, cached: 0.25, output: 15.00 }); + expect(MODEL_PRICING["gpt-5.4-mini"]).toMatchObject({ input: 0.75, cached: 0.075, output: 4.50 }); + expect(MODEL_PRICING["gpt-5"]).toMatchObject({ input: 1.25, cached: 0.125, output: 10.00 }); + expect(MODEL_PRICING["gpt-4.1"]).toMatchObject({ input: 2.00, cached: 0.50, output: 8.00 }); + }); + + it("does not price full gpt-4o variants as gpt-4o-mini", () => { + expect(getPricingForModel("openai", "gpt-4o-2024-08-06")).toMatchObject({ input: 2.50, cached: 1.25, output: 10.00 }); + expect(getPricingForModel("openai", "gpt-4o-mini-2024-07-18")).toMatchObject({ input: 0.15, cached: 0.075, output: 0.60 }); + }); + + it("charges cached and uncached input at different rates", () => { + const pricing = getPricingForModel("openai", "gpt-5.4-mini"); + const breakdown = calculateCostBreakdownFromTokens({ + prompt_tokens: 1000, + completion_tokens: 100, + cache_read_input_tokens: 800, + }, pricing); + + expect(breakdown.promptTokens).toBe(1000); + expect(breakdown.uncachedPromptTokens).toBe(200); + expect(breakdown.cacheReadTokens).toBe(800); + expect(breakdown.uncachedInputCost).toBeCloseTo((200 * 0.75) / 1_000_000, 12); + expect(breakdown.cachedInputCost).toBeCloseTo((800 * 0.075) / 1_000_000, 12); + expect(breakdown.outputCost).toBeCloseTo((100 * 4.50) / 1_000_000, 12); + expect(calculateCostFromTokens({ + prompt_tokens: 1000, + completion_tokens: 100, + cache_read_input_tokens: 800, + }, pricing)).toBeCloseTo(((200 * 0.75) + (800 * 0.075) + (100 * 4.50)) / 1_000_000, 12); + }); + + it("treats reasoning tokens as included output details by default", () => { + const pricing = getPricingForModel("openai", "gpt-5.5"); + const breakdown = calculateCostBreakdownFromTokens({ + prompt_tokens: 1000, + completion_tokens: 100, + reasoning_tokens: 40, + }, pricing); + + expect(breakdown.reasoningTokens).toBe(40); + expect(breakdown.outputCost).toBeCloseTo((100 * 30.00) / 1_000_000, 12); + expect(breakdown.totalCost).toBeCloseTo(((1000 * 5.00) + (100 * 30.00)) / 1_000_000, 12); + }); + + it("supports providers that report input and cache buckets separately", () => { + const breakdown = calculateCostBreakdownFromTokens({ + input_tokens: 1000, + output_tokens: 100, + cache_read_input_tokens: 800, + cache_creation_input_tokens: 200, + }, { input: 3.00, cached: 0.30, cache_creation: 3.75, output: 15.00 }); + + expect(breakdown.promptTokens).toBe(2000); + expect(breakdown.uncachedPromptTokens).toBe(1000); + expect(breakdown.cacheReadTokens).toBe(800); + expect(breakdown.cacheCreationTokens).toBe(200); + expect(breakdown.inputCost).toBeCloseTo(((1000 * 3.00) + (800 * 0.30) + (200 * 3.75)) / 1_000_000, 12); + }); +});