diff --git a/open-sse/handlers/chatCore/streamingHandler.js b/open-sse/handlers/chatCore/streamingHandler.js
index aa907cd5c7..6a558c0c21 100644
--- a/open-sse/handlers/chatCore/streamingHandler.js
+++ b/open-sse/handlers/chatCore/streamingHandler.js
@@ -5,7 +5,7 @@ import { pipeWithDisconnect } from "../../utils/streamHandler.js";
import { PROVIDERS } from "../../config/providers.js";
import { STREAM_STALL_TIMEOUT_MS } from "../../config/runtimeConfig.js";
import { buildAbortedResponsesTerminalBytes } from "../../utils/responsesStreamHelpers.js";
-import { buildRequestDetail, extractRequestConfig, saveUsageStats } from "./requestDetail.js";
+import { buildRequestDetail, extractRequestConfig } from "./requestDetail.js";
import { saveRequestDetail } from "@/lib/usageDb.js";
import { SSE_HEADERS_CORS as SSE_HEADERS } from "../../utils/sseConstants.js";
@@ -101,7 +101,8 @@ export function buildOnStreamComplete({ provider, model, connectionId, apiKey, r
console.error("[RequestDetail] Failed to update streaming content:", err.message);
});
- saveUsageStats({ provider, model, tokens: usage, connectionId, apiKey, endpoint: clientRawRequest?.endpoint, label: "STREAM USAGE" });
+ // Usage is already persisted by logUsage() in the SSE transform flush. Saving
+ // again here loses cache/reasoning detail and doubles dashboard costs.
};
return { onStreamComplete, streamDetailId };
diff --git a/open-sse/providers/pricing.js b/open-sse/providers/pricing.js
index 9e767a8049..94079eeb02 100644
--- a/open-sse/providers/pricing.js
+++ b/open-sse/providers/pricing.js
@@ -30,30 +30,48 @@ export const MODEL_PRICING = {
"claude-opus-4-6-thinking": { input: 5.00, output: 25.00, cached: 0.50, reasoning: 37.50, cache_creation: 5.00 },
// === OpenAI / GPT ===
- "gpt-3.5-turbo": { input: 0.50, output: 1.50, cached: 0.25, reasoning: 2.25, cache_creation: 0.50 },
- "gpt-4": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 },
- "gpt-4-turbo": { input: 10.00, output: 30.00, cached: 5.00, reasoning: 45.00, cache_creation: 10.00 },
- "gpt-4o": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 },
- "gpt-4o-mini": { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.90, cache_creation: 0.15 },
- "gpt-4.1": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 },
- "gpt-5": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 },
- "gpt-5-mini": { input: 0.75, output: 3.00, cached: 0.375, reasoning: 4.50, cache_creation: 0.75 },
- "gpt-5-codex": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 },
- "gpt-5.1": { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 },
- "gpt-5.1-codex": { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 },
+ // Standard API pricing from https://developers.openai.com/api/docs/pricing
+ // (input/cached input/output in $ per 1M tokens).
+ "gpt-3.5-turbo": { input: 0.50, output: 1.50, cached: null, reasoning: 1.50, cache_creation: 0.50 },
+ "gpt-4": { input: 30.00, output: 60.00, cached: null, reasoning: 60.00, cache_creation: 30.00 },
+ "gpt-4-turbo": { input: 10.00, output: 30.00, cached: null, reasoning: 30.00, cache_creation: 10.00 },
+ "gpt-4o": { input: 2.50, output: 10.00, cached: 1.25, reasoning: 10.00, cache_creation: 2.50 },
+ "gpt-4o-mini": { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.60, cache_creation: 0.15 },
+ "gpt-4.1": { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 },
+ "gpt-4.1-mini": { input: 0.40, output: 1.60, cached: 0.10, reasoning: 1.60, cache_creation: 0.40 },
+ "gpt-4.1-nano": { input: 0.10, output: 0.40, cached: 0.025, reasoning: 0.40, cache_creation: 0.10 },
+ "gpt-5.5": { input: 5.00, output: 30.00, cached: 0.50, reasoning: 30.00, cache_creation: 5.00 },
+ "gpt-5.5-pro": { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 },
+ "gpt-5.4": { input: 2.50, output: 15.00, cached: 0.25, reasoning: 15.00, cache_creation: 2.50 },
+ "gpt-5.4-mini": { input: 0.75, output: 4.50, cached: 0.075, reasoning: 4.50, cache_creation: 0.75 },
+ "gpt-5.4-nano": { input: 0.20, output: 1.25, cached: 0.02, reasoning: 1.25, cache_creation: 0.20 },
+ "gpt-5.4-pro": { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 },
+ "gpt-5": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 },
+ "gpt-5-mini": { input: 0.25, output: 2.00, cached: 0.025, reasoning: 2.00, cache_creation: 0.25 },
+ "gpt-5-nano": { input: 0.05, output: 0.40, cached: 0.005, reasoning: 0.40, cache_creation: 0.05 },
+ "gpt-5-pro": { input: 15.00, output: 120.00, cached: null, reasoning: 120.00, cache_creation: 15.00 },
+ "gpt-5-codex": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 },
+ "gpt-5.1": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 },
+ "gpt-5.1-codex": { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 },
"gpt-5.1-codex-mini": { input: 1.50, output: 6.00, cached: 0.75, reasoning: 9.00, cache_creation: 1.50 },
"gpt-5.1-codex-mini-high": { input: 2.00, output: 8.00, cached: 1.00, reasoning: 12.00, cache_creation: 2.00 },
"gpt-5.1-codex-max": { input: 8.00, output: 32.00, cached: 4.00, reasoning: 48.00, cache_creation: 8.00 },
- "gpt-5.2": { input: 5.00, output: 20.00, cached: 2.50, reasoning: 30.00, cache_creation: 5.00 },
- "gpt-5.2-codex": { input: 5.00, output: 20.00, cached: 2.50, reasoning: 30.00, cache_creation: 5.00 },
+ "gpt-5.2": { input: 1.75, output: 14.00, cached: 0.175, reasoning: 14.00, cache_creation: 1.75 },
+ "gpt-5.2-pro": { input: 21.00, output: 168.00, cached: null, reasoning: 168.00, cache_creation: 21.00 },
+ "gpt-5.2-codex": { input: 1.75, output: 14.00, cached: 0.175, reasoning: 14.00, cache_creation: 1.75 },
"gpt-5.3-codex": { input: 6.00, output: 24.00, cached: 3.00, reasoning: 36.00, cache_creation: 6.00 },
"gpt-5.3-codex-xhigh": { input: 10.00, output: 40.00, cached: 5.00, reasoning: 60.00, cache_creation: 10.00 },
"gpt-5.3-codex-high": { input: 8.00, output: 32.00, cached: 4.00, reasoning: 48.00, cache_creation: 8.00 },
"gpt-5.3-codex-low": { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 },
"gpt-5.3-codex-none": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 },
"gpt-5.3-codex-spark": { input: 3.00, output: 12.00, cached: 0.30, reasoning: 12.00, cache_creation: 3.00 },
- "o1": { input: 15.00, output: 60.00, cached: 7.50, reasoning: 90.00, cache_creation: 15.00 },
- "o1-mini": { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 },
+ "o1": { input: 15.00, output: 60.00, cached: 7.50, reasoning: 60.00, cache_creation: 15.00 },
+ "o1-mini": { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 },
+ "o1-pro": { input: 150.00, output: 600.00, cached: null, reasoning: 600.00, cache_creation: 150.00 },
+ "o3": { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 },
+ "o3-mini": { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 },
+ "o3-pro": { input: 20.00, output: 80.00, cached: null, reasoning: 80.00, cache_creation: 20.00 },
+ "o4-mini": { input: 1.10, output: 4.40, cached: 0.275, reasoning: 4.40, cache_creation: 1.10 },
// === Gemini ===
"gemini-3-flash-preview": { input: 0.50, output: 3.00, cached: 0.03, reasoning: 4.50, cache_creation: 0.50 },
@@ -161,20 +179,40 @@ export const PATTERN_PRICING = [
{ pattern: "gemini-*", pricing: { input: 0.50, output: 3.00, cached: 0.03, reasoning: 4.50, cache_creation: 0.50 } },
// --- GPT (specific first, generic last) ---
+ { pattern: "gpt-5.5-pro", pricing: { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 } },
+ { pattern: "gpt-5.5", pricing: { input: 5.00, output: 30.00, cached: 0.50, reasoning: 30.00, cache_creation: 5.00 } },
+ { pattern: "gpt-5.4-pro", pricing: { input: 30.00, output: 180.00, cached: null, reasoning: 180.00, cache_creation: 30.00 } },
+ { pattern: "gpt-5.4-mini", pricing: { input: 0.75, output: 4.50, cached: 0.075, reasoning: 4.50, cache_creation: 0.75 } },
+ { pattern: "gpt-5.4-nano", pricing: { input: 0.20, output: 1.25, cached: 0.02, reasoning: 1.25, cache_creation: 0.20 } },
+ { pattern: "gpt-5.4", pricing: { input: 2.50, output: 15.00, cached: 0.25, reasoning: 15.00, cache_creation: 2.50 } },
{ pattern: "gpt-5.3-*", pricing: { input: 6.00, output: 24.00, cached: 3.00, reasoning: 36.00, cache_creation: 6.00 } },
- { pattern: "gpt-5.2-*", pricing: { input: 5.00, output: 20.00, cached: 2.50, reasoning: 30.00, cache_creation: 5.00 } },
- { pattern: "gpt-5.1-*", pricing: { input: 4.00, output: 16.00, cached: 2.00, reasoning: 24.00, cache_creation: 4.00 } },
- { pattern: "gpt-5-*", pricing: { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 } },
- { pattern: "gpt-5*", pricing: { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 } },
- { pattern: "gpt-4o-*", pricing: { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.90, cache_creation: 0.15 } },
- { pattern: "gpt-4o", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 } },
- { pattern: "gpt-4*", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 15.00, cache_creation: 2.50 } },
+ { pattern: "gpt-5.2-pro", pricing: { input: 21.00, output: 168.00, cached: null, reasoning: 168.00, cache_creation: 21.00 } },
+ { pattern: "gpt-5.2-*", pricing: { input: 1.75, output: 14.00, cached: 0.175, reasoning: 14.00, cache_creation: 1.75 } },
+ { pattern: "gpt-5.1-*", pricing: { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 } },
+ { pattern: "gpt-5-pro", pricing: { input: 15.00, output: 120.00, cached: null, reasoning: 120.00, cache_creation: 15.00 } },
+ { pattern: "gpt-5-mini", pricing: { input: 0.25, output: 2.00, cached: 0.025, reasoning: 2.00, cache_creation: 0.25 } },
+ { pattern: "gpt-5-nano", pricing: { input: 0.05, output: 0.40, cached: 0.005, reasoning: 0.40, cache_creation: 0.05 } },
+ { pattern: "gpt-5-*", pricing: { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 } },
+ { pattern: "gpt-5*", pricing: { input: 1.25, output: 10.00, cached: 0.125, reasoning: 10.00, cache_creation: 1.25 } },
+ { pattern: "gpt-4.1-mini*", pricing: { input: 0.40, output: 1.60, cached: 0.10, reasoning: 1.60, cache_creation: 0.40 } },
+ { pattern: "gpt-4.1-nano*", pricing: { input: 0.10, output: 0.40, cached: 0.025, reasoning: 0.40, cache_creation: 0.10 } },
+ { pattern: "gpt-4.1*", pricing: { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 } },
+ { pattern: "gpt-4o-mini*", pricing: { input: 0.15, output: 0.60, cached: 0.075, reasoning: 0.60, cache_creation: 0.15 } },
+ { pattern: "gpt-4o-*", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 10.00, cache_creation: 2.50 } },
+ { pattern: "gpt-4o", pricing: { input: 2.50, output: 10.00, cached: 1.25, reasoning: 10.00, cache_creation: 2.50 } },
+ { pattern: "gpt-4-turbo*", pricing: { input: 10.00, output: 30.00, cached: null, reasoning: 30.00, cache_creation: 10.00 } },
+ { pattern: "gpt-4*", pricing: { input: 30.00, output: 60.00, cached: null, reasoning: 60.00, cache_creation: 30.00 } },
// --- o1 / o-series ---
- { pattern: "o1-*", pricing: { input: 3.00, output: 12.00, cached: 1.50, reasoning: 18.00, cache_creation: 3.00 } },
- { pattern: "o1", pricing: { input: 15.00, output: 60.00, cached: 7.50, reasoning: 90.00, cache_creation: 15.00 } },
- { pattern: "o3-*", pricing: { input: 10.00, output: 40.00, cached: 5.00, reasoning: 60.00, cache_creation: 10.00 } },
- { pattern: "o4-*", pricing: { input: 2.00, output: 8.00, cached: 1.00, reasoning: 12.00, cache_creation: 2.00 } },
+ { pattern: "o1-pro", pricing: { input: 150.00, output: 600.00, cached: null, reasoning: 600.00, cache_creation: 150.00 } },
+ { pattern: "o1-mini", pricing: { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 } },
+ { pattern: "o1", pricing: { input: 15.00, output: 60.00, cached: 7.50, reasoning: 60.00, cache_creation: 15.00 } },
+ { pattern: "o3-pro", pricing: { input: 20.00, output: 80.00, cached: null, reasoning: 80.00, cache_creation: 20.00 } },
+ { pattern: "o3-mini", pricing: { input: 1.10, output: 4.40, cached: 0.55, reasoning: 4.40, cache_creation: 1.10 } },
+ { pattern: "o3-*", pricing: { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 } },
+ { pattern: "o3", pricing: { input: 2.00, output: 8.00, cached: 0.50, reasoning: 8.00, cache_creation: 2.00 } },
+ { pattern: "o4-mini", pricing: { input: 1.10, output: 4.40, cached: 0.275, reasoning: 4.40, cache_creation: 1.10 } },
+ { pattern: "o4-*", pricing: { input: 1.10, output: 4.40, cached: 0.275, reasoning: 4.40, cache_creation: 1.10 } },
// --- Qwen ---
{ pattern: "qwen3-coder-*", pricing: { input: 1.00, output: 4.00, cached: 0.50, reasoning: 6.00, cache_creation: 1.00 } },
@@ -263,42 +301,129 @@ export function getDefaultPricing() {
*/
export function formatCost(cost) {
if (cost === null || cost === undefined || isNaN(cost)) return "$0.00";
- return `$${cost.toFixed(2)}`;
+ const value = Number(cost || 0);
+ if (!Number.isFinite(value) || value === 0) return "$0.00";
+ const abs = Math.abs(value);
+ if (abs < 0.0001) return `$${value.toFixed(6)}`;
+ if (abs < 0.01) return `$${value.toFixed(4)}`;
+ return `$${value.toFixed(2)}`;
}
/**
- * Calculate cost from tokens and pricing
+ * Calculate itemized cost from tokens and pricing.
+ *
+ * Reasoning tokens reported by OpenAI-compatible APIs are output-token details,
+ * not extra billable tokens. Only add separate reasoning cost when a pricing
+ * entry explicitly opts into reasoning_billed_separately.
+ *
* @param {object} tokens
* @param {object} pricing
- * @returns {number} cost in dollars
+ * @returns {object} cost breakdown in dollars and tokens
*/
-export function calculateCostFromTokens(tokens, pricing) {
- if (!tokens || !pricing) return 0;
-
- let cost = 0;
-
- const inputTokens = tokens.prompt_tokens || tokens.input_tokens || 0;
- const cachedTokens = tokens.cached_tokens || tokens.cache_read_input_tokens || 0;
- const nonCachedInput = Math.max(0, inputTokens - cachedTokens);
-
- cost += nonCachedInput * (pricing.input / 1000000);
-
- if (cachedTokens > 0) {
- cost += cachedTokens * ((pricing.cached || pricing.input) / 1000000);
- }
-
- const outputTokens = tokens.completion_tokens || tokens.output_tokens || 0;
- cost += outputTokens * (pricing.output / 1000000);
-
- const reasoningTokens = tokens.reasoning_tokens || 0;
- if (reasoningTokens > 0) {
- cost += reasoningTokens * ((pricing.reasoning || pricing.output) / 1000000);
+export function calculateCostBreakdownFromTokens(tokens, pricing) {
+ if (!tokens || !pricing) {
+ return {
+ promptTokens: 0,
+ completionTokens: 0,
+ uncachedPromptTokens: 0,
+ cacheReadTokens: 0,
+ cacheCreationTokens: 0,
+ reasoningTokens: 0,
+ inputCost: 0,
+ uncachedInputCost: 0,
+ cachedInputCost: 0,
+ cacheCreationCost: 0,
+ outputCost: 0,
+ visibleOutputCost: 0,
+ reasoningCost: 0,
+ totalCost: 0,
+ cacheSavings: 0,
+ };
}
- const cacheCreationTokens = tokens.cache_creation_input_tokens || 0;
- if (cacheCreationTokens > 0) {
- cost += cacheCreationTokens * ((pricing.cache_creation || pricing.input) / 1000000);
- }
+ const readNumber = (...values) => {
+ for (const value of values) {
+ const n = Number(value);
+ if (Number.isFinite(n) && n > 0) return n;
+ }
+ return 0;
+ };
+
+ const reportedPromptTokens = readNumber(tokens.prompt_tokens, tokens.input_tokens);
+ const cacheReadTokens = readNumber(
+ tokens.cache_read_input_tokens,
+ tokens.cached_tokens,
+ tokens.input_tokens_details?.cached_tokens,
+ tokens.prompt_tokens_details?.cached_tokens,
+ );
+ const cacheCreationTokens = readNumber(
+ tokens.cache_creation_input_tokens,
+ tokens.input_tokens_details?.cache_creation_tokens,
+ tokens.prompt_tokens_details?.cache_creation_tokens,
+ );
+ const cacheSideTokens = cacheReadTokens + cacheCreationTokens;
+
+ const promptTokensIncludesCache = tokens.prompt_tokens !== undefined || tokens.input_tokens_include_cache === true;
+ const promptTokens = promptTokensIncludesCache
+ ? reportedPromptTokens
+ : reportedPromptTokens + cacheSideTokens;
+
+ const uncachedPromptTokens = promptTokensIncludesCache
+ ? Math.max(0, reportedPromptTokens - cacheSideTokens)
+ : reportedPromptTokens;
+
+ const completionTokens = readNumber(tokens.completion_tokens, tokens.output_tokens);
+ const reasoningTokens = readNumber(
+ tokens.reasoning_tokens,
+ tokens.output_tokens_details?.reasoning_tokens,
+ tokens.completion_tokens_details?.reasoning_tokens,
+ );
+
+ const inputRate = pricing.input || 0;
+ const cachedRate = pricing.cached ?? inputRate;
+ const cacheCreationRate = pricing.cache_creation ?? inputRate;
+ const outputRate = pricing.output || 0;
+ const reasoningRate = pricing.reasoning ?? outputRate;
+
+ const uncachedInputCost = uncachedPromptTokens * (inputRate / 1000000);
+ const cachedInputCost = cacheReadTokens * (cachedRate / 1000000);
+ const cacheCreationCost = cacheCreationTokens * (cacheCreationRate / 1000000);
+ const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
+
+ const outputCost = completionTokens * (outputRate / 1000000);
+ const reasoningBilledSeparately = pricing.reasoning_billed_separately === true;
+ const reasoningCost = reasoningTokens * ((reasoningBilledSeparately ? reasoningRate : outputRate) / 1000000);
+ const visibleOutputTokens = Math.max(0, completionTokens - reasoningTokens);
+ const visibleOutputCost = visibleOutputTokens * (outputRate / 1000000);
+ const separateReasoningCost = reasoningBilledSeparately ? reasoningTokens * (reasoningRate / 1000000) : 0;
+ const totalCost = inputCost + outputCost + separateReasoningCost;
+ const cacheSavings = cacheReadTokens * (Math.max(0, inputRate - cachedRate) / 1000000);
+
+ return {
+ promptTokens,
+ completionTokens,
+ uncachedPromptTokens,
+ cacheReadTokens,
+ cacheCreationTokens,
+ reasoningTokens,
+ inputCost,
+ uncachedInputCost,
+ cachedInputCost,
+ cacheCreationCost,
+ outputCost,
+ visibleOutputCost,
+ reasoningCost,
+ totalCost,
+ cacheSavings,
+ };
+}
- return cost;
+/**
+ * Calculate cost from tokens and pricing
+ * @param {object} tokens
+ * @param {object} pricing
+ * @returns {number} cost in dollars
+ */
+export function calculateCostFromTokens(tokens, pricing) {
+ return calculateCostBreakdownFromTokens(tokens, pricing).totalCost;
}
diff --git a/open-sse/utils/usageTracking.js b/open-sse/utils/usageTracking.js
index aed411189f..f6d0a5b651 100644
--- a/open-sse/utils/usageTracking.js
+++ b/open-sse/utils/usageTracking.js
@@ -174,11 +174,14 @@ export function extractUsage(chunk) {
// Claude format (message_delta event)
if (chunk.type === "message_delta" && chunk.usage && typeof chunk.usage === "object") {
+ const inputTokens = chunk.usage.input_tokens || 0;
+ const cacheReadTokens = chunk.usage.cache_read_input_tokens || 0;
+ const cacheCreationTokens = chunk.usage.cache_creation_input_tokens || 0;
return normalizeUsage({
- prompt_tokens: chunk.usage.input_tokens || 0,
+ prompt_tokens: inputTokens + cacheReadTokens + cacheCreationTokens,
completion_tokens: chunk.usage.output_tokens || 0,
- cache_read_input_tokens: chunk.usage.cache_read_input_tokens,
- cache_creation_input_tokens: chunk.usage.cache_creation_input_tokens
+ cache_read_input_tokens: cacheReadTokens,
+ cache_creation_input_tokens: cacheCreationTokens
});
}
@@ -186,12 +189,17 @@ export function extractUsage(chunk) {
if ((chunk.type === "response.completed" || chunk.type === "response.done") && chunk.response?.usage && typeof chunk.response.usage === "object") {
const usage = chunk.response.usage;
const cachedTokens = usage.input_tokens_details?.cached_tokens;
+ const cacheCreationTokens = usage.input_tokens_details?.cache_creation_tokens;
return normalizeUsage({
prompt_tokens: usage.input_tokens || usage.prompt_tokens || 0,
completion_tokens: usage.output_tokens || usage.completion_tokens || 0,
cached_tokens: cachedTokens,
+ cache_creation_input_tokens: cacheCreationTokens,
reasoning_tokens: usage.output_tokens_details?.reasoning_tokens,
- prompt_tokens_details: cachedTokens ? { cached_tokens: cachedTokens } : undefined
+ prompt_tokens_details: (cachedTokens || cacheCreationTokens) ? {
+ ...(cachedTokens ? { cached_tokens: cachedTokens } : {}),
+ ...(cacheCreationTokens ? { cache_creation_tokens: cacheCreationTokens } : {}),
+ } : undefined
});
}
@@ -308,10 +316,15 @@ export function logUsage(provider, usage, model = null, connectionId = null, api
const p = provider?.toUpperCase() || "UNKNOWN";
+ // Add cache info if present (unified from different formats)
+ const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens || usage.prompt_tokens_details?.cached_tokens || usage.input_tokens_details?.cached_tokens;
+ const cacheCreation = usage.cache_creation_input_tokens || usage.prompt_tokens_details?.cache_creation_tokens || usage.input_tokens_details?.cache_creation_tokens;
+ const reasoning = usage.reasoning_tokens || usage.completion_tokens_details?.reasoning_tokens || usage.output_tokens_details?.reasoning_tokens;
+
// Support both formats:
// - OpenAI: prompt_tokens, completion_tokens
// - Claude: input_tokens, output_tokens
- const inTokens = usage?.prompt_tokens || usage?.input_tokens || 0;
+ const inTokens = usage?.prompt_tokens || ((usage?.input_tokens || 0) + (cacheRead || 0) + (cacheCreation || 0));
const outTokens = usage?.completion_tokens || usage?.output_tokens || 0;
const accountPrefix = connectionId ? connectionId.slice(0, 8) + "..." : "unknown";
@@ -322,14 +335,10 @@ export function logUsage(provider, usage, model = null, connectionId = null, api
msg += ` ${COLORS.yellow}(estimated)${COLORS.reset}`;
}
- // Add cache info if present (unified from different formats)
- const cacheRead = usage.cache_read_input_tokens || usage.cached_tokens || usage.prompt_tokens_details?.cached_tokens;
if (cacheRead) msg += ` | cache_read=${cacheRead}`;
- const cacheCreation = usage.cache_creation_input_tokens;
if (cacheCreation) msg += ` | cache_create=${cacheCreation}`;
- const reasoning = usage.reasoning_tokens;
if (reasoning) msg += ` | reasoning=${reasoning}`;
console.log(msg);
diff --git a/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js b/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js
index 5d08933de9..d2e254fc33 100644
--- a/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js
+++ b/src/app/(dashboard)/dashboard/usage/components/OverviewCards.js
@@ -4,9 +4,32 @@ import PropTypes from "prop-types";
import Card from "@/shared/components/Card";
const fmt = (n) => new Intl.NumberFormat().format(n || 0);
-const fmtCost = (n) => `$${(n || 0).toFixed(2)}`;
+const fmtCost = (n) => {
+ const value = Number(n || 0);
+ if (!Number.isFinite(value) || value === 0) return "$0.00";
+ const abs = Math.abs(value);
+ if (abs < 0.0001) return `$${value.toFixed(6)}`;
+ if (abs < 0.01) return `$${value.toFixed(4)}`;
+ return `$${value.toFixed(2)}`;
+};
export default function OverviewCards({ stats }) {
+ const cachedInput = stats.totalCacheReadTokens || 0;
+ const cachePercent = stats.totalPromptTokens > 0 ? Math.round((cachedInput / stats.totalPromptTokens) * 100) : 0;
+ const hasTokenBreakdown = (stats.totalUncachedPromptTokens || cachedInput || stats.totalCacheCreationTokens) > 0;
+ const uncachedInput = hasTokenBreakdown ? (stats.totalUncachedPromptTokens || 0) : (stats.totalPromptTokens || 0);
+ const hasCostBreakdown = [
+ stats.totalInputCost,
+ stats.totalOutputCost,
+ stats.totalCachedInputCost,
+ stats.totalCacheCreationCost,
+ ].some((value) => Number(value || 0) > 0);
+ const totalTokens = (stats.totalPromptTokens || 0) + (stats.totalCompletionTokens || 0);
+ const fallbackInputCost = totalTokens > 0 ? (stats.totalPromptTokens || 0) * ((stats.totalCost || 0) / totalTokens) : 0;
+ const fallbackOutputCost = totalTokens > 0 ? (stats.totalCompletionTokens || 0) * ((stats.totalCost || 0) / totalTokens) : 0;
+ const inputCost = hasCostBreakdown ? (stats.totalInputCost || 0) : fallbackInputCost;
+ const outputCost = hasCostBreakdown ? (stats.totalOutputCost || 0) : fallbackOutputCost;
+
return (
@@ -16,15 +39,20 @@ export default function OverviewCards({ stats }) {
Total Input Tokens
{fmt(stats.totalPromptTokens)}
+
+ {fmt(uncachedInput)} uncached | {fmt(cachedInput)} cached ({cachePercent}%)
+
Output Tokens
{fmt(stats.totalCompletionTokens)}
+ {fmt(stats.totalReasoningTokens)} reasoning
Est. Cost
~{fmtCost(stats.totalCost)}
- Estimated, not actual billing
+ Input {fmtCost(inputCost)} | Output {fmtCost(outputCost)}
+ Cache saved ~{fmtCost(stats.totalCacheSavings)}
);
diff --git a/src/app/(dashboard)/dashboard/usage/components/UsageTable.js b/src/app/(dashboard)/dashboard/usage/components/UsageTable.js
index 9f3d309922..cf76bb6d7c 100644
--- a/src/app/(dashboard)/dashboard/usage/components/UsageTable.js
+++ b/src/app/(dashboard)/dashboard/usage/components/UsageTable.js
@@ -6,7 +6,14 @@ import Card from "@/shared/components/Card";
import Badge from "@/shared/components/Badge";
const fmt = (n) => new Intl.NumberFormat().format(n || 0);
-const fmtCost = (n) => `$${(n || 0).toFixed(2)}`;
+const fmtCost = (n) => {
+ const value = Number(n || 0);
+ if (!Number.isFinite(value) || value === 0) return "$0.00";
+ const abs = Math.abs(value);
+ if (abs < 0.0001) return `$${value.toFixed(6)}`;
+ if (abs < 0.01) return `$${value.toFixed(4)}`;
+ return `$${value.toFixed(2)}`;
+};
function fmtTime(iso) {
if (!iso) return "Never";
@@ -32,14 +39,33 @@ SortIcon.propTypes = {
* Render 3 token or cost cells based on viewMode
*/
function ValueCells({ item, viewMode, isSummary = false }) {
+ const hasInputBreakdown = (item.cacheReadTokens || item.cacheCreationTokens || item.uncachedPromptTokens) > 0;
+ const hasInputCostBreakdown = (item.cachedInputCost || item.cacheCreationCost || item.uncachedInputCost) > 0;
+ const inputTokenParts = [
+ item.uncachedPromptTokens ? `${fmt(item.uncachedPromptTokens)} uncached` : null,
+ item.cacheReadTokens ? `${fmt(item.cacheReadTokens)} cached` : null,
+ item.cacheCreationTokens ? `${fmt(item.cacheCreationTokens)} write` : null,
+ ].filter(Boolean);
+ const inputCostParts = [
+ item.uncachedInputCost ? `${fmtCost(item.uncachedInputCost)} uncached` : null,
+ item.cachedInputCost ? `${fmtCost(item.cachedInputCost)} cached` : null,
+ item.cacheCreationCost ? `${fmtCost(item.cacheCreationCost)} write` : null,
+ ].filter(Boolean);
+
if (viewMode === "tokens") {
return (
<>
- {isSummary && item.promptTokens === undefined ? "—" : fmt(item.promptTokens)}
+ {isSummary && item.promptTokens === undefined ? "—" : fmt(item.promptTokens)}
+ {hasInputBreakdown && inputTokenParts.length > 0 && (
+ {inputTokenParts.join(" | ")}
+ )}
|
- {isSummary && item.completionTokens === undefined ? "—" : fmt(item.completionTokens)}
+ {isSummary && item.completionTokens === undefined ? "—" : fmt(item.completionTokens)}
+ {item.reasoningTokens > 0 && (
+ {fmt(item.reasoningTokens)} reasoning
+ )}
|
{fmt(item.totalTokens)}
@@ -50,13 +76,22 @@ function ValueCells({ item, viewMode, isSummary = false }) {
return (
<>
|
- {isSummary && item.inputCost === undefined ? "—" : fmtCost(item.inputCost)}
+ {isSummary && item.inputCost === undefined ? "—" : fmtCost(item.inputCost)}
+ {hasInputCostBreakdown && inputCostParts.length > 0 && (
+ {inputCostParts.join(" | ")}
+ )}
|
- {isSummary && item.outputCost === undefined ? "—" : fmtCost(item.outputCost)}
+ {isSummary && item.outputCost === undefined ? "—" : fmtCost(item.outputCost)}
+ {item.reasoningCost > 0 && (
+ {fmtCost(item.reasoningCost)} reasoning incl.
+ )}
|
- {fmtCost(item.totalCost || item.cost)}
+ {fmtCost(item.totalCost || item.cost)}
+ {item.cacheSavings > 0 && (
+ {fmtCost(item.cacheSavings)} cache saved
+ )}
|
>
);
diff --git a/src/lib/db/repos/usageRepo.js b/src/lib/db/repos/usageRepo.js
index 63d0494eb3..bdc706fcd0 100644
--- a/src/lib/db/repos/usageRepo.js
+++ b/src/lib/db/repos/usageRepo.js
@@ -2,11 +2,30 @@ import { EventEmitter } from "events";
import { getAdapter } from "../driver.js";
import { parseJson, stringifyJson } from "../helpers/jsonCol.js";
import { getMeta, setMeta } from "../helpers/metaStore.js";
+import { calculateCostBreakdownFromTokens } from "open-sse/providers/pricing.js";
const PENDING_TIMEOUT_MS = 60 * 1000;
const RING_CAP = 50;
const CONN_CACHE_TTL_MS = 30 * 1000;
const PERIOD_MS = { "24h": 86400000, "7d": 604800000, "30d": 2592000000, "60d": 5184000000 };
+const USAGE_NUMERIC_FIELDS = [
+ "requests",
+ "promptTokens",
+ "completionTokens",
+ "cost",
+ "uncachedPromptTokens",
+ "cacheReadTokens",
+ "cacheCreationTokens",
+ "reasoningTokens",
+ "inputCost",
+ "uncachedInputCost",
+ "cachedInputCost",
+ "cacheCreationCost",
+ "outputCost",
+ "visibleOutputCost",
+ "reasoningCost",
+ "cacheSavings",
+];
// In-memory state shared across Next.js modules
if (!global._pendingRequests) global._pendingRequests = { byModel: {}, byAccount: {} };
@@ -32,25 +51,107 @@ function getLocalDateKey(timestamp) {
return `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`;
}
+function usageCounter(extra = {}) {
+ const base = {};
+ for (const field of USAGE_NUMERIC_FIELDS) base[field] = 0;
+ return { ...base, ...extra };
+}
+
+function addUsageFields(target, values = {}) {
+ for (const field of USAGE_NUMERIC_FIELDS) {
+ target[field] = (target[field] || 0) + (values[field] || 0);
+ }
+}
+
+function readNumber(...values) {
+ for (const value of values) {
+ const n = Number(value);
+ if (Number.isFinite(n) && n > 0) return n;
+ }
+ return 0;
+}
+
+function summarizeTokens(tokens = {}, breakdown = null) {
+ const cb = breakdown || tokens.cost_breakdown || tokens.costBreakdown || {};
+ const cacheReadTokens = readNumber(
+ cb.cacheReadTokens,
+ tokens.cache_read_input_tokens,
+ tokens.cached_tokens,
+ tokens.input_tokens_details?.cached_tokens,
+ tokens.prompt_tokens_details?.cached_tokens,
+ );
+ const cacheCreationTokens = readNumber(
+ cb.cacheCreationTokens,
+ tokens.cache_creation_input_tokens,
+ tokens.input_tokens_details?.cache_creation_tokens,
+ tokens.prompt_tokens_details?.cache_creation_tokens,
+ );
+ const reportedPromptTokens = readNumber(tokens.prompt_tokens, tokens.input_tokens);
+ const promptTokens = readNumber(
+ cb.promptTokens,
+ tokens.prompt_tokens !== undefined ? reportedPromptTokens : reportedPromptTokens + cacheReadTokens + cacheCreationTokens,
+ );
+ const completionTokens = readNumber(cb.completionTokens, tokens.completion_tokens, tokens.output_tokens);
+ const uncachedPromptTokens = readNumber(
+ cb.uncachedPromptTokens,
+ Math.max(0, promptTokens - cacheReadTokens - cacheCreationTokens),
+ );
+ const reasoningTokens = readNumber(
+ cb.reasoningTokens,
+ tokens.reasoning_tokens,
+ tokens.output_tokens_details?.reasoning_tokens,
+ tokens.completion_tokens_details?.reasoning_tokens,
+ );
+
+ return usageCounter({
+ requests: 1,
+ promptTokens,
+ completionTokens,
+ cost: readNumber(cb.totalCost),
+ uncachedPromptTokens,
+ cacheReadTokens,
+ cacheCreationTokens,
+ reasoningTokens,
+ inputCost: readNumber(cb.inputCost),
+ uncachedInputCost: readNumber(cb.uncachedInputCost),
+ cachedInputCost: readNumber(cb.cachedInputCost),
+ cacheCreationCost: readNumber(cb.cacheCreationCost),
+ outputCost: readNumber(cb.outputCost),
+ visibleOutputCost: readNumber(cb.visibleOutputCost),
+ reasoningCost: readNumber(cb.reasoningCost),
+ cacheSavings: readNumber(cb.cacheSavings),
+ });
+}
+
+function addStatsTotals(stats, values = {}) {
+ stats.totalPromptTokens += values.promptTokens || 0;
+ stats.totalCompletionTokens += values.completionTokens || 0;
+ stats.totalCost += values.cost || 0;
+ stats.totalUncachedPromptTokens += values.uncachedPromptTokens || 0;
+ stats.totalCacheReadTokens += values.cacheReadTokens || 0;
+ stats.totalCacheCreationTokens += values.cacheCreationTokens || 0;
+ stats.totalReasoningTokens += values.reasoningTokens || 0;
+ stats.totalInputCost += values.inputCost || 0;
+ stats.totalUncachedInputCost += values.uncachedInputCost || 0;
+ stats.totalCachedInputCost += values.cachedInputCost || 0;
+ stats.totalCacheCreationCost += values.cacheCreationCost || 0;
+ stats.totalOutputCost += values.outputCost || 0;
+ stats.totalVisibleOutputCost += values.visibleOutputCost || 0;
+ stats.totalReasoningCost += values.reasoningCost || 0;
+ stats.totalCacheSavings += values.cacheSavings || 0;
+}
+
function addToCounter(target, key, values) {
- if (!target[key]) target[key] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0 };
- target[key].requests += values.requests || 1;
- target[key].promptTokens += values.promptTokens || 0;
- target[key].completionTokens += values.completionTokens || 0;
- target[key].cost += values.cost || 0;
+ if (!target[key]) target[key] = usageCounter();
+ addUsageFields(target[key], values);
if (values.meta) Object.assign(target[key], values.meta);
}
function aggregateEntryToDay(day, entry) {
- const promptTokens = entry.tokens?.prompt_tokens || entry.tokens?.input_tokens || 0;
- const completionTokens = entry.tokens?.completion_tokens || entry.tokens?.output_tokens || 0;
- const cost = entry.cost || 0;
- const vals = { promptTokens, completionTokens, cost };
+ const vals = summarizeTokens(entry.tokens || {}, entry.costBreakdown || entry.tokens?.cost_breakdown);
+ vals.cost = entry.cost || vals.cost || 0;
- day.requests = (day.requests || 0) + 1;
- day.promptTokens = (day.promptTokens || 0) + promptTokens;
- day.completionTokens = (day.completionTokens || 0) + completionTokens;
- day.cost = (day.cost || 0) + cost;
+ addUsageFields(day, vals);
day.byProvider ||= {};
day.byModel ||= {};
@@ -110,46 +211,23 @@ async function ensureRingInitialized() {
} catch {}
}
-async function calculateCost(provider, model, tokens) {
- if (!tokens || !provider || !model) return 0;
+async function calculateCostBreakdown(provider, model, tokens) {
+ if (!tokens || !provider || !model) return summarizeTokens(tokens || {});
try {
const { getPricingForModel } = await import("./pricingRepo.js");
const pricing = await getPricingForModel(provider, model);
- if (!pricing) return 0;
-
- let cost = 0;
- const inputTokens = tokens.prompt_tokens || tokens.input_tokens || 0;
- const cachedTokens = tokens.cached_tokens || tokens.cache_read_input_tokens || 0;
- const nonCachedInput = Math.max(0, inputTokens - cachedTokens);
- cost += nonCachedInput * (pricing.input / 1000000);
-
- if (cachedTokens > 0) {
- const cachedRate = pricing.cached || pricing.input;
- cost += cachedTokens * (cachedRate / 1000000);
- }
-
- const outputTokens = tokens.completion_tokens || tokens.output_tokens || 0;
- cost += outputTokens * (pricing.output / 1000000);
-
- const reasoningTokens = tokens.reasoning_tokens || 0;
- if (reasoningTokens > 0) {
- const rate = pricing.reasoning || pricing.output;
- cost += reasoningTokens * (rate / 1000000);
- }
-
- const cacheCreationTokens = tokens.cache_creation_input_tokens || 0;
- if (cacheCreationTokens > 0) {
- const rate = pricing.cache_creation || pricing.input;
- cost += cacheCreationTokens * (rate / 1000000);
- }
-
- return cost;
+ if (!pricing) return summarizeTokens(tokens || {});
+ return calculateCostBreakdownFromTokens(tokens, pricing);
} catch (e) {
console.error("Error calculating cost:", e);
- return 0;
+ return summarizeTokens(tokens || {});
}
}
+async function calculateCost(provider, model, tokens) {
+ return (await calculateCostBreakdown(provider, model, tokens)).totalCost || 0;
+}
+
export function trackPendingRequest(model, provider, connectionId, started, error = false) {
const modelKey = provider ? `${model} (${provider})` : model;
const timerKey = `${connectionId}|${modelKey}`;
@@ -219,10 +297,15 @@ export async function getActiveRequests() {
.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp))
.map((e) => {
const t = e.tokens || {};
+ const usage = summarizeTokens(t);
return {
timestamp: e.timestamp, model: e.model, provider: e.provider || "",
- promptTokens: t.prompt_tokens || t.input_tokens || 0,
- completionTokens: t.completion_tokens || t.output_tokens || 0,
+ promptTokens: usage.promptTokens,
+ completionTokens: usage.completionTokens,
+ cacheReadTokens: usage.cacheReadTokens,
+ cacheCreationTokens: usage.cacheCreationTokens,
+ reasoningTokens: usage.reasoningTokens,
+ cost: e.cost || usage.cost || 0,
status: e.status || "ok",
};
})
@@ -245,11 +328,16 @@ export async function saveRequestUsage(entry) {
const db = await getAdapter();
if (!entry.timestamp) entry.timestamp = new Date().toISOString();
- entry.cost = await calculateCost(entry.provider, entry.model, entry.tokens);
+ const tokens = { ...(entry.tokens || {}) };
+ const costBreakdown = await calculateCostBreakdown(entry.provider, entry.model, tokens);
+ tokens.cost_breakdown = costBreakdown;
+ entry.tokens = tokens;
+ entry.costBreakdown = costBreakdown;
+ entry.cost = costBreakdown.totalCost || await calculateCost(entry.provider, entry.model, tokens);
- const tokens = entry.tokens || {};
- const promptTokens = tokens.prompt_tokens || tokens.input_tokens || 0;
- const completionTokens = tokens.completion_tokens || tokens.output_tokens || 0;
+ const tokenSummary = summarizeTokens(tokens, costBreakdown);
+ const promptTokens = tokenSummary.promptTokens || 0;
+ const completionTokens = tokenSummary.completionTokens || 0;
// All 3 writes (history insert, daily upsert, lifetime counter) in ONE transaction.
// better-sqlite3 is sync → no JS yield mid-transaction → no race in same process.
@@ -347,10 +435,15 @@ export async function getUsageStats(period = "all") {
const recentRequests = recentRows
.map((r) => {
const t = parseJson(r.tokens, {}) || {};
+ const usage = summarizeTokens(t);
return {
timestamp: r.timestamp, model: r.model, provider: r.provider || "",
- promptTokens: t.prompt_tokens || t.input_tokens || 0,
- completionTokens: t.completion_tokens || t.output_tokens || 0,
+ promptTokens: usage.promptTokens,
+ completionTokens: usage.completionTokens,
+ cacheReadTokens: usage.cacheReadTokens,
+ cacheCreationTokens: usage.cacheCreationTokens,
+ reasoningTokens: usage.reasoningTokens,
+ cost: r.cost || usage.cost || 0,
status: r.status || "ok",
};
})
@@ -367,6 +460,9 @@ export async function getUsageStats(period = "all") {
const stats = {
totalRequests: 0,
totalPromptTokens: 0, totalCompletionTokens: 0, totalCost: 0,
+ totalUncachedPromptTokens: 0, totalCacheReadTokens: 0, totalCacheCreationTokens: 0, totalReasoningTokens: 0,
+ totalInputCost: 0, totalUncachedInputCost: 0, totalCachedInputCost: 0, totalCacheCreationCost: 0,
+ totalOutputCost: 0, totalVisibleOutputCost: 0, totalReasoningCost: 0, totalCacheSavings: 0,
byProvider: {}, byModel: {}, byAccount: {}, byApiKey: {}, byEndpoint: {},
last10Minutes: [],
pending: pendingRequests,
@@ -425,16 +521,11 @@ export async function getUsageStats(period = "all") {
for (const dr of dayRows) {
const dateKey = dr.dateKey;
const day = parseJson(dr.data, {});
- stats.totalPromptTokens += day.promptTokens || 0;
- stats.totalCompletionTokens += day.completionTokens || 0;
- stats.totalCost += day.cost || 0;
+ addStatsTotals(stats, day);
for (const [prov, p] of Object.entries(day.byProvider || {})) {
- if (!stats.byProvider[prov]) stats.byProvider[prov] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0 };
- stats.byProvider[prov].requests += p.requests || 0;
- stats.byProvider[prov].promptTokens += p.promptTokens || 0;
- stats.byProvider[prov].completionTokens += p.completionTokens || 0;
- stats.byProvider[prov].cost += p.cost || 0;
+ if (!stats.byProvider[prov]) stats.byProvider[prov] = usageCounter();
+ addUsageFields(stats.byProvider[prov], p);
}
for (const [mk, m] of Object.entries(day.byModel || {})) {
@@ -443,12 +534,9 @@ export async function getUsageStats(period = "all") {
const statsKey = provider ? `${rawModel} (${provider})` : rawModel;
const providerDisplayName = providerNodeNameMap[provider] || provider;
if (!stats.byModel[statsKey]) {
- stats.byModel[statsKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel, provider: providerDisplayName, lastUsed: dateKey };
+ stats.byModel[statsKey] = usageCounter({ rawModel, provider: providerDisplayName, lastUsed: dateKey });
}
- stats.byModel[statsKey].requests += m.requests || 0;
- stats.byModel[statsKey].promptTokens += m.promptTokens || 0;
- stats.byModel[statsKey].completionTokens += m.completionTokens || 0;
- stats.byModel[statsKey].cost += m.cost || 0;
+ addUsageFields(stats.byModel[statsKey], m);
if (dateKey > (stats.byModel[statsKey].lastUsed || "")) stats.byModel[statsKey].lastUsed = dateKey;
}
@@ -459,12 +547,9 @@ export async function getUsageStats(period = "all") {
const providerDisplayName = providerNodeNameMap[provider] || provider;
const accountKey = `${rawModel} (${provider} - ${accountName})`;
if (!stats.byAccount[accountKey]) {
- stats.byAccount[accountKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel, provider: providerDisplayName, connectionId: connId, accountName, lastUsed: dateKey };
+ stats.byAccount[accountKey] = usageCounter({ rawModel, provider: providerDisplayName, connectionId: connId, accountName, lastUsed: dateKey });
}
- stats.byAccount[accountKey].requests += a.requests || 0;
- stats.byAccount[accountKey].promptTokens += a.promptTokens || 0;
- stats.byAccount[accountKey].completionTokens += a.completionTokens || 0;
- stats.byAccount[accountKey].cost += a.cost || 0;
+ addUsageFields(stats.byAccount[accountKey], a);
if (dateKey > (stats.byAccount[accountKey].lastUsed || "")) stats.byAccount[accountKey].lastUsed = dateKey;
}
@@ -477,12 +562,9 @@ export async function getUsageStats(period = "all") {
const keyName = keyInfo?.name || (apiKeyVal ? apiKeyVal.slice(0, 8) + "..." : "Local (No API Key)");
const apiKeyKey = apiKeyVal || "local-no-key";
if (!stats.byApiKey[akKey]) {
- stats.byApiKey[akKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel, provider: providerDisplayName, apiKey: apiKeyVal, keyName, apiKeyKey, lastUsed: dateKey };
+ stats.byApiKey[akKey] = usageCounter({ rawModel, provider: providerDisplayName, apiKey: apiKeyVal, keyName, apiKeyKey, lastUsed: dateKey });
}
- stats.byApiKey[akKey].requests += ak.requests || 0;
- stats.byApiKey[akKey].promptTokens += ak.promptTokens || 0;
- stats.byApiKey[akKey].completionTokens += ak.completionTokens || 0;
- stats.byApiKey[akKey].cost += ak.cost || 0;
+ addUsageFields(stats.byApiKey[akKey], ak);
if (dateKey > (stats.byApiKey[akKey].lastUsed || "")) stats.byApiKey[akKey].lastUsed = dateKey;
}
@@ -492,12 +574,9 @@ export async function getUsageStats(period = "all") {
const provider = ep.provider || "";
const providerDisplayName = providerNodeNameMap[provider] || provider;
if (!stats.byEndpoint[epKey]) {
- stats.byEndpoint[epKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, endpoint, rawModel, provider: providerDisplayName, lastUsed: dateKey };
+ stats.byEndpoint[epKey] = usageCounter({ endpoint, rawModel, provider: providerDisplayName, lastUsed: dateKey });
}
- stats.byEndpoint[epKey].requests += ep.requests || 0;
- stats.byEndpoint[epKey].promptTokens += ep.promptTokens || 0;
- stats.byEndpoint[epKey].completionTokens += ep.completionTokens || 0;
- stats.byEndpoint[epKey].cost += ep.cost || 0;
+ addUsageFields(stats.byEndpoint[epKey], ep);
if (dateKey > (stats.byEndpoint[epKey].lastUsed || "")) stats.byEndpoint[epKey].lastUsed = dateKey;
}
}
@@ -545,41 +624,29 @@ export async function getUsageStats(period = "all") {
for (const r of filtered) {
const tokens = parseJson(r.tokens, {}) || {};
- const promptTokens = tokens.prompt_tokens || 0;
- const completionTokens = tokens.completion_tokens || 0;
- const entryCost = r.cost || 0;
+ const vals = summarizeTokens(tokens);
+ vals.cost = r.cost || vals.cost || 0;
const providerDisplayName = providerNodeNameMap[r.provider] || r.provider;
- stats.totalPromptTokens += promptTokens;
- stats.totalCompletionTokens += completionTokens;
- stats.totalCost += entryCost;
+ addStatsTotals(stats, vals);
- if (!stats.byProvider[r.provider]) stats.byProvider[r.provider] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0 };
- stats.byProvider[r.provider].requests++;
- stats.byProvider[r.provider].promptTokens += promptTokens;
- stats.byProvider[r.provider].completionTokens += completionTokens;
- stats.byProvider[r.provider].cost += entryCost;
+ if (!stats.byProvider[r.provider]) stats.byProvider[r.provider] = usageCounter();
+ addUsageFields(stats.byProvider[r.provider], vals);
const modelKey = r.provider ? `${r.model} (${r.provider})` : r.model;
if (!stats.byModel[modelKey]) {
- stats.byModel[modelKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp };
+ stats.byModel[modelKey] = usageCounter({ rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp });
}
- stats.byModel[modelKey].requests++;
- stats.byModel[modelKey].promptTokens += promptTokens;
- stats.byModel[modelKey].completionTokens += completionTokens;
- stats.byModel[modelKey].cost += entryCost;
+ addUsageFields(stats.byModel[modelKey], vals);
if (new Date(r.timestamp) > new Date(stats.byModel[modelKey].lastUsed)) stats.byModel[modelKey].lastUsed = r.timestamp;
if (r.connectionId) {
const accountName = connectionMap[r.connectionId] || `Account ${r.connectionId.slice(0, 8)}...`;
const accountKey = `${r.model} (${r.provider} - ${accountName})`;
if (!stats.byAccount[accountKey]) {
- stats.byAccount[accountKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, connectionId: r.connectionId, accountName, lastUsed: r.timestamp };
+ stats.byAccount[accountKey] = usageCounter({ rawModel: r.model, provider: providerDisplayName, connectionId: r.connectionId, accountName, lastUsed: r.timestamp });
}
- stats.byAccount[accountKey].requests++;
- stats.byAccount[accountKey].promptTokens += promptTokens;
- stats.byAccount[accountKey].completionTokens += completionTokens;
- stats.byAccount[accountKey].cost += entryCost;
+ addUsageFields(stats.byAccount[accountKey], vals);
if (new Date(r.timestamp) > new Date(stats.byAccount[accountKey].lastUsed)) stats.byAccount[accountKey].lastUsed = r.timestamp;
}
@@ -588,27 +655,27 @@ export async function getUsageStats(period = "all") {
const keyName = keyInfo?.name || r.apiKey.slice(0, 8) + "...";
const akKey = `${r.apiKey}|${r.model}|${r.provider || "unknown"}`;
if (!stats.byApiKey[akKey]) {
- stats.byApiKey[akKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, apiKey: r.apiKey, keyName, apiKeyKey: r.apiKey, lastUsed: r.timestamp };
+ stats.byApiKey[akKey] = usageCounter({ rawModel: r.model, provider: providerDisplayName, apiKey: r.apiKey, keyName, apiKeyKey: r.apiKey, lastUsed: r.timestamp });
}
const ake = stats.byApiKey[akKey];
- ake.requests++; ake.promptTokens += promptTokens; ake.completionTokens += completionTokens; ake.cost += entryCost;
+ addUsageFields(ake, vals);
if (new Date(r.timestamp) > new Date(ake.lastUsed)) ake.lastUsed = r.timestamp;
} else {
if (!stats.byApiKey["local-no-key"]) {
- stats.byApiKey["local-no-key"] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, rawModel: r.model, provider: providerDisplayName, apiKey: null, keyName: "Local (No API Key)", apiKeyKey: "local-no-key", lastUsed: r.timestamp };
+ stats.byApiKey["local-no-key"] = usageCounter({ rawModel: r.model, provider: providerDisplayName, apiKey: null, keyName: "Local (No API Key)", apiKeyKey: "local-no-key", lastUsed: r.timestamp });
}
const ake = stats.byApiKey["local-no-key"];
- ake.requests++; ake.promptTokens += promptTokens; ake.completionTokens += completionTokens; ake.cost += entryCost;
+ addUsageFields(ake, vals);
if (new Date(r.timestamp) > new Date(ake.lastUsed)) ake.lastUsed = r.timestamp;
}
const endpoint = r.endpoint || "Unknown";
const epKey = `${endpoint}|${r.model}|${r.provider || "unknown"}`;
if (!stats.byEndpoint[epKey]) {
- stats.byEndpoint[epKey] = { requests: 0, promptTokens: 0, completionTokens: 0, cost: 0, endpoint, rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp };
+ stats.byEndpoint[epKey] = usageCounter({ endpoint, rawModel: r.model, provider: providerDisplayName, lastUsed: r.timestamp });
}
const epe = stats.byEndpoint[epKey];
- epe.requests++; epe.promptTokens += promptTokens; epe.completionTokens += completionTokens; epe.cost += entryCost;
+ addUsageFields(epe, vals);
if (new Date(r.timestamp) > new Date(epe.lastUsed)) epe.lastUsed = r.timestamp;
}
}
@@ -720,9 +787,12 @@ export async function getRecentLogs(limit = 200) {
const m = r.model || "-";
const account = connMap[r.connectionId] || (r.connectionId ? r.connectionId.slice(0, 8) : "-");
const tk = r.tokens ? parseJson(r.tokens, {}) : {};
- const sent = r.promptTokens ?? tk.prompt_tokens ?? "-";
- const received = r.completionTokens ?? tk.completion_tokens ?? "-";
- return `${ts} | ${m} | ${p} | ${account} | ${sent} | ${received} | ${r.status || "-"}`;
+ const usage = summarizeTokens(tk);
+ const sent = usage.promptTokens || r.promptTokens || tk.prompt_tokens || "-";
+ const received = usage.completionTokens || r.completionTokens || tk.completion_tokens || "-";
+ const cache = usage.cacheReadTokens ? ` | cache_read=${usage.cacheReadTokens}` : "";
+ const reasoning = usage.reasoningTokens ? ` | reasoning=${usage.reasoningTokens}` : "";
+ return `${ts} | ${m} | ${p} | ${account} | ${sent} | ${received}${cache}${reasoning} | ${r.status || "-"}`;
});
} catch (e) {
console.error("[usageRepo] getRecentLogs failed:", e.message);
diff --git a/src/shared/components/UsageStats.js b/src/shared/components/UsageStats.js
index 950a7af9d6..f0bd4b8c7f 100644
--- a/src/shared/components/UsageStats.js
+++ b/src/shared/components/UsageStats.js
@@ -61,6 +61,10 @@ function RecentRequests({ requests = [] }) {
{requests.map((r, i) => {
const ok = !r.status || r.status === "ok" || r.status === "success";
+ const cacheInfo = [
+ r.cacheReadTokens ? `${fmt(r.cacheReadTokens)} cached` : null,
+ r.reasoningTokens ? `${fmt(r.reasoningTokens)} reasoning` : null,
+ ].filter(Boolean).join(" | ");
return (
|
@@ -71,6 +75,7 @@ function RecentRequests({ requests = [] }) {
{fmt(r.promptTokens)}↑
{" "}
{fmt(r.completionTokens)}↓
+ {cacheInfo && {cacheInfo} }
|
|
@@ -89,9 +94,35 @@ function sortData(dataMap, pendingMap = {}, sortBy, sortOrder) {
.map(([key, data]) => {
const totalTokens = (data.promptTokens || 0) + (data.completionTokens || 0);
const totalCost = data.cost || 0;
- const inputCost = totalTokens > 0 ? (data.promptTokens || 0) * (totalCost / totalTokens) : 0;
- const outputCost = totalTokens > 0 ? (data.completionTokens || 0) * (totalCost / totalTokens) : 0;
- return { ...data, key, totalTokens, totalCost, inputCost, outputCost, pending: pendingMap[key] || 0 };
+ const fallbackInputCost = totalTokens > 0 ? (data.promptTokens || 0) * (totalCost / totalTokens) : 0;
+ const fallbackOutputCost = totalTokens > 0 ? (data.completionTokens || 0) * (totalCost / totalTokens) : 0;
+ const hasItemizedCost = [
+ data.inputCost,
+ data.outputCost,
+ data.uncachedInputCost,
+ data.cachedInputCost,
+ data.cacheCreationCost,
+ ].some((value) => Number(value || 0) > 0);
+ const inputCost = hasItemizedCost ? (data.inputCost || 0) : fallbackInputCost;
+ const outputCost = hasItemizedCost ? (data.outputCost || 0) : fallbackOutputCost;
+ const cacheReadTokens = data.cacheReadTokens || 0;
+ const cacheCreationTokens = data.cacheCreationTokens || 0;
+ const hasTokenBreakdown = (data.uncachedPromptTokens || cacheReadTokens || cacheCreationTokens) > 0;
+ const uncachedPromptTokens = hasTokenBreakdown
+ ? (data.uncachedPromptTokens || 0)
+ : Math.max(0, (data.promptTokens || 0) - cacheReadTokens - cacheCreationTokens);
+ return {
+ ...data,
+ key,
+ totalTokens,
+ totalCost,
+ inputCost,
+ outputCost,
+ uncachedPromptTokens,
+ cacheReadTokens,
+ cacheCreationTokens,
+ pending: pendingMap[key] || 0,
+ };
})
.sort((a, b) => {
let valA = a[sortBy];
@@ -114,6 +145,19 @@ function getGroupKey(item, keyField) {
}
}
+const USAGE_SUMMARY_FIELDS = [
+ "uncachedPromptTokens",
+ "cacheReadTokens",
+ "cacheCreationTokens",
+ "reasoningTokens",
+ "uncachedInputCost",
+ "cachedInputCost",
+ "cacheCreationCost",
+ "visibleOutputCost",
+ "reasoningCost",
+ "cacheSavings",
+];
+
function groupDataByKey(data, keyField) {
if (!Array.isArray(data)) return [];
const groups = {};
@@ -135,6 +179,9 @@ function groupDataByKey(data, keyField) {
s.inputCost += item.inputCost || 0;
s.outputCost += item.outputCost || 0;
s.pending += item.pending || 0;
+ for (const field of USAGE_SUMMARY_FIELDS) {
+ s[field] = (s[field] || 0) + (item[field] || 0);
+ }
if (item.lastUsed && (!s.lastUsed || new Date(item.lastUsed) > new Date(s.lastUsed))) {
s.lastUsed = item.lastUsed;
}
diff --git a/tests/unit/pricing-cache-breakdown.test.js b/tests/unit/pricing-cache-breakdown.test.js
new file mode 100644
index 0000000000..07e6a8efc7
--- /dev/null
+++ b/tests/unit/pricing-cache-breakdown.test.js
@@ -0,0 +1,71 @@
+import { describe, expect, it } from "vitest";
+import {
+ MODEL_PRICING,
+ calculateCostBreakdownFromTokens,
+ calculateCostFromTokens,
+ getPricingForModel,
+} from "../../open-sse/providers/pricing.js";
+
+describe("pricing cache breakdown", () => {
+ it("uses exact standard API pricing for current OpenAI models", () => {
+ expect(MODEL_PRICING["gpt-5.5"]).toMatchObject({ input: 5.00, cached: 0.50, output: 30.00 });
+ expect(MODEL_PRICING["gpt-5.4"]).toMatchObject({ input: 2.50, cached: 0.25, output: 15.00 });
+ expect(MODEL_PRICING["gpt-5.4-mini"]).toMatchObject({ input: 0.75, cached: 0.075, output: 4.50 });
+ expect(MODEL_PRICING["gpt-5"]).toMatchObject({ input: 1.25, cached: 0.125, output: 10.00 });
+ expect(MODEL_PRICING["gpt-4.1"]).toMatchObject({ input: 2.00, cached: 0.50, output: 8.00 });
+ });
+
+ it("does not price full gpt-4o variants as gpt-4o-mini", () => {
+ expect(getPricingForModel("openai", "gpt-4o-2024-08-06")).toMatchObject({ input: 2.50, cached: 1.25, output: 10.00 });
+ expect(getPricingForModel("openai", "gpt-4o-mini-2024-07-18")).toMatchObject({ input: 0.15, cached: 0.075, output: 0.60 });
+ });
+
+ it("charges cached and uncached input at different rates", () => {
+ const pricing = getPricingForModel("openai", "gpt-5.4-mini");
+ const breakdown = calculateCostBreakdownFromTokens({
+ prompt_tokens: 1000,
+ completion_tokens: 100,
+ cache_read_input_tokens: 800,
+ }, pricing);
+
+ expect(breakdown.promptTokens).toBe(1000);
+ expect(breakdown.uncachedPromptTokens).toBe(200);
+ expect(breakdown.cacheReadTokens).toBe(800);
+ expect(breakdown.uncachedInputCost).toBeCloseTo((200 * 0.75) / 1_000_000, 12);
+ expect(breakdown.cachedInputCost).toBeCloseTo((800 * 0.075) / 1_000_000, 12);
+ expect(breakdown.outputCost).toBeCloseTo((100 * 4.50) / 1_000_000, 12);
+ expect(calculateCostFromTokens({
+ prompt_tokens: 1000,
+ completion_tokens: 100,
+ cache_read_input_tokens: 800,
+ }, pricing)).toBeCloseTo(((200 * 0.75) + (800 * 0.075) + (100 * 4.50)) / 1_000_000, 12);
+ });
+
+ it("treats reasoning tokens as included output details by default", () => {
+ const pricing = getPricingForModel("openai", "gpt-5.5");
+ const breakdown = calculateCostBreakdownFromTokens({
+ prompt_tokens: 1000,
+ completion_tokens: 100,
+ reasoning_tokens: 40,
+ }, pricing);
+
+ expect(breakdown.reasoningTokens).toBe(40);
+ expect(breakdown.outputCost).toBeCloseTo((100 * 30.00) / 1_000_000, 12);
+ expect(breakdown.totalCost).toBeCloseTo(((1000 * 5.00) + (100 * 30.00)) / 1_000_000, 12);
+ });
+
+ it("supports providers that report input and cache buckets separately", () => {
+ const breakdown = calculateCostBreakdownFromTokens({
+ input_tokens: 1000,
+ output_tokens: 100,
+ cache_read_input_tokens: 800,
+ cache_creation_input_tokens: 200,
+ }, { input: 3.00, cached: 0.30, cache_creation: 3.75, output: 15.00 });
+
+ expect(breakdown.promptTokens).toBe(2000);
+ expect(breakdown.uncachedPromptTokens).toBe(1000);
+ expect(breakdown.cacheReadTokens).toBe(800);
+ expect(breakdown.cacheCreationTokens).toBe(200);
+ expect(breakdown.inputCost).toBeCloseTo(((1000 * 3.00) + (800 * 0.30) + (200 * 3.75)) / 1_000_000, 12);
+ });
+});