From 643fd1293d75fe4ec5200fcd620f1238e3aca8ba Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Sun, 24 May 2026 16:43:11 +0000 Subject: [PATCH] feat: daily budget throttling with rate-aware proxy sleep --- packages/gateway/src/cost-tracker.ts | 303 +++++++++++++++++- packages/gateway/src/pipeline.ts | 49 +++ packages/gateway/src/server.ts | 6 + packages/gateway/src/translate/types.ts | 3 + packages/gateway/src/ui.ts | 74 +++++ packages/gateway/test/budget-throttle.test.ts | 272 ++++++++++++++++ 6 files changed, 705 insertions(+), 2 deletions(-) create mode 100644 packages/gateway/test/budget-throttle.test.ts diff --git a/packages/gateway/src/cost-tracker.ts b/packages/gateway/src/cost-tracker.ts index d2639cad..4852c676 100644 --- a/packages/gateway/src/cost-tracker.ts +++ b/packages/gateway/src/cost-tracker.ts @@ -12,7 +12,7 @@ import { getModelEntrySync, getWorkerModel } from "./worker-model"; import { AUTOCOMPACT_THRESHOLD } from "./compaction"; -import { log, data, temporal, loadAllSessionCosts } from "@loreai/core"; +import { log, data, temporal, loadAllSessionCosts, db, getKV, setKV } from "@loreai/core"; // --------------------------------------------------------------------------- // Types @@ -72,6 +72,14 @@ export type SessionCosts = { avoidedCompactionCost: number; }; + // --- Budget throttle diagnostics --- + throttle: { + /** Number of requests that were throttled (delayed). */ + events: number; + /** Total delay imposed in milliseconds. */ + totalDelayMs: number; + }; + /** Shadow context counter — tracks virtual uncompressed context growth for compaction estimation. */ _shadowContextTokens: number; /** Previous turn's actual (compressed) input tokens — for delta estimation. */ @@ -153,6 +161,281 @@ let historicalCache: HistoricalEstimates | null = null; let historicalCacheAt = 0; const HISTORICAL_CACHE_TTL_MS = 60_000; // 1 minute +// --------------------------------------------------------------------------- +// Daily budget throttle state +// --------------------------------------------------------------------------- + +/** Cumulative USD spend for the current UTC day (conversation + worker + warmup). */ +let dailySpend = 0; + +/** UTC date string (YYYY-MM-DD) for which `dailySpend` is valid. */ +let dailySpendDate = ""; + +/** EMA of cost-per-hour (USD/hr) — tracks spending velocity across all sessions. */ +let costRateEMA = 0; + +/** Timestamp (ms) of the last conversation turn that updated the EMA. */ +let costRateLastUpdate = 0; + +/** Whether the cost-rate EMA has been seeded (first turn sets it directly). */ +let costRateSeeded = false; + +/** + * Base alpha for cost-rate EMA. Slower than the output-token EMA (0.3) + * for spike resistance — one expensive turn only moves the EMA ~15%. + */ +const COST_RATE_ALPHA = 0.15; + +/** Maximum throttle delay in seconds. */ +const MAX_THROTTLE_DELAY = 60; + +/** + * Budget fraction below which no throttling occurs, regardless of rate. + * At 50% spend, no friction is applied even if the rate is high. + */ +const THROTTLE_FLOOR = 0.50; + +/** + * Reset the daily spend counter if the UTC day has changed. + * Called before every cost increment — a single string comparison. + */ +function maybeResetDay(): void { + const today = new Date().toISOString().slice(0, 10); + if (today !== dailySpendDate) { + dailySpend = 0; + dailySpendDate = today; + } +} + +/** + * Update the cost-rate EMA after a conversation turn. + * + * Computes instantaneous rate as (turnCost / hoursSinceLastTurn), + * then blends into the EMA with time-gap-adjusted alpha. + * Worker costs are excluded — we only track the user-facing request velocity. + */ +function updateCostRate(turnCost: number): void { + const now = Date.now(); + + if (!costRateSeeded) { + // First turn: seed with a conservative estimate. + // Assume 1 turn per 2 minutes = 30 turns/hr as baseline cadence. + costRateEMA = turnCost * 30; + costRateLastUpdate = now; + costRateSeeded = true; + return; + } + + const elapsedHours = (now - costRateLastUpdate) / 3_600_000; + costRateLastUpdate = now; + + if (elapsedHours < 0.0001) { + // Sub-second since last update (tool-use auto-continuation). + // Don't spike the rate — treat as part of the same logical turn. + return; + } + + // Instantaneous rate: cost of this turn / time since last turn + const instantRate = turnCost / elapsedHours; + + // Time-gap adjusted alpha: after long gaps, the EMA should decay toward + // the (low) instantaneous rate faster. After a 1-hour gap where no money + // was spent, the old EMA is stale. + // Reference interval = 1/30 hr ≈ 2 min (typical turn cadence). + const referenceHours = 1 / 30; + const effectiveAlpha = 1 - Math.pow(1 - COST_RATE_ALPHA, elapsedHours / referenceHours); + + costRateEMA = costRateEMA * (1 - effectiveAlpha) + instantRate * effectiveAlpha; +} + +/** + * Bootstrap the daily spend counter from persisted DB data on startup. + * + * Queries `session_state` for today's persisted costs (sessions that went + * idle and were flushed to DB). Also sums any live in-memory sessions. + * Call once during gateway startup. + */ +export function bootstrapDailySpend(): void { + const today = new Date(); + const todayStr = today.toISOString().slice(0, 10); + dailySpendDate = todayStr; + + // Midnight UTC today as epoch ms + const midnightMs = Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), today.getUTCDate()); + + // Sum persisted session costs updated today. + // Note: conversation_cost and worker_cost are cumulative session totals, + // so multi-day sessions may overestimate today's spend. In practice most + // sessions don't span midnight boundaries (they idle out). + try { + const row = db() + .query( + `SELECT COALESCE(SUM(conversation_cost + worker_cost), 0) as total + FROM session_state + WHERE updated_at >= ?`, + ) + .get(midnightMs) as { total: number } | null; + dailySpend = row?.total ?? 0; + } catch (err) { + log.error("budget-throttle: failed to bootstrap daily spend from DB", err); + dailySpend = 0; + } + + if (dailySpend > 0) { + log.info(`budget-throttle: bootstrapped daily spend=$${dailySpend.toFixed(4)} for ${todayStr}`); + } +} + +/** + * Estimate the USD cost of a request before sending it upstream. + * + * Input cost is exact (token count known from gradient transform). + * Output cost uses a conservative heuristic: 25% of input tokens, capped + * at 16K tokens. Actual median is 1-4% — the estimate deliberately + * overestimates to avoid budget overshoot. + */ +export function estimateRequestCost(model: string, inputTokens: number): number { + const pricing = getPricingSync(model); + const inputCost = (inputTokens / 1_000_000) * pricing.input; + // Conservative output estimate — 25% of input, capped at 16K tokens + const estOutputTokens = Math.min(inputTokens * 0.25, 16_384); + const outputCost = (estOutputTokens / 1_000_000) * pricing.output; + return inputCost + outputCost; +} + +/** + * Compute the throttle delay for a request given current budget state. + * + * Two factors multiplied together: + * 1. Budget pressure: (spendFraction - THROTTLE_FLOOR)² — gentle ramp + * 2. Rate overshoot: tanh((currentRate / targetRate - 1) / 3) — smooth S-curve + * + * Returns 0 when: + * - No budget configured (dailyBudget ≤ 0) + * - Spend below THROTTLE_FLOOR (50%) + * - Current rate is sustainable (projected spend ≤ budget) + * + * @returns Delay in seconds (0 = no throttle, max MAX_THROTTLE_DELAY) + */ +export function computeThrottleDelay( + dailySpendUSD: number, + dailyBudget: number, + costRatePerHour: number, + hoursRemaining: number, +): number { + if (dailyBudget <= 0) return 0; + + const spendFraction = dailySpendUSD / dailyBudget; + if (spendFraction < THROTTLE_FLOOR) return 0; + + // Budget fully exhausted — apply max delay regardless of rate. + // Without this, a user who exhausts their budget then goes idle (EMA → 0) + // would get zero delay on return because 0 <= targetRate(0). + if (spendFraction >= 1.0) return MAX_THROTTLE_DELAY; + + // Target rate = remaining budget / remaining hours. + // Floor hoursRemaining at 0.5 to avoid division explosion near midnight. + const remainingBudget = Math.max(0, dailyBudget - dailySpendUSD); + const safeHours = Math.max(hoursRemaining, 0.5); + const targetRate = remainingBudget / safeHours; + + // If current rate is sustainable, no throttle + if (costRatePerHour <= targetRate) return 0; + + // Overshoot ratio: how much faster than sustainable (clamped to [0, 10]) + const overshoot = Math.min((costRatePerHour / targetRate) - 1, 10); + + // Budget pressure: maps [THROTTLE_FLOOR, 1.0] → [0, 1], squared for gentle ramp + const pressure = (spendFraction - THROTTLE_FLOOR) / (1 - THROTTLE_FLOOR); + + // delay = MAX_THROTTLE_DELAY × pressure² × tanh(overshoot / 3) + // tanh provides smooth S-curve: overshoot=1 → 0.32, 3 → 0.76, 10 → ~1.0 + const delay = MAX_THROTTLE_DELAY * pressure * pressure * Math.tanh(overshoot / 3); + + return Math.min(Math.round(delay * 10) / 10, MAX_THROTTLE_DELAY); +} + +/** + * Get the throttle delay for the next request, factoring in current daily + * spend, cost-rate EMA, and time remaining in the UTC day. + * + * @param dailyBudget - Configured daily budget in USD (0 = disabled) + * @param estimatedCost - Estimated cost of the upcoming request + * @returns Delay in seconds (0 = no throttle) + */ +export function getDailyThrottleDelay(dailyBudget: number, estimatedCost: number): number { + if (dailyBudget <= 0) return 0; + + maybeResetDay(); + const projectedSpend = dailySpend + estimatedCost; + + // Hours remaining in the UTC day + const now = new Date(); + const endOfDay = new Date(now); + endOfDay.setUTCHours(24, 0, 0, 0); + const hoursRemaining = (endOfDay.getTime() - now.getTime()) / 3_600_000; + + return computeThrottleDelay(projectedSpend, dailyBudget, costRateEMA, hoursRemaining); +} + +/** Get current daily spend and date (for UI / diagnostics). */ +export function getDailySpend(): { date: string; spend: number } { + maybeResetDay(); + return { date: dailySpendDate, spend: dailySpend }; +} + +/** Get current cost-rate EMA in USD/hr (for UI / diagnostics). */ +export function getCostRate(): number { + return costRateEMA; +} + +/** KV key for the persisted daily budget value. */ +const DAILY_BUDGET_KV_KEY = "daily_budget"; + +/** + * Get the effective daily budget in USD. + * + * Resolution priority: + * 1. `LORE_DAILY_BUDGET` env var (override for automation / CI) + * 2. DB-persisted value from `kv_meta` (set via UI) + * 3. 0 (disabled) + */ +export function getDailyBudget(): number { + const envVal = process.env.LORE_DAILY_BUDGET; + if (envVal) { + const parsed = parseFloat(envVal); + if (parsed > 0) return parsed; + } + try { + const dbVal = getKV(DAILY_BUDGET_KV_KEY); + if (dbVal) { + const parsed = parseFloat(dbVal); + if (parsed > 0) return parsed; + } + } catch { + // DB not initialized yet (e.g., early startup) — fall through + } + return 0; +} + +/** + * Set the daily budget in the DB (persisted across restarts). + * Pass 0 to disable. + */ +export function setDailyBudget(budgetUSD: number): void { + if (!Number.isFinite(budgetUSD) || budgetUSD < 0) budgetUSD = 0; + setKV(DAILY_BUDGET_KV_KEY, String(budgetUSD)); +} + +/** Reset daily budget throttle state (for testing). */ +export function resetDailyBudgetState(): void { + dailySpend = 0; + dailySpendDate = ""; + costRateEMA = 0; + costRateLastUpdate = 0; + costRateSeeded = false; +} + // --------------------------------------------------------------------------- // Initialization // --------------------------------------------------------------------------- @@ -183,6 +466,7 @@ function emptyCosts(): SessionCosts { avoidedCompactions: 0, avoidedCompactionCost: 0, }, + throttle: { events: 0, totalDelayMs: 0 }, _shadowContextTokens: 0, _lastActualInput: 0, _lastOutputTokens: 0, @@ -293,6 +577,11 @@ export function recordConversationCost( costs.conversation.cacheReadTokens += usage.cache_read_input_tokens ?? 0; costs.conversation.cacheWriteTokens += usage.cache_creation_input_tokens ?? 0; costs.conversation.turns++; + + // Daily budget throttle: accumulate spend and update velocity EMA + maybeResetDay(); + dailySpend += call.total; + updateCostRate(call.total); } /** Worker ID → cost bucket mapping. */ @@ -328,6 +617,10 @@ export function recordWorkerCost( const fullCost = computeCallCost(model, usage, "direct"); costs.batchSavings += fullCost.total - call.total; } + + // Daily budget throttle: accumulate worker spend (no EMA update — workers excluded from velocity) + maybeResetDay(); + dailySpend += call.total; } /** @@ -349,8 +642,13 @@ export function recordWarmupCost( // Anthropic doubles cache_write pricing for 1h TTL const cacheWriteRate = ttl === "1h" ? pricing.cache_write * 2 : pricing.cache_write; const writeCost = (cacheCreationTokens / 1_000_000) * cacheWriteRate; - costs.workers.warmup.cost += readCost + writeCost; + const warmupTotal = readCost + writeCost; + costs.workers.warmup.cost += warmupTotal; costs.workers.warmup.calls++; + + // Daily budget throttle: accumulate warmup spend (no EMA update) + maybeResetDay(); + dailySpend += warmupTotal; } // --------------------------------------------------------------------------- @@ -581,6 +879,7 @@ export function deleteSessionCosts(sessionID: string): void { /** Clear all sessions (for testing). */ export function clearAllCosts(): void { sessions.clear(); + resetDailyBudgetState(); } // --------------------------------------------------------------------------- diff --git a/packages/gateway/src/pipeline.ts b/packages/gateway/src/pipeline.ts index b96c8f9d..655bf546 100644 --- a/packages/gateway/src/pipeline.ts +++ b/packages/gateway/src/pipeline.ts @@ -33,6 +33,7 @@ import { recordCacheUsage, calibrate, getLastTransformedCount, + getLastTransformEstimate, getLastLayer, onIdleResume, consumeCameOutOfIdle, @@ -158,6 +159,12 @@ import { updateShadowContext, recordWarmupHit, recordTTLSavings, + getDailyThrottleDelay, + estimateRequestCost, + getDailySpend, + getDailyBudget, + getCostRate, + getSessionCosts, } from "./cost-tracker"; import { RECALL_GATEWAY_TOOL, @@ -949,6 +956,7 @@ function getOrCreateSession( } sessions.set(sessionID, state); } + state.prevRequestTime = state.lastRequestTime; state.lastRequestTime = Date.now(); // Ensure recallStore exists (upgrade from older session state) @@ -3493,6 +3501,47 @@ async function handleConversationTurn( conversationTTL: resolvedConversationTTL, }; + // --- Daily budget throttle --- + // Apply an invisible proxy-level sleep to slow the agent when approaching + // the daily budget. The sleep is capped to avoid causing cache busts + // (which would be self-defeating — costing more than the throttle saved). + const dailyBudget = getDailyBudget(); + if (dailyBudget > 0) { + const inputTokens = getLastTransformEstimate(sessionID) + || Math.ceil(JSON.stringify(modifiedReq.messages).length / 3); + const estimatedCost = estimateRequestCost(req.model, inputTokens); + const delay = getDailyThrottleDelay(dailyBudget, estimatedCost); + + if (delay > 0) { + // Cap delay to avoid pushing the next request past the cache TTL boundary. + // Use prevRequestTime (the request before this one) to compute how much + // of the cache TTL window has already been consumed. + const ttlMs = resolvedConversationTTL === "1h" ? 3_600_000 : 300_000; + const elapsed = sessionState.prevRequestTime + ? Date.now() - sessionState.prevRequestTime + : 0; // first request — no prior timing, full TTL available + const maxSafe = Math.max(0, (ttlMs - elapsed) * 0.50) / 1000; + const actualDelay = Math.min(delay, maxSafe); + + if (actualDelay > 0.5) { // don't bother sleeping < 500ms + log.info( + `budget-throttle: sleeping ${actualDelay.toFixed(1)}s ` + + `session=${sessionID.slice(0, 16)} ` + + `spend=$${getDailySpend().spend.toFixed(2)} ` + + `rate=$${getCostRate().toFixed(2)}/hr`, + ); + await new Promise((resolve) => setTimeout(resolve, actualDelay * 1000)); + + // Track throttle event on session costs + const costs = getSessionCosts(sessionID); + if (costs) { + costs.throttle.events++; + costs.throttle.totalDelayMs += actualDelay * 1000; + } + } + } + } + // Start gen_ai.chat span before the upstream call so it captures real // wall-clock duration (including network latency and streaming time). // The span is ended in postResponse() after usage attributes are set. diff --git a/packages/gateway/src/server.ts b/packages/gateway/src/server.ts index 61263ab7..c809053e 100644 --- a/packages/gateway/src/server.ts +++ b/packages/gateway/src/server.ts @@ -12,6 +12,7 @@ * Uses `Bun.serve()` — this package targets Bun exclusively. */ import { DEFAULT_PORT, type GatewayConfig } from "./config"; +import { bootstrapDailySpend, getDailyBudget } from "./cost-tracker"; import type { GatewayRequest } from "./translate/types"; import { parseAnthropicRequest } from "./translate/anthropic"; import { parseOpenAIRequest, buildOpenAIResponse } from "./translate/openai"; @@ -278,6 +279,11 @@ export function startServer(config: GatewayConfig): { config = { ...config, port: DEFAULT_PORT }; } + // Bootstrap the daily spend counter from DB (recovers today's spend after restart) + if (getDailyBudget() > 0) { + bootstrapDailySpend(); + } + // Shared fetch handler for all server instances. const fetch = async (req: Request): Promise => { const url = new URL(req.url); diff --git a/packages/gateway/src/translate/types.ts b/packages/gateway/src/translate/types.ts index 9661363d..c538b3c3 100644 --- a/packages/gateway/src/translate/types.ts +++ b/packages/gateway/src/translate/types.ts @@ -227,6 +227,9 @@ export type SessionState = { fingerprint: string; /** Unix timestamp (ms) of the last request in this session. */ lastRequestTime: number; + /** Unix timestamp (ms) of the request before the current one — used by budget + * throttle to compute elapsed time since the previous turn for cache TTL safety. */ + prevRequestTime?: number; /** Unix timestamp (ms) of the last user-initiated turn — excludes tool-use * auto-continuations. Used exclusively for inter-turn gap histogram * recording (survival analysis). */ diff --git a/packages/gateway/src/ui.ts b/packages/gateway/src/ui.ts index 0b7b9685..d598f1a9 100644 --- a/packages/gateway/src/ui.ts +++ b/packages/gateway/src/ui.ts @@ -29,6 +29,10 @@ import { totalWorkerCost, totalSavings, costWithoutLore, + getDailySpend, + getDailyBudget, + setDailyBudget, + getCostRate, type SessionCosts, } from "./cost-tracker"; import { getActiveSessions } from "./pipeline"; @@ -336,6 +340,14 @@ function renderCostSummary(sessionId: string): string { `; } + // Budget throttle diagnostics + if (costs.throttle.events > 0) { + const totalDelaySec = (costs.throttle.totalDelayMs / 1000).toFixed(1); + html += `
+ Budget throttle: ${costs.throttle.events} event${costs.throttle.events === 1 ? "" : "s"}, ${totalDelaySec}s total delay +
`; + } + html += ``; return html; } @@ -2109,6 +2121,59 @@ function pageCosts(): string { `; } + // --- Daily budget status + settings --- + const currentBudget = getDailyBudget(); + { + const { spend, date } = getDailySpend(); + const rate = getCostRate(); + + body += `
+

Daily Budget

`; + + if (currentBudget > 0) { + const budgetPct = Math.min((spend / currentBudget) * 100, 100); + + // Count total throttle events across live sessions + let totalThrottleEvents = 0; + let totalThrottleDelayMs = 0; + for (const [, c] of allCosts) { + totalThrottleEvents += c.throttle.events; + totalThrottleDelayMs += c.throttle.totalDelayMs; + } + + body += renderCostBar({ + title: `Budget (${date})`, + value: `${formatUSD(spend)} / ${formatUSD(currentBudget)}`, + percent: budgetPct, + tint: budgetPct < 60 ? "bar-green" : budgetPct < 85 ? "bar-amber" : "bar-red", + detailLeftHtml: `Rate: ${formatUSD(rate)}/hr`, + detailRightHtml: totalThrottleEvents > 0 + ? `Throttled: ${totalThrottleEvents} req, ${(totalThrottleDelayMs / 1000).toFixed(1)}s delay` + : "", + }); + } else { + body += `

No daily budget set. Configure one to automatically throttle spending.

`; + } + + // Budget settings form + const envOverride = process.env.LORE_DAILY_BUDGET; + if (envOverride) { + body += `
+ Overridden by env var LORE_DAILY_BUDGET=${esc(envOverride)} +
`; + } else { + body += `
+ + + + ${currentBudget > 0 ? `` : ""} +
`; + } + + body += `
`; + } + // Summary stats (compact pills for secondary metrics) // Trend arrow: compare live savings rate vs historical average. // Both rates use the same formula: netSavings / counterfactual, @@ -2691,6 +2756,15 @@ export async function handleUIRequest( return redirect(`/ui/projects/${renameProjectMatch.id}`); } + // Set daily budget + if (pathname === "/ui/api/budget") { + const formData = await req.formData(); + const budgetStr = formData.get("budget"); + const budgetVal = parseFloat(typeof budgetStr === "string" ? budgetStr : "0") || 0; + setDailyBudget(budgetVal); + return redirect("/ui/costs"); + } + // Set warming mode for a live session const warmingMode = matchRoute(pathname, "/ui/api/warming/:sessionId/:mode"); if (warmingMode) { diff --git a/packages/gateway/test/budget-throttle.test.ts b/packages/gateway/test/budget-throttle.test.ts new file mode 100644 index 00000000..9c21f5ff --- /dev/null +++ b/packages/gateway/test/budget-throttle.test.ts @@ -0,0 +1,272 @@ +import { describe, test, expect, beforeEach } from "bun:test"; +import { + computeThrottleDelay, + getDailyThrottleDelay, + getDailySpend, + getCostRate, + estimateRequestCost, + resetDailyBudgetState, + recordConversationCost, + clearAllCosts, + getSessionCosts, +} from "../src/cost-tracker"; + +describe("budget-throttle", () => { + beforeEach(() => { + clearAllCosts(); + }); + + // --------------------------------------------------------------------------- + // computeThrottleDelay — pure function, no global state dependency + // --------------------------------------------------------------------------- + describe("computeThrottleDelay", () => { + test("returns 0 when budget is 0 (disabled)", () => { + expect(computeThrottleDelay(5, 0, 10, 12)).toBe(0); + }); + + test("returns 0 when budget is negative (disabled)", () => { + expect(computeThrottleDelay(5, -1, 10, 12)).toBe(0); + }); + + test("returns 0 when spend is below 50% floor", () => { + // $4 of $10 = 40% — below THROTTLE_FLOOR (50%) + expect(computeThrottleDelay(4, 10, 100, 12)).toBe(0); + }); + + test("returns 0 when spend is exactly at 50% floor", () => { + // $5 of $10 = 50% — exactly at floor + expect(computeThrottleDelay(5, 10, 100, 12)).toBe(0); + }); + + test("returns 0 when rate is sustainable", () => { + // $7 of $10 = 70%, remaining = $3 over 12h = $0.25/hr target + // Current rate = $0.10/hr — sustainable + expect(computeThrottleDelay(7, 10, 0.1, 12)).toBe(0); + }); + + test("applies small delay at 60% spend with 2x overshoot", () => { + // $6 of $10 = 60%, remaining = $4 over 12h = $0.33/hr target + // Current rate = $0.67/hr (~2x overshoot) + const delay = computeThrottleDelay(6, 10, 0.67, 12); + expect(delay).toBeGreaterThan(0); + expect(delay).toBeLessThan(3); // should be well under 3s + }); + + test("applies moderate delay at 80% spend with 2x overshoot", () => { + // $8 of $10 = 80%, remaining = $2 over 10h = $0.20/hr target + // Current rate = $0.40/hr (2x overshoot) + const delay = computeThrottleDelay(8, 10, 0.4, 10); + expect(delay).toBeGreaterThan(3); + expect(delay).toBeLessThan(15); + }); + + test("applies large delay at 80% spend with 5x overshoot", () => { + // $8 of $10 = 80%, remaining = $2 over 10h = $0.20/hr target + // Current rate = $1.00/hr (5x overshoot) + const delay = computeThrottleDelay(8, 10, 1.0, 10); + expect(delay).toBeGreaterThan(10); + expect(delay).toBeLessThan(30); + }); + + test("approaches max delay at 95% spend with 3x overshoot", () => { + // $9.50 of $10 = 95%, remaining = $0.50 over 10h = $0.05/hr target + // Current rate = $0.15/hr (3x overshoot) + const delay = computeThrottleDelay(9.5, 10, 0.15, 10); + expect(delay).toBeGreaterThan(20); + expect(delay).toBeLessThanOrEqual(60); + }); + + test("never exceeds MAX_THROTTLE_DELAY (60s)", () => { + // Extreme: 100% spend, 10x overshoot + const delay = computeThrottleDelay(10, 10, 100, 1); + expect(delay).toBeLessThanOrEqual(60); + }); + + test("is monotonically increasing with spend fraction", () => { + const rate = 2; + const hours = 10; + const budget = 10; + let prevDelay = 0; + for (let spend = 5; spend <= 10; spend += 0.5) { + const delay = computeThrottleDelay(spend, budget, rate, hours); + expect(delay).toBeGreaterThanOrEqual(prevDelay); + prevDelay = delay; + } + }); + + test("is monotonically increasing with cost rate", () => { + const spend = 7; + const budget = 10; + const hours = 10; + // Target rate = $3 / 10h = $0.30/hr + let prevDelay = 0; + for (let rate = 0.3; rate <= 5; rate += 0.5) { + const delay = computeThrottleDelay(spend, budget, rate, hours); + expect(delay).toBeGreaterThanOrEqual(prevDelay); + prevDelay = delay; + } + }); + + test("floors hoursRemaining at 0.5 to avoid division explosion", () => { + // Near midnight: only 0.01 hours remaining (36 seconds) + // Without floor this would make targetRate insanely high + const delay = computeThrottleDelay(9, 10, 5, 0.01); + // Should still compute a reasonable delay, not NaN or Infinity + expect(Number.isFinite(delay)).toBe(true); + expect(delay).toBeGreaterThanOrEqual(0); + expect(delay).toBeLessThanOrEqual(60); + }); + + test("returns max delay when budget is exhausted regardless of rate", () => { + // Spent more than budget — even if rate is 0 (idle return), max delay applies + expect(computeThrottleDelay(12, 10, 0, 10)).toBe(60); + expect(computeThrottleDelay(10, 10, 0, 10)).toBe(60); + }); + + test("smooth curve — no cliff edges between adjacent inputs", () => { + const budget = 10; + const rate = 2; + const hours = 10; + // Check that adjacent 0.1% spend increments don't produce >5s jumps + for (let pct = 0.50; pct < 1.0; pct += 0.001) { + const d1 = computeThrottleDelay(pct * budget, budget, rate, hours); + const d2 = computeThrottleDelay((pct + 0.001) * budget, budget, rate, hours); + expect(Math.abs(d2 - d1)).toBeLessThan(5); + } + }); + }); + + // --------------------------------------------------------------------------- + // estimateRequestCost + // --------------------------------------------------------------------------- + describe("estimateRequestCost", () => { + test("returns positive cost for known model", () => { + const cost = estimateRequestCost("claude-sonnet-4-20250514", 100_000); + expect(cost).toBeGreaterThan(0); + }); + + test("input tokens contribute to cost", () => { + const small = estimateRequestCost("claude-sonnet-4-20250514", 10_000); + const large = estimateRequestCost("claude-sonnet-4-20250514", 100_000); + expect(large).toBeGreaterThan(small); + }); + + test("output estimate is capped at 16K tokens", () => { + // With 1M input tokens, 25% would be 250K — should be capped at 16K + const cost1M = estimateRequestCost("claude-sonnet-4-20250514", 1_000_000); + const cost500K = estimateRequestCost("claude-sonnet-4-20250514", 500_000); + // The difference should come only from input cost, not output + // (both hit the 16K cap) + const costRatio = cost1M / cost500K; + // Should be close to 2x (input doubles), not 2x+ (if output also doubled) + expect(costRatio).toBeGreaterThan(1.5); + expect(costRatio).toBeLessThan(2.5); + }); + }); + + // --------------------------------------------------------------------------- + // updateCostRate (tested via recordConversationCost + getCostRate) + // --------------------------------------------------------------------------- + describe("cost rate EMA", () => { + const mockUsage = { + input_tokens: 50_000, + output_tokens: 1_000, + cache_read_input_tokens: 0, + cache_creation_input_tokens: 0, + }; + + test("seeds on first turn", () => { + expect(getCostRate()).toBe(0); + recordConversationCost("session-1", "claude-sonnet-4-20250514", mockUsage); + expect(getCostRate()).toBeGreaterThan(0); + }); + + test("EMA is finite after multiple turns", async () => { + for (let i = 0; i < 5; i++) { + recordConversationCost("session-1", "claude-sonnet-4-20250514", mockUsage); + // Small delay to avoid sub-second collapse + await new Promise((r) => setTimeout(r, 10)); + } + const rate = getCostRate(); + expect(Number.isFinite(rate)).toBe(true); + expect(rate).toBeGreaterThan(0); + }); + }); + + // --------------------------------------------------------------------------- + // Daily spend accumulator + // --------------------------------------------------------------------------- + describe("daily spend accumulator", () => { + test("starts at zero", () => { + const { spend } = getDailySpend(); + expect(spend).toBe(0); + }); + + test("accumulates conversation costs", () => { + recordConversationCost("session-1", "claude-sonnet-4-20250514", { + input_tokens: 50_000, + output_tokens: 1_000, + }); + const { spend } = getDailySpend(); + expect(spend).toBeGreaterThan(0); + }); + + test("returns today's date", () => { + const { date } = getDailySpend(); + const today = new Date().toISOString().slice(0, 10); + expect(date).toBe(today); + }); + }); + + // --------------------------------------------------------------------------- + // getDailyThrottleDelay — integrates accumulator + EMA + computeThrottleDelay + // --------------------------------------------------------------------------- + describe("getDailyThrottleDelay", () => { + test("returns 0 when budget is 0 (disabled)", () => { + expect(getDailyThrottleDelay(0, 0.01)).toBe(0); + }); + + test("returns 0 when no spend has occurred", () => { + // Even with a budget, no spend + no EMA = no throttle + expect(getDailyThrottleDelay(10, 0.01)).toBe(0); + }); + + test("returns 0 for small estimated cost with fresh state", () => { + expect(getDailyThrottleDelay(10, 0.001)).toBe(0); + }); + }); + + // --------------------------------------------------------------------------- + // resetDailyBudgetState + // --------------------------------------------------------------------------- + describe("resetDailyBudgetState", () => { + test("clears all budget state", () => { + recordConversationCost("session-1", "claude-sonnet-4-20250514", { + input_tokens: 50_000, + output_tokens: 1_000, + }); + expect(getDailySpend().spend).toBeGreaterThan(0); + expect(getCostRate()).toBeGreaterThan(0); + + resetDailyBudgetState(); + expect(getDailySpend().spend).toBe(0); + expect(getCostRate()).toBe(0); + }); + }); + + // --------------------------------------------------------------------------- + // SessionCosts.throttle field + // --------------------------------------------------------------------------- + describe("SessionCosts throttle field", () => { + test("initialized to zero in new sessions", () => { + recordConversationCost("new-session", "claude-sonnet-4-20250514", { + input_tokens: 100, + output_tokens: 100, + }); + const costs = getSessionCosts("new-session"); + expect(costs).not.toBeNull(); + expect(costs!.throttle.events).toBe(0); + expect(costs!.throttle.totalDelayMs).toBe(0); + }); + }); +});