From 643fd1293d75fe4ec5200fcd620f1238e3aca8ba Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <ben@byk.im>
Date: Sun, 24 May 2026 16:43:11 +0000
Subject: [PATCH] feat: daily budget throttling with rate-aware proxy sleep

---
 packages/gateway/src/cost-tracker.ts          | 303 +++++++++++++++++-
 packages/gateway/src/pipeline.ts              |  49 +++
 packages/gateway/src/server.ts                |   6 +
 packages/gateway/src/translate/types.ts       |   3 +
 packages/gateway/src/ui.ts                    |  74 +++++
 packages/gateway/test/budget-throttle.test.ts | 272 ++++++++++++++++
 6 files changed, 705 insertions(+), 2 deletions(-)
 create mode 100644 packages/gateway/test/budget-throttle.test.ts

diff --git a/packages/gateway/src/cost-tracker.ts b/packages/gateway/src/cost-tracker.ts
index d2639cad..4852c676 100644
--- a/packages/gateway/src/cost-tracker.ts
+++ b/packages/gateway/src/cost-tracker.ts
@@ -12,7 +12,7 @@
 
 import { getModelEntrySync, getWorkerModel } from "./worker-model";
 import { AUTOCOMPACT_THRESHOLD } from "./compaction";
-import { log, data, temporal, loadAllSessionCosts } from "@loreai/core";
+import { log, data, temporal, loadAllSessionCosts, db, getKV, setKV } from "@loreai/core";
 
 // ---------------------------------------------------------------------------
 // Types
@@ -72,6 +72,14 @@ export type SessionCosts = {
     avoidedCompactionCost: number;
   };
 
+  // --- Budget throttle diagnostics ---
+  throttle: {
+    /** Number of requests that were throttled (delayed). */
+    events: number;
+    /** Total delay imposed in milliseconds. */
+    totalDelayMs: number;
+  };
+
   /** Shadow context counter — tracks virtual uncompressed context growth for compaction estimation. */
   _shadowContextTokens: number;
   /** Previous turn's actual (compressed) input tokens — for delta estimation. */
@@ -153,6 +161,281 @@ let historicalCache: HistoricalEstimates | null = null;
 let historicalCacheAt = 0;
 const HISTORICAL_CACHE_TTL_MS = 60_000; // 1 minute
 
+// ---------------------------------------------------------------------------
+// Daily budget throttle state
+// ---------------------------------------------------------------------------
+
+/** Cumulative USD spend for the current UTC day (conversation + worker + warmup). */
+let dailySpend = 0;
+
+/** UTC date string (YYYY-MM-DD) for which `dailySpend` is valid. */
+let dailySpendDate = "";
+
+/** EMA of cost-per-hour (USD/hr) — tracks spending velocity across all sessions. */
+let costRateEMA = 0;
+
+/** Timestamp (ms) of the last conversation turn that updated the EMA. */
+let costRateLastUpdate = 0;
+
+/** Whether the cost-rate EMA has been seeded (first turn sets it directly). */
+let costRateSeeded = false;
+
+/**
+ * Base alpha for cost-rate EMA. Slower than the output-token EMA (0.3)
+ * for spike resistance — one expensive turn only moves the EMA ~15%.
+ */
+const COST_RATE_ALPHA = 0.15;
+
+/** Maximum throttle delay in seconds. */
+const MAX_THROTTLE_DELAY = 60;
+
+/**
+ * Budget fraction below which no throttling occurs, regardless of rate.
+ * At 50% spend, no friction is applied even if the rate is high.
+ */
+const THROTTLE_FLOOR = 0.50;
+
+/**
+ * Reset the daily spend counter if the UTC day has changed.
+ * Called before every cost increment — a single string comparison.
+ */
+function maybeResetDay(): void {
+  const today = new Date().toISOString().slice(0, 10);
+  if (today !== dailySpendDate) {
+    dailySpend = 0;
+    dailySpendDate = today;
+  }
+}
+
+/**
+ * Update the cost-rate EMA after a conversation turn.
+ *
+ * Computes instantaneous rate as (turnCost / hoursSinceLastTurn),
+ * then blends into the EMA with time-gap-adjusted alpha.
+ * Worker costs are excluded — we only track the user-facing request velocity.
+ */
+function updateCostRate(turnCost: number): void {
+  const now = Date.now();
+
+  if (!costRateSeeded) {
+    // First turn: seed with a conservative estimate.
+    // Assume 1 turn per 2 minutes = 30 turns/hr as baseline cadence.
+    costRateEMA = turnCost * 30;
+    costRateLastUpdate = now;
+    costRateSeeded = true;
+    return;
+  }
+
+  const elapsedHours = (now - costRateLastUpdate) / 3_600_000;
+  costRateLastUpdate = now;
+
+  if (elapsedHours < 0.0001) {
+    // Sub-second since last update (tool-use auto-continuation).
+    // Don't spike the rate — treat as part of the same logical turn.
+    return;
+  }
+
+  // Instantaneous rate: cost of this turn / time since last turn
+  const instantRate = turnCost / elapsedHours;
+
+  // Time-gap adjusted alpha: after long gaps, the EMA should decay toward
+  // the (low) instantaneous rate faster. After a 1-hour gap where no money
+  // was spent, the old EMA is stale.
+  // Reference interval = 1/30 hr ≈ 2 min (typical turn cadence).
+  const referenceHours = 1 / 30;
+  const effectiveAlpha = 1 - Math.pow(1 - COST_RATE_ALPHA, elapsedHours / referenceHours);
+
+  costRateEMA = costRateEMA * (1 - effectiveAlpha) + instantRate * effectiveAlpha;
+}
+
+/**
+ * Bootstrap the daily spend counter from persisted DB data on startup.
+ *
+ * Queries `session_state` for today's persisted costs (sessions that went
+ * idle and were flushed to DB). Also sums any live in-memory sessions.
+ * Call once during gateway startup.
+ */
+export function bootstrapDailySpend(): void {
+  const today = new Date();
+  const todayStr = today.toISOString().slice(0, 10);
+  dailySpendDate = todayStr;
+
+  // Midnight UTC today as epoch ms
+  const midnightMs = Date.UTC(today.getUTCFullYear(), today.getUTCMonth(), today.getUTCDate());
+
+  // Sum persisted session costs updated today.
+  // Note: conversation_cost and worker_cost are cumulative session totals,
+  // so multi-day sessions may overestimate today's spend. In practice most
+  // sessions don't span midnight boundaries (they idle out).
+  try {
+    const row = db()
+      .query(
+        `SELECT COALESCE(SUM(conversation_cost + worker_cost), 0) as total
+         FROM session_state
+         WHERE updated_at >= ?`,
+      )
+      .get(midnightMs) as { total: number } | null;
+    dailySpend = row?.total ?? 0;
+  } catch (err) {
+    log.error("budget-throttle: failed to bootstrap daily spend from DB", err);
+    dailySpend = 0;
+  }
+
+  if (dailySpend > 0) {
+    log.info(`budget-throttle: bootstrapped daily spend=$${dailySpend.toFixed(4)} for ${todayStr}`);
+  }
+}
+
+/**
+ * Estimate the USD cost of a request before sending it upstream.
+ *
+ * Input cost is exact (token count known from gradient transform).
+ * Output cost uses a conservative heuristic: 25% of input tokens, capped
+ * at 16K tokens. Actual median is 1-4% — the estimate deliberately
+ * overestimates to avoid budget overshoot.
+ */
+export function estimateRequestCost(model: string, inputTokens: number): number {
+  const pricing = getPricingSync(model);
+  const inputCost = (inputTokens / 1_000_000) * pricing.input;
+  // Conservative output estimate — 25% of input, capped at 16K tokens
+  const estOutputTokens = Math.min(inputTokens * 0.25, 16_384);
+  const outputCost = (estOutputTokens / 1_000_000) * pricing.output;
+  return inputCost + outputCost;
+}
+
+/**
+ * Compute the throttle delay for a request given current budget state.
+ *
+ * Two factors multiplied together:
+ * 1. Budget pressure: (spendFraction - THROTTLE_FLOOR)² — gentle ramp
+ * 2. Rate overshoot: tanh((currentRate / targetRate - 1) / 3) — smooth S-curve
+ *
+ * Returns 0 when:
+ * - No budget configured (dailyBudget ≤ 0)
+ * - Spend below THROTTLE_FLOOR (50%)
+ * - Current rate is sustainable (projected spend ≤ budget)
+ *
+ * @returns Delay in seconds (0 = no throttle, max MAX_THROTTLE_DELAY)
+ */
+export function computeThrottleDelay(
+  dailySpendUSD: number,
+  dailyBudget: number,
+  costRatePerHour: number,
+  hoursRemaining: number,
+): number {
+  if (dailyBudget <= 0) return 0;
+
+  const spendFraction = dailySpendUSD / dailyBudget;
+  if (spendFraction < THROTTLE_FLOOR) return 0;
+
+  // Budget fully exhausted — apply max delay regardless of rate.
+  // Without this, a user who exhausts their budget then goes idle (EMA → 0)
+  // would get zero delay on return because 0 <= targetRate(0).
+  if (spendFraction >= 1.0) return MAX_THROTTLE_DELAY;
+
+  // Target rate = remaining budget / remaining hours.
+  // Floor hoursRemaining at 0.5 to avoid division explosion near midnight.
+  const remainingBudget = Math.max(0, dailyBudget - dailySpendUSD);
+  const safeHours = Math.max(hoursRemaining, 0.5);
+  const targetRate = remainingBudget / safeHours;
+
+  // If current rate is sustainable, no throttle
+  if (costRatePerHour <= targetRate) return 0;
+
+  // Overshoot ratio: how much faster than sustainable (clamped to [0, 10])
+  const overshoot = Math.min((costRatePerHour / targetRate) - 1, 10);
+
+  // Budget pressure: maps [THROTTLE_FLOOR, 1.0] → [0, 1], squared for gentle ramp
+  const pressure = (spendFraction - THROTTLE_FLOOR) / (1 - THROTTLE_FLOOR);
+
+  // delay = MAX_THROTTLE_DELAY × pressure² × tanh(overshoot / 3)
+  // tanh provides smooth S-curve: overshoot=1 → 0.32, 3 → 0.76, 10 → ~1.0
+  const delay = MAX_THROTTLE_DELAY * pressure * pressure * Math.tanh(overshoot / 3);
+
+  return Math.min(Math.round(delay * 10) / 10, MAX_THROTTLE_DELAY);
+}
+
+/**
+ * Get the throttle delay for the next request, factoring in current daily
+ * spend, cost-rate EMA, and time remaining in the UTC day.
+ *
+ * @param dailyBudget - Configured daily budget in USD (0 = disabled)
+ * @param estimatedCost - Estimated cost of the upcoming request
+ * @returns Delay in seconds (0 = no throttle)
+ */
+export function getDailyThrottleDelay(dailyBudget: number, estimatedCost: number): number {
+  if (dailyBudget <= 0) return 0;
+
+  maybeResetDay();
+  const projectedSpend = dailySpend + estimatedCost;
+
+  // Hours remaining in the UTC day
+  const now = new Date();
+  const endOfDay = new Date(now);
+  endOfDay.setUTCHours(24, 0, 0, 0);
+  const hoursRemaining = (endOfDay.getTime() - now.getTime()) / 3_600_000;
+
+  return computeThrottleDelay(projectedSpend, dailyBudget, costRateEMA, hoursRemaining);
+}
+
+/** Get current daily spend and date (for UI / diagnostics). */
+export function getDailySpend(): { date: string; spend: number } {
+  maybeResetDay();
+  return { date: dailySpendDate, spend: dailySpend };
+}
+
+/** Get current cost-rate EMA in USD/hr (for UI / diagnostics). */
+export function getCostRate(): number {
+  return costRateEMA;
+}
+
+/** KV key for the persisted daily budget value. */
+const DAILY_BUDGET_KV_KEY = "daily_budget";
+
+/**
+ * Get the effective daily budget in USD.
+ *
+ * Resolution priority:
+ * 1. `LORE_DAILY_BUDGET` env var (override for automation / CI)
+ * 2. DB-persisted value from `kv_meta` (set via UI)
+ * 3. 0 (disabled)
+ */
+export function getDailyBudget(): number {
+  const envVal = process.env.LORE_DAILY_BUDGET;
+  if (envVal) {
+    const parsed = parseFloat(envVal);
+    if (parsed > 0) return parsed;
+  }
+  try {
+    const dbVal = getKV(DAILY_BUDGET_KV_KEY);
+    if (dbVal) {
+      const parsed = parseFloat(dbVal);
+      if (parsed > 0) return parsed;
+    }
+  } catch {
+    // DB not initialized yet (e.g., early startup) — fall through
+  }
+  return 0;
+}
+
+/**
+ * Set the daily budget in the DB (persisted across restarts).
+ * Pass 0 to disable.
+ */
+export function setDailyBudget(budgetUSD: number): void {
+  if (!Number.isFinite(budgetUSD) || budgetUSD < 0) budgetUSD = 0;
+  setKV(DAILY_BUDGET_KV_KEY, String(budgetUSD));
+}
+
+/** Reset daily budget throttle state (for testing). */
+export function resetDailyBudgetState(): void {
+  dailySpend = 0;
+  dailySpendDate = "";
+  costRateEMA = 0;
+  costRateLastUpdate = 0;
+  costRateSeeded = false;
+}
+
 // ---------------------------------------------------------------------------
 // Initialization
 // ---------------------------------------------------------------------------
@@ -183,6 +466,7 @@ function emptyCosts(): SessionCosts {
       avoidedCompactions: 0,
       avoidedCompactionCost: 0,
     },
+    throttle: { events: 0, totalDelayMs: 0 },
     _shadowContextTokens: 0,
     _lastActualInput: 0,
     _lastOutputTokens: 0,
@@ -293,6 +577,11 @@ export function recordConversationCost(
   costs.conversation.cacheReadTokens += usage.cache_read_input_tokens ?? 0;
   costs.conversation.cacheWriteTokens += usage.cache_creation_input_tokens ?? 0;
   costs.conversation.turns++;
+
+  // Daily budget throttle: accumulate spend and update velocity EMA
+  maybeResetDay();
+  dailySpend += call.total;
+  updateCostRate(call.total);
 }
 
 /** Worker ID → cost bucket mapping. */
@@ -328,6 +617,10 @@ export function recordWorkerCost(
     const fullCost = computeCallCost(model, usage, "direct");
     costs.batchSavings += fullCost.total - call.total;
   }
+
+  // Daily budget throttle: accumulate worker spend (no EMA update — workers excluded from velocity)
+  maybeResetDay();
+  dailySpend += call.total;
 }
 
 /**
@@ -349,8 +642,13 @@ export function recordWarmupCost(
   // Anthropic doubles cache_write pricing for 1h TTL
   const cacheWriteRate = ttl === "1h" ? pricing.cache_write * 2 : pricing.cache_write;
   const writeCost = (cacheCreationTokens / 1_000_000) * cacheWriteRate;
-  costs.workers.warmup.cost += readCost + writeCost;
+  const warmupTotal = readCost + writeCost;
+  costs.workers.warmup.cost += warmupTotal;
   costs.workers.warmup.calls++;
+
+  // Daily budget throttle: accumulate warmup spend (no EMA update)
+  maybeResetDay();
+  dailySpend += warmupTotal;
 }
 
 // ---------------------------------------------------------------------------
@@ -581,6 +879,7 @@ export function deleteSessionCosts(sessionID: string): void {
 /** Clear all sessions (for testing). */
 export function clearAllCosts(): void {
   sessions.clear();
+  resetDailyBudgetState();
 }
 
 // ---------------------------------------------------------------------------
diff --git a/packages/gateway/src/pipeline.ts b/packages/gateway/src/pipeline.ts
index b96c8f9d..655bf546 100644
--- a/packages/gateway/src/pipeline.ts
+++ b/packages/gateway/src/pipeline.ts
@@ -33,6 +33,7 @@ import {
   recordCacheUsage,
   calibrate,
   getLastTransformedCount,
+  getLastTransformEstimate,
   getLastLayer,
   onIdleResume,
   consumeCameOutOfIdle,
@@ -158,6 +159,12 @@ import {
   updateShadowContext,
   recordWarmupHit,
   recordTTLSavings,
+  getDailyThrottleDelay,
+  estimateRequestCost,
+  getDailySpend,
+  getDailyBudget,
+  getCostRate,
+  getSessionCosts,
 } from "./cost-tracker";
 import {
   RECALL_GATEWAY_TOOL,
@@ -949,6 +956,7 @@ function getOrCreateSession(
     }
     sessions.set(sessionID, state);
   }
+  state.prevRequestTime = state.lastRequestTime;
   state.lastRequestTime = Date.now();
 
   // Ensure recallStore exists (upgrade from older session state)
@@ -3493,6 +3501,47 @@ async function handleConversationTurn(
     conversationTTL: resolvedConversationTTL,
   };
 
+  // --- Daily budget throttle ---
+  // Apply an invisible proxy-level sleep to slow the agent when approaching
+  // the daily budget. The sleep is capped to avoid causing cache busts
+  // (which would be self-defeating — costing more than the throttle saved).
+  const dailyBudget = getDailyBudget();
+  if (dailyBudget > 0) {
+    const inputTokens = getLastTransformEstimate(sessionID)
+      || Math.ceil(JSON.stringify(modifiedReq.messages).length / 3);
+    const estimatedCost = estimateRequestCost(req.model, inputTokens);
+    const delay = getDailyThrottleDelay(dailyBudget, estimatedCost);
+
+    if (delay > 0) {
+      // Cap delay to avoid pushing the next request past the cache TTL boundary.
+      // Use prevRequestTime (the request before this one) to compute how much
+      // of the cache TTL window has already been consumed.
+      const ttlMs = resolvedConversationTTL === "1h" ? 3_600_000 : 300_000;
+      const elapsed = sessionState.prevRequestTime
+        ? Date.now() - sessionState.prevRequestTime
+        : 0; // first request — no prior timing, full TTL available
+      const maxSafe = Math.max(0, (ttlMs - elapsed) * 0.50) / 1000;
+      const actualDelay = Math.min(delay, maxSafe);
+
+      if (actualDelay > 0.5) { // don't bother sleeping < 500ms
+        log.info(
+          `budget-throttle: sleeping ${actualDelay.toFixed(1)}s ` +
+            `session=${sessionID.slice(0, 16)} ` +
+            `spend=$${getDailySpend().spend.toFixed(2)} ` +
+            `rate=$${getCostRate().toFixed(2)}/hr`,
+        );
+        await new Promise((resolve) => setTimeout(resolve, actualDelay * 1000));
+
+        // Track throttle event on session costs
+        const costs = getSessionCosts(sessionID);
+        if (costs) {
+          costs.throttle.events++;
+          costs.throttle.totalDelayMs += actualDelay * 1000;
+        }
+      }
+    }
+  }
+
   // Start gen_ai.chat span before the upstream call so it captures real
   // wall-clock duration (including network latency and streaming time).
   // The span is ended in postResponse() after usage attributes are set.
diff --git a/packages/gateway/src/server.ts b/packages/gateway/src/server.ts
index 61263ab7..c809053e 100644
--- a/packages/gateway/src/server.ts
+++ b/packages/gateway/src/server.ts
@@ -12,6 +12,7 @@
  * Uses `Bun.serve()` — this package targets Bun exclusively.
  */
 import { DEFAULT_PORT, type GatewayConfig } from "./config";
+import { bootstrapDailySpend, getDailyBudget } from "./cost-tracker";
 import type { GatewayRequest } from "./translate/types";
 import { parseAnthropicRequest } from "./translate/anthropic";
 import { parseOpenAIRequest, buildOpenAIResponse } from "./translate/openai";
@@ -278,6 +279,11 @@ export function startServer(config: GatewayConfig): {
     config = { ...config, port: DEFAULT_PORT };
   }
 
+  // Bootstrap the daily spend counter from DB (recovers today's spend after restart)
+  if (getDailyBudget() > 0) {
+    bootstrapDailySpend();
+  }
+
   // Shared fetch handler for all server instances.
   const fetch = async (req: Request): Promise<Response> => {
     const url = new URL(req.url);
diff --git a/packages/gateway/src/translate/types.ts b/packages/gateway/src/translate/types.ts
index 9661363d..c538b3c3 100644
--- a/packages/gateway/src/translate/types.ts
+++ b/packages/gateway/src/translate/types.ts
@@ -227,6 +227,9 @@ export type SessionState = {
   fingerprint: string;
   /** Unix timestamp (ms) of the last request in this session. */
   lastRequestTime: number;
+  /** Unix timestamp (ms) of the request before the current one — used by budget
+   *  throttle to compute elapsed time since the previous turn for cache TTL safety. */
+  prevRequestTime?: number;
   /** Unix timestamp (ms) of the last user-initiated turn — excludes tool-use
    *  auto-continuations. Used exclusively for inter-turn gap histogram
    *  recording (survival analysis). */
diff --git a/packages/gateway/src/ui.ts b/packages/gateway/src/ui.ts
index 0b7b9685..d598f1a9 100644
--- a/packages/gateway/src/ui.ts
+++ b/packages/gateway/src/ui.ts
@@ -29,6 +29,10 @@ import {
   totalWorkerCost,
   totalSavings,
   costWithoutLore,
+  getDailySpend,
+  getDailyBudget,
+  setDailyBudget,
+  getCostRate,
   type SessionCosts,
 } from "./cost-tracker";
 import { getActiveSessions } from "./pipeline";
@@ -336,6 +340,14 @@ function renderCostSummary(sessionId: string): string {
     </div>`;
   }
 
+  // Budget throttle diagnostics
+  if (costs.throttle.events > 0) {
+    const totalDelaySec = (costs.throttle.totalDelayMs / 1000).toFixed(1);
+    html += `<div style="margin-top:10px;font-size:0.85em;color:var(--fg2)">
+      <strong style="color:#f59e0b">Budget throttle:</strong> ${costs.throttle.events} event${costs.throttle.events === 1 ? "" : "s"}, ${totalDelaySec}s total delay
+    </div>`;
+  }
+
   html += `</div>`;
   return html;
 }
@@ -2109,6 +2121,59 @@ function pageCosts(): string {
     </div>`;
   }
 
+  // --- Daily budget status + settings ---
+  const currentBudget = getDailyBudget();
+  {
+    const { spend, date } = getDailySpend();
+    const rate = getCostRate();
+
+    body += `<div class="card" style="margin-bottom:1em">
+      <h3 style="margin-top:0;margin-bottom:0.5em">Daily Budget</h3>`;
+
+    if (currentBudget > 0) {
+      const budgetPct = Math.min((spend / currentBudget) * 100, 100);
+
+      // Count total throttle events across live sessions
+      let totalThrottleEvents = 0;
+      let totalThrottleDelayMs = 0;
+      for (const [, c] of allCosts) {
+        totalThrottleEvents += c.throttle.events;
+        totalThrottleDelayMs += c.throttle.totalDelayMs;
+      }
+
+      body += renderCostBar({
+        title: `Budget (${date})`,
+        value: `${formatUSD(spend)} / ${formatUSD(currentBudget)}`,
+        percent: budgetPct,
+        tint: budgetPct < 60 ? "bar-green" : budgetPct < 85 ? "bar-amber" : "bar-red",
+        detailLeftHtml: `Rate: ${formatUSD(rate)}/hr`,
+        detailRightHtml: totalThrottleEvents > 0
+          ? `Throttled: ${totalThrottleEvents} req, ${(totalThrottleDelayMs / 1000).toFixed(1)}s delay`
+          : "",
+      });
+    } else {
+      body += `<p style="color:var(--fg2);margin:0 0 8px">No daily budget set. Configure one to automatically throttle spending.</p>`;
+    }
+
+    // Budget settings form
+    const envOverride = process.env.LORE_DAILY_BUDGET;
+    if (envOverride) {
+      body += `<div style="margin-top:8px;font-size:0.85em;color:var(--fg2)">
+        Overridden by env var <code>LORE_DAILY_BUDGET=${esc(envOverride)}</code>
+      </div>`;
+    } else {
+      body += `<form method="POST" action="/ui/api/budget" style="margin-top:8px;display:flex;gap:8px;align-items:center">
+        <label style="font-size:0.85em;color:var(--fg2)">Budget (USD/day):</label>
+        <input type="number" name="budget" step="0.01" min="0" value="${currentBudget || ""}"
+          placeholder="e.g. 10.00" style="width:100px;padding:4px 8px;border:1px solid var(--border);border-radius:4px;background:var(--bg2);color:var(--fg)">
+        <button type="submit" class="btn">Save</button>
+        ${currentBudget > 0 ? `<button type="submit" name="budget" value="0" class="btn" style="background:var(--bg2);color:var(--fg2)">Disable</button>` : ""}
+      </form>`;
+    }
+
+    body += `</div>`;
+  }
+
   // Summary stats (compact pills for secondary metrics)
   // Trend arrow: compare live savings rate vs historical average.
   // Both rates use the same formula: netSavings / counterfactual,
@@ -2691,6 +2756,15 @@ export async function handleUIRequest(
       return redirect(`/ui/projects/${renameProjectMatch.id}`);
     }
 
+    // Set daily budget
+    if (pathname === "/ui/api/budget") {
+      const formData = await req.formData();
+      const budgetStr = formData.get("budget");
+      const budgetVal = parseFloat(typeof budgetStr === "string" ? budgetStr : "0") || 0;
+      setDailyBudget(budgetVal);
+      return redirect("/ui/costs");
+    }
+
     // Set warming mode for a live session
     const warmingMode = matchRoute(pathname, "/ui/api/warming/:sessionId/:mode");
     if (warmingMode) {
diff --git a/packages/gateway/test/budget-throttle.test.ts b/packages/gateway/test/budget-throttle.test.ts
new file mode 100644
index 00000000..9c21f5ff
--- /dev/null
+++ b/packages/gateway/test/budget-throttle.test.ts
@@ -0,0 +1,272 @@
+import { describe, test, expect, beforeEach } from "bun:test";
+import {
+  computeThrottleDelay,
+  getDailyThrottleDelay,
+  getDailySpend,
+  getCostRate,
+  estimateRequestCost,
+  resetDailyBudgetState,
+  recordConversationCost,
+  clearAllCosts,
+  getSessionCosts,
+} from "../src/cost-tracker";
+
+describe("budget-throttle", () => {
+  beforeEach(() => {
+    clearAllCosts();
+  });
+
+  // ---------------------------------------------------------------------------
+  // computeThrottleDelay — pure function, no global state dependency
+  // ---------------------------------------------------------------------------
+  describe("computeThrottleDelay", () => {
+    test("returns 0 when budget is 0 (disabled)", () => {
+      expect(computeThrottleDelay(5, 0, 10, 12)).toBe(0);
+    });
+
+    test("returns 0 when budget is negative (disabled)", () => {
+      expect(computeThrottleDelay(5, -1, 10, 12)).toBe(0);
+    });
+
+    test("returns 0 when spend is below 50% floor", () => {
+      // $4 of $10 = 40% — below THROTTLE_FLOOR (50%)
+      expect(computeThrottleDelay(4, 10, 100, 12)).toBe(0);
+    });
+
+    test("returns 0 when spend is exactly at 50% floor", () => {
+      // $5 of $10 = 50% — exactly at floor
+      expect(computeThrottleDelay(5, 10, 100, 12)).toBe(0);
+    });
+
+    test("returns 0 when rate is sustainable", () => {
+      // $7 of $10 = 70%, remaining = $3 over 12h = $0.25/hr target
+      // Current rate = $0.10/hr — sustainable
+      expect(computeThrottleDelay(7, 10, 0.1, 12)).toBe(0);
+    });
+
+    test("applies small delay at 60% spend with 2x overshoot", () => {
+      // $6 of $10 = 60%, remaining = $4 over 12h = $0.33/hr target
+      // Current rate = $0.67/hr (~2x overshoot)
+      const delay = computeThrottleDelay(6, 10, 0.67, 12);
+      expect(delay).toBeGreaterThan(0);
+      expect(delay).toBeLessThan(3); // should be well under 3s
+    });
+
+    test("applies moderate delay at 80% spend with 2x overshoot", () => {
+      // $8 of $10 = 80%, remaining = $2 over 10h = $0.20/hr target
+      // Current rate = $0.40/hr (2x overshoot)
+      const delay = computeThrottleDelay(8, 10, 0.4, 10);
+      expect(delay).toBeGreaterThan(3);
+      expect(delay).toBeLessThan(15);
+    });
+
+    test("applies large delay at 80% spend with 5x overshoot", () => {
+      // $8 of $10 = 80%, remaining = $2 over 10h = $0.20/hr target
+      // Current rate = $1.00/hr (5x overshoot)
+      const delay = computeThrottleDelay(8, 10, 1.0, 10);
+      expect(delay).toBeGreaterThan(10);
+      expect(delay).toBeLessThan(30);
+    });
+
+    test("approaches max delay at 95% spend with 3x overshoot", () => {
+      // $9.50 of $10 = 95%, remaining = $0.50 over 10h = $0.05/hr target
+      // Current rate = $0.15/hr (3x overshoot)
+      const delay = computeThrottleDelay(9.5, 10, 0.15, 10);
+      expect(delay).toBeGreaterThan(20);
+      expect(delay).toBeLessThanOrEqual(60);
+    });
+
+    test("never exceeds MAX_THROTTLE_DELAY (60s)", () => {
+      // Extreme: 100% spend, 10x overshoot
+      const delay = computeThrottleDelay(10, 10, 100, 1);
+      expect(delay).toBeLessThanOrEqual(60);
+    });
+
+    test("is monotonically increasing with spend fraction", () => {
+      const rate = 2;
+      const hours = 10;
+      const budget = 10;
+      let prevDelay = 0;
+      for (let spend = 5; spend <= 10; spend += 0.5) {
+        const delay = computeThrottleDelay(spend, budget, rate, hours);
+        expect(delay).toBeGreaterThanOrEqual(prevDelay);
+        prevDelay = delay;
+      }
+    });
+
+    test("is monotonically increasing with cost rate", () => {
+      const spend = 7;
+      const budget = 10;
+      const hours = 10;
+      // Target rate = $3 / 10h = $0.30/hr
+      let prevDelay = 0;
+      for (let rate = 0.3; rate <= 5; rate += 0.5) {
+        const delay = computeThrottleDelay(spend, budget, rate, hours);
+        expect(delay).toBeGreaterThanOrEqual(prevDelay);
+        prevDelay = delay;
+      }
+    });
+
+    test("floors hoursRemaining at 0.5 to avoid division explosion", () => {
+      // Near midnight: only 0.01 hours remaining (36 seconds)
+      // Without floor this would make targetRate insanely high
+      const delay = computeThrottleDelay(9, 10, 5, 0.01);
+      // Should still compute a reasonable delay, not NaN or Infinity
+      expect(Number.isFinite(delay)).toBe(true);
+      expect(delay).toBeGreaterThanOrEqual(0);
+      expect(delay).toBeLessThanOrEqual(60);
+    });
+
+    test("returns max delay when budget is exhausted regardless of rate", () => {
+      // Spent more than budget — even if rate is 0 (idle return), max delay applies
+      expect(computeThrottleDelay(12, 10, 0, 10)).toBe(60);
+      expect(computeThrottleDelay(10, 10, 0, 10)).toBe(60);
+    });
+
+    test("smooth curve — no cliff edges between adjacent inputs", () => {
+      const budget = 10;
+      const rate = 2;
+      const hours = 10;
+      // Check that adjacent 0.1% spend increments don't produce >5s jumps
+      for (let pct = 0.50; pct < 1.0; pct += 0.001) {
+        const d1 = computeThrottleDelay(pct * budget, budget, rate, hours);
+        const d2 = computeThrottleDelay((pct + 0.001) * budget, budget, rate, hours);
+        expect(Math.abs(d2 - d1)).toBeLessThan(5);
+      }
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // estimateRequestCost
+  // ---------------------------------------------------------------------------
+  describe("estimateRequestCost", () => {
+    test("returns positive cost for known model", () => {
+      const cost = estimateRequestCost("claude-sonnet-4-20250514", 100_000);
+      expect(cost).toBeGreaterThan(0);
+    });
+
+    test("input tokens contribute to cost", () => {
+      const small = estimateRequestCost("claude-sonnet-4-20250514", 10_000);
+      const large = estimateRequestCost("claude-sonnet-4-20250514", 100_000);
+      expect(large).toBeGreaterThan(small);
+    });
+
+    test("output estimate is capped at 16K tokens", () => {
+      // With 1M input tokens, 25% would be 250K — should be capped at 16K
+      const cost1M = estimateRequestCost("claude-sonnet-4-20250514", 1_000_000);
+      const cost500K = estimateRequestCost("claude-sonnet-4-20250514", 500_000);
+      // The difference should come only from input cost, not output
+      // (both hit the 16K cap)
+      const costRatio = cost1M / cost500K;
+      // Should be close to 2x (input doubles), not 2x+ (if output also doubled)
+      expect(costRatio).toBeGreaterThan(1.5);
+      expect(costRatio).toBeLessThan(2.5);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // updateCostRate (tested via recordConversationCost + getCostRate)
+  // ---------------------------------------------------------------------------
+  describe("cost rate EMA", () => {
+    const mockUsage = {
+      input_tokens: 50_000,
+      output_tokens: 1_000,
+      cache_read_input_tokens: 0,
+      cache_creation_input_tokens: 0,
+    };
+
+    test("seeds on first turn", () => {
+      expect(getCostRate()).toBe(0);
+      recordConversationCost("session-1", "claude-sonnet-4-20250514", mockUsage);
+      expect(getCostRate()).toBeGreaterThan(0);
+    });
+
+    test("EMA is finite after multiple turns", async () => {
+      for (let i = 0; i < 5; i++) {
+        recordConversationCost("session-1", "claude-sonnet-4-20250514", mockUsage);
+        // Small delay to avoid sub-second collapse
+        await new Promise((r) => setTimeout(r, 10));
+      }
+      const rate = getCostRate();
+      expect(Number.isFinite(rate)).toBe(true);
+      expect(rate).toBeGreaterThan(0);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // Daily spend accumulator
+  // ---------------------------------------------------------------------------
+  describe("daily spend accumulator", () => {
+    test("starts at zero", () => {
+      const { spend } = getDailySpend();
+      expect(spend).toBe(0);
+    });
+
+    test("accumulates conversation costs", () => {
+      recordConversationCost("session-1", "claude-sonnet-4-20250514", {
+        input_tokens: 50_000,
+        output_tokens: 1_000,
+      });
+      const { spend } = getDailySpend();
+      expect(spend).toBeGreaterThan(0);
+    });
+
+    test("returns today's date", () => {
+      const { date } = getDailySpend();
+      const today = new Date().toISOString().slice(0, 10);
+      expect(date).toBe(today);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // getDailyThrottleDelay — integrates accumulator + EMA + computeThrottleDelay
+  // ---------------------------------------------------------------------------
+  describe("getDailyThrottleDelay", () => {
+    test("returns 0 when budget is 0 (disabled)", () => {
+      expect(getDailyThrottleDelay(0, 0.01)).toBe(0);
+    });
+
+    test("returns 0 when no spend has occurred", () => {
+      // Even with a budget, no spend + no EMA = no throttle
+      expect(getDailyThrottleDelay(10, 0.01)).toBe(0);
+    });
+
+    test("returns 0 for small estimated cost with fresh state", () => {
+      expect(getDailyThrottleDelay(10, 0.001)).toBe(0);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // resetDailyBudgetState
+  // ---------------------------------------------------------------------------
+  describe("resetDailyBudgetState", () => {
+    test("clears all budget state", () => {
+      recordConversationCost("session-1", "claude-sonnet-4-20250514", {
+        input_tokens: 50_000,
+        output_tokens: 1_000,
+      });
+      expect(getDailySpend().spend).toBeGreaterThan(0);
+      expect(getCostRate()).toBeGreaterThan(0);
+
+      resetDailyBudgetState();
+      expect(getDailySpend().spend).toBe(0);
+      expect(getCostRate()).toBe(0);
+    });
+  });
+
+  // ---------------------------------------------------------------------------
+  // SessionCosts.throttle field
+  // ---------------------------------------------------------------------------
+  describe("SessionCosts throttle field", () => {
+    test("initialized to zero in new sessions", () => {
+      recordConversationCost("new-session", "claude-sonnet-4-20250514", {
+        input_tokens: 100,
+        output_tokens: 100,
+      });
+      const costs = getSessionCosts("new-session");
+      expect(costs).not.toBeNull();
+      expect(costs!.throttle.events).toBe(0);
+      expect(costs!.throttle.totalDelayMs).toBe(0);
+    });
+  });
+});