diff --git a/packages/gateway/src/cache-warmer.ts b/packages/gateway/src/cache-warmer.ts
index 000b43ef..cc2c79a6 100644
--- a/packages/gateway/src/cache-warmer.ts
+++ b/packages/gateway/src/cache-warmer.ts
@@ -78,12 +78,17 @@ export const BLEND_PSEUDOCOUNT = 20;
 export const DEAD_SESSION_THRESHOLD = 0.02;
 
 /** Minimum completed turns before warming is eligible. Filters out one-shot
- *  sessions and ensures the survival model has ≥2 gap observations. */
-export const MIN_TURNS_FOR_WARMING = 3;
+ *  sessions and ensures the survival model has ≥4 gap observations. */
+export const MIN_TURNS_FOR_WARMING = 5;
 
 /** Maximum duration (ms) to keep warming during a tool call before
- *  falling back to normal survival analysis. 30 min ≈ 6 cycles at 5m TTL. */
-export const MAX_TOOL_CALL_WARMING_MS = 30 * 60 * 1000;
+ *  falling back to normal survival analysis. 10 min ≈ 2 cycles at 5m TTL. */
+export const MAX_TOOL_CALL_WARMING_MS = 10 * 60 * 1000;
+
+/** Maximum warmup cycles during a single tool-call break. Most tool
+ *  calls complete in <5 minutes; 2 cycles covers a 10-minute operation
+ *  at 5m TTL, sufficient for 95%+ of tool calls. */
+export const TOOL_CALL_MAX_CYCLES = 2;
 
 /** Max uncached warmup responses before the global circuit breaker trips. */
 const CIRCUIT_BREAKER_MAX_FAILURES = 3;
@@ -94,12 +99,24 @@ const CIRCUIT_BREAKER_MAX_FAILURES = 3;
 export const BREAK_FLOOR_MS = 180_000;
 
 /** Minimum total warmups before session-level hit-rate ROI check kicks in. */
-export const MIN_WARMUPS_FOR_ROI_CHECK = 10;
+export const MIN_WARMUPS_FOR_ROI_CHECK = 5;
 
 /** Minimum session-level hit rate to continue warming. Below this,
- *  warming is empirically unprofitable and we stop. 20% means at least
- *  1 in 5 warmups must result in a confirmed user return. */
-export const MIN_SESSION_HIT_RATE = 0.20;
+ *  warming is empirically unprofitable and we stop. 25% means at least
+ *  1 in 4 warmups must result in a confirmed user return. */
+export const MIN_SESSION_HIT_RATE = 0.25;
+
+/** Minimum total input tokens (input + cache_read + cache_creation) before
+ *  warming is eligible. Below this threshold the absolute savings per hit
+ *  are too small to justify the risk of wasted warmups. At 50K tokens with
+ *  Opus 5m, a hit saves ~$0.29 and a warmup costs ~$0.025. */
+export const MIN_INPUT_TOKENS_FOR_WARMING = 50_000;
+
+/** Minimum P(returns) floor for the initial warming commitment. The
+ *  break-even threshold read/(write-read) is often very low (4–9%),
+ *  which causes nearly every non-dead session to get warmed. This floor
+ *  ensures at least 30% return probability before the first warmup. */
+export const MIN_RETURN_PROBABILITY_FLOOR = 0.30;
 
 // ---------------------------------------------------------------------------
 // Global circuit breaker
@@ -749,11 +766,16 @@ export function shouldWarm(
 
     const maxCycles = maxProfitableCycles(cacheReadCostPerMTok, cacheMissCostPerMTok);
     const cyclesSpent = state.warmup?.warmupCount ?? 0;
-    if (cyclesSpent >= maxCycles) return false;
+    // Tool-call-specific cap: most tools complete in <10min (2 cycles at 5m TTL)
+    const effectiveMax = Math.min(maxCycles, TOOL_CALL_MAX_CYCLES);
+    if (cyclesSpent >= effectiveMax) return false;
 
     // Still require some history to have a stored body worth warming
     if (state.messageCount < MIN_TURNS_FOR_WARMING * 2) return false;
 
+    // Context too small — absolute savings per hit don't justify risk
+    if ((state.lastInputTokens ?? 0) < MIN_INPUT_TOKENS_FOR_WARMING) return false;
+
     // Only warm in the margin window of the current TTL cycle
     const intoWindow = elapsed % ttlMs;
     if (intoWindow < ttlMs - warmupMarginMs) return false;
@@ -768,6 +790,9 @@ export function shouldWarm(
   // wasted warmup at 200K Opus tokens).
   if (state.messageCount < MIN_TURNS_FOR_WARMING * 2) return false;
 
+  // Context too small — absolute savings per hit don't justify risk
+  if ((state.lastInputTokens ?? 0) < MIN_INPUT_TOKENS_FOR_WARMING) return false;
+
   // Session marked dead
   if (state.warmup?.disabled) return false;
 
@@ -796,9 +821,13 @@ export function shouldWarm(
   });
   const pReturns = 1.0 - pFinished;
 
-  // Corrected cost threshold: read / (write - read)
+  // Corrected cost threshold: read / (write - read), with a floor to prevent
+  // warming sessions with trivially low return probability (8.7% for 5m TTL).
+  // NOTE: an explicit minReturnProbability config override intentionally
+  // bypasses the floor — it's a user-controlled knob for tuning.
   const autoThreshold = costThreshold(cacheReadCostPerMTok, cacheMissCostPerMTok);
-  const threshold = cfg.cache.warming.minReturnProbability ?? autoThreshold;
+  const threshold = cfg.cache.warming.minReturnProbability
+    ?? Math.max(MIN_RETURN_PROBABILITY_FLOOR, autoThreshold);
 
   // Max cycles before warming becomes unprofitable
   const maxCycles = maxProfitableCycles(cacheReadCostPerMTok, cacheMissCostPerMTok);
@@ -841,6 +870,9 @@ export function shouldWarm(
     // Rising cost threshold: after k cycles, the accumulated warmup cost
     // means we need a higher P(returns) to justify the next one.
     // k=1: 8.7%, k=3: 26%, k=5: 43%, k=6: 52% for Opus 5m.
+    // NOTE: intentionally does NOT use MIN_RETURN_PROBABILITY_FLOOR — the
+    // floor only gates the initial commitment (Phase A). Once committed,
+    // the rising threshold handles profitability based on sunk costs.
     const risingThreshold = cumulativeCostThreshold(
       cyclesSpent + 1, // +1 because we're deciding whether to do the NEXT cycle
       cacheReadCostPerMTok,
@@ -978,11 +1010,12 @@ export function computeWarmingSnapshot(
   });
   const pReturns = 1.0 - pFinished;
 
-  // Corrected threshold
+  // Corrected threshold with floor
   const autoThreshold = profile
     ? costThreshold(profile.cacheReadCostPerMTok, profile.cacheMissCostPerMTok)
     : 0.1;
-  const thresholdVal = cfg.cache.warming.minReturnProbability ?? autoThreshold;
+  const thresholdVal = cfg.cache.warming.minReturnProbability
+    ?? Math.max(MIN_RETURN_PROBABILITY_FLOOR, autoThreshold);
 
   // Commitment model cost analysis
   const maxCyclesVal = profile
@@ -1028,21 +1061,25 @@ export function computeWarmingSnapshot(
           notWarmingReason = "Force-keep: cooldown active";
         }
       }
-    } else if (state.lastStopReason === "tool_use") {
-      if (state.warmup?.disabled) {
-        notWarmingReason = "Warming stopped (/lore:warm:stop)";
-      } else if (state.messageCount < MIN_TURNS_FOR_WARMING * 2) {
+    } else if (state.lastStopReason === "tool_use" && !state.warmup?.disabled) {
+      // Mirror shouldWarm()'s tool-call entry: `toolCallActive && !disabled`.
+      // If disabled=true, fall through to the normal path below.
+      if (state.messageCount < MIN_TURNS_FOR_WARMING * 2) {
         notWarmingReason = `Too few turns (${state.messageCount} < ${MIN_TURNS_FOR_WARMING * 2})`;
+      } else if ((state.lastInputTokens ?? 0) < MIN_INPUT_TOKENS_FOR_WARMING) {
+        const tokK = Math.round((state.lastInputTokens ?? 0) / 1000);
+        notWarmingReason = `Context too small (${tokK}k < ${MIN_INPUT_TOKENS_FOR_WARMING / 1000}k tokens)`;
       } else if (idleMs > MAX_TOOL_CALL_WARMING_MS) {
-        notWarmingReason = `Tool call exceeded max duration (${Math.round(idleMs / 60_000)}min > 30min)`;
+        notWarmingReason = `Tool call exceeded max duration (${Math.round(idleMs / 60_000)}min > ${Math.round(MAX_TOOL_CALL_WARMING_MS / 60_000)}min)`;
       } else if ((state.warmup?.totalWarmups ?? 0) >= MIN_WARMUPS_FOR_ROI_CHECK &&
                  (state.warmup?.warmupHits ?? 0) / (state.warmup?.totalWarmups ?? 1) < MIN_SESSION_HIT_RATE) {
         const hitRate = ((state.warmup?.warmupHits ?? 0) / (state.warmup?.totalWarmups ?? 1) * 100).toFixed(0);
         notWarmingReason = `Tool call: session hit rate too low (${hitRate}% < ${(MIN_SESSION_HIT_RATE * 100).toFixed(0)}%)`;
       } else {
         const maxCyc = maxProfitableCycles(profile.cacheReadCostPerMTok, profile.cacheMissCostPerMTok);
-        if ((state.warmup?.warmupCount ?? 0) >= maxCyc) {
-          notWarmingReason = "Tool call: break-even exceeded";
+        const effectiveMax = Math.min(maxCyc, TOOL_CALL_MAX_CYCLES);
+        if ((state.warmup?.warmupCount ?? 0) >= effectiveMax) {
+          notWarmingReason = `Tool call: cycle cap reached (${state.warmup?.warmupCount ?? 0} >= ${effectiveMax})`;
         } else {
           const intoWindow = idleMs % ttlMs;
           notWarmingReason = intoWindow < ttlMs - warmupMarginMs
@@ -1054,6 +1091,9 @@ export function computeWarmingSnapshot(
       notWarmingReason = "Already warmed in this TTL window";
     } else if (state.messageCount < MIN_TURNS_FOR_WARMING * 2) {
       notWarmingReason = `Too few turns (${state.messageCount} < ${MIN_TURNS_FOR_WARMING * 2})`;
+    } else if ((state.lastInputTokens ?? 0) < MIN_INPUT_TOKENS_FOR_WARMING) {
+      const tokK = Math.round((state.lastInputTokens ?? 0) / 1000);
+      notWarmingReason = `Context too small (${tokK}k < ${MIN_INPUT_TOKENS_FOR_WARMING / 1000}k tokens)`;
     } else if (state.warmup?.disabled) {
       notWarmingReason = "Warming stopped (/lore:warm:stop)";
     } else if ((state.warmup?.totalWarmups ?? 0) >= MIN_WARMUPS_FOR_ROI_CHECK &&
diff --git a/packages/gateway/src/idle.ts b/packages/gateway/src/idle.ts
index 8f0f41c3..cd0360e2 100644
--- a/packages/gateway/src/idle.ts
+++ b/packages/gateway/src/idle.ts
@@ -42,6 +42,7 @@ import {
   loadGlobalHistograms,
   flushGlobalHistograms,
   MIN_TURNS_FOR_WARMING,
+  MIN_INPUT_TOKENS_FOR_WARMING,
 } from "./cache-warmer";
 import * as Sentry from "@sentry/bun";
 import { runBackground } from "./background-limiter";
@@ -109,6 +110,10 @@ export function startIdleScheduler(
       // work before shouldWarm() rejects them anyway.
       if (state.messageCount < MIN_TURNS_FOR_WARMING * 2) continue;
 
+      // Skip sessions with small context — absolute savings per hit
+      // don't justify the risk of wasted warmups.
+      if ((state.lastInputTokens ?? 0) < MIN_INPUT_TOKENS_FOR_WARMING) continue;
+
       // Ensure global histograms are loaded from SQLite for this project
       loadGlobalHistograms(state.projectPath);
 
diff --git a/packages/gateway/test/cache-warmer.test.ts b/packages/gateway/test/cache-warmer.test.ts
index bf935030..53048e14 100644
--- a/packages/gateway/test/cache-warmer.test.ts
+++ b/packages/gateway/test/cache-warmer.test.ts
@@ -19,6 +19,10 @@ import {
   MAX_TOOL_CALL_WARMING_MS,
   MIN_WARMUPS_FOR_ROI_CHECK,
   MIN_SESSION_HIT_RATE,
+  MIN_TURNS_FOR_WARMING,
+  MIN_INPUT_TOKENS_FOR_WARMING,
+  MIN_RETURN_PROBABILITY_FLOOR,
+  TOOL_CALL_MAX_CYCLES,
   HISTOGRAM_BINS,
   BREAK_FLOOR_MS,
   _resetForTest,
@@ -52,7 +56,7 @@ function makeSessionState(overrides: Partial<SessionState> = {}): SessionState {
     fingerprint: "abc123",
     lastRequestTime: Date.now() - 270_000, // 4.5 min ago (inside 5m warmup window)
     lastUserTurnTime: Date.now() - 270_000,
-    messageCount: 10,
+    messageCount: 20,
     turnsSinceCuration: 2,
     consecutiveTextOnlyTurns: 0,
     recallStore: new Map(),
@@ -60,6 +64,7 @@ function makeSessionState(overrides: Partial<SessionState> = {}): SessionState {
     lastModel: "claude-sonnet-4-20250514",
     lastProtocol: "anthropic",
     resolvedConversationTTL: "5m",
+    lastInputTokens: 100_000, // above MIN_INPUT_TOKENS_FOR_WARMING
     ...overrides,
   };
 }
@@ -549,7 +554,7 @@ describe("shouldWarm", () => {
     const now = Date.now();
     const state = makeSessionState({
       lastRequestTime: now - 270_000,
-      messageCount: 4, // 2 turns (user+assistant each) — below threshold of 3 turns (6 messages)
+      messageCount: 8, // 4 turns (user+assistant each) — below threshold of 5 turns (10 messages)
       cacheAnalytics: {
         ...makeCacheAnalytics(),
         lastRequestBody: compressBody('{"test": true}'),
@@ -1670,15 +1675,16 @@ describe("shouldWarm tool-call warming", () => {
     expect(shouldWarm(state, profile, hist, now)).toBe(false);
   });
 
-  test("works across multiple TTL windows", () => {
+  test("tool-call warming stops at TOOL_CALL_MAX_CYCLES even across TTL windows", () => {
     const now = Date.now();
     // 14.5 min — in 3rd 5m window's warmup margin (14:15-15:00)
+    // With TOOL_CALL_MAX_CYCLES=2 and warmupCount=2, this should be rejected.
     const state = makeToolCallState({
       lastRequestTime: now - 870_000,
       warmup: {
         lastWarmupAt: now - 270_000, // past cooldown
-        warmupCount: 2,
-        totalWarmups: 2,
+        warmupCount: TOOL_CALL_MAX_CYCLES,
+        totalWarmups: TOOL_CALL_MAX_CYCLES,
         warmupHits: 0,
         disabled: false,
       },
@@ -1686,7 +1692,8 @@ describe("shouldWarm tool-call warming", () => {
     const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
     const hist = createHistogram();
 
-    expect(shouldWarm(state, profile, hist, now)).toBe(true);
+    // Tool-call warming is now capped at TOOL_CALL_MAX_CYCLES
+    expect(shouldWarm(state, profile, hist, now)).toBe(false);
   });
 
   test("requires minimum turns", () => {
@@ -1844,7 +1851,7 @@ describe("shouldWarm session ROI guard", () => {
       warmup: {
         lastWarmupAt: 0,
         warmupCount: 0,
-        totalWarmups: 12,  // >= MIN_WARMUPS_FOR_ROI_CHECK (10)
+        totalWarmups: 12,  // >= MIN_WARMUPS_FOR_ROI_CHECK (5)
         warmupHits: 1,     // 8.3% < MIN_SESSION_HIT_RATE (20%)
         disabled: false,
       },
@@ -1890,7 +1897,7 @@ describe("shouldWarm session ROI guard", () => {
       warmup: {
         lastWarmupAt: 0,
         warmupCount: 0,
-        totalWarmups: 5,   // < MIN_WARMUPS_FOR_ROI_CHECK (10)
+        totalWarmups: MIN_WARMUPS_FOR_ROI_CHECK - 1, // below threshold
         warmupHits: 0,     // 0% hit rate, but too few warmups to judge
         disabled: false,
       },
@@ -1915,7 +1922,201 @@ describe("shouldWarm session ROI guard", () => {
         lastWarmupAt: 0,
         warmupCount: 0,
         totalWarmups: 15,
-        warmupHits: 1,     // 6.7% < MIN_SESSION_HIT_RATE (20%)
+        warmupHits: 1,     // 6.7% < MIN_SESSION_HIT_RATE (25%)
+        disabled: false,
+      },
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+    for (let i = 0; i < 50; i++) recordGap(hist, 360_000);
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Cost optimization fixes (context size gate, threshold floor, tool-call cap)
+// ---------------------------------------------------------------------------
+
+describe("shouldWarm cost optimization gates", () => {
+  beforeEach(() => {
+    _resetForTest();
+  });
+
+  test("returns false when lastInputTokens < MIN_INPUT_TOKENS_FOR_WARMING", () => {
+    const now = Date.now();
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      lastInputTokens: 30_000, // below 50K threshold
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+    for (let i = 0; i < 50; i++) recordGap(hist, 360_000);
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(false);
+  });
+
+  test("returns true when lastInputTokens >= MIN_INPUT_TOKENS_FOR_WARMING", () => {
+    const now = Date.now();
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      lastInputTokens: 100_000, // above 50K threshold
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    // Histogram with many breaks — high P(returns)
+    const hist = createHistogram();
+    for (let i = 0; i < 50; i++) recordGap(hist, 360_000);
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(true);
+  });
+
+  test("context size gate applies to tool-call path", () => {
+    const now = Date.now();
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      lastStopReason: "tool_use",
+      lastInputTokens: 20_000, // below 50K threshold
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(false);
+  });
+
+  test("context size gate does NOT apply to forceKeepWarm path", () => {
+    const now = Date.now();
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      lastInputTokens: 10_000, // well below 50K — but force-keep overrides
+      warmup: {
+        lastWarmupAt: 0,
+        warmupCount: 0,
+        totalWarmups: 0,
+        warmupHits: 0,
+        disabled: false,
+        forceKeepWarm: true,
+      },
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"test": true}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(true);
+  });
+
+  test("tool-call warming stops after TOOL_CALL_MAX_CYCLES", () => {
+    const now = Date.now();
+    const state = makeSessionState({
+      lastRequestTime: now - 570_000, // 9.5 min — in warmup margin of 2nd window
+      lastStopReason: "tool_use",
+      warmup: {
+        lastWarmupAt: now - 310_000, // past cooldown
+        warmupCount: TOOL_CALL_MAX_CYCLES, // already at cap
+        totalWarmups: TOOL_CALL_MAX_CYCLES,
+        warmupHits: 0,
+        disabled: false,
+      },
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(false);
+  });
+
+  test("tool-call warming allowed when below TOOL_CALL_MAX_CYCLES", () => {
+    const now = Date.now();
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      lastStopReason: "tool_use",
+      warmup: {
+        lastWarmupAt: 0,
+        warmupCount: TOOL_CALL_MAX_CYCLES - 1, // one below cap
+        totalWarmups: TOOL_CALL_MAX_CYCLES - 1,
+        warmupHits: TOOL_CALL_MAX_CYCLES - 1, // good hit rate
+        disabled: false,
+      },
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(true);
+  });
+
+  test("initial commitment requires P(returns) >= MIN_RETURN_PROBABILITY_FLOOR", () => {
+    const now = Date.now();
+    // Histogram with mostly short gaps and very few breaks — survival drops
+    // at 4.5m but not to zero. This creates P(returns) ~29%, above the old
+    // 8.7% threshold but below the new 30% floor.
+    const hist = createHistogram();
+    for (let i = 0; i < 98; i++) recordGap(hist, 30_000);  // 30s — active
+    for (let i = 0; i < 2; i++) recordGap(hist, 360_000);  // 6m — break
+
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      consecutiveTextOnlyTurns: 0,
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+
+    // Verify this session has P(returns) below the 30% floor
+    const survivalAtIdle = survivalFunction(hist, 270_000);
+    const breakFrac = breakFraction(hist);
+    const pFinished = pSessionFinished({
+      survivalAtIdle,
+      consecutiveTextOnlyTurns: 0,
+      breakFraction: breakFrac,
+      totalTurns: 10,
+    });
+    const pReturns = 1.0 - pFinished;
+    expect(pReturns).toBeLessThan(MIN_RETURN_PROBABILITY_FLOOR);
+    // But it would have passed the old break-even threshold
+    const oldThreshold = costThreshold(profile.cacheReadCostPerMTok, profile.cacheMissCostPerMTok);
+    expect(pReturns).toBeGreaterThan(oldThreshold);
+
+    // With the new floor, warming should be rejected
+    expect(shouldWarm(state, profile, hist, now)).toBe(false);
+  });
+
+  test("session-level ROI check kicks in at 5 warmups", () => {
+    const now = Date.now();
+    // Session with 5 warmups and 0 hits → hit rate 0% < 25%
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      warmup: {
+        lastWarmupAt: 0,
+        warmupCount: 0,
+        totalWarmups: MIN_WARMUPS_FOR_ROI_CHECK, // exactly at threshold
+        warmupHits: 0, // 0% hit rate
         disabled: false,
       },
       cacheAnalytics: {
@@ -1929,4 +2130,100 @@ describe("shouldWarm session ROI guard", () => {
 
     expect(shouldWarm(state, profile, hist, now)).toBe(false);
   });
+
+  test("session-level ROI check passes when hit rate is above threshold", () => {
+    const now = Date.now();
+    // Session with 5 warmups and 2 hits → hit rate 40% > 25%
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      warmup: {
+        lastWarmupAt: 0,
+        warmupCount: 0,
+        totalWarmups: MIN_WARMUPS_FOR_ROI_CHECK,
+        warmupHits: 2, // 40% hit rate > 25%
+        disabled: false,
+      },
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+    for (let i = 0; i < 50; i++) recordGap(hist, 360_000);
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(true);
+  });
+
+  test("returns false when lastInputTokens is undefined (first turn)", () => {
+    const now = Date.now();
+    const state = makeSessionState({
+      lastRequestTime: now - 270_000,
+      lastInputTokens: undefined, // no response yet — ?? 0 < 50K
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+    const hist = createHistogram();
+    for (let i = 0; i < 50; i++) recordGap(hist, 360_000);
+
+    expect(shouldWarm(state, profile, hist, now)).toBe(false);
+  });
+
+  test("Phase B continuation uses rising threshold, not the floor", () => {
+    const now = Date.now();
+    // Session has been idle for 9.5 min (past first 5m TTL window).
+    // In warmup margin of 2nd window: 9.5min % 5min = 4.5min > 4.25min.
+    // With 1 cycle already spent, risingThreshold(k=2) ≈ 17.4% for Sonnet 5m.
+    // We need P(returns) between 17.4% and 30% to prove Phase B does NOT
+    // use the 30% floor.
+    const state = makeSessionState({
+      lastRequestTime: now - 570_000, // 9.5 min ago
+      warmup: {
+        lastWarmupAt: now - 310_000, // past cooldown
+        warmupCount: 1,
+        totalWarmups: 1,
+        warmupHits: 0,
+        disabled: false,
+      },
+      cacheAnalytics: {
+        ...makeCacheAnalytics(),
+        lastRequestBody: compressBody('{"model":"claude-sonnet-4-20250514","max_tokens":16384,"stream":true,"messages":[{"role":"user","content":"test"}]}'),
+      },
+    });
+    const profile = buildAnthropicProfile("claude-sonnet-4-20250514", "5m");
+
+    // Histogram: 98% short gaps, 2% long breaks. At 9.5 min idle,
+    // survival is ~2% — P(returns) should land ~29%, between the rising
+    // threshold at k=2 (~17.4%) and the 30% floor.
+    const hist = createHistogram();
+    for (let i = 0; i < 98; i++) recordGap(hist, 30_000);  // 30s
+    for (let i = 0; i < 2; i++) recordGap(hist, 600_000);  // 10m
+
+    // Verify P(returns) is in the interesting range: above rising threshold
+    // at k=2 but below the 30% floor
+    const survivalAtIdle = survivalFunction(hist, 570_000);
+    const breakFrac = breakFraction(hist);
+    const pFinished = pSessionFinished({
+      survivalAtIdle,
+      consecutiveTextOnlyTurns: 0,
+      breakFraction: breakFrac,
+      totalTurns: 10,
+    });
+    const pReturns = 1.0 - pFinished;
+    const risingThresh = cumulativeCostThreshold(
+      2, // cyclesSpent(1) + 1
+      profile.cacheReadCostPerMTok,
+      profile.cacheMissCostPerMTok,
+    );
+    // P(returns) should be above the rising threshold (continuation is profitable)
+    expect(pReturns).toBeGreaterThan(risingThresh);
+    // But below the floor (Phase A would reject this)
+    expect(pReturns).toBeLessThan(MIN_RETURN_PROBABILITY_FLOOR);
+
+    // Phase B should allow warming because it uses risingThreshold, not the floor
+    expect(shouldWarm(state, profile, hist, now)).toBe(true);
+  });
 });
diff --git a/packages/gateway/test/helpers/idle-worker.ts b/packages/gateway/test/helpers/idle-worker.ts
index e19de0dc..7dc84750 100644
--- a/packages/gateway/test/helpers/idle-worker.ts
+++ b/packages/gateway/test/helpers/idle-worker.ts
@@ -88,7 +88,8 @@ mock.module("../../src/cache-warmer", () => ({
   executeWarmup: async () => ({}),
   loadGlobalHistograms: () => {},
   flushGlobalHistograms: () => {},
-  MIN_TURNS_FOR_WARMING: 3,
+  MIN_TURNS_FOR_WARMING: 5,
+  MIN_INPUT_TOKENS_FOR_WARMING: 50_000,
 }));
 
 mock.module("../../src/worker-model", () => ({