feat(retry): add validateResponse to detect empty model responses and trigger fallback

jiuxingwang · jiuxingwang · commit d497fe534142 · 2026-06-19T07:19:44.000+08:00
Some model providers return HTTP 200 with zero output tokens on quota
exhaustion (e.g. shared-quota plans) instead of a proper error code.
promptSyncWithModelSuggestionRetry only triggered fallback on thrown
exceptions, so these empty responses were treated as success and the
fallback chain was never activated — dreamer/historian/sidekick tasks
silently failed with post-hoc 'no assistant output' errors after the
fallback opportunity was already lost.

This adds an optional validateResponse callback to PromptRetryOptions
that is invoked after each successful prompt attempt (both primary and
every fallback). If the validator throws, the error is treated as
retryable and the next fallback model is tried.

The dreamer runner now wires validateResponse at its two direct
promptSyncWithModelSuggestionRetry call sites (task runner + smart
notes evaluator) to fetch messages and reject empty responses.

Fully backward-compatible: callers that don't provide validateResponse
behave exactly as before.
diff --git a/packages/plugin/src/features/magic-context/dreamer/runner.ts b/packages/plugin/src/features/magic-context/dreamer/runner.ts
@@ -386,6 +386,30 @@ export async function runDream(args: {
                         signal: taskAbortController.signal,
                         fallbackModels: args.fallbackModels,
                         callContext: `dreamer:${taskName}`,
+                        validateResponse: async (validateClient, validateSessionId) => {
+                            // Detect empty responses (0 output tokens) that some
+                            // providers return on quota exhaustion with HTTP 200
+                            // instead of a proper error code. Without this, the
+                            // fallback chain is never activated for such responses.
+                            const messagesResponse = await validateClient.session.messages({
+                                path: { id: validateSessionId },
+                                query: {
+                                    directory: args.sessionDirectory ?? args.projectIdentity,
+                                    limit: 50,
+                                },
+                            });
+                            const messages = shared.normalizeSDKResponse(
+                                messagesResponse,
+                                [] as unknown[],
+                                { preferResponseOnMissingData: true },
+                            );
+                            const output = extractLatestAssistantText(messages);
+                            if (!output) {
+                                throw new Error(
+                                    `[dreamer:${taskName}] model returned empty response (0 tokens) — possible quota exhaustion`,
+                                );
+                            }
+                        },
                     },
                 );
                 if (lostLease) {
@@ -859,6 +883,26 @@ Only include notes whose conditions you could definitively evaluate against exte
                 signal: abortController.signal,
                 fallbackModels: args.fallbackModels,
                 callContext: "dreamer:smart-notes",
+                validateResponse: async (validateClient, validateSessionId) => {
+                    const messagesResponse = await validateClient.session.messages({
+                        path: { id: validateSessionId },
+                        query: {
+                            directory: args.sessionDirectory ?? args.projectIdentity,
+                            limit: 50,
+                        },
+                    });
+                    const messages = shared.normalizeSDKResponse(
+                        messagesResponse,
+                        [] as unknown[],
+                        { preferResponseOnMissingData: true },
+                    );
+                    const output = extractLatestAssistantText(messages);
+                    if (!output) {
+                        throw new Error(
+                            "[dreamer:smart-notes] model returned empty response (0 tokens) — possible quota exhaustion",
+                        );
+                    }
+                },
             },
         );
 
diff --git a/packages/plugin/src/shared/model-suggestion-retry.test.ts b/packages/plugin/src/shared/model-suggestion-retry.test.ts
@@ -308,4 +308,75 @@ describe("promptSyncWithModelSuggestionRetry", () => {
         ).rejects.toBe(originalError);
         expect(prompt).toHaveBeenCalledTimes(1);
     });
+
+    // --- validateResponse: empty-response detection ---
+
+    test("validateResponse: primary returns empty → fallback[0] succeeds", async () => {
+        const prompt = mock(async () => ({})); // both succeed (HTTP 200)
+        const client = createClient(prompt);
+        const validate = mock(async () => {
+            // primary attempt (call 1): empty → throw
+            if (validate.mock.calls.length === 1) {
+                throw new Error("empty response (0 tokens)");
+            }
+            // fallback attempt (call 2): non-empty → ok
+        });
+
+        await promptSyncWithModelSuggestionRetry(client, createArgs(), {
+            fallbackModels: ["anthropic/claude-sonnet-4-6"],
+            validateResponse: validate,
+        });
+
+        expect(prompt).toHaveBeenCalledTimes(2);
+        expect(validate).toHaveBeenCalledTimes(2);
+        expect((prompt.mock.calls[1]?.[0] as PromptCall).body.model).toEqual({
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-6",
+        });
+    });
+
+    test("validateResponse: all attempts return empty → throws last error", async () => {
+        const prompt = mock(async () => ({}));
+        const client = createClient(prompt);
+        const emptyError = new Error("empty response (0 tokens)");
+        const validate = mock(async () => {
+            throw emptyError;
+        });
+
+        await expect(
+            promptSyncWithModelSuggestionRetry(client, createArgs(), {
+                fallbackModels: ["anthropic/claude-sonnet-4-6", "google/gemini-3-flash"],
+                validateResponse: validate,
+            }),
+        ).rejects.toBe(emptyError);
+
+        expect(prompt).toHaveBeenCalledTimes(3); // primary + 2 fallbacks
+        expect(validate).toHaveBeenCalledTimes(3);
+    });
+
+    test("validateResponse: primary non-empty → no fallback tried", async () => {
+        const prompt = mock(async () => ({}));
+        const client = createClient(prompt);
+        const validate = mock(async () => {}); // always passes
+
+        await promptSyncWithModelSuggestionRetry(client, createArgs(), {
+            fallbackModels: ["anthropic/claude-sonnet-4-6"],
+            validateResponse: validate,
+        });
+
+        expect(prompt).toHaveBeenCalledTimes(1);
+        expect(validate).toHaveBeenCalledTimes(1);
+    });
+
+    test("validateResponse absent → backward compatible", async () => {
+        const prompt = mock(async () => ({}));
+        const client = createClient(prompt);
+
+        await promptSyncWithModelSuggestionRetry(client, createArgs(), {
+            fallbackModels: ["anthropic/claude-sonnet-4-6"],
+            // no validateResponse — legacy behavior
+        });
+
+        expect(prompt).toHaveBeenCalledTimes(1);
+    });
 });
diff --git a/packages/plugin/src/shared/model-suggestion-retry.ts b/packages/plugin/src/shared/model-suggestion-retry.ts
@@ -45,6 +45,24 @@ export interface PromptRetryOptions {
      * "subagent" if not provided.
      */
     callContext?: string;
+    /**
+     * Optional validator invoked after each successful prompt attempt (both
+     * the primary and every fallback). If it throws, the error is treated as
+     * retryable — the next fallback model is tried (or the error propagates
+     * if no fallbacks remain).
+     *
+     * Use this to detect "empty response" conditions where the model API
+     * returns HTTP 200 with zero output tokens instead of a proper error
+     * (e.g. shared-quota providers that return empty bodies on quota
+     * exhaustion). Without this validator, such responses are indistinguishable
+     * from success and the fallback chain is never activated — the task
+     * silently fails and only surfaces a post-hoc "no assistant output" error
+     * in the caller's catch block, after the fallback opportunity is lost.
+     *
+     * The validator receives the client and the session ID so it can fetch
+     * messages and inspect the model's output.
+     */
+    validateResponse?: (client: Client, sessionId: string) => Promise<void>;
 }
 
 export interface ModelSuggestionInfo {
@@ -310,6 +328,9 @@ export async function promptSyncWithModelSuggestionRetry(
             callContext,
             explicitPrimaryLabel,
         );
+        if (options.validateResponse) {
+            await options.validateResponse(client, args.path.id);
+        }
         return;
     } catch (error) {
         lastError = error;
@@ -343,6 +364,9 @@ export async function promptSyncWithModelSuggestionRetry(
 
         try {
             await attemptOnce(client, attemptArgs, timeoutMs, options.signal, callContext, label);
+            if (options.validateResponse) {
+                await options.validateResponse(client, args.path.id);
+            }
             log(
                 `[${callContext}] fallback succeeded with ${label} (attempt ${i + 2}/${fallbacks.length + 1})`,
             );