Skip to content

Commit d497fe5

Browse files
author
jiuxingwang
committed
feat(retry): add validateResponse to detect empty model responses and trigger fallback
Some model providers return HTTP 200 with zero output tokens on quota exhaustion (e.g. shared-quota plans) instead of a proper error code. promptSyncWithModelSuggestionRetry only triggered fallback on thrown exceptions, so these empty responses were treated as success and the fallback chain was never activated — dreamer/historian/sidekick tasks silently failed with post-hoc 'no assistant output' errors after the fallback opportunity was already lost. This adds an optional validateResponse callback to PromptRetryOptions that is invoked after each successful prompt attempt (both primary and every fallback). If the validator throws, the error is treated as retryable and the next fallback model is tried. The dreamer runner now wires validateResponse at its two direct promptSyncWithModelSuggestionRetry call sites (task runner + smart notes evaluator) to fetch messages and reject empty responses. Fully backward-compatible: callers that don't provide validateResponse behave exactly as before.
1 parent e82b478 commit d497fe5

3 files changed

Lines changed: 139 additions & 0 deletions

File tree

packages/plugin/src/features/magic-context/dreamer/runner.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,30 @@ export async function runDream(args: {
386386
signal: taskAbortController.signal,
387387
fallbackModels: args.fallbackModels,
388388
callContext: `dreamer:${taskName}`,
389+
validateResponse: async (validateClient, validateSessionId) => {
390+
// Detect empty responses (0 output tokens) that some
391+
// providers return on quota exhaustion with HTTP 200
392+
// instead of a proper error code. Without this, the
393+
// fallback chain is never activated for such responses.
394+
const messagesResponse = await validateClient.session.messages({
395+
path: { id: validateSessionId },
396+
query: {
397+
directory: args.sessionDirectory ?? args.projectIdentity,
398+
limit: 50,
399+
},
400+
});
401+
const messages = shared.normalizeSDKResponse(
402+
messagesResponse,
403+
[] as unknown[],
404+
{ preferResponseOnMissingData: true },
405+
);
406+
const output = extractLatestAssistantText(messages);
407+
if (!output) {
408+
throw new Error(
409+
`[dreamer:${taskName}] model returned empty response (0 tokens) — possible quota exhaustion`,
410+
);
411+
}
412+
},
389413
},
390414
);
391415
if (lostLease) {
@@ -859,6 +883,26 @@ Only include notes whose conditions you could definitively evaluate against exte
859883
signal: abortController.signal,
860884
fallbackModels: args.fallbackModels,
861885
callContext: "dreamer:smart-notes",
886+
validateResponse: async (validateClient, validateSessionId) => {
887+
const messagesResponse = await validateClient.session.messages({
888+
path: { id: validateSessionId },
889+
query: {
890+
directory: args.sessionDirectory ?? args.projectIdentity,
891+
limit: 50,
892+
},
893+
});
894+
const messages = shared.normalizeSDKResponse(
895+
messagesResponse,
896+
[] as unknown[],
897+
{ preferResponseOnMissingData: true },
898+
);
899+
const output = extractLatestAssistantText(messages);
900+
if (!output) {
901+
throw new Error(
902+
"[dreamer:smart-notes] model returned empty response (0 tokens) — possible quota exhaustion",
903+
);
904+
}
905+
},
862906
},
863907
);
864908

packages/plugin/src/shared/model-suggestion-retry.test.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,4 +308,75 @@ describe("promptSyncWithModelSuggestionRetry", () => {
308308
).rejects.toBe(originalError);
309309
expect(prompt).toHaveBeenCalledTimes(1);
310310
});
311+
312+
// --- validateResponse: empty-response detection ---
313+
314+
test("validateResponse: primary returns empty → fallback[0] succeeds", async () => {
315+
const prompt = mock(async () => ({})); // both succeed (HTTP 200)
316+
const client = createClient(prompt);
317+
const validate = mock(async () => {
318+
// primary attempt (call 1): empty → throw
319+
if (validate.mock.calls.length === 1) {
320+
throw new Error("empty response (0 tokens)");
321+
}
322+
// fallback attempt (call 2): non-empty → ok
323+
});
324+
325+
await promptSyncWithModelSuggestionRetry(client, createArgs(), {
326+
fallbackModels: ["anthropic/claude-sonnet-4-6"],
327+
validateResponse: validate,
328+
});
329+
330+
expect(prompt).toHaveBeenCalledTimes(2);
331+
expect(validate).toHaveBeenCalledTimes(2);
332+
expect((prompt.mock.calls[1]?.[0] as PromptCall).body.model).toEqual({
333+
providerID: "anthropic",
334+
modelID: "claude-sonnet-4-6",
335+
});
336+
});
337+
338+
test("validateResponse: all attempts return empty → throws last error", async () => {
339+
const prompt = mock(async () => ({}));
340+
const client = createClient(prompt);
341+
const emptyError = new Error("empty response (0 tokens)");
342+
const validate = mock(async () => {
343+
throw emptyError;
344+
});
345+
346+
await expect(
347+
promptSyncWithModelSuggestionRetry(client, createArgs(), {
348+
fallbackModels: ["anthropic/claude-sonnet-4-6", "google/gemini-3-flash"],
349+
validateResponse: validate,
350+
}),
351+
).rejects.toBe(emptyError);
352+
353+
expect(prompt).toHaveBeenCalledTimes(3); // primary + 2 fallbacks
354+
expect(validate).toHaveBeenCalledTimes(3);
355+
});
356+
357+
test("validateResponse: primary non-empty → no fallback tried", async () => {
358+
const prompt = mock(async () => ({}));
359+
const client = createClient(prompt);
360+
const validate = mock(async () => {}); // always passes
361+
362+
await promptSyncWithModelSuggestionRetry(client, createArgs(), {
363+
fallbackModels: ["anthropic/claude-sonnet-4-6"],
364+
validateResponse: validate,
365+
});
366+
367+
expect(prompt).toHaveBeenCalledTimes(1);
368+
expect(validate).toHaveBeenCalledTimes(1);
369+
});
370+
371+
test("validateResponse absent → backward compatible", async () => {
372+
const prompt = mock(async () => ({}));
373+
const client = createClient(prompt);
374+
375+
await promptSyncWithModelSuggestionRetry(client, createArgs(), {
376+
fallbackModels: ["anthropic/claude-sonnet-4-6"],
377+
// no validateResponse — legacy behavior
378+
});
379+
380+
expect(prompt).toHaveBeenCalledTimes(1);
381+
});
311382
});

packages/plugin/src/shared/model-suggestion-retry.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,24 @@ export interface PromptRetryOptions {
4545
* "subagent" if not provided.
4646
*/
4747
callContext?: string;
48+
/**
49+
* Optional validator invoked after each successful prompt attempt (both
50+
* the primary and every fallback). If it throws, the error is treated as
51+
* retryable — the next fallback model is tried (or the error propagates
52+
* if no fallbacks remain).
53+
*
54+
* Use this to detect "empty response" conditions where the model API
55+
* returns HTTP 200 with zero output tokens instead of a proper error
56+
* (e.g. shared-quota providers that return empty bodies on quota
57+
* exhaustion). Without this validator, such responses are indistinguishable
58+
* from success and the fallback chain is never activated — the task
59+
* silently fails and only surfaces a post-hoc "no assistant output" error
60+
* in the caller's catch block, after the fallback opportunity is lost.
61+
*
62+
* The validator receives the client and the session ID so it can fetch
63+
* messages and inspect the model's output.
64+
*/
65+
validateResponse?: (client: Client, sessionId: string) => Promise<void>;
4866
}
4967

5068
export interface ModelSuggestionInfo {
@@ -310,6 +328,9 @@ export async function promptSyncWithModelSuggestionRetry(
310328
callContext,
311329
explicitPrimaryLabel,
312330
);
331+
if (options.validateResponse) {
332+
await options.validateResponse(client, args.path.id);
333+
}
313334
return;
314335
} catch (error) {
315336
lastError = error;
@@ -343,6 +364,9 @@ export async function promptSyncWithModelSuggestionRetry(
343364

344365
try {
345366
await attemptOnce(client, attemptArgs, timeoutMs, options.signal, callContext, label);
367+
if (options.validateResponse) {
368+
await options.validateResponse(client, args.path.id);
369+
}
346370
log(
347371
`[${callContext}] fallback succeeded with ${label} (attempt ${i + 2}/${fallbacks.length + 1})`,
348372
);

0 commit comments

Comments
 (0)