Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions src/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ async function persistAccountPool(
});
}

function retryAfterMsFromResponse(response: Response): number {
function retryAfterMsFromResponse(response: Response, defaultRetryMs: number = 60_000): number {
const retryAfterMsHeader = response.headers.get("retry-after-ms");
if (retryAfterMsHeader) {
const parsed = Number.parseInt(retryAfterMsHeader, 10);
Expand All @@ -369,7 +369,7 @@ function retryAfterMsFromResponse(response: Response): number {
}
}

return 60_000;
return defaultRetryMs;
}

function parseDurationToMs(duration: string): number | null {
Expand Down Expand Up @@ -602,12 +602,14 @@ function recordAndGetCapacityBackoff(family: ModelFamily, model?: string | null)
* @param accountIndex - The account index
* @param quotaKey - The quota key (e.g., "gemini-cli", "gemini-antigravity", "claude")
* @param serverRetryAfterMs - Server-provided retry delay (if any)
* @param maxBackoffMs - Maximum backoff delay in milliseconds (default 60000)
* @returns { attempt, delayMs, isDuplicate } - isDuplicate=true if within dedup window
*/
function getRateLimitBackoff(
accountIndex: number,
quotaKey: string,
serverRetryAfterMs: number | null
serverRetryAfterMs: number | null,
maxBackoffMs: number = 60_000
): { attempt: number; delayMs: number; isDuplicate: boolean } {
const now = Date.now();
const stateKey = `${accountIndex}:${quotaKey}`;
Expand All @@ -617,7 +619,7 @@ function getRateLimitBackoff(
if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
// Same rate limit event from concurrent request - don't increment
const baseDelay = serverRetryAfterMs ?? 1000;
const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60_000);
const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), maxBackoffMs);
return {
attempt: previous.consecutive429,
delayMs: Math.max(baseDelay, backoffDelay),
Expand All @@ -637,7 +639,7 @@ function getRateLimitBackoff(
});

const baseDelay = serverRetryAfterMs ?? 1000;
const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60_000);
const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), maxBackoffMs);
return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
}

Expand Down Expand Up @@ -1325,11 +1327,13 @@ export const createAntigravityPlugin = (providerId: string) => async (
tokenConsumed = false;
}

const headerRetryMs = retryAfterMsFromResponse(response);
const defaultRetryMs = (config.default_retry_after_seconds ?? 60) * 1000;
const maxBackoffMs = (config.max_backoff_seconds ?? 60) * 1000;
const headerRetryMs = retryAfterMsFromResponse(response, defaultRetryMs);
const bodyInfo = await extractRetryInfoFromBody(response);
const serverRetryMs = bodyInfo.retryDelayMs ?? headerRetryMs;
const quotaKey = headerStyleToQuotaKey(headerStyle, family);
const { attempt, delayMs, isDuplicate } = getRateLimitBackoff(account.index, quotaKey, serverRetryMs);
const { attempt, delayMs, isDuplicate } = getRateLimitBackoff(account.index, quotaKey, serverRetryMs, maxBackoffMs);

const rateLimitReason = parseRateLimitReason(bodyInfo.reason, bodyInfo.message, bodyInfo.quotaResetTime);
const isServiceCapacityExhausted = rateLimitReason === "SERVICE_CAPACITY_EXHAUSTED";
Expand Down
26 changes: 22 additions & 4 deletions src/plugin/config/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,28 @@ export const AntigravityConfigSchema = z.object({
pid_offset_enabled: z.boolean().default(false),

/**
* Switch to another account immediately on first rate limit (after 1s delay).
* When disabled, retries same account first, then switches on second rate limit.
* Switch to another account immediately on first rate limit (after 1s delay).
* When disabled, retries same account first, then switches on second rate limit.
*
* @default true
*/
switch_on_first_rate_limit: z.boolean().default(true),

/**
* Default retry delay in seconds when API doesn't return a retry-after header.
* Lower values allow faster retries but may trigger more 429 errors.
*
* @default 60
*/
default_retry_after_seconds: z.number().min(1).max(300).default(60),

/**
* Maximum backoff delay in seconds for exponential retry.
* This caps how long the exponential backoff can grow.
*
* @default true
* @default 60
*/
switch_on_first_rate_limit: z.boolean().default(true),
max_backoff_seconds: z.number().min(5).max(300).default(60),

// =========================================================================
// Health Score (used by hybrid and priority-queue strategies)
Expand Down Expand Up @@ -345,6 +361,8 @@ export const DEFAULT_CONFIG: AntigravityConfig = {
account_selection_strategy: 'hybrid',
pid_offset_enabled: false,
switch_on_first_rate_limit: true,
default_retry_after_seconds: 60,
max_backoff_seconds: 60,
auto_update: true,
signature_cache: {
enabled: true,
Expand Down