Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 249 additions & 1 deletion open-sse/services/combo.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
/**
* Shared combo (model combo) handling with fallback support
* Supports: priority, weighted, round-robin, random, least-used, cost-optimized,
* strict-random, auto, fill-first, p2c, lkgp, context-optimized, and context-relay strategies
* reset-aware, strict-random, auto, fill-first, p2c, lkgp, context-optimized,
* and context-relay strategies
*/

import {
Expand Down Expand Up @@ -86,6 +87,16 @@ const DEFAULT_MODEL_P95_MS = {
"deepseek-chat": 2000,
};
const MIN_HISTORY_SAMPLES = 10;
const RESET_AWARE_SESSION_WINDOW_MS = 5 * 60 * 60 * 1000;
const RESET_AWARE_WEEKLY_WINDOW_MS = 7 * 24 * 60 * 60 * 1000;
const RESET_AWARE_REMAINING_WEIGHT = 0.55;
const RESET_AWARE_RESET_WEIGHT = 0.45;
const RESET_AWARE_DEFAULTS = {
sessionWeight: 0.35,
weeklyWeight: 0.65,
tieBandPercent: 5,
exhaustionGuardPercent: 10,
};

type ResolvedComboTarget = {
kind: "model";
Expand Down Expand Up @@ -697,6 +708,237 @@ function orderTargetsByPowerOfTwoChoices(targets: ResolvedComboTarget[], comboNa
return [targets[selectedIndex], ...targets.filter((_, index) => index !== selectedIndex)];
}

function clamp01(value: number): number {
if (!Number.isFinite(value)) return 0;
return Math.max(0, Math.min(1, value));
}

function finiteNumberOrNull(value: unknown): number | null {
const numericValue = Number(value);
return Number.isFinite(numericValue) ? numericValue : null;
}

function getPercentConfig(value: unknown, fallback: number): number {
const numericValue = finiteNumberOrNull(value);
if (numericValue === null) return fallback;
return Math.max(0, Math.min(100, numericValue));
}

function getWeightConfig(value: unknown, fallback: number): number {
const numericValue = finiteNumberOrNull(value);
if (numericValue === null || numericValue < 0) return fallback;
return numericValue;
}

function resolveResetAwareConfig(config: Record<string, unknown> | null | undefined) {
const sessionWeight = getWeightConfig(
config?.resetAwareSessionWeight,
RESET_AWARE_DEFAULTS.sessionWeight
);
const weeklyWeight = getWeightConfig(
config?.resetAwareWeeklyWeight,
RESET_AWARE_DEFAULTS.weeklyWeight
);
const totalWeight = sessionWeight + weeklyWeight;
const normalizedSessionWeight = totalWeight > 0 ? sessionWeight / totalWeight : 0.35;

return {
sessionWeight: normalizedSessionWeight,
weeklyWeight: 1 - normalizedSessionWeight,
tieBand:
getPercentConfig(config?.resetAwareTieBandPercent, RESET_AWARE_DEFAULTS.tieBandPercent) / 100,
exhaustionGuard:
getPercentConfig(
config?.resetAwareExhaustionGuardPercent,
RESET_AWARE_DEFAULTS.exhaustionGuardPercent
) / 100,
};
}

function isCodexTarget(target: ResolvedComboTarget): boolean {
const provider = (target.providerId || target.provider || "").toLowerCase();
return provider === "codex" || target.modelStr.toLowerCase().startsWith("codex/");
}

function getQuotaWindow(
quota: unknown,
key: "window5h" | "window7d"
): { percentUsed: number | null; resetAt: string | null } | null {
if (!isRecord(quota)) return null;
const window = quota[key];
if (!isRecord(window)) return null;
const percentUsed = finiteNumberOrNull(window.percentUsed);
const resetAt =
typeof window.resetAt === "string" && window.resetAt.length > 0 ? window.resetAt : null;
return { percentUsed, resetAt };
}

function getResetUrgency(resetAt: string | null | undefined, windowMs: number): number {
if (!resetAt) return 0.5;
const resetTime = new Date(resetAt).getTime();
if (!Number.isFinite(resetTime)) return 0.5;
const msUntilReset = resetTime - Date.now();
if (msUntilReset <= 0) return 1;
return clamp01(1 - msUntilReset / windowMs);
}
Comment on lines +776 to +783
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The getResetUrgency function relies on Date.now(), which uses the system clock. If the server's system time is significantly out of sync with the Codex provider's clock, the urgency calculation (and thus the routing) may be inaccurate. While codexQuotaFetcher.ts attempts to normalize resetAt using Date.now() when reset_after_seconds is provided, a direct comparison with system time is still a potential point of drift.


function scoreQuotaWindow(
remaining: number,
resetAt: string | null | undefined,
windowMs: number
): number {
return (
RESET_AWARE_REMAINING_WEIGHT * clamp01(remaining) +
RESET_AWARE_RESET_WEIGHT * getResetUrgency(resetAt, windowMs)
);
}

function scoreResetAwareQuota(quota: unknown, config: ReturnType<typeof resolveResetAwareConfig>) {
if (!quota || !isRecord(quota)) return { score: 0.5 };
if (quota.limitReached === true) return { score: -Infinity };

const overallPercentUsed = clamp01(finiteNumberOrNull(quota.percentUsed) ?? 0.5);
const sessionWindow = getQuotaWindow(quota, "window5h");
const weeklyWindow = getQuotaWindow(quota, "window7d");
const sessionRemaining = clamp01(1 - (sessionWindow?.percentUsed ?? overallPercentUsed));
const weeklyRemaining = clamp01(1 - (weeklyWindow?.percentUsed ?? overallPercentUsed));
const sessionScore = scoreQuotaWindow(
sessionRemaining,
sessionWindow?.resetAt,
RESET_AWARE_SESSION_WINDOW_MS
);
const weeklyScore = scoreQuotaWindow(
weeklyRemaining,
weeklyWindow?.resetAt ?? (typeof quota.resetAt === "string" ? quota.resetAt : null),
RESET_AWARE_WEEKLY_WINDOW_MS
);
let score = config.sessionWeight * sessionScore + config.weeklyWeight * weeklyScore;

if (config.exhaustionGuard > 0 && sessionRemaining < config.exhaustionGuard) {
score *= Math.max(0.05, sessionRemaining / config.exhaustionGuard);
}
Comment on lines +817 to +819
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Deprioritize weekly-exhausted Codex accounts

When a Codex account is at or above the weekly quota preflight threshold but its weekly reset is soon, this score can still rank it ahead of a usable account because the exhaustion guard only applies to sessionRemaining. In that scenario getProviderCredentialsWithQuotaPreflight will reject the forced connection as quota-exhausted and the combo then falls back on every request, adding avoidable latency/noisy failures even though the reset-aware sorter already had the weekly window data needed to skip or heavily penalize it.

Useful? React with 👍 / 👎.


return { score };
}

async function getCodexConnectionsForTarget(
target: ResolvedComboTarget,
connectionCache: Map<string, Array<Record<string, unknown>>>
) {
if (!isCodexTarget(target)) return [];
const provider = target.providerId || target.provider;
if (!provider) return [];
if (!connectionCache.has(provider)) {
try {
const connections = await getProviderConnections({ provider, isActive: true });
connectionCache.set(
provider,
Array.isArray(connections) ? (connections as Array<Record<string, unknown>>) : []
);
} catch {
connectionCache.set(provider, []);
}
}
return connectionCache.get(provider) || [];
}

function getTargetConnectionIds(
target: ResolvedComboTarget,
connections: Array<Record<string, unknown>>
): string[] {
if (target.connectionId) return [target.connectionId];
if (Array.isArray(target.allowedConnectionIds) && target.allowedConnectionIds.length > 0) {
return target.allowedConnectionIds.filter(
(connectionId): connectionId is string =>
typeof connectionId === "string" && connectionId.trim().length > 0
);
}
return connections
.map((connection) => (typeof connection.id === "string" ? connection.id : null))
.filter((connectionId): connectionId is string => !!connectionId);
}

async function orderTargetsByResetAwareQuota(
targets: ResolvedComboTarget[],
comboName: string,
configSource: Record<string, unknown> | null | undefined,
log: { warn?: (...args: unknown[]) => void }
) {
if (targets.length === 0) return targets;

const config = resolveResetAwareConfig(configSource);
const connectionCache = new Map<string, Array<Record<string, unknown>>>();
const connectionById = new Map<string, Record<string, unknown>>();
const expandedTargets: ResolvedComboTarget[] = [];

for (const target of targets) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The loop iterates through targets and performs sequential await on getCodexConnectionsForTarget. While there is a local connectionCache, the initial calls for each unique provider will be sequential. Consider using Promise.all to fetch connections for all targets concurrently to improve performance, especially for combos with multiple providers.

const connections = await getCodexConnectionsForTarget(target, connectionCache);
for (const connection of connections) {
if (typeof connection.id === "string") connectionById.set(connection.id, connection);
}

const connectionIds = getTargetConnectionIds(target, connections);
if (connectionIds.length === 0) {
expandedTargets.push(target);
continue;
}

for (const connectionId of connectionIds) {
expandedTargets.push({
...target,
connectionId,
executionKey:
target.connectionId === connectionId
? target.executionKey
: `${target.executionKey}@${connectionId}`,
});
}
}

const scoredTargets = await Promise.all(
expandedTargets.map(async (target, index) => {
let quota: unknown = null;
if (isCodexTarget(target) && target.connectionId) {
try {
quota = await fetchCodexQuota(
target.connectionId,
connectionById.get(target.connectionId)
);
} catch (error) {
log.warn?.(
"COMBO",
`Reset-aware quota fetch failed for connection=${target.connectionId}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
const { score } = scoreResetAwareQuota(quota, config);
return { target, score, index };
})
);
Comment on lines +898 to +917
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using Promise.all to fetch quotas for all expandedTargets concurrently is efficient, but it could lead to a burst of network requests if a combo contains a large number of Codex accounts. Since fetchCodexQuota has a 60s cache, this is mostly an issue on the first request or after cache expiration. If the number of accounts is expected to be very high, consider implementing a concurrency limit for these background fetches.


scoredTargets.sort((a, b) => {
if (b.score !== a.score) return b.score - a.score;
return a.index - b.index;
});

const bestScore = scoredTargets[0]?.score ?? 0;
const tiedTargets = scoredTargets.filter((entry) => bestScore - entry.score <= config.tieBand);
let orderedTiedTargets = tiedTargets;
if (tiedTargets.length > 1) {
const key = `reset-aware:${comboName}`;
const counter = rrCounters.get(key) || 0;
rrCounters.set(key, counter + 1);
const startIndex = counter % tiedTargets.length;
orderedTiedTargets = [...tiedTargets.slice(startIndex), ...tiedTargets.slice(0, startIndex)];
}

const tiedExecutionKeys = new Set(orderedTiedTargets.map((entry) => entry.target.executionKey));
return [
...orderedTiedTargets,
...scoredTargets.filter((entry) => !tiedExecutionKeys.has(entry.target.executionKey)),
].map((entry) => entry.target);
}

function toTextContent(content) {
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
Expand Down Expand Up @@ -1482,6 +1724,12 @@ export async function handleComboChat({
} else if (strategy === "cost-optimized") {
orderedTargets = await sortTargetsByCost(orderedTargets);
log.info("COMBO", `Cost-optimized ordering: cheapest first (${orderedTargets[0]?.modelStr})`);
} else if (strategy === "reset-aware") {
orderedTargets = await orderTargetsByResetAwareQuota(orderedTargets, combo.name, config, log);
log.info(
"COMBO",
`Reset-aware ordering: ${orderedTargets[0]?.modelStr}${orderedTargets[0]?.connectionId ? ` (${orderedTargets[0].connectionId})` : ""} first`
);
} else if (strategy === "context-optimized") {
orderedTargets = sortTargetsByContextSize(orderedTargets);
log.info("COMBO", `Context-optimized ordering: largest first (${orderedTargets[0]?.modelStr})`);
Expand Down
4 changes: 4 additions & 0 deletions open-sse/services/comboConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ const DEFAULT_COMBO_CONFIG = {
maxMessagesForSummary: 30,
maxComboDepth: 3,
trackMetrics: true,
resetAwareSessionWeight: 0.35,
resetAwareWeeklyWeight: 0.65,
resetAwareTieBandPercent: 5,
resetAwareExhaustionGuardPercent: 10,
};

const LEGACY_COMBO_RESILIENCE_KEYS = new Set([
Expand Down
18 changes: 18 additions & 0 deletions src/app/(dashboard)/dashboard/combos/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,14 @@ const STRATEGY_OPTIONS = ROUTING_STRATEGIES.map((strategy) => ({

const STRATEGY_LABEL_FALLBACK = {
"context-relay": "Context Relay",
"reset-aware": "Reset-Aware RR",
};

const STRATEGY_DESC_FALLBACK = {
"context-relay":
"Priority-style routing with automatic context handoffs when account rotation happens.",
"reset-aware":
"Quota remaining and reset windows decide the order; similar scores rotate round-robin.",
};

const STRATEGY_GUIDANCE_FALLBACK = {
Expand Down Expand Up @@ -108,6 +111,11 @@ const STRATEGY_GUIDANCE_FALLBACK = {
avoid: "Avoid when pricing data is missing or outdated.",
example: "Example: Batch or background jobs where lower cost matters most.",
},
"reset-aware": {
when: "Use when multiple Codex accounts have different 5h and weekly reset windows.",
avoid: "Avoid when quota telemetry is unavailable for most accounts.",
example: "Example: Prefer a 60% weekly account resetting tomorrow over 80% that resets later.",
},
"fill-first": {
when: "Use when you want to drain one provider's quota fully before moving to the next.",
avoid: "Avoid when you need request-level load balancing across providers.",
Expand Down Expand Up @@ -230,6 +238,15 @@ const STRATEGY_RECOMMENDATIONS_FALLBACK = {
"Use for batch/background jobs where cost is the main KPI.",
],
},
"reset-aware": {
title: "Reset-aware account rotation",
description: "Balances remaining Codex quota against 5h and weekly reset timing.",
tips: [
"Use explicit Codex account steps or account-tag routing.",
"Tune session vs weekly weights when short-term exhaustion is more risky.",
"Keep the tie band small so equivalent accounts still rotate fairly.",
],
},
"fill-first": {
title: "Quota drain strategy",
description: "Exhausts one provider's quota before moving to the next in chain.",
Expand Down Expand Up @@ -439,6 +456,7 @@ function getStrategyBadgeClass(strategy) {
if (strategy === "random") return "bg-purple-500/15 text-purple-600 dark:text-purple-400";
if (strategy === "least-used") return "bg-cyan-500/15 text-cyan-600 dark:text-cyan-400";
if (strategy === "cost-optimized") return "bg-teal-500/15 text-teal-600 dark:text-teal-400";
if (strategy === "reset-aware") return "bg-lime-500/15 text-lime-700 dark:text-lime-300";
if (strategy === "fill-first") return "bg-orange-500/15 text-orange-600 dark:text-orange-400";
if (strategy === "p2c") return "bg-indigo-500/15 text-indigo-600 dark:text-indigo-400";
return "bg-blue-500/15 text-blue-600 dark:text-blue-400";
Expand Down
16 changes: 16 additions & 0 deletions src/i18n/messages/de.json
Original file line number Diff line number Diff line change
Expand Up @@ -1396,6 +1396,8 @@
"randomDesc": "Einheitliche Zufallsauswahl, dann Rückgriff auf verbleibende Modelle",
"leastUsedDesc": "Wählt das Modell mit den wenigsten Anfragen aus und gleicht die Last über die Zeit aus",
"costOptimizedDesc": "Leitet basierend auf dem Preis zuerst zum günstigsten Modell weiter",
"resetAware": "Reset-Aware RR",
"resetAwareDesc": "Gewichtet Restquote gegen 5h- und Wochen-Resets und rotiert ähnliche Scores per Round Robin",
"strictRandom": "Strict Random",
"strictRandomDesc": "Shuffle deck — uses each model once before reshuffling",
"models": "Modelle",
Expand Down Expand Up @@ -1447,6 +1449,11 @@
"avoid": "Preisdaten fehlen oder sind veraltet.",
"example": "Hintergrund- oder Batch-Jobs, bei denen geringere Kosten bevorzugt werden."
},
"reset-aware": {
"when": "Du routest über mehrere Codex-Konten mit unterschiedlichen 5h- und Wochen-Reset-Fenstern.",
"avoid": "Für die meisten Konten fehlen Quota-Telemetriedaten.",
"example": "Bevorzuge ein Konto mit 60 % Wochen-Restquote und Reset morgen vor 80 % mit späterem Reset."
},
"strict-random": {
"when": "Use when you want perfectly even spread — each model used once before repeating.",
"avoid": "Avoid when models have different quality or latency and order matters.",
Expand Down Expand Up @@ -1555,6 +1562,13 @@
"tip2": "Behalte einen Qualitäts-Fallback für schwierige Prompts.",
"tip3": "Ideal für Batch/Hintergrundjobs, bei denen Kosten das Haupt-KPI sind."
},
"reset-aware": {
"title": "Reset-bewusste Kontorotation",
"description": "Gewichtet verbleibende Codex-Quote gegen 5h- und Wochen-Reset-Zeitpunkte.",
"tip1": "Nutze explizite Codex-Kontoschritte oder kontobasiertes Tag-Routing.",
"tip2": "Passe 5h- und Wochengewichtung an, wenn kurzfristige Erschöpfung riskant ist.",
"tip3": "Halte das Tie-Band klein, damit gleichwertige Konten fair rotieren."
},
"strict-random": {
"title": "Shuffle deck distribution",
"description": "Each model is used exactly once per cycle before reshuffling.",
Expand Down Expand Up @@ -2906,6 +2920,8 @@
"leastUsedDesc": "Wählen Sie das zuletzt verwendete Konto aus",
"costOpt": "Kosten Opt",
"costOptDesc": "Bevorzugen Sie das günstigste verfügbare Konto",
"resetAware": "Reset-Aware RR",
"resetAwareDesc": "Bevorzugt Konten mit gesunder Restquote und näherem Reset",
"strictRandom": "Strict Random",
"strictRandomDesc": "Shuffle deck — uses each account once before reshuffling",
"stickyLimit": "Sticky-Limit",
Expand Down
Loading
Loading