Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/cli/ccproxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { randomUUID } from 'node:crypto';
import fs from 'node:fs/promises';
import fastify, { FastifyRequest, FastifyReply } from 'fastify';

import { Claude3_5_Haiku_Vertex, Claude4_1_Opus_Vertex, Claude4_5_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
import { Claude4_1_Opus_Vertex, Claude4_5_Haiku_Vertex, Claude4_5_Sonnet_Vertex } from '#llm/services/anthropic-vertex';
import type { AssistantContentExt, LlmMessage, TextPartExt } from '#shared/llm/llm.model';

const PROXY_PORT = Number(process.env.PROXY_PORT ?? 8080);
Expand All @@ -15,7 +15,7 @@ const LOG_FILE = process.env.LLM_PROXY_LOG ?? 'llm-proxy.log';
* Anthropic ↔ internal model name mapping
*/
function pickLLM(modelName: string) {
if (modelName.includes('haiku')) return Claude3_5_Haiku_Vertex();
if (modelName.includes('haiku')) return Claude4_5_Haiku_Vertex();
if (modelName.includes('sonnet')) return Claude4_5_Sonnet_Vertex();
if (modelName.includes('opus')) return Claude4_1_Opus_Vertex();
return undefined;
Expand Down
2 changes: 1 addition & 1 deletion src/llm/multi-agent/fastEasy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export class FastEasyLLM extends BaseLLM {

constructor() {
super({
displayName: 'Fast Easy (Cerebras GPT OSS 120B - Groq Llama Scout - Gemini 2.5 Flash)',
displayName: 'Fast Easy (Cerebras GPT OSS 120B - Groq Llama Scout - Gemini 2.5 Flash lite - GPT5 nano)',
service: 'multi',
modelId: 'fast-easy',
maxInputTokens: 0,
Expand Down
22 changes: 12 additions & 10 deletions src/llm/multi-agent/fastMedium.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import { cerebrasQwen3_235b_Thinking } from '#llm/services/cerebras';
import { anthropicClaude4_5_Haiku } from '#llm/services/anthropic';
import { Claude4_5_Haiku_Vertex } from '#llm/services/anthropic-vertex';
import { cerebrasQwen3_235b_Thinking, cerebrasQwen3_Coder } from '#llm/services/cerebras';
import { groqKimiK2, groqQwen3_32b } from '#llm/services/groq';
import { openaiGPT5mini } from '#llm/services/openai';
import { vertexGemini_2_5_Flash } from '#llm/services/vertexai';
import { countTokens } from '#llm/tokens';
Expand All @@ -12,13 +15,16 @@ import { BaseLLM } from '../base-llm';
*/
export class FastMediumLLM extends BaseLLM {
private readonly providers: LLM[];
private readonly cerebras: LLM;
private readonly openai: LLM;
private readonly gemini: LLM;
private readonly cerebras = cerebrasQwen3_Coder();
private readonly groq = groqQwen3_32b();
private readonly openai = openaiGPT5mini();
private readonly gemini = vertexGemini_2_5_Flash({ thinking: 'high' });
private readonly haiku = anthropicClaude4_5_Haiku();
private readonly vertexHaiku = Claude4_5_Haiku_Vertex();

constructor() {
super({
displayName: 'Fast Medium (Qwen3 235b (Cerebras) - GPT-5 Mini - Gemini 2.5 Flash)',
displayName: 'Fast Medium (Cerebras/Groq Qwen3, Gemini 2.5 Flash, GPT-5 Mini',
service: 'multi',
modelId: 'fast-medium',
maxInputTokens: 0,
Expand All @@ -28,11 +34,7 @@ export class FastMediumLLM extends BaseLLM {
totalCost: 0,
}),
});
this.providers = [cerebrasQwen3_235b_Thinking(), openaiGPT5mini(), vertexGemini_2_5_Flash({ thinking: 'high' })];
this.cerebras = this.providers[0]!;
this.openai = this.providers[1]!;
this.gemini = this.providers[2]!;

this.providers = [this.vertexHaiku, this.haiku, this.cerebras, this.groq, this.gemini, this.openai];
this.maxInputTokens = Math.max(...this.providers.map((p) => p.getMaxInputTokens()));
}

Expand Down
244 changes: 244 additions & 0 deletions src/llm/multi-agent/openaiFlex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
import type { TextStreamPart } from 'ai';
import { expect } from 'chai';
import type { GenerateTextOptions, GenerationStats, LLM, LlmMessage } from '#shared/llm/llm.model';
import { type FlexMetricsSnapshot, OPENAI_FLEX_SERVICE, OpenAIFlex } from './openaiFlex';

type GenerateHandler = (messages: LlmMessage[], opts?: GenerateTextOptions) => Promise<string>;
type StreamHandler = (messages: LlmMessage[], onChunk: (chunk: TextStreamPart<any>) => void, opts?: GenerateTextOptions) => Promise<GenerationStats>;

const DEFAULT_STATS: GenerationStats = {
llmId: 'test:model',
cost: 0,
inputTokens: 0,
outputTokens: 0,
totalTime: 0,
timeToFirstToken: 0,
requestTime: 0,
finishReason: 'stop',
};

class TestLLM implements LLM {
constructor(
private readonly name: string,
private readonly model: string,
private readonly generateHandler: GenerateHandler,
private readonly streamHandler: StreamHandler,
private readonly configured = true,
) {}

async generateText(
userOrSystemOrMessages: string | LlmMessage[] | ReadonlyArray<LlmMessage>,
userOrOpts?: string | GenerateTextOptions,
opts?: GenerateTextOptions,
): Promise<string> {
const messages = this.toMessages(userOrSystemOrMessages, userOrOpts, opts);
return this.generateHandler(messages, this.toOptions(userOrSystemOrMessages, userOrOpts, opts));
}

async generateTextWithJson(): Promise<any> {
throw new Error('Not implemented in TestLLM');
}

async generateJson(): Promise<any> {
throw new Error('Not implemented in TestLLM');
}

async generateTextWithResult(): Promise<string> {
throw new Error('Not implemented in TestLLM');
}

async generateMessage(): Promise<LlmMessage> {
throw new Error('Not implemented in TestLLM');
}

streamText(
messages: LlmMessage[] | ReadonlyArray<LlmMessage>,
onChunk: (chunk: TextStreamPart<any>) => void,
opts?: GenerateTextOptions,
): Promise<GenerationStats> {
return this.streamHandler(messages as LlmMessage[], onChunk, opts);
}

getService(): string {
return OPENAI_FLEX_SERVICE;
}

getModel(): string {
return this.model;
}

getDisplayName(): string {
return this.name;
}

getId(): string {
return `${this.getService()}:${this.model}`;
}

getMaxInputTokens(): number {
return 100_000;
}

getMaxOutputTokens(): number {
return 100_000;
}

countTokens(): Promise<number> {
return Promise.resolve(0);
}

isConfigured(): boolean {
return this.configured;
}

getOldModels(): string[] {
return [];
}

private toMessages(
userOrSystemOrMessages: string | LlmMessage[] | ReadonlyArray<LlmMessage>,
userOrOpts?: string | GenerateTextOptions,
opts?: GenerateTextOptions,
): LlmMessage[] {
if (Array.isArray(userOrSystemOrMessages)) return [...userOrSystemOrMessages];
if (typeof userOrOpts === 'string') {
return [
{ role: 'system', content: userOrSystemOrMessages as string },
{ role: 'user', content: userOrOpts },
];
}
return [{ role: 'user', content: userOrSystemOrMessages as string }];
}

private toOptions(
userOrSystemOrMessages: string | LlmMessage[] | ReadonlyArray<LlmMessage>,
userOrOpts?: string | GenerateTextOptions,
opts?: GenerateTextOptions,
): GenerateTextOptions | undefined {
if (Array.isArray(userOrSystemOrMessages)) return userOrOpts as GenerateTextOptions | undefined;
if (typeof userOrOpts === 'string') return opts;
return userOrOpts as GenerateTextOptions | undefined;
}
}

describe('OpenAIFlex', () => {
const messages: LlmMessage[] = [{ role: 'user', content: 'hello' }];

it('uses flex response when first chunk arrives before timeout', async () => {
let streamed = '';
const flexLLM = new TestLLM(
'flex',
'flex-model',
async () => 'unused',
async (_msgs, onChunk) => {
onChunk({ type: 'text-delta', id: '1', text: 'flex-response' });
streamed += 'flex-response';
return DEFAULT_STATS;
},
);
const standardLLM = new TestLLM(
'standard',
'std-model',
async () => 'standard-response',
async (_msgs, _onChunk) => DEFAULT_STATS,
);

const flex = new OpenAIFlex('Flex Under Test', 'flex-test', standardLLM, flexLLM, 200);
const response = await flex.generateTextFromMessages(messages);
const metrics = flex.getMetrics();

expect(response).to.equal('flex-response');
expect(streamed).to.equal('flex-response');
expect(metrics.flexAttempts).to.equal(1);
expect(metrics.flexFallbacks).to.equal(0);
expect(metrics.flexResponses).to.equal(1);
expect(metrics.lastFlexResponseMs).to.be.a('number');
});

it('falls back to standard when flex times out before first chunk', async () => {
const flexLLM = new TestLLM(
'flex',
'flex-model',
async () => 'unused',
async (_msgs, _onChunk, opts) =>
await new Promise<GenerationStats>((_resolve, reject) => {
opts?.abortSignal?.addEventListener('abort', () => reject(new Error('aborted')));
}),
);
const standardLLM = new TestLLM(
'standard',
'std-model',
async () => 'standard-response',
async (_msgs, _onChunk) => DEFAULT_STATS,
);

const flex = new OpenAIFlex('Flex Under Test', 'flex-test', standardLLM, flexLLM, 50);
const response = await flex.generateTextFromMessages(messages);
const metrics = flex.getMetrics();

expect(response).to.equal('standard-response');
expect(metrics.flexAttempts).to.equal(1);
expect(metrics.flexFallbacks).to.equal(1);
expect(metrics.flexResponses).to.equal(0);
});

it('falls back if flex fails after first chunk', async () => {
const flexLLM = new TestLLM(
'flex',
'flex-model',
async () => 'unused',
async (_msgs, onChunk) =>
await new Promise<GenerationStats>((_resolve, reject) => {
onChunk({ type: 'text-delta', id: '1', text: 'partial' });
setTimeout(() => reject(new Error('boom')), 0);
}),
);
const standardLLM = new TestLLM(
'standard',
'std-model',
async () => 'standard-response',
async (_msgs, _onChunk) => DEFAULT_STATS,
);

const flex = new OpenAIFlex('Flex Under Test', 'flex-test', standardLLM, flexLLM, 200);
const response = await flex.generateTextFromMessages(messages);
const metrics: FlexMetricsSnapshot = flex.getMetrics();

expect(response).to.equal('standard-response');
expect(metrics.flexFallbacks).to.equal(1);
expect(metrics.flexResponses).to.equal(1);
});

it('streams from standard when flex times out', async () => {
const flexLLM = new TestLLM(
'flex',
'flex-model',
async () => 'unused',
async (_msgs, _onChunk, opts) =>
await new Promise<GenerationStats>((_resolve, reject) => {
opts?.abortSignal?.addEventListener('abort', () => reject(new Error('aborted')));
}),
);

const standardLLM = new TestLLM(
'standard',
'std-model',
async () => 'standard-response',
async (_msgs, onChunk) => {
onChunk({ type: 'text-delta', id: '1', text: 'S' });
return DEFAULT_STATS;
},
);

let streamed = '';
const flex = new OpenAIFlex('Flex Under Test', 'flex-test', standardLLM, flexLLM, 30);
const stats = await flex.streamText(messages, (chunk) => {
if (chunk.type === 'text-delta') streamed += chunk.text;
});

expect(streamed).to.equal('S');
expect(stats.llmId).to.equal('test:model');
const metrics = flex.getMetrics();
expect(metrics.flexFallbacks).to.equal(1);
});
});
Loading
Loading