diff --git a/packages/openai-adapters/src/apis/OpenAI.ts b/packages/openai-adapters/src/apis/OpenAI.ts index 310e5ef4028..86452706ad8 100644 --- a/packages/openai-adapters/src/apis/OpenAI.ts +++ b/packages/openai-adapters/src/apis/OpenAI.ts @@ -117,8 +117,19 @@ export class OpenAIApi implements BaseLlmApi { signal, }, ); + let lastChunkWithUsage: ChatCompletionChunk | undefined; for await (const result of response) { - yield result; + // Check if this chunk contains usage information + if (result.usage) { + // Store it to emit after all content chunks + lastChunkWithUsage = result; + } else { + yield result; + } + } + // Emit the usage chunk at the end if we have one + if (lastChunkWithUsage) { + yield lastChunkWithUsage; } } async completionNonStream( diff --git a/packages/openai-adapters/src/test/util.ts b/packages/openai-adapters/src/test/util.ts index c36e568480e..d921306d16a 100644 --- a/packages/openai-adapters/src/test/util.ts +++ b/packages/openai-adapters/src/test/util.ts @@ -207,6 +207,23 @@ export function testChat( const completion = response.choices[0].message.content; expect(typeof completion).toBe("string"); expect(completion?.length).toBeGreaterThan(0); + + if (options?.expectUsage === true) { + expect(response.usage).toBeDefined(); + expect(response.usage!.completion_tokens).toBeGreaterThan(0); + expect(response.usage!.prompt_tokens).toBeGreaterThan(0); + // Gemini 2.5 models have thinking tokens, so total_tokens >= prompt + completion + // Other models should have total_tokens = prompt + completion + if (model.includes("gemini-2.5") || model.includes("gemini-2.0")) { + expect(response.usage!.total_tokens).toBeGreaterThanOrEqual( + response.usage!.prompt_tokens + response.usage!.completion_tokens, + ); + } else { + expect(response.usage!.total_tokens).toEqual( + response.usage!.prompt_tokens + response.usage!.completion_tokens, + ); + } + } }); test("should acknowledge system message in chat", async () => {