Skip to content

Commit 349d98a

Browse files
sestinjclaude
andcommitted
feat: add token usage tracking to OpenAI adapter
- Modified OpenAI adapter to properly handle and emit usage chunks in streaming responses - Added logic to store usage chunks and emit them at the end of the stream - Verified Anthropic and Gemini adapters already have complete token usage implementations - Added comprehensive tests for token usage tracking across all three providers - All tests passing with provided API keys 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 1c11f61 commit 349d98a

File tree

3 files changed

+366
-1
lines changed

3 files changed

+366
-1
lines changed

packages/openai-adapters/package-lock.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/openai-adapters/src/apis/OpenAI.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,19 @@ export class OpenAIApi implements BaseLlmApi {
117117
signal,
118118
},
119119
);
120+
let lastChunkWithUsage: ChatCompletionChunk | undefined;
120121
for await (const result of response) {
121-
yield result;
122+
// Check if this chunk contains usage information
123+
if (result.usage) {
124+
// Store it to emit after all content chunks
125+
lastChunkWithUsage = result;
126+
} else {
127+
yield result;
128+
}
129+
}
130+
// Emit the usage chunk at the end if we have one
131+
if (lastChunkWithUsage) {
132+
yield lastChunkWithUsage;
122133
}
123134
}
124135
async completionNonStream(
Lines changed: 353 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,353 @@
1+
import { describe, expect, test, vi } from "vitest";
2+
import { AnthropicApi } from "../apis/Anthropic.js";
3+
import { GeminiApi } from "../apis/Gemini.js";
4+
import { OpenAIApi } from "../apis/OpenAI.js";
5+
import { CompletionUsage } from "openai/resources/index.js";
6+
7+
describe("Token usage tracking", () => {
8+
test("OpenAI should track usage in streaming responses", async () => {
9+
// Mock the OpenAI client
10+
const mockStream = async function* () {
11+
yield {
12+
id: "1",
13+
object: "chat.completion.chunk",
14+
created: Date.now(),
15+
model: "gpt-4",
16+
choices: [
17+
{
18+
index: 0,
19+
delta: { content: "Hello", role: "assistant" },
20+
finish_reason: null,
21+
logprobs: null,
22+
},
23+
],
24+
};
25+
yield {
26+
id: "1",
27+
object: "chat.completion.chunk",
28+
created: Date.now(),
29+
model: "gpt-4",
30+
choices: [
31+
{
32+
index: 0,
33+
delta: { content: " world", role: "assistant" },
34+
finish_reason: "stop",
35+
logprobs: null,
36+
},
37+
],
38+
};
39+
// Usage chunk
40+
yield {
41+
id: "1",
42+
object: "chat.completion.chunk",
43+
created: Date.now(),
44+
model: "gpt-4",
45+
choices: [],
46+
usage: {
47+
prompt_tokens: 10,
48+
completion_tokens: 5,
49+
total_tokens: 15,
50+
},
51+
};
52+
};
53+
54+
const api = new OpenAIApi({ apiKey: "test", provider: "openai" });
55+
api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockStream());
56+
57+
const stream = api.chatCompletionStream(
58+
{
59+
model: "gpt-4",
60+
messages: [{ role: "user", content: "Hello" }],
61+
stream: true,
62+
},
63+
new AbortController().signal
64+
);
65+
66+
let content = "";
67+
let usage: CompletionUsage | undefined;
68+
for await (const chunk of stream) {
69+
if (chunk.choices.length > 0) {
70+
content += chunk.choices[0].delta.content ?? "";
71+
}
72+
if (chunk.usage) {
73+
usage = chunk.usage;
74+
}
75+
}
76+
77+
expect(content).toBe("Hello world");
78+
expect(usage).toBeDefined();
79+
expect(usage?.prompt_tokens).toBe(10);
80+
expect(usage?.completion_tokens).toBe(5);
81+
expect(usage?.total_tokens).toBe(15);
82+
});
83+
84+
test("Anthropic should track usage in streaming responses", async () => {
85+
// Create a mock response that simulates Anthropic's SSE stream
86+
const mockResponseText = `event: message_start
87+
data: {"type":"message_start","message":{"usage":{"input_tokens":10,"cache_read_input_tokens":2}}}
88+
89+
event: content_block_delta
90+
data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}}
91+
92+
event: content_block_delta
93+
data: {"type":"content_block_delta","delta":{"type":"text_delta","text":" world"}}
94+
95+
event: message_delta
96+
data: {"type":"message_delta","usage":{"output_tokens":5}}
97+
98+
event: message_stop
99+
data: {"type":"message_stop"}
100+
`;
101+
102+
const mockResponse = {
103+
ok: true,
104+
status: 200,
105+
headers: new Headers({ "content-type": "text/event-stream" }),
106+
text: vi.fn().mockResolvedValue(mockResponseText),
107+
body: new ReadableStream({
108+
start(controller) {
109+
controller.enqueue(new TextEncoder().encode(mockResponseText));
110+
controller.close();
111+
},
112+
}),
113+
};
114+
115+
global.fetch = vi.fn().mockResolvedValue(mockResponse);
116+
117+
const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" });
118+
119+
const stream = api.chatCompletionStream(
120+
{
121+
model: "claude-3",
122+
messages: [{ role: "user", content: "Hello" }],
123+
stream: true,
124+
},
125+
new AbortController().signal
126+
);
127+
128+
let content = "";
129+
let usage: CompletionUsage | undefined;
130+
for await (const chunk of stream) {
131+
if (chunk.choices.length > 0) {
132+
content += chunk.choices[0].delta.content ?? "";
133+
}
134+
if (chunk.usage) {
135+
usage = chunk.usage;
136+
}
137+
}
138+
139+
expect(content).toBe("Hello world");
140+
expect(usage).toBeDefined();
141+
expect(usage?.prompt_tokens).toBe(10);
142+
expect(usage?.completion_tokens).toBe(5);
143+
expect(usage?.total_tokens).toBe(15);
144+
expect(usage?.prompt_tokens_details?.cached_tokens).toBe(2);
145+
});
146+
147+
test("Gemini should track usage in streaming responses", async () => {
148+
// Create a mock response for Gemini streaming
149+
const mockResponseData = [
150+
{
151+
candidates: [
152+
{
153+
content: {
154+
parts: [{ text: "Hello" }],
155+
},
156+
},
157+
],
158+
},
159+
{
160+
candidates: [
161+
{
162+
content: {
163+
parts: [{ text: " world" }],
164+
},
165+
},
166+
],
167+
usageMetadata: {
168+
promptTokenCount: 10,
169+
candidatesTokenCount: 5,
170+
totalTokenCount: 15,
171+
},
172+
},
173+
];
174+
175+
const mockResponse = {
176+
ok: true,
177+
status: 200,
178+
headers: new Headers({ "content-type": "application/json" }),
179+
body: new ReadableStream({
180+
start(controller) {
181+
controller.enqueue(
182+
new TextEncoder().encode(JSON.stringify(mockResponseData))
183+
);
184+
controller.close();
185+
},
186+
}),
187+
};
188+
189+
global.fetch = vi.fn().mockResolvedValue(mockResponse);
190+
191+
const api = new GeminiApi({ apiKey: "test", provider: "gemini" });
192+
193+
const stream = api.chatCompletionStream(
194+
{
195+
model: "gemini-1.5-flash",
196+
messages: [{ role: "user", content: "Hello" }],
197+
stream: true,
198+
},
199+
new AbortController().signal
200+
);
201+
202+
let content = "";
203+
let usage: CompletionUsage | undefined;
204+
for await (const chunk of stream) {
205+
if (chunk.choices.length > 0) {
206+
content += chunk.choices[0].delta.content ?? "";
207+
}
208+
if (chunk.usage) {
209+
usage = chunk.usage;
210+
}
211+
}
212+
213+
expect(content).toBe("Hello world");
214+
expect(usage).toBeDefined();
215+
expect(usage?.prompt_tokens).toBe(10);
216+
expect(usage?.completion_tokens).toBe(5);
217+
expect(usage?.total_tokens).toBe(15);
218+
});
219+
220+
test("OpenAI should pass through usage in non-streaming responses", async () => {
221+
const api = new OpenAIApi({ apiKey: "test", provider: "openai" });
222+
223+
const mockResponse = {
224+
id: "1",
225+
object: "chat.completion",
226+
created: Date.now(),
227+
model: "gpt-4",
228+
choices: [
229+
{
230+
index: 0,
231+
message: {
232+
role: "assistant",
233+
content: "Hello world",
234+
refusal: null,
235+
},
236+
finish_reason: "stop",
237+
logprobs: null,
238+
},
239+
],
240+
usage: {
241+
prompt_tokens: 10,
242+
completion_tokens: 5,
243+
total_tokens: 15,
244+
},
245+
};
246+
247+
api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockResponse);
248+
249+
const response = await api.chatCompletionNonStream(
250+
{
251+
model: "gpt-4",
252+
messages: [{ role: "user", content: "Hello" }],
253+
stream: false,
254+
},
255+
new AbortController().signal
256+
);
257+
258+
expect(response.choices[0].message.content).toBe("Hello world");
259+
expect(response.usage).toBeDefined();
260+
expect(response.usage?.prompt_tokens).toBe(10);
261+
expect(response.usage?.completion_tokens).toBe(5);
262+
expect(response.usage?.total_tokens).toBe(15);
263+
});
264+
265+
test("Anthropic should track usage in non-streaming responses", async () => {
266+
const mockResponse = {
267+
ok: true,
268+
status: 200,
269+
json: vi.fn().mockResolvedValue({
270+
id: "msg_123",
271+
content: [{ text: "Hello world" }],
272+
usage: {
273+
input_tokens: 10,
274+
output_tokens: 5,
275+
cache_read_input_tokens: 2,
276+
},
277+
}),
278+
};
279+
280+
global.fetch = vi.fn().mockResolvedValue(mockResponse);
281+
282+
const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" });
283+
284+
const response = await api.chatCompletionNonStream(
285+
{
286+
model: "claude-3",
287+
messages: [{ role: "user", content: "Hello" }],
288+
stream: false,
289+
},
290+
new AbortController().signal
291+
);
292+
293+
expect(response.choices[0].message.content).toBe("Hello world");
294+
expect(response.usage).toBeDefined();
295+
expect(response.usage?.prompt_tokens).toBe(10);
296+
expect(response.usage?.completion_tokens).toBe(5);
297+
expect(response.usage?.total_tokens).toBe(15);
298+
expect(response.usage?.prompt_tokens_details?.cached_tokens).toBe(2);
299+
});
300+
301+
test("Gemini should track usage in non-streaming responses", async () => {
302+
// Gemini non-streaming uses the streaming method internally
303+
const mockResponseData = [
304+
{
305+
candidates: [
306+
{
307+
content: {
308+
parts: [{ text: "Hello world" }],
309+
},
310+
},
311+
],
312+
usageMetadata: {
313+
promptTokenCount: 10,
314+
candidatesTokenCount: 5,
315+
totalTokenCount: 15,
316+
},
317+
},
318+
];
319+
320+
const mockResponse = {
321+
ok: true,
322+
status: 200,
323+
headers: new Headers({ "content-type": "application/json" }),
324+
body: new ReadableStream({
325+
start(controller) {
326+
controller.enqueue(
327+
new TextEncoder().encode(JSON.stringify(mockResponseData))
328+
);
329+
controller.close();
330+
},
331+
}),
332+
};
333+
334+
global.fetch = vi.fn().mockResolvedValue(mockResponse);
335+
336+
const api = new GeminiApi({ apiKey: "test", provider: "gemini" });
337+
338+
const response = await api.chatCompletionNonStream(
339+
{
340+
model: "gemini-1.5-flash",
341+
messages: [{ role: "user", content: "Hello" }],
342+
stream: false,
343+
},
344+
new AbortController().signal
345+
);
346+
347+
expect(response.choices[0].message.content).toBe("Hello world");
348+
expect(response.usage).toBeDefined();
349+
expect(response.usage?.prompt_tokens).toBe(10);
350+
expect(response.usage?.completion_tokens).toBe(5);
351+
expect(response.usage?.total_tokens).toBe(15);
352+
});
353+
});

0 commit comments

Comments
 (0)