1+ import { describe , expect , test , vi } from "vitest" ;
2+ import { AnthropicApi } from "../apis/Anthropic.js" ;
3+ import { GeminiApi } from "../apis/Gemini.js" ;
4+ import { OpenAIApi } from "../apis/OpenAI.js" ;
5+ import { CompletionUsage } from "openai/resources/index.js" ;
6+
7+ describe ( "Token usage tracking" , ( ) => {
8+ test ( "OpenAI should track usage in streaming responses" , async ( ) => {
9+ // Mock the OpenAI client
10+ const mockStream = async function * ( ) {
11+ yield {
12+ id : "1" ,
13+ object : "chat.completion.chunk" ,
14+ created : Date . now ( ) ,
15+ model : "gpt-4" ,
16+ choices : [
17+ {
18+ index : 0 ,
19+ delta : { content : "Hello" , role : "assistant" } ,
20+ finish_reason : null ,
21+ logprobs : null ,
22+ } ,
23+ ] ,
24+ } ;
25+ yield {
26+ id : "1" ,
27+ object : "chat.completion.chunk" ,
28+ created : Date . now ( ) ,
29+ model : "gpt-4" ,
30+ choices : [
31+ {
32+ index : 0 ,
33+ delta : { content : " world" , role : "assistant" } ,
34+ finish_reason : "stop" ,
35+ logprobs : null ,
36+ } ,
37+ ] ,
38+ } ;
39+ // Usage chunk
40+ yield {
41+ id : "1" ,
42+ object : "chat.completion.chunk" ,
43+ created : Date . now ( ) ,
44+ model : "gpt-4" ,
45+ choices : [ ] ,
46+ usage : {
47+ prompt_tokens : 10 ,
48+ completion_tokens : 5 ,
49+ total_tokens : 15 ,
50+ } ,
51+ } ;
52+ } ;
53+
54+ const api = new OpenAIApi ( { apiKey : "test" , provider : "openai" } ) ;
55+ api . openai . chat . completions . create = vi . fn ( ) . mockResolvedValue ( mockStream ( ) ) ;
56+
57+ const stream = api . chatCompletionStream (
58+ {
59+ model : "gpt-4" ,
60+ messages : [ { role : "user" , content : "Hello" } ] ,
61+ stream : true ,
62+ } ,
63+ new AbortController ( ) . signal
64+ ) ;
65+
66+ let content = "" ;
67+ let usage : CompletionUsage | undefined ;
68+ for await ( const chunk of stream ) {
69+ if ( chunk . choices . length > 0 ) {
70+ content += chunk . choices [ 0 ] . delta . content ?? "" ;
71+ }
72+ if ( chunk . usage ) {
73+ usage = chunk . usage ;
74+ }
75+ }
76+
77+ expect ( content ) . toBe ( "Hello world" ) ;
78+ expect ( usage ) . toBeDefined ( ) ;
79+ expect ( usage ?. prompt_tokens ) . toBe ( 10 ) ;
80+ expect ( usage ?. completion_tokens ) . toBe ( 5 ) ;
81+ expect ( usage ?. total_tokens ) . toBe ( 15 ) ;
82+ } ) ;
83+
84+ test ( "Anthropic should track usage in streaming responses" , async ( ) => {
85+ // Create a mock response that simulates Anthropic's SSE stream
86+ const mockResponseText = `event: message_start
87+ data: {"type":"message_start","message":{"usage":{"input_tokens":10,"cache_read_input_tokens":2}}}
88+
89+ event: content_block_delta
90+ data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}}
91+
92+ event: content_block_delta
93+ data: {"type":"content_block_delta","delta":{"type":"text_delta","text":" world"}}
94+
95+ event: message_delta
96+ data: {"type":"message_delta","usage":{"output_tokens":5}}
97+
98+ event: message_stop
99+ data: {"type":"message_stop"}
100+ ` ;
101+
102+ const mockResponse = {
103+ ok : true ,
104+ status : 200 ,
105+ headers : new Headers ( { "content-type" : "text/event-stream" } ) ,
106+ text : vi . fn ( ) . mockResolvedValue ( mockResponseText ) ,
107+ body : new ReadableStream ( {
108+ start ( controller ) {
109+ controller . enqueue ( new TextEncoder ( ) . encode ( mockResponseText ) ) ;
110+ controller . close ( ) ;
111+ } ,
112+ } ) ,
113+ } ;
114+
115+ global . fetch = vi . fn ( ) . mockResolvedValue ( mockResponse ) ;
116+
117+ const api = new AnthropicApi ( { apiKey : "test" , provider : "anthropic" } ) ;
118+
119+ const stream = api . chatCompletionStream (
120+ {
121+ model : "claude-3" ,
122+ messages : [ { role : "user" , content : "Hello" } ] ,
123+ stream : true ,
124+ } ,
125+ new AbortController ( ) . signal
126+ ) ;
127+
128+ let content = "" ;
129+ let usage : CompletionUsage | undefined ;
130+ for await ( const chunk of stream ) {
131+ if ( chunk . choices . length > 0 ) {
132+ content += chunk . choices [ 0 ] . delta . content ?? "" ;
133+ }
134+ if ( chunk . usage ) {
135+ usage = chunk . usage ;
136+ }
137+ }
138+
139+ expect ( content ) . toBe ( "Hello world" ) ;
140+ expect ( usage ) . toBeDefined ( ) ;
141+ expect ( usage ?. prompt_tokens ) . toBe ( 10 ) ;
142+ expect ( usage ?. completion_tokens ) . toBe ( 5 ) ;
143+ expect ( usage ?. total_tokens ) . toBe ( 15 ) ;
144+ expect ( usage ?. prompt_tokens_details ?. cached_tokens ) . toBe ( 2 ) ;
145+ } ) ;
146+
147+ test ( "Gemini should track usage in streaming responses" , async ( ) => {
148+ // Create a mock response for Gemini streaming
149+ const mockResponseData = [
150+ {
151+ candidates : [
152+ {
153+ content : {
154+ parts : [ { text : "Hello" } ] ,
155+ } ,
156+ } ,
157+ ] ,
158+ } ,
159+ {
160+ candidates : [
161+ {
162+ content : {
163+ parts : [ { text : " world" } ] ,
164+ } ,
165+ } ,
166+ ] ,
167+ usageMetadata : {
168+ promptTokenCount : 10 ,
169+ candidatesTokenCount : 5 ,
170+ totalTokenCount : 15 ,
171+ } ,
172+ } ,
173+ ] ;
174+
175+ const mockResponse = {
176+ ok : true ,
177+ status : 200 ,
178+ headers : new Headers ( { "content-type" : "application/json" } ) ,
179+ body : new ReadableStream ( {
180+ start ( controller ) {
181+ controller . enqueue (
182+ new TextEncoder ( ) . encode ( JSON . stringify ( mockResponseData ) )
183+ ) ;
184+ controller . close ( ) ;
185+ } ,
186+ } ) ,
187+ } ;
188+
189+ global . fetch = vi . fn ( ) . mockResolvedValue ( mockResponse ) ;
190+
191+ const api = new GeminiApi ( { apiKey : "test" , provider : "gemini" } ) ;
192+
193+ const stream = api . chatCompletionStream (
194+ {
195+ model : "gemini-1.5-flash" ,
196+ messages : [ { role : "user" , content : "Hello" } ] ,
197+ stream : true ,
198+ } ,
199+ new AbortController ( ) . signal
200+ ) ;
201+
202+ let content = "" ;
203+ let usage : CompletionUsage | undefined ;
204+ for await ( const chunk of stream ) {
205+ if ( chunk . choices . length > 0 ) {
206+ content += chunk . choices [ 0 ] . delta . content ?? "" ;
207+ }
208+ if ( chunk . usage ) {
209+ usage = chunk . usage ;
210+ }
211+ }
212+
213+ expect ( content ) . toBe ( "Hello world" ) ;
214+ expect ( usage ) . toBeDefined ( ) ;
215+ expect ( usage ?. prompt_tokens ) . toBe ( 10 ) ;
216+ expect ( usage ?. completion_tokens ) . toBe ( 5 ) ;
217+ expect ( usage ?. total_tokens ) . toBe ( 15 ) ;
218+ } ) ;
219+
220+ test ( "OpenAI should pass through usage in non-streaming responses" , async ( ) => {
221+ const api = new OpenAIApi ( { apiKey : "test" , provider : "openai" } ) ;
222+
223+ const mockResponse = {
224+ id : "1" ,
225+ object : "chat.completion" ,
226+ created : Date . now ( ) ,
227+ model : "gpt-4" ,
228+ choices : [
229+ {
230+ index : 0 ,
231+ message : {
232+ role : "assistant" ,
233+ content : "Hello world" ,
234+ refusal : null ,
235+ } ,
236+ finish_reason : "stop" ,
237+ logprobs : null ,
238+ } ,
239+ ] ,
240+ usage : {
241+ prompt_tokens : 10 ,
242+ completion_tokens : 5 ,
243+ total_tokens : 15 ,
244+ } ,
245+ } ;
246+
247+ api . openai . chat . completions . create = vi . fn ( ) . mockResolvedValue ( mockResponse ) ;
248+
249+ const response = await api . chatCompletionNonStream (
250+ {
251+ model : "gpt-4" ,
252+ messages : [ { role : "user" , content : "Hello" } ] ,
253+ stream : false ,
254+ } ,
255+ new AbortController ( ) . signal
256+ ) ;
257+
258+ expect ( response . choices [ 0 ] . message . content ) . toBe ( "Hello world" ) ;
259+ expect ( response . usage ) . toBeDefined ( ) ;
260+ expect ( response . usage ?. prompt_tokens ) . toBe ( 10 ) ;
261+ expect ( response . usage ?. completion_tokens ) . toBe ( 5 ) ;
262+ expect ( response . usage ?. total_tokens ) . toBe ( 15 ) ;
263+ } ) ;
264+
265+ test ( "Anthropic should track usage in non-streaming responses" , async ( ) => {
266+ const mockResponse = {
267+ ok : true ,
268+ status : 200 ,
269+ json : vi . fn ( ) . mockResolvedValue ( {
270+ id : "msg_123" ,
271+ content : [ { text : "Hello world" } ] ,
272+ usage : {
273+ input_tokens : 10 ,
274+ output_tokens : 5 ,
275+ cache_read_input_tokens : 2 ,
276+ } ,
277+ } ) ,
278+ } ;
279+
280+ global . fetch = vi . fn ( ) . mockResolvedValue ( mockResponse ) ;
281+
282+ const api = new AnthropicApi ( { apiKey : "test" , provider : "anthropic" } ) ;
283+
284+ const response = await api . chatCompletionNonStream (
285+ {
286+ model : "claude-3" ,
287+ messages : [ { role : "user" , content : "Hello" } ] ,
288+ stream : false ,
289+ } ,
290+ new AbortController ( ) . signal
291+ ) ;
292+
293+ expect ( response . choices [ 0 ] . message . content ) . toBe ( "Hello world" ) ;
294+ expect ( response . usage ) . toBeDefined ( ) ;
295+ expect ( response . usage ?. prompt_tokens ) . toBe ( 10 ) ;
296+ expect ( response . usage ?. completion_tokens ) . toBe ( 5 ) ;
297+ expect ( response . usage ?. total_tokens ) . toBe ( 15 ) ;
298+ expect ( response . usage ?. prompt_tokens_details ?. cached_tokens ) . toBe ( 2 ) ;
299+ } ) ;
300+
301+ test ( "Gemini should track usage in non-streaming responses" , async ( ) => {
302+ // Gemini non-streaming uses the streaming method internally
303+ const mockResponseData = [
304+ {
305+ candidates : [
306+ {
307+ content : {
308+ parts : [ { text : "Hello world" } ] ,
309+ } ,
310+ } ,
311+ ] ,
312+ usageMetadata : {
313+ promptTokenCount : 10 ,
314+ candidatesTokenCount : 5 ,
315+ totalTokenCount : 15 ,
316+ } ,
317+ } ,
318+ ] ;
319+
320+ const mockResponse = {
321+ ok : true ,
322+ status : 200 ,
323+ headers : new Headers ( { "content-type" : "application/json" } ) ,
324+ body : new ReadableStream ( {
325+ start ( controller ) {
326+ controller . enqueue (
327+ new TextEncoder ( ) . encode ( JSON . stringify ( mockResponseData ) )
328+ ) ;
329+ controller . close ( ) ;
330+ } ,
331+ } ) ,
332+ } ;
333+
334+ global . fetch = vi . fn ( ) . mockResolvedValue ( mockResponse ) ;
335+
336+ const api = new GeminiApi ( { apiKey : "test" , provider : "gemini" } ) ;
337+
338+ const response = await api . chatCompletionNonStream (
339+ {
340+ model : "gemini-1.5-flash" ,
341+ messages : [ { role : "user" , content : "Hello" } ] ,
342+ stream : false ,
343+ } ,
344+ new AbortController ( ) . signal
345+ ) ;
346+
347+ expect ( response . choices [ 0 ] . message . content ) . toBe ( "Hello world" ) ;
348+ expect ( response . usage ) . toBeDefined ( ) ;
349+ expect ( response . usage ?. prompt_tokens ) . toBe ( 10 ) ;
350+ expect ( response . usage ?. completion_tokens ) . toBe ( 5 ) ;
351+ expect ( response . usage ?. total_tokens ) . toBe ( 15 ) ;
352+ } ) ;
353+ } ) ;
0 commit comments