Skip to content

Commit 3b9368d

Browse files
habemaseratch
andauthored
feat: #1994 Add a list of per-request usage data to Usage (#1995)
Co-authored-by: Kazuhiro Sera <[email protected]>
1 parent f91b38f commit 3b9368d

File tree

2 files changed

+273
-1
lines changed

2 files changed

+273
-1
lines changed

src/agents/usage.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,26 @@
44
from pydantic.dataclasses import dataclass
55

66

7+
@dataclass
8+
class RequestUsage:
9+
"""Usage details for a single API request."""
10+
11+
input_tokens: int
12+
"""Input tokens for this individual request."""
13+
14+
output_tokens: int
15+
"""Output tokens for this individual request."""
16+
17+
total_tokens: int
18+
"""Total tokens (input + output) for this individual request."""
19+
20+
input_tokens_details: InputTokensDetails
21+
"""Details about the input tokens for this individual request."""
22+
23+
output_tokens_details: OutputTokensDetails
24+
"""Details about the output tokens for this individual request."""
25+
26+
727
@dataclass
828
class Usage:
929
requests: int = 0
@@ -27,7 +47,27 @@ class Usage:
2747
total_tokens: int = 0
2848
"""Total tokens sent and received, across all requests."""
2949

50+
request_usage_entries: list[RequestUsage] = field(default_factory=list)
51+
"""List of RequestUsage entries for accurate per-request cost calculation.
52+
53+
Each call to `add()` automatically creates an entry in this list if the added usage
54+
represents a new request (i.e., has non-zero tokens).
55+
56+
Example:
57+
For a run that makes 3 API calls with 100K, 150K, and 80K input tokens each,
58+
the aggregated `input_tokens` would be 330K, but `request_usage_entries` would
59+
preserve the [100K, 150K, 80K] breakdown, which could be helpful for detailed
60+
cost calculation or context window management.
61+
"""
62+
3063
def add(self, other: "Usage") -> None:
64+
"""Add another Usage object to this one, aggregating all fields.
65+
66+
This method automatically preserves request_usage_entries.
67+
68+
Args:
69+
other: The Usage object to add to this one.
70+
"""
3171
self.requests += other.requests if other.requests else 0
3272
self.input_tokens += other.input_tokens if other.input_tokens else 0
3373
self.output_tokens += other.output_tokens if other.output_tokens else 0
@@ -41,3 +81,18 @@ def add(self, other: "Usage") -> None:
4181
reasoning_tokens=self.output_tokens_details.reasoning_tokens
4282
+ other.output_tokens_details.reasoning_tokens
4383
)
84+
85+
# Automatically preserve request_usage_entries.
86+
# If the other Usage represents a single request with tokens, record it.
87+
if other.requests == 1 and other.total_tokens > 0:
88+
request_usage = RequestUsage(
89+
input_tokens=other.input_tokens,
90+
output_tokens=other.output_tokens,
91+
total_tokens=other.total_tokens,
92+
input_tokens_details=other.input_tokens_details,
93+
output_tokens_details=other.output_tokens_details,
94+
)
95+
self.request_usage_entries.append(request_usage)
96+
elif other.request_usage_entries:
97+
# If the other Usage already has individual request breakdowns, merge them.
98+
self.request_usage_entries.extend(other.request_usage_entries)

tests/test_usage.py

Lines changed: 218 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
22

3-
from agents.usage import Usage
3+
from agents.usage import RequestUsage, Usage
44

55

66
def test_usage_add_aggregates_all_fields():
@@ -50,3 +50,220 @@ def test_usage_add_aggregates_with_none_values():
5050
assert u1.total_tokens == 15
5151
assert u1.input_tokens_details.cached_tokens == 4
5252
assert u1.output_tokens_details.reasoning_tokens == 6
53+
54+
55+
def test_request_usage_creation():
56+
"""Test that RequestUsage is created correctly."""
57+
request_usage = RequestUsage(
58+
input_tokens=100,
59+
output_tokens=200,
60+
total_tokens=300,
61+
input_tokens_details=InputTokensDetails(cached_tokens=10),
62+
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
63+
)
64+
65+
assert request_usage.input_tokens == 100
66+
assert request_usage.output_tokens == 200
67+
assert request_usage.total_tokens == 300
68+
assert request_usage.input_tokens_details.cached_tokens == 10
69+
assert request_usage.output_tokens_details.reasoning_tokens == 20
70+
71+
72+
def test_usage_add_preserves_single_request():
73+
"""Test that adding a single request Usage creates an RequestUsage entry."""
74+
u1 = Usage()
75+
u2 = Usage(
76+
requests=1,
77+
input_tokens=100,
78+
input_tokens_details=InputTokensDetails(cached_tokens=10),
79+
output_tokens=200,
80+
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
81+
total_tokens=300,
82+
)
83+
84+
u1.add(u2)
85+
86+
# Should preserve the request usage details
87+
assert len(u1.request_usage_entries) == 1
88+
request_usage = u1.request_usage_entries[0]
89+
assert request_usage.input_tokens == 100
90+
assert request_usage.output_tokens == 200
91+
assert request_usage.total_tokens == 300
92+
assert request_usage.input_tokens_details.cached_tokens == 10
93+
assert request_usage.output_tokens_details.reasoning_tokens == 20
94+
95+
96+
def test_usage_add_ignores_zero_token_requests():
97+
"""Test that zero-token requests don't create request_usage_entries."""
98+
u1 = Usage()
99+
u2 = Usage(
100+
requests=1,
101+
input_tokens=0,
102+
input_tokens_details=InputTokensDetails(cached_tokens=0),
103+
output_tokens=0,
104+
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
105+
total_tokens=0,
106+
)
107+
108+
u1.add(u2)
109+
110+
# Should not create a request_usage_entry for zero tokens
111+
assert len(u1.request_usage_entries) == 0
112+
113+
114+
def test_usage_add_ignores_multi_request_usage():
115+
"""Test that multi-request Usage objects don't create request_usage_entries."""
116+
u1 = Usage()
117+
u2 = Usage(
118+
requests=3, # Multiple requests
119+
input_tokens=100,
120+
input_tokens_details=InputTokensDetails(cached_tokens=10),
121+
output_tokens=200,
122+
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
123+
total_tokens=300,
124+
)
125+
126+
u1.add(u2)
127+
128+
# Should not create a request usage entry for multi-request usage
129+
assert len(u1.request_usage_entries) == 0
130+
131+
132+
def test_usage_add_merges_existing_request_usage_entries():
133+
"""Test that existing request_usage_entries are merged when adding Usage objects."""
134+
# Create first usage with request_usage_entries
135+
u1 = Usage()
136+
u2 = Usage(
137+
requests=1,
138+
input_tokens=100,
139+
input_tokens_details=InputTokensDetails(cached_tokens=10),
140+
output_tokens=200,
141+
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
142+
total_tokens=300,
143+
)
144+
u1.add(u2)
145+
146+
# Create second usage with request_usage_entries
147+
u3 = Usage(
148+
requests=1,
149+
input_tokens=50,
150+
input_tokens_details=InputTokensDetails(cached_tokens=5),
151+
output_tokens=75,
152+
output_tokens_details=OutputTokensDetails(reasoning_tokens=10),
153+
total_tokens=125,
154+
)
155+
156+
u1.add(u3)
157+
158+
# Should have both request_usage_entries
159+
assert len(u1.request_usage_entries) == 2
160+
161+
# First request
162+
first = u1.request_usage_entries[0]
163+
assert first.input_tokens == 100
164+
assert first.output_tokens == 200
165+
assert first.total_tokens == 300
166+
167+
# Second request
168+
second = u1.request_usage_entries[1]
169+
assert second.input_tokens == 50
170+
assert second.output_tokens == 75
171+
assert second.total_tokens == 125
172+
173+
174+
def test_usage_add_with_pre_existing_request_usage_entries():
175+
"""Test adding Usage objects that already have request_usage_entries."""
176+
u1 = Usage()
177+
178+
# Create a usage with request_usage_entries
179+
u2 = Usage(
180+
requests=1,
181+
input_tokens=100,
182+
input_tokens_details=InputTokensDetails(cached_tokens=10),
183+
output_tokens=200,
184+
output_tokens_details=OutputTokensDetails(reasoning_tokens=20),
185+
total_tokens=300,
186+
)
187+
u1.add(u2)
188+
189+
# Create another usage with request_usage_entries
190+
u3 = Usage(
191+
requests=1,
192+
input_tokens=50,
193+
input_tokens_details=InputTokensDetails(cached_tokens=5),
194+
output_tokens=75,
195+
output_tokens_details=OutputTokensDetails(reasoning_tokens=10),
196+
total_tokens=125,
197+
)
198+
199+
# Add u3 to u1
200+
u1.add(u3)
201+
202+
# Should have both request_usage_entries
203+
assert len(u1.request_usage_entries) == 2
204+
assert u1.request_usage_entries[0].input_tokens == 100
205+
assert u1.request_usage_entries[1].input_tokens == 50
206+
207+
208+
def test_usage_request_usage_entries_default_empty():
209+
"""Test that request_usage_entries defaults to an empty list."""
210+
u = Usage()
211+
assert u.request_usage_entries == []
212+
213+
214+
def test_anthropic_cost_calculation_scenario():
215+
"""Test a realistic scenario for Sonnet 4.5 cost calculation with 200K token thresholds."""
216+
# Simulate 3 API calls: 100K, 150K, and 80K input tokens each
217+
# None exceed 200K, so they should all use the lower pricing tier
218+
219+
usage = Usage()
220+
221+
# First request: 100K input tokens
222+
req1 = Usage(
223+
requests=1,
224+
input_tokens=100_000,
225+
input_tokens_details=InputTokensDetails(cached_tokens=0),
226+
output_tokens=50_000,
227+
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
228+
total_tokens=150_000,
229+
)
230+
usage.add(req1)
231+
232+
# Second request: 150K input tokens
233+
req2 = Usage(
234+
requests=1,
235+
input_tokens=150_000,
236+
input_tokens_details=InputTokensDetails(cached_tokens=0),
237+
output_tokens=75_000,
238+
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
239+
total_tokens=225_000,
240+
)
241+
usage.add(req2)
242+
243+
# Third request: 80K input tokens
244+
req3 = Usage(
245+
requests=1,
246+
input_tokens=80_000,
247+
input_tokens_details=InputTokensDetails(cached_tokens=0),
248+
output_tokens=40_000,
249+
output_tokens_details=OutputTokensDetails(reasoning_tokens=0),
250+
total_tokens=120_000,
251+
)
252+
usage.add(req3)
253+
254+
# Verify aggregated totals
255+
assert usage.requests == 3
256+
assert usage.input_tokens == 330_000 # 100K + 150K + 80K
257+
assert usage.output_tokens == 165_000 # 50K + 75K + 40K
258+
assert usage.total_tokens == 495_000 # 150K + 225K + 120K
259+
260+
# Verify request_usage_entries preservation
261+
assert len(usage.request_usage_entries) == 3
262+
assert usage.request_usage_entries[0].input_tokens == 100_000
263+
assert usage.request_usage_entries[1].input_tokens == 150_000
264+
assert usage.request_usage_entries[2].input_tokens == 80_000
265+
266+
# All request_usage_entries are under 200K threshold
267+
for req in usage.request_usage_entries:
268+
assert req.input_tokens < 200_000
269+
assert req.output_tokens < 200_000

0 commit comments

Comments
 (0)