Skip to content

Commit e594877

Browse files
authored
Fix audio transcription cost tracking (#16478)
1 parent 28cadaa commit e594877

File tree

2 files changed

+76
-9
lines changed

2 files changed

+76
-9
lines changed

litellm/cost_calculator.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,25 @@ def _cost_per_token_custom_pricing_helper(
133133
return None
134134

135135

136+
def _transcription_usage_has_token_details(
137+
usage_block: Optional[Usage],
138+
) -> bool:
139+
if usage_block is None:
140+
return False
141+
142+
prompt_tokens_val = getattr(usage_block, "prompt_tokens", 0) or 0
143+
completion_tokens_val = getattr(usage_block, "completion_tokens", 0) or 0
144+
prompt_details = getattr(usage_block, "prompt_tokens_details", None)
145+
146+
if prompt_details is not None:
147+
audio_token_count = getattr(prompt_details, "audio_tokens", 0) or 0
148+
text_token_count = getattr(prompt_details, "text_tokens", 0) or 0
149+
if audio_token_count > 0 or text_token_count > 0:
150+
return True
151+
152+
return (prompt_tokens_val > 0) or (completion_tokens_val > 0)
153+
154+
136155
def cost_per_token( # noqa: PLR0915
137156
model: str = "",
138157
prompt_tokens: int = 0,
@@ -324,19 +343,18 @@ def cost_per_token( # noqa: PLR0915
324343
usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
325344
)
326345
elif call_type == "atranscription" or call_type == "transcription":
327-
328-
if model == "gpt-4o-mini-transcribe":
346+
if _transcription_usage_has_token_details(usage_block):
329347
return openai_cost_per_token(
330-
model=model,
348+
model=model_without_prefix,
331349
usage=usage_block,
332350
service_tier=service_tier,
333351
)
334-
else:
335-
return openai_cost_per_second(
336-
model=model,
337-
custom_llm_provider=custom_llm_provider,
338-
duration=audio_transcription_file_duration,
339-
)
352+
353+
return openai_cost_per_second(
354+
model=model_without_prefix,
355+
custom_llm_provider=custom_llm_provider,
356+
duration=audio_transcription_file_duration,
357+
)
340358
elif call_type == "search" or call_type == "asearch":
341359
# Search providers use per-query pricing
342360
from litellm.search import search_provider_cost_per_query

tests/test_litellm/test_cost_calculator.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
)
2020
from litellm.types.llms.openai import OpenAIRealtimeStreamList
2121
from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
22+
from litellm.utils import TranscriptionResponse
2223

2324

2425
def test_cost_calculator_with_response_cost_in_additional_headers():
@@ -77,6 +78,54 @@ def test_cost_calculator_with_usage():
7778
assert result == expected_cost, f"Got {result}, Expected {expected_cost}"
7879

7980

81+
def test_transcription_cost_uses_token_pricing():
82+
from litellm import completion_cost
83+
84+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
85+
litellm.model_cost = litellm.get_model_cost_map(url="")
86+
87+
usage = Usage(
88+
prompt_tokens=14,
89+
completion_tokens=45,
90+
total_tokens=59,
91+
prompt_tokens_details=PromptTokensDetailsWrapper(
92+
text_tokens=0, audio_tokens=14
93+
),
94+
)
95+
response = TranscriptionResponse(text="demo text")
96+
response.usage = usage
97+
98+
cost = completion_cost(
99+
completion_response=response,
100+
model="gpt-4o-transcribe",
101+
custom_llm_provider="openai",
102+
call_type="atranscription",
103+
)
104+
105+
expected_cost = (14 * 6e-06) + (45 * 1e-05)
106+
assert pytest.approx(cost, rel=1e-6) == expected_cost
107+
108+
109+
def test_transcription_cost_falls_back_to_duration():
110+
from litellm import completion_cost
111+
112+
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
113+
litellm.model_cost = litellm.get_model_cost_map(url="")
114+
115+
response = TranscriptionResponse(text="demo text")
116+
response.duration = 10.0
117+
118+
cost = completion_cost(
119+
completion_response=response,
120+
model="whisper-1",
121+
custom_llm_provider="openai",
122+
call_type="atranscription",
123+
)
124+
125+
expected_cost = 10.0 * 0.0001
126+
assert pytest.approx(cost, rel=1e-6) == expected_cost
127+
128+
80129
def test_handle_realtime_stream_cost_calculation():
81130
from litellm.cost_calculator import RealtimeAPITokenUsageProcessor
82131

0 commit comments

Comments
 (0)