Fix audio transcription cost tracking (#16478)

Sameerlite · web-flow · commit e5948770dd5b · 2025-11-19T20:29:39.000-08:00
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
@@ -133,6 +133,25 @@ def _cost_per_token_custom_pricing_helper(
     return None
 
 
+def _transcription_usage_has_token_details(
+    usage_block: Optional[Usage],
+) -> bool:
+    if usage_block is None:
+        return False
+
+    prompt_tokens_val = getattr(usage_block, "prompt_tokens", 0) or 0
+    completion_tokens_val = getattr(usage_block, "completion_tokens", 0) or 0
+    prompt_details = getattr(usage_block, "prompt_tokens_details", None)
+
+    if prompt_details is not None:
+        audio_token_count = getattr(prompt_details, "audio_tokens", 0) or 0
+        text_token_count = getattr(prompt_details, "text_tokens", 0) or 0
+        if audio_token_count > 0 or text_token_count > 0:
+            return True
+
+    return (prompt_tokens_val > 0) or (completion_tokens_val > 0)
+
+
 def cost_per_token(  # noqa: PLR0915
     model: str = "",
     prompt_tokens: int = 0,
@@ -324,19 +343,18 @@ def cost_per_token(  # noqa: PLR0915
             usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
         )
     elif call_type == "atranscription" or call_type == "transcription":
-
-        if model == "gpt-4o-mini-transcribe":
+        if _transcription_usage_has_token_details(usage_block):
             return openai_cost_per_token(
-                model=model,
+                model=model_without_prefix,
                 usage=usage_block,
                 service_tier=service_tier,
             )
-        else:
-            return openai_cost_per_second(
-                model=model,
-                custom_llm_provider=custom_llm_provider,
-                duration=audio_transcription_file_duration,
-            )
+
+        return openai_cost_per_second(
+            model=model_without_prefix,
+            custom_llm_provider=custom_llm_provider,
+            duration=audio_transcription_file_duration,
+        )
     elif call_type == "search" or call_type == "asearch":
         # Search providers use per-query pricing
         from litellm.search import search_provider_cost_per_query
diff --git a/tests/test_litellm/test_cost_calculator.py b/tests/test_litellm/test_cost_calculator.py
@@ -19,6 +19,7 @@
 )
 from litellm.types.llms.openai import OpenAIRealtimeStreamList
 from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
+from litellm.utils import TranscriptionResponse
 
 
 def test_cost_calculator_with_response_cost_in_additional_headers():
@@ -77,6 +78,54 @@ def test_cost_calculator_with_usage():
     assert result == expected_cost, f"Got {result}, Expected {expected_cost}"
 
 
+def test_transcription_cost_uses_token_pricing():
+    from litellm import completion_cost
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    usage = Usage(
+        prompt_tokens=14,
+        completion_tokens=45,
+        total_tokens=59,
+        prompt_tokens_details=PromptTokensDetailsWrapper(
+            text_tokens=0, audio_tokens=14
+        ),
+    )
+    response = TranscriptionResponse(text="demo text")
+    response.usage = usage
+
+    cost = completion_cost(
+        completion_response=response,
+        model="gpt-4o-transcribe",
+        custom_llm_provider="openai",
+        call_type="atranscription",
+    )
+
+    expected_cost = (14 * 6e-06) + (45 * 1e-05)
+    assert pytest.approx(cost, rel=1e-6) == expected_cost
+
+
+def test_transcription_cost_falls_back_to_duration():
+    from litellm import completion_cost
+
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+
+    response = TranscriptionResponse(text="demo text")
+    response.duration = 10.0
+
+    cost = completion_cost(
+        completion_response=response,
+        model="whisper-1",
+        custom_llm_provider="openai",
+        call_type="atranscription",
+    )
+
+    expected_cost = 10.0 * 0.0001
+    assert pytest.approx(cost, rel=1e-6) == expected_cost
+
+
 def test_handle_realtime_stream_cost_calculation():
     from litellm.cost_calculator import RealtimeAPITokenUsageProcessor