From c6e184ae8b51728b038318dcd1afeca6b6c32f4f Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 13:56:08 -0500 Subject: [PATCH 1/9] propagate model id on errors too --- .../proxy/anthropic_endpoints/endpoints.py | 24 ++++ litellm/proxy/common_request_processing.py | 37 ++++++ .../proxy/test_failed_request_headers.py | 118 ++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 tests/test_litellm/proxy/test_failed_request_headers.py diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py index c450b655a2c1..7c4bf43368ec 100644 --- a/litellm/proxy/anthropic_endpoints/endpoints.py +++ b/litellm/proxy/anthropic_endpoints/endpoints.py @@ -85,6 +85,20 @@ async def anthropic_response( # noqa: PLR0915 if data["model"] in litellm.model_alias_map: data["model"] = litellm.model_alias_map[data["model"]] + # Inject model_id into metadata if available + # This ensures model_id is available in logging_obj for failed requests + if llm_router and data.get("model"): + try: + model_ids = llm_router.get_model_ids(data["model"]) + if model_ids: + if "metadata" not in data: + data["metadata"] = {} + if "model_info" not in data["metadata"]: + data["metadata"]["model_info"] = {} + data["metadata"]["model_info"]["id"] = model_ids[0] + except Exception as e: + verbose_proxy_logger.error(f"Error getting model ID from router for model: {data['model']}: {e}") + ### CALL HOOKS ### - modify incoming data before calling the model data = await proxy_logging_obj.pre_call_hook( # type: ignore user_api_key_dict=user_api_key_dict, data=data, call_type=CallTypes.anthropic_messages.value @@ -217,11 +231,21 @@ async def anthropic_response( # noqa: PLR0915 ) ) error_msg = f"{str(e)}" + + # Get headers with model_id if available + headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + user_api_key_dict=user_api_key_dict, + model_id=data.get("metadata", {}).get("model_info", {}).get("id", None), + version=version, + request_data=data + ) + raise ProxyException( message=getattr(e, "message", error_msg), type=getattr(e, "type", "None"), param=getattr(e, "param", "None"), code=getattr(e, "status_code", 500), + headers=headers ) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 5a3e0b334b47..fea569129088 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -340,6 +340,7 @@ async def common_processing_pre_call_logic( user_max_tokens: Optional[int] = None, user_api_base: Optional[str] = None, model: Optional[str] = None, + llm_router: Optional[Router] = None, ) -> Tuple[dict, LiteLLMLoggingObj]: start_time = datetime.now() # start before calling guardrail hooks @@ -378,6 +379,20 @@ async def common_processing_pre_call_logic( ): self.data["model"] = litellm.model_alias_map[self.data["model"]] + # Inject model_id into metadata if available + # This ensures model_id is available in logging_obj for failed requests + if llm_router and self.data.get("model"): + try: + model_ids = llm_router.get_model_ids(self.data["model"]) + if model_ids: + if "metadata" not in self.data: + self.data["metadata"] = {} + if "model_info" not in self.data["metadata"]: + self.data["metadata"]["model_info"] = {} + self.data["metadata"]["model_info"]["id"] = model_ids[0] + except Exception as e: + verbose_proxy_logger.error(f"Error getting model ID from router for model: {self.data['model']}: {e}") + # Check key-specific aliases if ( isinstance(self.data["model"], str) @@ -490,6 +505,7 @@ async def base_process_llm_request( user_api_base=user_api_base, model=model, route_type=route_type, + llm_router=llm_router, ) tasks = [] @@ -748,11 +764,32 @@ async def _handle_llm_api_exception( _litellm_logging_obj: Optional[LiteLLMLoggingObj] = self.data.get( "litellm_logging_obj", None ) + + # Attempt to get model_id from logging object + model_id = None + if _litellm_logging_obj: + # 1. Try getting from litellm_params (updated during call) + if ( + hasattr(_litellm_logging_obj, "litellm_params") + and _litellm_logging_obj.litellm_params + ): + metadata = _litellm_logging_obj.litellm_params.get("metadata") or {} + model_info = metadata.get("model_info") or {} + model_id = model_info.get("id", None) + + # 2. Fallback to kwargs (initial) + if not model_id and _litellm_logging_obj.kwargs: + litellm_params = _litellm_logging_obj.kwargs.get("litellm_params", {}) + metadata = litellm_params.get("metadata") or {} + model_info = metadata.get("model_info") or {} + model_id = model_info.get("id", None) + custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( user_api_key_dict=user_api_key_dict, call_id=( _litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None ), + model_id=model_id, version=version, response_cost=0, model_region=getattr(user_api_key_dict, "allowed_model_region", ""), diff --git a/tests/test_litellm/proxy/test_failed_request_headers.py b/tests/test_litellm/proxy/test_failed_request_headers.py new file mode 100644 index 000000000000..ce4fef5c102f --- /dev/null +++ b/tests/test_litellm/proxy/test_failed_request_headers.py @@ -0,0 +1,118 @@ +import pytest +from fastapi.testclient import TestClient +from unittest.mock import MagicMock, patch, AsyncMock +from litellm.proxy.proxy_server import app +from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing +from litellm.proxy._types import UserAPIKeyAuth + +@pytest.fixture +def client(): + return TestClient(app) + +def test_x_litellm_model_id_header_in_exception(): + """ + Directly test the logic in ProxyBaseLLMRequestProcessing._handle_llm_api_exception + to ensure it extracts model_id from the logging object and passes it to get_custom_headers. + """ + # Mock dependencies + mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth) + mock_user_api_key_dict.allowed_model_region = "us-east-1" + mock_user_api_key_dict.tpm_limit = 100 + mock_user_api_key_dict.rpm_limit = 10 + mock_user_api_key_dict.max_budget = 100.0 + mock_user_api_key_dict.spend = 5.0 + + # Use AsyncMock for awaited methods + mock_proxy_logging_obj = MagicMock() + mock_proxy_logging_obj.post_call_failure_hook = AsyncMock() + + # Create a mock exception + exception = Exception("Test exception") + + # Create a mock logging object with model_id in litellm_params + mock_litellm_logging_obj = MagicMock() + mock_litellm_logging_obj.litellm_call_id = "test-call-id" + mock_litellm_logging_obj.litellm_params = { + "metadata": { + "model_info": { + "id": "test-model-id-123" + } + } + } + + # Setup the processor with data containing the logging object + data = { + "litellm_logging_obj": mock_litellm_logging_obj, + "model": "gpt-4" + } + processor = ProxyBaseLLMRequestProcessing(data=data) + + import asyncio + from litellm.proxy._types import ProxyException + + try: + asyncio.run(processor._handle_llm_api_exception( + e=exception, + user_api_key_dict=mock_user_api_key_dict, + proxy_logging_obj=mock_proxy_logging_obj + )) + except ProxyException as pe: + # Verify the headers in the raised exception + assert "x-litellm-model-id" in pe.headers + assert pe.headers["x-litellm-model-id"] == "test-model-id-123" + except Exception as e: + pytest.fail(f"Raised unexpected exception type: {type(e)}") + +def test_x_litellm_model_id_header_in_exception_fallback_kwargs(): + """ + Test fallback to kwargs if litellm_params is missing/empty + """ + # Mock dependencies + mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth) + mock_user_api_key_dict.allowed_model_region = "us-east-1" + # Need to mock tpm_limit/rpm_limit etc as they are accessed by get_custom_headers + mock_user_api_key_dict.tpm_limit = 100 + mock_user_api_key_dict.rpm_limit = 10 + mock_user_api_key_dict.max_budget = 100.0 + mock_user_api_key_dict.spend = 5.0 + + # Use AsyncMock for awaited methods + mock_proxy_logging_obj = MagicMock() + mock_proxy_logging_obj.post_call_failure_hook = AsyncMock() + + exception = Exception("Test exception") + + # Create a mock logging object with model_id in kwargs + mock_litellm_logging_obj = MagicMock() + mock_litellm_logging_obj.litellm_call_id = "test-call-id" + mock_litellm_logging_obj.litellm_params = {} # Empty + mock_litellm_logging_obj.kwargs = { + "litellm_params": { + "metadata": { + "model_info": { + "id": "fallback-model-id-456" + } + } + } + } + + data = { + "litellm_logging_obj": mock_litellm_logging_obj, + "model": "gpt-4" + } + processor = ProxyBaseLLMRequestProcessing(data=data) + + import asyncio + from litellm.proxy._types import ProxyException + + try: + asyncio.run(processor._handle_llm_api_exception( + e=exception, + user_api_key_dict=mock_user_api_key_dict, + proxy_logging_obj=mock_proxy_logging_obj + )) + except ProxyException as pe: + assert "x-litellm-model-id" in pe.headers + assert pe.headers["x-litellm-model-id"] == "fallback-model-id-456" + except Exception as e: + pytest.fail(f"Raised unexpected exception type: {type(e)}") From f6ffa0bbfe3fbffe5d45e62d4b968e0c672c48cb Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 19:45:32 -0500 Subject: [PATCH 2/9] make it work for messages and streaming --- .../adapters/transformation.py | 7 ++ .../proxy/anthropic_endpoints/endpoints.py | 61 +++++---- litellm/proxy/common_request_processing.py | 23 +++- litellm/responses/streaming_iterator.py | 19 +++ litellm/router.py | 1 + .../test_anthropic_messages_error_headers.py | 118 ++++++++++++++++++ .../proxy/test_failed_request_headers.py | 118 ------------------ 7 files changed, 204 insertions(+), 143 deletions(-) create mode 100644 tests/test_litellm/proxy/test_anthropic_messages_error_headers.py delete mode 100644 tests/test_litellm/proxy/test_failed_request_headers.py diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py index 0e905014fe2e..31c314426896 100644 --- a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py @@ -622,6 +622,13 @@ def translate_openai_response_to_anthropic( stop_reason=anthropic_finish_reason, ) + # Preserve model_id from the OpenAI response's _hidden_params + # This is needed for load balancing attribution + hidden_params = getattr(response, "_hidden_params", {}) or {} + model_id = hidden_params.get("model_id") + if model_id: + translated_obj["_litellm_model_id"] = model_id # type: ignore + return translated_obj def _translate_streaming_openai_chunk_to_anthropic_content_block( diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py index 7c4bf43368ec..4425a4f0af37 100644 --- a/litellm/proxy/anthropic_endpoints/endpoints.py +++ b/litellm/proxy/anthropic_endpoints/endpoints.py @@ -85,20 +85,6 @@ async def anthropic_response( # noqa: PLR0915 if data["model"] in litellm.model_alias_map: data["model"] = litellm.model_alias_map[data["model"]] - # Inject model_id into metadata if available - # This ensures model_id is available in logging_obj for failed requests - if llm_router and data.get("model"): - try: - model_ids = llm_router.get_model_ids(data["model"]) - if model_ids: - if "metadata" not in data: - data["metadata"] = {} - if "model_info" not in data["metadata"]: - data["metadata"]["model_info"] = {} - data["metadata"]["model_info"]["id"] = model_ids[0] - except Exception as e: - verbose_proxy_logger.error(f"Error getting model ID from router for model: {data['model']}: {e}") - ### CALL HOOKS ### - modify incoming data before calling the model data = await proxy_logging_obj.pre_call_hook( # type: ignore user_api_key_dict=user_api_key_dict, data=data, call_type=CallTypes.anthropic_messages.value @@ -168,8 +154,13 @@ async def anthropic_response( # noqa: PLR0915 response = responses[1] + # Extract model_id from request metadata (set by router during routing) + litellm_metadata = data.get("litellm_metadata", {}) or {} + model_info = litellm_metadata.get("model_info", {}) or {} + model_id = model_info.get("id", "") or "" + + # Get other metadata from hidden_params hidden_params = getattr(response, "_hidden_params", {}) or {} - model_id = hidden_params.get("model_id", None) or "" cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" response_cost = hidden_params.get("response_cost", None) or "" @@ -230,22 +221,50 @@ async def anthropic_response( # noqa: PLR0915 str(e) ) ) - error_msg = f"{str(e)}" - # Get headers with model_id if available - headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + # Extract model_id from request metadata (same as success path) + litellm_metadata = data.get("litellm_metadata", {}) or {} + model_info = litellm_metadata.get("model_info", {}) or {} + model_id = model_info.get("id", "") or "" + + # Get headers + custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( user_api_key_dict=user_api_key_dict, - model_id=data.get("metadata", {}).get("model_info", {}).get("id", None), + call_id=data.get("litellm_call_id", ""), + model_id=model_id, version=version, - request_data=data + response_cost=0, + model_region=getattr(user_api_key_dict, "allowed_model_region", ""), + request_data=data, + timeout=getattr(e, "timeout", None), + litellm_logging_obj=None, ) + headers = getattr(e, "headers", {}) or {} + headers.update(custom_headers) + + # Raise ProxyException with proper headers + from litellm.proxy.proxy_server import ProxyException + + if isinstance(e, HTTPException): + raise ProxyException( + message=getattr(e, "detail", str(e)), + type=getattr(e, "type", "None"), + param=getattr(e, "param", "None"), + code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), + provider_specific_fields=getattr(e, "provider_specific_fields", None), + headers=headers, + ) + + error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), type=getattr(e, "type", "None"), param=getattr(e, "param", "None"), + openai_code=getattr(e, "code", None), code=getattr(e, "status_code", 500), - headers=headers + provider_specific_fields=getattr(e, "provider_specific_fields", None), + headers=headers, ) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index fea569129088..79c941d46a00 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -766,6 +766,9 @@ async def _handle_llm_api_exception( ) # Attempt to get model_id from logging object + # + # Note: We check the direct model_info path first (not nested in metadata) because that's where the router sets it. + # The nested metadata path is only a fallback for cases where model_info wasn't set at the top level. model_id = None if _litellm_logging_obj: # 1. Try getting from litellm_params (updated during call) @@ -773,17 +776,29 @@ async def _handle_llm_api_exception( hasattr(_litellm_logging_obj, "litellm_params") and _litellm_logging_obj.litellm_params ): - metadata = _litellm_logging_obj.litellm_params.get("metadata") or {} - model_info = metadata.get("model_info") or {} + # First check direct model_info path (set by router.py with selected deployment) + model_info = _litellm_logging_obj.litellm_params.get("model_info") or {} model_id = model_info.get("id", None) + # Fallback to nested metadata path + if not model_id: + metadata = _litellm_logging_obj.litellm_params.get("metadata") or {} + model_info = metadata.get("model_info") or {} + model_id = model_info.get("id", None) + # 2. Fallback to kwargs (initial) if not model_id and _litellm_logging_obj.kwargs: litellm_params = _litellm_logging_obj.kwargs.get("litellm_params", {}) - metadata = litellm_params.get("metadata") or {} - model_info = metadata.get("model_info") or {} + # First check direct model_info path + model_info = litellm_params.get("model_info") or {} model_id = model_info.get("id", None) + # Fallback to nested metadata path + if not model_id: + metadata = litellm_params.get("metadata") or {} + model_info = metadata.get("model_info") or {} + model_id = model_info.get("id", None) + custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( user_api_key_dict=user_api_key_dict, call_id=( diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py index 8eecc3e82111..0407776029d3 100644 --- a/litellm/responses/streaming_iterator.py +++ b/litellm/responses/streaming_iterator.py @@ -8,7 +8,9 @@ import litellm from litellm.constants import STREAM_SSE_DONE_STRING from litellm.litellm_core_utils.asyncify import run_async_function +from litellm.litellm_core_utils.core_helpers import process_response_headers from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.litellm_core_utils.llm_response_utils.get_api_base import get_api_base from litellm.litellm_core_utils.thread_pool_executor import executor from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig from litellm.responses.utils import ResponsesAPIRequestUtils @@ -51,6 +53,23 @@ def __init__( self.litellm_metadata = litellm_metadata self.custom_llm_provider = custom_llm_provider + # set hidden params for response headers (e.g., x-litellm-model-id) + # This matches ths stream wrapper in litellm/litellm_core_utils/streaming_handler.py + _api_base = get_api_base( + model=model or "", + optional_params=self.logging_obj.model_call_details.get( + "litellm_params", {} + ), + ) + _model_info: Dict = litellm_metadata.get("model_info", {}) if litellm_metadata else {} + self._hidden_params = { + "model_id": _model_info.get("id", None), + "api_base": _api_base, + } + self._hidden_params["additional_headers"] = process_response_headers( + self.response.headers or {} + ) # GUARANTEE OPENAI HEADERS IN RESPONSE + def _process_chunk(self, chunk) -> Optional[ResponsesAPIStreamingResponse]: """Process a single chunk of data from the stream""" if not chunk: diff --git a/litellm/router.py b/litellm/router.py index 6d38d2fc2bd8..9de34097bde4 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1639,6 +1639,7 @@ def _update_kwargs_with_deployment( - Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging) - Adds default litellm params to kwargs, if set. """ + print("MODEL-INFO: ", deployment.get("model_info", {}), flush=True) model_info = deployment.get("model_info", {}).copy() deployment_litellm_model_name = deployment["litellm_params"]["model"] deployment_api_base = deployment["litellm_params"].get("api_base") diff --git a/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py b/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py new file mode 100644 index 000000000000..2b11b216f095 --- /dev/null +++ b/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py @@ -0,0 +1,118 @@ +""" +Test that x-litellm-model-id header is returned on /v1/messages error responses. + +This test verifies that the model_id header is propagated correctly when +requests fail after router selection (e.g., due to unsupported parameters). +""" + +import pytest +import asyncio +import aiohttp + +LITELLM_MASTER_KEY = "sk-1234" + + +async def anthropic_messages_with_headers(session, key, model="gpt-4", **extra_params): + """ + Make a request to /v1/messages and return response headers. + """ + url = "http://0.0.0.0:4000/v1/messages" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + data = { + "model": model, + "max_tokens": 10, + "messages": [ + {"role": "user", "content": "Hello!"}, + ], + **extra_params, + } + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + print(f"Status: {status}") + print(f"Response: {response_text}") + print() + + raw_headers = response.raw_headers + raw_headers_json = {} + + for item in response.raw_headers: + raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8") + + return { + "status": status, + "headers": raw_headers_json, + "response_text": response_text, + } + + +@pytest.mark.asyncio +async def test_anthropic_messages_error_with_model_id_header(): + """ + Test that x-litellm-model-id header is returned on error responses. + + This test: + 1. Makes a request to /v1/messages with an unsupported parameter (reasoning_effort) + 2. Verifies that the request fails with a 400 error + 3. Verifies that the x-litellm-model-id header is present in the error response + + The error occurs AFTER router selection, so model_id should be available + and included in the error response headers. + """ + async with aiohttp.ClientSession() as session: + key = LITELLM_MASTER_KEY + result = await anthropic_messages_with_headers( + session=session, + key=key, + model="gpt-4", + reasoning_effort="low", # Unsupported param that triggers error + ) + + # Verify the request failed + assert result["status"] == 400, f"Expected 400, got {result['status']}" + + # Verify model_id header is present + assert "x-litellm-model-id" in result["headers"], ( + f"x-litellm-model-id header missing in error response. " + f"Headers: {result['headers'].keys()}" + ) + + # Verify the header has a non-empty value + model_id = result["headers"]["x-litellm-model-id"] + assert model_id, "x-litellm-model-id header is empty" + print(f"Successfully retrieved model_id on error response: {model_id}") + + +@pytest.mark.asyncio +async def test_anthropic_messages_success_with_model_id_header(): + """ + Test that x-litellm-model-id header is returned on successful responses. + + This is a baseline test to ensure the header is present on success too. + """ + async with aiohttp.ClientSession() as session: + key = LITELLM_MASTER_KEY + result = await anthropic_messages_with_headers( + session=session, + key=key, + model="gpt-4", + ) + + # Verify the request succeeded + assert result["status"] == 200, f"Expected 200, got {result['status']}" + + # Verify model_id header is present + assert "x-litellm-model-id" in result["headers"], ( + f"x-litellm-model-id header missing in success response. " + f"Headers: {result['headers'].keys()}" + ) + + # Verify the header has a non-empty value + model_id = result["headers"]["x-litellm-model-id"] + assert model_id, "x-litellm-model-id header is empty" + print(f"Successfully retrieved model_id on success response: {model_id}") diff --git a/tests/test_litellm/proxy/test_failed_request_headers.py b/tests/test_litellm/proxy/test_failed_request_headers.py deleted file mode 100644 index ce4fef5c102f..000000000000 --- a/tests/test_litellm/proxy/test_failed_request_headers.py +++ /dev/null @@ -1,118 +0,0 @@ -import pytest -from fastapi.testclient import TestClient -from unittest.mock import MagicMock, patch, AsyncMock -from litellm.proxy.proxy_server import app -from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing -from litellm.proxy._types import UserAPIKeyAuth - -@pytest.fixture -def client(): - return TestClient(app) - -def test_x_litellm_model_id_header_in_exception(): - """ - Directly test the logic in ProxyBaseLLMRequestProcessing._handle_llm_api_exception - to ensure it extracts model_id from the logging object and passes it to get_custom_headers. - """ - # Mock dependencies - mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth) - mock_user_api_key_dict.allowed_model_region = "us-east-1" - mock_user_api_key_dict.tpm_limit = 100 - mock_user_api_key_dict.rpm_limit = 10 - mock_user_api_key_dict.max_budget = 100.0 - mock_user_api_key_dict.spend = 5.0 - - # Use AsyncMock for awaited methods - mock_proxy_logging_obj = MagicMock() - mock_proxy_logging_obj.post_call_failure_hook = AsyncMock() - - # Create a mock exception - exception = Exception("Test exception") - - # Create a mock logging object with model_id in litellm_params - mock_litellm_logging_obj = MagicMock() - mock_litellm_logging_obj.litellm_call_id = "test-call-id" - mock_litellm_logging_obj.litellm_params = { - "metadata": { - "model_info": { - "id": "test-model-id-123" - } - } - } - - # Setup the processor with data containing the logging object - data = { - "litellm_logging_obj": mock_litellm_logging_obj, - "model": "gpt-4" - } - processor = ProxyBaseLLMRequestProcessing(data=data) - - import asyncio - from litellm.proxy._types import ProxyException - - try: - asyncio.run(processor._handle_llm_api_exception( - e=exception, - user_api_key_dict=mock_user_api_key_dict, - proxy_logging_obj=mock_proxy_logging_obj - )) - except ProxyException as pe: - # Verify the headers in the raised exception - assert "x-litellm-model-id" in pe.headers - assert pe.headers["x-litellm-model-id"] == "test-model-id-123" - except Exception as e: - pytest.fail(f"Raised unexpected exception type: {type(e)}") - -def test_x_litellm_model_id_header_in_exception_fallback_kwargs(): - """ - Test fallback to kwargs if litellm_params is missing/empty - """ - # Mock dependencies - mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth) - mock_user_api_key_dict.allowed_model_region = "us-east-1" - # Need to mock tpm_limit/rpm_limit etc as they are accessed by get_custom_headers - mock_user_api_key_dict.tpm_limit = 100 - mock_user_api_key_dict.rpm_limit = 10 - mock_user_api_key_dict.max_budget = 100.0 - mock_user_api_key_dict.spend = 5.0 - - # Use AsyncMock for awaited methods - mock_proxy_logging_obj = MagicMock() - mock_proxy_logging_obj.post_call_failure_hook = AsyncMock() - - exception = Exception("Test exception") - - # Create a mock logging object with model_id in kwargs - mock_litellm_logging_obj = MagicMock() - mock_litellm_logging_obj.litellm_call_id = "test-call-id" - mock_litellm_logging_obj.litellm_params = {} # Empty - mock_litellm_logging_obj.kwargs = { - "litellm_params": { - "metadata": { - "model_info": { - "id": "fallback-model-id-456" - } - } - } - } - - data = { - "litellm_logging_obj": mock_litellm_logging_obj, - "model": "gpt-4" - } - processor = ProxyBaseLLMRequestProcessing(data=data) - - import asyncio - from litellm.proxy._types import ProxyException - - try: - asyncio.run(processor._handle_llm_api_exception( - e=exception, - user_api_key_dict=mock_user_api_key_dict, - proxy_logging_obj=mock_proxy_logging_obj - )) - except ProxyException as pe: - assert "x-litellm-model-id" in pe.headers - assert pe.headers["x-litellm-model-id"] == "fallback-model-id-456" - except Exception as e: - pytest.fail(f"Raised unexpected exception type: {type(e)}") From 855ddba522fb9d468c6b861f0b2eefa1c1f700f2 Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 19:47:22 -0500 Subject: [PATCH 3/9] fix --- litellm/proxy/anthropic_endpoints/endpoints.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py index 4425a4f0af37..c954dc2ef048 100644 --- a/litellm/proxy/anthropic_endpoints/endpoints.py +++ b/litellm/proxy/anthropic_endpoints/endpoints.py @@ -243,19 +243,6 @@ async def anthropic_response( # noqa: PLR0915 headers = getattr(e, "headers", {}) or {} headers.update(custom_headers) - # Raise ProxyException with proper headers - from litellm.proxy.proxy_server import ProxyException - - if isinstance(e, HTTPException): - raise ProxyException( - message=getattr(e, "detail", str(e)), - type=getattr(e, "type", "None"), - param=getattr(e, "param", "None"), - code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), - provider_specific_fields=getattr(e, "provider_specific_fields", None), - headers=headers, - ) - error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), From 66fb1c9f500e7a13259ba389ee52f980df60178b Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 20:06:45 -0500 Subject: [PATCH 4/9] cleanup --- .../adapters/transformation.py | 7 -- .../proxy/anthropic_endpoints/endpoints.py | 7 +- litellm/proxy/common_request_processing.py | 65 ++++++++++--------- litellm/router.py | 1 - 4 files changed, 37 insertions(+), 43 deletions(-) diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py index 31c314426896..0e905014fe2e 100644 --- a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py +++ b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py @@ -622,13 +622,6 @@ def translate_openai_response_to_anthropic( stop_reason=anthropic_finish_reason, ) - # Preserve model_id from the OpenAI response's _hidden_params - # This is needed for load balancing attribution - hidden_params = getattr(response, "_hidden_params", {}) or {} - model_id = hidden_params.get("model_id") - if model_id: - translated_obj["_litellm_model_id"] = model_id # type: ignore - return translated_obj def _translate_streaming_openai_chunk_to_anthropic_content_block( diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py index c954dc2ef048..abea9e6fee1a 100644 --- a/litellm/proxy/anthropic_endpoints/endpoints.py +++ b/litellm/proxy/anthropic_endpoints/endpoints.py @@ -228,7 +228,7 @@ async def anthropic_response( # noqa: PLR0915 model_id = model_info.get("id", "") or "" # Get headers - custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + headers = ProxyBaseLLMRequestProcessing.get_custom_headers( user_api_key_dict=user_api_key_dict, call_id=data.get("litellm_call_id", ""), model_id=model_id, @@ -240,17 +240,12 @@ async def anthropic_response( # noqa: PLR0915 litellm_logging_obj=None, ) - headers = getattr(e, "headers", {}) or {} - headers.update(custom_headers) - error_msg = f"{str(e)}" raise ProxyException( message=getattr(e, "message", error_msg), type=getattr(e, "type", "None"), param=getattr(e, "param", "None"), - openai_code=getattr(e, "code", None), code=getattr(e, "status_code", 500), - provider_specific_fields=getattr(e, "provider_specific_fields", None), headers=headers, ) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 79c941d46a00..f6abe6992e29 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -769,35 +769,7 @@ async def _handle_llm_api_exception( # # Note: We check the direct model_info path first (not nested in metadata) because that's where the router sets it. # The nested metadata path is only a fallback for cases where model_info wasn't set at the top level. - model_id = None - if _litellm_logging_obj: - # 1. Try getting from litellm_params (updated during call) - if ( - hasattr(_litellm_logging_obj, "litellm_params") - and _litellm_logging_obj.litellm_params - ): - # First check direct model_info path (set by router.py with selected deployment) - model_info = _litellm_logging_obj.litellm_params.get("model_info") or {} - model_id = model_info.get("id", None) - - # Fallback to nested metadata path - if not model_id: - metadata = _litellm_logging_obj.litellm_params.get("metadata") or {} - model_info = metadata.get("model_info") or {} - model_id = model_info.get("id", None) - - # 2. Fallback to kwargs (initial) - if not model_id and _litellm_logging_obj.kwargs: - litellm_params = _litellm_logging_obj.kwargs.get("litellm_params", {}) - # First check direct model_info path - model_info = litellm_params.get("model_info") or {} - model_id = model_info.get("id", None) - - # Fallback to nested metadata path - if not model_id: - metadata = litellm_params.get("metadata") or {} - model_info = metadata.get("model_info") or {} - model_id = model_info.get("id", None) + model_id = self.maybe_get_model_id_from_logging_obj(_litellm_logging_obj) custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( user_api_key_dict=user_api_key_dict, @@ -1117,3 +1089,38 @@ def _inject_cost_into_usage_dict(obj: dict, model_name: str) -> Optional[dict]: obj.setdefault("usage", {})["cost"] = cost_val return obj return None + + def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]: + model_id = None + if _logging_obj: + # 1. Try getting from litellm_params (updated during call) + if ( + hasattr(_logging_obj, "litellm_params") + and _logging_obj.litellm_params + ): + # First check direct model_info path (set by router.py with selected deployment) + model_info = _logging_obj.litellm_params.get("model_info") or {} + model_id = model_info.get("id", None) + + # Fallback to nested metadata path + if not model_id: + metadata = _logging_obj.litellm_params.get("metadata") or {} + model_info = metadata.get("model_info") or {} + model_id = model_info.get("id", None) + + # 2. Fallback to kwargs (initial) + if not model_id: + _kwargs = getattr(_logging_obj, "kwargs", None) + if _kwargs: + litellm_params = _kwargs.get("litellm_params", {}) + # First check direct model_info path + model_info = litellm_params.get("model_info") or {} + model_id = model_info.get("id", None) + + # Fallback to nested metadata path + if not model_id: + metadata = litellm_params.get("metadata") or {} + model_info = metadata.get("model_info") or {} + model_id = model_info.get("id", None) + + return model_id \ No newline at end of file diff --git a/litellm/router.py b/litellm/router.py index 9de34097bde4..6d38d2fc2bd8 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1639,7 +1639,6 @@ def _update_kwargs_with_deployment( - Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging) - Adds default litellm params to kwargs, if set. """ - print("MODEL-INFO: ", deployment.get("model_info", {}), flush=True) model_info = deployment.get("model_info", {}).copy() deployment_litellm_model_name = deployment["litellm_params"]["model"] deployment_api_base = deployment["litellm_params"].get("api_base") From 4a28b015d134e3e5b82d3c6fba667aec3b1be8b0 Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 20:14:47 -0500 Subject: [PATCH 5/9] cleanup --- litellm/proxy/common_request_processing.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index f6abe6992e29..928752022d1b 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -379,20 +379,6 @@ async def common_processing_pre_call_logic( ): self.data["model"] = litellm.model_alias_map[self.data["model"]] - # Inject model_id into metadata if available - # This ensures model_id is available in logging_obj for failed requests - if llm_router and self.data.get("model"): - try: - model_ids = llm_router.get_model_ids(self.data["model"]) - if model_ids: - if "metadata" not in self.data: - self.data["metadata"] = {} - if "model_info" not in self.data["metadata"]: - self.data["metadata"]["model_info"] = {} - self.data["metadata"]["model_info"]["id"] = model_ids[0] - except Exception as e: - verbose_proxy_logger.error(f"Error getting model ID from router for model: {self.data['model']}: {e}") - # Check key-specific aliases if ( isinstance(self.data["model"], str) From 3e2f693f946d6c4e689db2d9a1a5637ffd7178c9 Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 20:30:31 -0500 Subject: [PATCH 6/9] final --- litellm/proxy/common_request_processing.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 928752022d1b..cfb9bbe613b3 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -1077,6 +1077,12 @@ def _inject_cost_into_usage_dict(obj: dict, model_name: str) -> Optional[dict]: return None def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]: + """ + Get model_id from logging object or request metadata. + + The router sets model_info.id when selecting a deployment. This tries multiple locations + where the ID might be stored depending on the request lifecycle stage. + """ model_id = None if _logging_obj: # 1. Try getting from litellm_params (updated during call) @@ -1109,4 +1115,10 @@ def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLogg model_info = metadata.get("model_info") or {} model_id = model_info.get("id", None) + # 3. Final fallback to self.data["litellm_metadata"] (for routes like /v1/responses that populate data before error) + if not model_id: + litellm_metadata = self.data.get("litellm_metadata", {}) or {} + model_info = litellm_metadata.get("model_info", {}) or {} + model_id = model_info.get("id", None) + return model_id \ No newline at end of file From 7de22eb3ef69c457f2cbea66dd38103d0cea4f8f Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 20:54:08 -0500 Subject: [PATCH 7/9] cleanup --- .../test_anthropic_messages_error_headers.py | 118 --------- .../proxy/test_model_id_header_propagation.py | 250 ++++++++++++++++++ 2 files changed, 250 insertions(+), 118 deletions(-) delete mode 100644 tests/test_litellm/proxy/test_anthropic_messages_error_headers.py create mode 100644 tests/test_litellm/proxy/test_model_id_header_propagation.py diff --git a/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py b/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py deleted file mode 100644 index 2b11b216f095..000000000000 --- a/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -Test that x-litellm-model-id header is returned on /v1/messages error responses. - -This test verifies that the model_id header is propagated correctly when -requests fail after router selection (e.g., due to unsupported parameters). -""" - -import pytest -import asyncio -import aiohttp - -LITELLM_MASTER_KEY = "sk-1234" - - -async def anthropic_messages_with_headers(session, key, model="gpt-4", **extra_params): - """ - Make a request to /v1/messages and return response headers. - """ - url = "http://0.0.0.0:4000/v1/messages" - headers = { - "Authorization": f"Bearer {key}", - "Content-Type": "application/json", - } - data = { - "model": model, - "max_tokens": 10, - "messages": [ - {"role": "user", "content": "Hello!"}, - ], - **extra_params, - } - - async with session.post(url, headers=headers, json=data) as response: - status = response.status - response_text = await response.text() - - print(f"Status: {status}") - print(f"Response: {response_text}") - print() - - raw_headers = response.raw_headers - raw_headers_json = {} - - for item in response.raw_headers: - raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8") - - return { - "status": status, - "headers": raw_headers_json, - "response_text": response_text, - } - - -@pytest.mark.asyncio -async def test_anthropic_messages_error_with_model_id_header(): - """ - Test that x-litellm-model-id header is returned on error responses. - - This test: - 1. Makes a request to /v1/messages with an unsupported parameter (reasoning_effort) - 2. Verifies that the request fails with a 400 error - 3. Verifies that the x-litellm-model-id header is present in the error response - - The error occurs AFTER router selection, so model_id should be available - and included in the error response headers. - """ - async with aiohttp.ClientSession() as session: - key = LITELLM_MASTER_KEY - result = await anthropic_messages_with_headers( - session=session, - key=key, - model="gpt-4", - reasoning_effort="low", # Unsupported param that triggers error - ) - - # Verify the request failed - assert result["status"] == 400, f"Expected 400, got {result['status']}" - - # Verify model_id header is present - assert "x-litellm-model-id" in result["headers"], ( - f"x-litellm-model-id header missing in error response. " - f"Headers: {result['headers'].keys()}" - ) - - # Verify the header has a non-empty value - model_id = result["headers"]["x-litellm-model-id"] - assert model_id, "x-litellm-model-id header is empty" - print(f"Successfully retrieved model_id on error response: {model_id}") - - -@pytest.mark.asyncio -async def test_anthropic_messages_success_with_model_id_header(): - """ - Test that x-litellm-model-id header is returned on successful responses. - - This is a baseline test to ensure the header is present on success too. - """ - async with aiohttp.ClientSession() as session: - key = LITELLM_MASTER_KEY - result = await anthropic_messages_with_headers( - session=session, - key=key, - model="gpt-4", - ) - - # Verify the request succeeded - assert result["status"] == 200, f"Expected 200, got {result['status']}" - - # Verify model_id header is present - assert "x-litellm-model-id" in result["headers"], ( - f"x-litellm-model-id header missing in success response. " - f"Headers: {result['headers'].keys()}" - ) - - # Verify the header has a non-empty value - model_id = result["headers"]["x-litellm-model-id"] - assert model_id, "x-litellm-model-id header is empty" - print(f"Successfully retrieved model_id on success response: {model_id}") diff --git a/tests/test_litellm/proxy/test_model_id_header_propagation.py b/tests/test_litellm/proxy/test_model_id_header_propagation.py new file mode 100644 index 000000000000..c9dfede0d3ef --- /dev/null +++ b/tests/test_litellm/proxy/test_model_id_header_propagation.py @@ -0,0 +1,250 @@ +""" +Test that x-litellm-model-id header is propagated correctly on error responses. + +This test suite verifies the `maybe_get_model_id_from_logging_obj` method +which is responsible for extracting model_id from different locations +depending on the request lifecycle stage. +""" + +import pytest +from unittest.mock import MagicMock + +from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing +from litellm.proxy._types import UserAPIKeyAuth + + +def test_maybe_get_model_id_from_logging_obj_from_litellm_params(): + """ + Test extraction of model_id from logging_obj.litellm_params (used by /v1/chat/completions). + """ + # Create a ProxyBaseLLMRequestProcessing instance + processor = ProxyBaseLLMRequestProcessing(data={}) + + # Create a mock logging object with model_info in litellm_params + mock_logging_obj = MagicMock() + mock_logging_obj.litellm_params = { + "model_info": { + "id": "test-model-id-from-litellm-params" + } + } + + # Test extraction + model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + + assert model_id == "test-model-id-from-litellm-params" + + +def test_maybe_get_model_id_from_logging_obj_from_litellm_params_nested(): + """ + Test extraction of model_id from nested metadata in logging_obj.litellm_params. + """ + processor = ProxyBaseLLMRequestProcessing(data={}) + + # Create a mock logging object with model_info nested in metadata + mock_logging_obj = MagicMock() + mock_logging_obj.litellm_params = { + "metadata": { + "model_info": { + "id": "test-model-id-nested" + } + } + } + + # Test extraction + model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + + assert model_id == "test-model-id-nested" + + +def test_maybe_get_model_id_from_logging_obj_from_kwargs(): + """ + Test extraction of model_id from logging_obj.kwargs (fallback path). + """ + processor = ProxyBaseLLMRequestProcessing(data={}) + + # Create a mock logging object with model_info in kwargs + mock_logging_obj = MagicMock() + mock_logging_obj.litellm_params = None + mock_logging_obj.kwargs = { + "litellm_params": { + "model_info": { + "id": "test-model-id-from-kwargs" + } + } + } + + # Test extraction + model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + + assert model_id == "test-model-id-from-kwargs" + + +def test_maybe_get_model_id_from_logging_obj_from_data(): + """ + Test extraction of model_id from self.data (used by /v1/messages and /v1/responses). + """ + # Create a processor with model_info in data + processor = ProxyBaseLLMRequestProcessing(data={ + "litellm_metadata": { + "model_info": { + "id": "test-model-id-from-data" + } + } + }) + + # Create a mock logging object without model_info + mock_logging_obj = MagicMock() + mock_logging_obj.litellm_params = {} + mock_logging_obj.kwargs = {} + + # Test extraction - should fall back to self.data + model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + + assert model_id == "test-model-id-from-data" + + +def test_maybe_get_model_id_from_logging_obj_no_logging_obj(): + """ + Test extraction of model_id when logging_obj is None (should use self.data). + """ + # Create a processor with model_info in data + processor = ProxyBaseLLMRequestProcessing(data={ + "litellm_metadata": { + "model_info": { + "id": "test-model-id-no-logging-obj" + } + } + }) + + # Test extraction with None logging_obj + model_id = processor.maybe_get_model_id_from_logging_obj(None) + + assert model_id == "test-model-id-no-logging-obj" + + +def test_maybe_get_model_id_from_logging_obj_not_found(): + """ + Test extraction of model_id when it's not available anywhere (should return None). + """ + processor = ProxyBaseLLMRequestProcessing(data={}) + + # Create a mock logging object without model_info anywhere + mock_logging_obj = MagicMock() + mock_logging_obj.litellm_params = {} + mock_logging_obj.kwargs = {} + + # Test extraction - should return None + model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + + assert model_id is None + + +def test_maybe_get_model_id_priority_litellm_params_over_data(): + """ + Test that model_id from logging_obj.litellm_params takes priority over self.data. + """ + # Create a processor with model_info in both places + processor = ProxyBaseLLMRequestProcessing(data={ + "litellm_metadata": { + "model_info": { + "id": "model-id-from-data" + } + } + }) + + # Create a mock logging object with model_info + mock_logging_obj = MagicMock() + mock_logging_obj.litellm_params = { + "model_info": { + "id": "model-id-from-litellm-params" + } + } + + # Test extraction - should prefer litellm_params + model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + + assert model_id == "model-id-from-litellm-params" + + +def test_get_custom_headers_includes_model_id(): + """ + Test that get_custom_headers includes x-litellm-model-id when model_id is provided. + """ + # Create mock user_api_key_dict with all required attributes + mock_user_api_key_dict = MagicMock() + mock_user_api_key_dict.user_id = "test-user" + mock_user_api_key_dict.team_id = "test-team" + mock_user_api_key_dict.tpm_limit = 1000 + mock_user_api_key_dict.rpm_limit = 100 + + # Call get_custom_headers with a model_id + headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + user_api_key_dict=mock_user_api_key_dict, + model_id="test-model-123", + cache_key="test-cache-key", + api_base="https://api.example.com", + version="1.0.0", + response_cost=0.001, + request_data={}, + hidden_params={} + ) + + # Verify model_id is in headers + assert "x-litellm-model-id" in headers + assert headers["x-litellm-model-id"] == "test-model-123" + + +def test_get_custom_headers_without_model_id(): + """ + Test that get_custom_headers works correctly when model_id is None or empty. + """ + # Create mock user_api_key_dict with all required attributes + mock_user_api_key_dict = MagicMock() + mock_user_api_key_dict.user_id = "test-user" + mock_user_api_key_dict.team_id = "test-team" + mock_user_api_key_dict.tpm_limit = 1000 + mock_user_api_key_dict.rpm_limit = 100 + + # Call get_custom_headers without a model_id + headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + user_api_key_dict=mock_user_api_key_dict, + model_id=None, + cache_key="test-cache-key", + api_base="https://api.example.com", + version="1.0.0", + response_cost=0.001, + request_data={}, + hidden_params={} + ) + + # x-litellm-model-id should not be in headers (or should be empty/None) + if "x-litellm-model-id" in headers: + assert headers["x-litellm-model-id"] in [None, ""] + + +def test_get_custom_headers_with_empty_string_model_id(): + """ + Test that get_custom_headers handles empty string model_id correctly. + """ + # Create mock user_api_key_dict with all required attributes + mock_user_api_key_dict = MagicMock() + mock_user_api_key_dict.user_id = "test-user" + mock_user_api_key_dict.team_id = "test-team" + mock_user_api_key_dict.tpm_limit = 1000 + mock_user_api_key_dict.rpm_limit = 100 + + # Call get_custom_headers with empty string model_id + headers = ProxyBaseLLMRequestProcessing.get_custom_headers( + user_api_key_dict=mock_user_api_key_dict, + model_id="", + cache_key="test-cache-key", + api_base="https://api.example.com", + version="1.0.0", + response_cost=0.001, + request_data={}, + hidden_params={} + ) + + # x-litellm-model-id should not be in headers (or should be empty) + if "x-litellm-model-id" in headers: + assert headers["x-litellm-model-id"] == "" From ff823a276cb4f445d92786ff71854a4fcff757ca Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 21:45:58 -0500 Subject: [PATCH 8/9] clean up method name and fix responses api streaming --- litellm/proxy/common_request_processing.py | 12 ++++++-- .../proxy/test_model_id_header_propagation.py | 28 +++++++++---------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index cfb9bbe613b3..63ddd2d32639 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -530,6 +530,14 @@ async def base_process_llm_request( hidden_params = getattr(response, "_hidden_params", {}) or {} model_id = hidden_params.get("model_id", None) or "" + + # Fallback: extract model_id from litellm_metadata if not in hidden_params + # This is needed for ResponsesAPIStreamingIterator where _hidden_params might not be accessible + if not model_id: + litellm_metadata = self.data.get("litellm_metadata", {}) or {} + model_info = litellm_metadata.get("model_info", {}) or {} + model_id = model_info.get("id", "") or "" + cache_key = hidden_params.get("cache_key", None) or "" api_base = hidden_params.get("api_base", None) or "" response_cost = hidden_params.get("response_cost", None) or "" @@ -755,7 +763,7 @@ async def _handle_llm_api_exception( # # Note: We check the direct model_info path first (not nested in metadata) because that's where the router sets it. # The nested metadata path is only a fallback for cases where model_info wasn't set at the top level. - model_id = self.maybe_get_model_id_from_logging_obj(_litellm_logging_obj) + model_id = self.maybe_get_model_id(_litellm_logging_obj) custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers( user_api_key_dict=user_api_key_dict, @@ -1076,7 +1084,7 @@ def _inject_cost_into_usage_dict(obj: dict, model_name: str) -> Optional[dict]: return obj return None - def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]: + def maybe_get_model_id(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]: """ Get model_id from logging object or request metadata. diff --git a/tests/test_litellm/proxy/test_model_id_header_propagation.py b/tests/test_litellm/proxy/test_model_id_header_propagation.py index c9dfede0d3ef..cc4e7c084d6c 100644 --- a/tests/test_litellm/proxy/test_model_id_header_propagation.py +++ b/tests/test_litellm/proxy/test_model_id_header_propagation.py @@ -1,7 +1,7 @@ """ Test that x-litellm-model-id header is propagated correctly on error responses. -This test suite verifies the `maybe_get_model_id_from_logging_obj` method +This test suite verifies the `maybe_get_model_id` method which is responsible for extracting model_id from different locations depending on the request lifecycle stage. """ @@ -13,7 +13,7 @@ from litellm.proxy._types import UserAPIKeyAuth -def test_maybe_get_model_id_from_logging_obj_from_litellm_params(): +def test_maybe_get_model_id_from_litellm_params(): """ Test extraction of model_id from logging_obj.litellm_params (used by /v1/chat/completions). """ @@ -29,12 +29,12 @@ def test_maybe_get_model_id_from_logging_obj_from_litellm_params(): } # Test extraction - model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + model_id = processor.maybe_get_model_id(mock_logging_obj) assert model_id == "test-model-id-from-litellm-params" -def test_maybe_get_model_id_from_logging_obj_from_litellm_params_nested(): +def test_maybe_get_model_id_from_litellm_params_nested(): """ Test extraction of model_id from nested metadata in logging_obj.litellm_params. """ @@ -51,12 +51,12 @@ def test_maybe_get_model_id_from_logging_obj_from_litellm_params_nested(): } # Test extraction - model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + model_id = processor.maybe_get_model_id(mock_logging_obj) assert model_id == "test-model-id-nested" -def test_maybe_get_model_id_from_logging_obj_from_kwargs(): +def test_maybe_get_model_id_from_kwargs(): """ Test extraction of model_id from logging_obj.kwargs (fallback path). """ @@ -74,12 +74,12 @@ def test_maybe_get_model_id_from_logging_obj_from_kwargs(): } # Test extraction - model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + model_id = processor.maybe_get_model_id(mock_logging_obj) assert model_id == "test-model-id-from-kwargs" -def test_maybe_get_model_id_from_logging_obj_from_data(): +def test_maybe_get_model_id_from_data(): """ Test extraction of model_id from self.data (used by /v1/messages and /v1/responses). """ @@ -98,12 +98,12 @@ def test_maybe_get_model_id_from_logging_obj_from_data(): mock_logging_obj.kwargs = {} # Test extraction - should fall back to self.data - model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + model_id = processor.maybe_get_model_id(mock_logging_obj) assert model_id == "test-model-id-from-data" -def test_maybe_get_model_id_from_logging_obj_no_logging_obj(): +def test_maybe_get_model_id_no_logging_obj(): """ Test extraction of model_id when logging_obj is None (should use self.data). """ @@ -117,12 +117,12 @@ def test_maybe_get_model_id_from_logging_obj_no_logging_obj(): }) # Test extraction with None logging_obj - model_id = processor.maybe_get_model_id_from_logging_obj(None) + model_id = processor.maybe_get_model_id(None) assert model_id == "test-model-id-no-logging-obj" -def test_maybe_get_model_id_from_logging_obj_not_found(): +def test_maybe_get_model_id_not_found(): """ Test extraction of model_id when it's not available anywhere (should return None). """ @@ -134,7 +134,7 @@ def test_maybe_get_model_id_from_logging_obj_not_found(): mock_logging_obj.kwargs = {} # Test extraction - should return None - model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + model_id = processor.maybe_get_model_id(mock_logging_obj) assert model_id is None @@ -161,7 +161,7 @@ def test_maybe_get_model_id_priority_litellm_params_over_data(): } # Test extraction - should prefer litellm_params - model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj) + model_id = processor.maybe_get_model_id(mock_logging_obj) assert model_id == "model-id-from-litellm-params" From a54a9f91633e12aa05a30a02ae7ba559f37901c3 Mon Sep 17 00:00:00 2001 From: Raghav Jhavar Date: Sat, 22 Nov 2025 21:50:43 -0500 Subject: [PATCH 9/9] remove comment --- litellm/proxy/common_request_processing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 63ddd2d32639..a189216ad467 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -532,7 +532,6 @@ async def base_process_llm_request( model_id = hidden_params.get("model_id", None) or "" # Fallback: extract model_id from litellm_metadata if not in hidden_params - # This is needed for ResponsesAPIStreamingIterator where _hidden_params might not be accessible if not model_id: litellm_metadata = self.data.get("litellm_metadata", {}) or {} model_info = litellm_metadata.get("model_info", {}) or {} @@ -1129,4 +1128,4 @@ def maybe_get_model_id(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optio model_info = litellm_metadata.get("model_info", {}) or {} model_id = model_info.get("id", None) - return model_id \ No newline at end of file + return model_id