From c6e184ae8b51728b038318dcd1afeca6b6c32f4f Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 13:56:08 -0500
Subject: [PATCH 1/9] propagate model id on errors too

---
 .../proxy/anthropic_endpoints/endpoints.py    |  24 ++++
 litellm/proxy/common_request_processing.py    |  37 ++++++
 .../proxy/test_failed_request_headers.py      | 118 ++++++++++++++++++
 3 files changed, 179 insertions(+)
 create mode 100644 tests/test_litellm/proxy/test_failed_request_headers.py

diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index c450b655a2c1..7c4bf43368ec 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -85,6 +85,20 @@ async def anthropic_response(  # noqa: PLR0915
         if data["model"] in litellm.model_alias_map:
             data["model"] = litellm.model_alias_map[data["model"]]
 
+        # Inject model_id into metadata if available
+        # This ensures model_id is available in logging_obj for failed requests
+        if llm_router and data.get("model"):
+            try:
+                model_ids = llm_router.get_model_ids(data["model"])
+                if model_ids:
+                    if "metadata" not in data:
+                        data["metadata"] = {}
+                    if "model_info" not in data["metadata"]:
+                        data["metadata"]["model_info"] = {}
+                    data["metadata"]["model_info"]["id"] = model_ids[0]
+            except Exception as e:
+                verbose_proxy_logger.error(f"Error getting model ID from router for model: {data['model']}: {e}")
+
         ### CALL HOOKS ### - modify incoming data before calling the model
         data = await proxy_logging_obj.pre_call_hook(  # type: ignore
             user_api_key_dict=user_api_key_dict, data=data, call_type=CallTypes.anthropic_messages.value
@@ -217,11 +231,21 @@ async def anthropic_response(  # noqa: PLR0915
             )
         )
         error_msg = f"{str(e)}"
+
+        # Get headers with model_id if available
+        headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+            user_api_key_dict=user_api_key_dict,
+            model_id=data.get("metadata", {}).get("model_info", {}).get("id", None),
+            version=version,
+            request_data=data
+        )
+
         raise ProxyException(
             message=getattr(e, "message", error_msg),
             type=getattr(e, "type", "None"),
             param=getattr(e, "param", "None"),
             code=getattr(e, "status_code", 500),
+            headers=headers
         )
 
 
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 5a3e0b334b47..fea569129088 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -340,6 +340,7 @@ async def common_processing_pre_call_logic(
         user_max_tokens: Optional[int] = None,
         user_api_base: Optional[str] = None,
         model: Optional[str] = None,
+        llm_router: Optional[Router] = None,
     ) -> Tuple[dict, LiteLLMLoggingObj]:
         start_time = datetime.now()  # start before calling guardrail hooks
 
@@ -378,6 +379,20 @@ async def common_processing_pre_call_logic(
         ):
             self.data["model"] = litellm.model_alias_map[self.data["model"]]
 
+        # Inject model_id into metadata if available
+        # This ensures model_id is available in logging_obj for failed requests
+        if llm_router and self.data.get("model"):
+            try:
+                model_ids = llm_router.get_model_ids(self.data["model"])
+                if model_ids:
+                    if "metadata" not in self.data:
+                        self.data["metadata"] = {}
+                    if "model_info" not in self.data["metadata"]:
+                        self.data["metadata"]["model_info"] = {}
+                    self.data["metadata"]["model_info"]["id"] = model_ids[0]
+            except Exception as e:
+                verbose_proxy_logger.error(f"Error getting model ID from router for model: {self.data['model']}: {e}")
+
         # Check key-specific aliases
         if (
             isinstance(self.data["model"], str)
@@ -490,6 +505,7 @@ async def base_process_llm_request(
             user_api_base=user_api_base,
             model=model,
             route_type=route_type,
+            llm_router=llm_router,
         )
 
         tasks = []
@@ -748,11 +764,32 @@ async def _handle_llm_api_exception(
         _litellm_logging_obj: Optional[LiteLLMLoggingObj] = self.data.get(
             "litellm_logging_obj", None
         )
+
+        # Attempt to get model_id from logging object
+        model_id = None
+        if _litellm_logging_obj:
+            # 1. Try getting from litellm_params (updated during call)
+            if (
+                hasattr(_litellm_logging_obj, "litellm_params")
+                and _litellm_logging_obj.litellm_params
+            ):
+                metadata = _litellm_logging_obj.litellm_params.get("metadata") or {}
+                model_info = metadata.get("model_info") or {}
+                model_id = model_info.get("id", None)
+
+            # 2. Fallback to kwargs (initial)
+            if not model_id and _litellm_logging_obj.kwargs:
+                litellm_params = _litellm_logging_obj.kwargs.get("litellm_params", {})
+                metadata = litellm_params.get("metadata") or {}
+                model_info = metadata.get("model_info") or {}
+                model_id = model_info.get("id", None)
+
         custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
             user_api_key_dict=user_api_key_dict,
             call_id=(
                 _litellm_logging_obj.litellm_call_id if _litellm_logging_obj else None
             ),
+            model_id=model_id,
             version=version,
             response_cost=0,
             model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
diff --git a/tests/test_litellm/proxy/test_failed_request_headers.py b/tests/test_litellm/proxy/test_failed_request_headers.py
new file mode 100644
index 000000000000..ce4fef5c102f
--- /dev/null
+++ b/tests/test_litellm/proxy/test_failed_request_headers.py
@@ -0,0 +1,118 @@
+import pytest
+from fastapi.testclient import TestClient
+from unittest.mock import MagicMock, patch, AsyncMock
+from litellm.proxy.proxy_server import app
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm.proxy._types import UserAPIKeyAuth
+
+@pytest.fixture
+def client():
+    return TestClient(app)
+
+def test_x_litellm_model_id_header_in_exception():
+    """
+    Directly test the logic in ProxyBaseLLMRequestProcessing._handle_llm_api_exception
+    to ensure it extracts model_id from the logging object and passes it to get_custom_headers.
+    """
+    # Mock dependencies
+    mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth)
+    mock_user_api_key_dict.allowed_model_region = "us-east-1"
+    mock_user_api_key_dict.tpm_limit = 100
+    mock_user_api_key_dict.rpm_limit = 10
+    mock_user_api_key_dict.max_budget = 100.0
+    mock_user_api_key_dict.spend = 5.0
+    
+    # Use AsyncMock for awaited methods
+    mock_proxy_logging_obj = MagicMock()
+    mock_proxy_logging_obj.post_call_failure_hook = AsyncMock()
+    
+    # Create a mock exception
+    exception = Exception("Test exception")
+    
+    # Create a mock logging object with model_id in litellm_params
+    mock_litellm_logging_obj = MagicMock()
+    mock_litellm_logging_obj.litellm_call_id = "test-call-id"
+    mock_litellm_logging_obj.litellm_params = {
+        "metadata": {
+            "model_info": {
+                "id": "test-model-id-123"
+            }
+        }
+    }
+    
+    # Setup the processor with data containing the logging object
+    data = {
+        "litellm_logging_obj": mock_litellm_logging_obj,
+        "model": "gpt-4"
+    }
+    processor = ProxyBaseLLMRequestProcessing(data=data)
+    
+    import asyncio
+    from litellm.proxy._types import ProxyException
+    
+    try:
+        asyncio.run(processor._handle_llm_api_exception(
+            e=exception,
+            user_api_key_dict=mock_user_api_key_dict,
+            proxy_logging_obj=mock_proxy_logging_obj
+        ))
+    except ProxyException as pe:
+        # Verify the headers in the raised exception
+        assert "x-litellm-model-id" in pe.headers
+        assert pe.headers["x-litellm-model-id"] == "test-model-id-123"
+    except Exception as e:
+        pytest.fail(f"Raised unexpected exception type: {type(e)}")
+
+def test_x_litellm_model_id_header_in_exception_fallback_kwargs():
+    """
+    Test fallback to kwargs if litellm_params is missing/empty
+    """
+    # Mock dependencies
+    mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth)
+    mock_user_api_key_dict.allowed_model_region = "us-east-1"
+    # Need to mock tpm_limit/rpm_limit etc as they are accessed by get_custom_headers
+    mock_user_api_key_dict.tpm_limit = 100
+    mock_user_api_key_dict.rpm_limit = 10
+    mock_user_api_key_dict.max_budget = 100.0
+    mock_user_api_key_dict.spend = 5.0
+    
+    # Use AsyncMock for awaited methods
+    mock_proxy_logging_obj = MagicMock()
+    mock_proxy_logging_obj.post_call_failure_hook = AsyncMock()
+    
+    exception = Exception("Test exception")
+    
+    # Create a mock logging object with model_id in kwargs
+    mock_litellm_logging_obj = MagicMock()
+    mock_litellm_logging_obj.litellm_call_id = "test-call-id"
+    mock_litellm_logging_obj.litellm_params = {} # Empty
+    mock_litellm_logging_obj.kwargs = {
+        "litellm_params": {
+            "metadata": {
+                "model_info": {
+                    "id": "fallback-model-id-456"
+                }
+            }
+        }
+    }
+    
+    data = {
+        "litellm_logging_obj": mock_litellm_logging_obj,
+        "model": "gpt-4"
+    }
+    processor = ProxyBaseLLMRequestProcessing(data=data)
+    
+    import asyncio
+    from litellm.proxy._types import ProxyException
+    
+    try:
+        asyncio.run(processor._handle_llm_api_exception(
+            e=exception,
+            user_api_key_dict=mock_user_api_key_dict,
+            proxy_logging_obj=mock_proxy_logging_obj
+        ))
+    except ProxyException as pe:
+        assert "x-litellm-model-id" in pe.headers
+        assert pe.headers["x-litellm-model-id"] == "fallback-model-id-456"
+    except Exception as e:
+        pytest.fail(f"Raised unexpected exception type: {type(e)}")

From f6ffa0bbfe3fbffe5d45e62d4b968e0c672c48cb Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 19:45:32 -0500
Subject: [PATCH 2/9] make it work for messages and streaming

---
 .../adapters/transformation.py                |   7 ++
 .../proxy/anthropic_endpoints/endpoints.py    |  61 +++++----
 litellm/proxy/common_request_processing.py    |  23 +++-
 litellm/responses/streaming_iterator.py       |  19 +++
 litellm/router.py                             |   1 +
 .../test_anthropic_messages_error_headers.py  | 118 ++++++++++++++++++
 .../proxy/test_failed_request_headers.py      | 118 ------------------
 7 files changed, 204 insertions(+), 143 deletions(-)
 create mode 100644 tests/test_litellm/proxy/test_anthropic_messages_error_headers.py
 delete mode 100644 tests/test_litellm/proxy/test_failed_request_headers.py

diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
index 0e905014fe2e..31c314426896 100644
--- a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
+++ b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
@@ -622,6 +622,13 @@ def translate_openai_response_to_anthropic(
             stop_reason=anthropic_finish_reason,
         )
 
+        # Preserve model_id from the OpenAI response's _hidden_params
+        # This is needed for load balancing attribution
+        hidden_params = getattr(response, "_hidden_params", {}) or {}
+        model_id = hidden_params.get("model_id")
+        if model_id:
+            translated_obj["_litellm_model_id"] = model_id  # type: ignore
+
         return translated_obj
 
     def _translate_streaming_openai_chunk_to_anthropic_content_block(
diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index 7c4bf43368ec..4425a4f0af37 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -85,20 +85,6 @@ async def anthropic_response(  # noqa: PLR0915
         if data["model"] in litellm.model_alias_map:
             data["model"] = litellm.model_alias_map[data["model"]]
 
-        # Inject model_id into metadata if available
-        # This ensures model_id is available in logging_obj for failed requests
-        if llm_router and data.get("model"):
-            try:
-                model_ids = llm_router.get_model_ids(data["model"])
-                if model_ids:
-                    if "metadata" not in data:
-                        data["metadata"] = {}
-                    if "model_info" not in data["metadata"]:
-                        data["metadata"]["model_info"] = {}
-                    data["metadata"]["model_info"]["id"] = model_ids[0]
-            except Exception as e:
-                verbose_proxy_logger.error(f"Error getting model ID from router for model: {data['model']}: {e}")
-
         ### CALL HOOKS ### - modify incoming data before calling the model
         data = await proxy_logging_obj.pre_call_hook(  # type: ignore
             user_api_key_dict=user_api_key_dict, data=data, call_type=CallTypes.anthropic_messages.value
@@ -168,8 +154,13 @@ async def anthropic_response(  # noqa: PLR0915
 
         response = responses[1]
 
+        # Extract model_id from request metadata (set by router during routing)
+        litellm_metadata = data.get("litellm_metadata", {}) or {}
+        model_info = litellm_metadata.get("model_info", {}) or {}
+        model_id = model_info.get("id", "") or ""
+
+        # Get other metadata from hidden_params
         hidden_params = getattr(response, "_hidden_params", {}) or {}
-        model_id = hidden_params.get("model_id", None) or ""
         cache_key = hidden_params.get("cache_key", None) or ""
         api_base = hidden_params.get("api_base", None) or ""
         response_cost = hidden_params.get("response_cost", None) or ""
@@ -230,22 +221,50 @@ async def anthropic_response(  # noqa: PLR0915
                 str(e)
             )
         )
-        error_msg = f"{str(e)}"
 
-        # Get headers with model_id if available
-        headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+        # Extract model_id from request metadata (same as success path)
+        litellm_metadata = data.get("litellm_metadata", {}) or {}
+        model_info = litellm_metadata.get("model_info", {}) or {}
+        model_id = model_info.get("id", "") or ""
+
+        # Get headers
+        custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
             user_api_key_dict=user_api_key_dict,
-            model_id=data.get("metadata", {}).get("model_info", {}).get("id", None),
+            call_id=data.get("litellm_call_id", ""),
+            model_id=model_id,
             version=version,
-            request_data=data
+            response_cost=0,
+            model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
+            request_data=data,
+            timeout=getattr(e, "timeout", None),
+            litellm_logging_obj=None,
         )
 
+        headers = getattr(e, "headers", {}) or {}
+        headers.update(custom_headers)
+
+        # Raise ProxyException with proper headers
+        from litellm.proxy.proxy_server import ProxyException
+
+        if isinstance(e, HTTPException):
+            raise ProxyException(
+                message=getattr(e, "detail", str(e)),
+                type=getattr(e, "type", "None"),
+                param=getattr(e, "param", "None"),
+                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
+                provider_specific_fields=getattr(e, "provider_specific_fields", None),
+                headers=headers,
+            )
+
+        error_msg = f"{str(e)}"
         raise ProxyException(
             message=getattr(e, "message", error_msg),
             type=getattr(e, "type", "None"),
             param=getattr(e, "param", "None"),
+            openai_code=getattr(e, "code", None),
             code=getattr(e, "status_code", 500),
-            headers=headers
+            provider_specific_fields=getattr(e, "provider_specific_fields", None),
+            headers=headers,
         )
 
 
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index fea569129088..79c941d46a00 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -766,6 +766,9 @@ async def _handle_llm_api_exception(
         )
 
         # Attempt to get model_id from logging object
+        #
+        # Note: We check the direct model_info path first (not nested in metadata) because that's where the router sets it.
+        # The nested metadata path is only a fallback for cases where model_info wasn't set at the top level.
         model_id = None
         if _litellm_logging_obj:
             # 1. Try getting from litellm_params (updated during call)
@@ -773,17 +776,29 @@ async def _handle_llm_api_exception(
                 hasattr(_litellm_logging_obj, "litellm_params")
                 and _litellm_logging_obj.litellm_params
             ):
-                metadata = _litellm_logging_obj.litellm_params.get("metadata") or {}
-                model_info = metadata.get("model_info") or {}
+                # First check direct model_info path (set by router.py with selected deployment)
+                model_info = _litellm_logging_obj.litellm_params.get("model_info") or {}
                 model_id = model_info.get("id", None)
 
+                # Fallback to nested metadata path
+                if not model_id:
+                    metadata = _litellm_logging_obj.litellm_params.get("metadata") or {}
+                    model_info = metadata.get("model_info") or {}
+                    model_id = model_info.get("id", None)
+
             # 2. Fallback to kwargs (initial)
             if not model_id and _litellm_logging_obj.kwargs:
                 litellm_params = _litellm_logging_obj.kwargs.get("litellm_params", {})
-                metadata = litellm_params.get("metadata") or {}
-                model_info = metadata.get("model_info") or {}
+                # First check direct model_info path
+                model_info = litellm_params.get("model_info") or {}
                 model_id = model_info.get("id", None)
 
+                # Fallback to nested metadata path
+                if not model_id:
+                    metadata = litellm_params.get("metadata") or {}
+                    model_info = metadata.get("model_info") or {}
+                    model_id = model_info.get("id", None)
+
         custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
             user_api_key_dict=user_api_key_dict,
             call_id=(
diff --git a/litellm/responses/streaming_iterator.py b/litellm/responses/streaming_iterator.py
index 8eecc3e82111..0407776029d3 100644
--- a/litellm/responses/streaming_iterator.py
+++ b/litellm/responses/streaming_iterator.py
@@ -8,7 +8,9 @@
 import litellm
 from litellm.constants import STREAM_SSE_DONE_STRING
 from litellm.litellm_core_utils.asyncify import run_async_function
+from litellm.litellm_core_utils.core_helpers import process_response_headers
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.llm_response_utils.get_api_base import get_api_base
 from litellm.litellm_core_utils.thread_pool_executor import executor
 from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
 from litellm.responses.utils import ResponsesAPIRequestUtils
@@ -51,6 +53,23 @@ def __init__(
         self.litellm_metadata = litellm_metadata
         self.custom_llm_provider = custom_llm_provider
 
+        # set hidden params for response headers (e.g., x-litellm-model-id)
+        # This matches ths stream wrapper in litellm/litellm_core_utils/streaming_handler.py
+        _api_base = get_api_base(
+            model=model or "",
+            optional_params=self.logging_obj.model_call_details.get(
+                "litellm_params", {}
+            ),
+        )
+        _model_info: Dict = litellm_metadata.get("model_info", {}) if litellm_metadata else {}
+        self._hidden_params = {
+            "model_id": _model_info.get("id", None),
+            "api_base": _api_base,
+        }
+        self._hidden_params["additional_headers"] = process_response_headers(
+            self.response.headers or {}
+        )  # GUARANTEE OPENAI HEADERS IN RESPONSE
+
     def _process_chunk(self, chunk) -> Optional[ResponsesAPIStreamingResponse]:
         """Process a single chunk of data from the stream"""
         if not chunk:
diff --git a/litellm/router.py b/litellm/router.py
index 6d38d2fc2bd8..9de34097bde4 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -1639,6 +1639,7 @@ def _update_kwargs_with_deployment(
         - Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging)
         - Adds default litellm params to kwargs, if set.
         """
+        print("MODEL-INFO: ", deployment.get("model_info", {}), flush=True)
         model_info = deployment.get("model_info", {}).copy()
         deployment_litellm_model_name = deployment["litellm_params"]["model"]
         deployment_api_base = deployment["litellm_params"].get("api_base")
diff --git a/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py b/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py
new file mode 100644
index 000000000000..2b11b216f095
--- /dev/null
+++ b/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py
@@ -0,0 +1,118 @@
+"""
+Test that x-litellm-model-id header is returned on /v1/messages error responses.
+
+This test verifies that the model_id header is propagated correctly when
+requests fail after router selection (e.g., due to unsupported parameters).
+"""
+
+import pytest
+import asyncio
+import aiohttp
+
+LITELLM_MASTER_KEY = "sk-1234"
+
+
+async def anthropic_messages_with_headers(session, key, model="gpt-4", **extra_params):
+    """
+    Make a request to /v1/messages and return response headers.
+    """
+    url = "http://0.0.0.0:4000/v1/messages"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": model,
+        "max_tokens": 10,
+        "messages": [
+            {"role": "user", "content": "Hello!"},
+        ],
+        **extra_params,
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+        print(f"Status: {status}")
+        print(f"Response: {response_text}")
+        print()
+
+        raw_headers = response.raw_headers
+        raw_headers_json = {}
+
+        for item in response.raw_headers:
+            raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8")
+
+        return {
+            "status": status,
+            "headers": raw_headers_json,
+            "response_text": response_text,
+        }
+
+
+@pytest.mark.asyncio
+async def test_anthropic_messages_error_with_model_id_header():
+    """
+    Test that x-litellm-model-id header is returned on error responses.
+
+    This test:
+    1. Makes a request to /v1/messages with an unsupported parameter (reasoning_effort)
+    2. Verifies that the request fails with a 400 error
+    3. Verifies that the x-litellm-model-id header is present in the error response
+
+    The error occurs AFTER router selection, so model_id should be available
+    and included in the error response headers.
+    """
+    async with aiohttp.ClientSession() as session:
+        key = LITELLM_MASTER_KEY
+        result = await anthropic_messages_with_headers(
+            session=session,
+            key=key,
+            model="gpt-4",
+            reasoning_effort="low",  # Unsupported param that triggers error
+        )
+
+        # Verify the request failed
+        assert result["status"] == 400, f"Expected 400, got {result['status']}"
+
+        # Verify model_id header is present
+        assert "x-litellm-model-id" in result["headers"], (
+            f"x-litellm-model-id header missing in error response. "
+            f"Headers: {result['headers'].keys()}"
+        )
+
+        # Verify the header has a non-empty value
+        model_id = result["headers"]["x-litellm-model-id"]
+        assert model_id, "x-litellm-model-id header is empty"
+        print(f"Successfully retrieved model_id on error response: {model_id}")
+
+
+@pytest.mark.asyncio
+async def test_anthropic_messages_success_with_model_id_header():
+    """
+    Test that x-litellm-model-id header is returned on successful responses.
+
+    This is a baseline test to ensure the header is present on success too.
+    """
+    async with aiohttp.ClientSession() as session:
+        key = LITELLM_MASTER_KEY
+        result = await anthropic_messages_with_headers(
+            session=session,
+            key=key,
+            model="gpt-4",
+        )
+
+        # Verify the request succeeded
+        assert result["status"] == 200, f"Expected 200, got {result['status']}"
+
+        # Verify model_id header is present
+        assert "x-litellm-model-id" in result["headers"], (
+            f"x-litellm-model-id header missing in success response. "
+            f"Headers: {result['headers'].keys()}"
+        )
+
+        # Verify the header has a non-empty value
+        model_id = result["headers"]["x-litellm-model-id"]
+        assert model_id, "x-litellm-model-id header is empty"
+        print(f"Successfully retrieved model_id on success response: {model_id}")
diff --git a/tests/test_litellm/proxy/test_failed_request_headers.py b/tests/test_litellm/proxy/test_failed_request_headers.py
deleted file mode 100644
index ce4fef5c102f..000000000000
--- a/tests/test_litellm/proxy/test_failed_request_headers.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import pytest
-from fastapi.testclient import TestClient
-from unittest.mock import MagicMock, patch, AsyncMock
-from litellm.proxy.proxy_server import app
-from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
-from litellm.proxy._types import UserAPIKeyAuth
-
-@pytest.fixture
-def client():
-    return TestClient(app)
-
-def test_x_litellm_model_id_header_in_exception():
-    """
-    Directly test the logic in ProxyBaseLLMRequestProcessing._handle_llm_api_exception
-    to ensure it extracts model_id from the logging object and passes it to get_custom_headers.
-    """
-    # Mock dependencies
-    mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth)
-    mock_user_api_key_dict.allowed_model_region = "us-east-1"
-    mock_user_api_key_dict.tpm_limit = 100
-    mock_user_api_key_dict.rpm_limit = 10
-    mock_user_api_key_dict.max_budget = 100.0
-    mock_user_api_key_dict.spend = 5.0
-    
-    # Use AsyncMock for awaited methods
-    mock_proxy_logging_obj = MagicMock()
-    mock_proxy_logging_obj.post_call_failure_hook = AsyncMock()
-    
-    # Create a mock exception
-    exception = Exception("Test exception")
-    
-    # Create a mock logging object with model_id in litellm_params
-    mock_litellm_logging_obj = MagicMock()
-    mock_litellm_logging_obj.litellm_call_id = "test-call-id"
-    mock_litellm_logging_obj.litellm_params = {
-        "metadata": {
-            "model_info": {
-                "id": "test-model-id-123"
-            }
-        }
-    }
-    
-    # Setup the processor with data containing the logging object
-    data = {
-        "litellm_logging_obj": mock_litellm_logging_obj,
-        "model": "gpt-4"
-    }
-    processor = ProxyBaseLLMRequestProcessing(data=data)
-    
-    import asyncio
-    from litellm.proxy._types import ProxyException
-    
-    try:
-        asyncio.run(processor._handle_llm_api_exception(
-            e=exception,
-            user_api_key_dict=mock_user_api_key_dict,
-            proxy_logging_obj=mock_proxy_logging_obj
-        ))
-    except ProxyException as pe:
-        # Verify the headers in the raised exception
-        assert "x-litellm-model-id" in pe.headers
-        assert pe.headers["x-litellm-model-id"] == "test-model-id-123"
-    except Exception as e:
-        pytest.fail(f"Raised unexpected exception type: {type(e)}")
-
-def test_x_litellm_model_id_header_in_exception_fallback_kwargs():
-    """
-    Test fallback to kwargs if litellm_params is missing/empty
-    """
-    # Mock dependencies
-    mock_user_api_key_dict = MagicMock(spec=UserAPIKeyAuth)
-    mock_user_api_key_dict.allowed_model_region = "us-east-1"
-    # Need to mock tpm_limit/rpm_limit etc as they are accessed by get_custom_headers
-    mock_user_api_key_dict.tpm_limit = 100
-    mock_user_api_key_dict.rpm_limit = 10
-    mock_user_api_key_dict.max_budget = 100.0
-    mock_user_api_key_dict.spend = 5.0
-    
-    # Use AsyncMock for awaited methods
-    mock_proxy_logging_obj = MagicMock()
-    mock_proxy_logging_obj.post_call_failure_hook = AsyncMock()
-    
-    exception = Exception("Test exception")
-    
-    # Create a mock logging object with model_id in kwargs
-    mock_litellm_logging_obj = MagicMock()
-    mock_litellm_logging_obj.litellm_call_id = "test-call-id"
-    mock_litellm_logging_obj.litellm_params = {} # Empty
-    mock_litellm_logging_obj.kwargs = {
-        "litellm_params": {
-            "metadata": {
-                "model_info": {
-                    "id": "fallback-model-id-456"
-                }
-            }
-        }
-    }
-    
-    data = {
-        "litellm_logging_obj": mock_litellm_logging_obj,
-        "model": "gpt-4"
-    }
-    processor = ProxyBaseLLMRequestProcessing(data=data)
-    
-    import asyncio
-    from litellm.proxy._types import ProxyException
-    
-    try:
-        asyncio.run(processor._handle_llm_api_exception(
-            e=exception,
-            user_api_key_dict=mock_user_api_key_dict,
-            proxy_logging_obj=mock_proxy_logging_obj
-        ))
-    except ProxyException as pe:
-        assert "x-litellm-model-id" in pe.headers
-        assert pe.headers["x-litellm-model-id"] == "fallback-model-id-456"
-    except Exception as e:
-        pytest.fail(f"Raised unexpected exception type: {type(e)}")

From 855ddba522fb9d468c6b861f0b2eefa1c1f700f2 Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 19:47:22 -0500
Subject: [PATCH 3/9] fix

---
 litellm/proxy/anthropic_endpoints/endpoints.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index 4425a4f0af37..c954dc2ef048 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -243,19 +243,6 @@ async def anthropic_response(  # noqa: PLR0915
         headers = getattr(e, "headers", {}) or {}
         headers.update(custom_headers)
 
-        # Raise ProxyException with proper headers
-        from litellm.proxy.proxy_server import ProxyException
-
-        if isinstance(e, HTTPException):
-            raise ProxyException(
-                message=getattr(e, "detail", str(e)),
-                type=getattr(e, "type", "None"),
-                param=getattr(e, "param", "None"),
-                code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
-                provider_specific_fields=getattr(e, "provider_specific_fields", None),
-                headers=headers,
-            )
-
         error_msg = f"{str(e)}"
         raise ProxyException(
             message=getattr(e, "message", error_msg),

From 66fb1c9f500e7a13259ba389ee52f980df60178b Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 20:06:45 -0500
Subject: [PATCH 4/9] cleanup

---
 .../adapters/transformation.py                |  7 --
 .../proxy/anthropic_endpoints/endpoints.py    |  7 +-
 litellm/proxy/common_request_processing.py    | 65 ++++++++++---------
 litellm/router.py                             |  1 -
 4 files changed, 37 insertions(+), 43 deletions(-)

diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
index 31c314426896..0e905014fe2e 100644
--- a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
+++ b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
@@ -622,13 +622,6 @@ def translate_openai_response_to_anthropic(
             stop_reason=anthropic_finish_reason,
         )
 
-        # Preserve model_id from the OpenAI response's _hidden_params
-        # This is needed for load balancing attribution
-        hidden_params = getattr(response, "_hidden_params", {}) or {}
-        model_id = hidden_params.get("model_id")
-        if model_id:
-            translated_obj["_litellm_model_id"] = model_id  # type: ignore
-
         return translated_obj
 
     def _translate_streaming_openai_chunk_to_anthropic_content_block(
diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index c954dc2ef048..abea9e6fee1a 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -228,7 +228,7 @@ async def anthropic_response(  # noqa: PLR0915
         model_id = model_info.get("id", "") or ""
 
         # Get headers
-        custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+        headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
             user_api_key_dict=user_api_key_dict,
             call_id=data.get("litellm_call_id", ""),
             model_id=model_id,
@@ -240,17 +240,12 @@ async def anthropic_response(  # noqa: PLR0915
             litellm_logging_obj=None,
         )
 
-        headers = getattr(e, "headers", {}) or {}
-        headers.update(custom_headers)
-
         error_msg = f"{str(e)}"
         raise ProxyException(
             message=getattr(e, "message", error_msg),
             type=getattr(e, "type", "None"),
             param=getattr(e, "param", "None"),
-            openai_code=getattr(e, "code", None),
             code=getattr(e, "status_code", 500),
-            provider_specific_fields=getattr(e, "provider_specific_fields", None),
             headers=headers,
         )
 
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 79c941d46a00..f6abe6992e29 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -769,35 +769,7 @@ async def _handle_llm_api_exception(
         #
         # Note: We check the direct model_info path first (not nested in metadata) because that's where the router sets it.
         # The nested metadata path is only a fallback for cases where model_info wasn't set at the top level.
-        model_id = None
-        if _litellm_logging_obj:
-            # 1. Try getting from litellm_params (updated during call)
-            if (
-                hasattr(_litellm_logging_obj, "litellm_params")
-                and _litellm_logging_obj.litellm_params
-            ):
-                # First check direct model_info path (set by router.py with selected deployment)
-                model_info = _litellm_logging_obj.litellm_params.get("model_info") or {}
-                model_id = model_info.get("id", None)
-
-                # Fallback to nested metadata path
-                if not model_id:
-                    metadata = _litellm_logging_obj.litellm_params.get("metadata") or {}
-                    model_info = metadata.get("model_info") or {}
-                    model_id = model_info.get("id", None)
-
-            # 2. Fallback to kwargs (initial)
-            if not model_id and _litellm_logging_obj.kwargs:
-                litellm_params = _litellm_logging_obj.kwargs.get("litellm_params", {})
-                # First check direct model_info path
-                model_info = litellm_params.get("model_info") or {}
-                model_id = model_info.get("id", None)
-
-                # Fallback to nested metadata path
-                if not model_id:
-                    metadata = litellm_params.get("metadata") or {}
-                    model_info = metadata.get("model_info") or {}
-                    model_id = model_info.get("id", None)
+        model_id = self.maybe_get_model_id_from_logging_obj(_litellm_logging_obj)
 
         custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
             user_api_key_dict=user_api_key_dict,
@@ -1117,3 +1089,38 @@ def _inject_cost_into_usage_dict(obj: dict, model_name: str) -> Optional[dict]:
                 obj.setdefault("usage", {})["cost"] = cost_val
                 return obj
         return None
+
+    def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]:
+        model_id = None
+        if _logging_obj:
+            # 1. Try getting from litellm_params (updated during call)
+            if (
+                hasattr(_logging_obj, "litellm_params")
+                and _logging_obj.litellm_params
+            ):
+                # First check direct model_info path (set by router.py with selected deployment)
+                model_info = _logging_obj.litellm_params.get("model_info") or {}
+                model_id = model_info.get("id", None)
+
+                # Fallback to nested metadata path
+                if not model_id:
+                    metadata = _logging_obj.litellm_params.get("metadata") or {}
+                    model_info = metadata.get("model_info") or {}
+                    model_id = model_info.get("id", None)
+
+            # 2. Fallback to kwargs (initial)
+            if not model_id:
+                _kwargs = getattr(_logging_obj, "kwargs", None)
+                if _kwargs:
+                    litellm_params = _kwargs.get("litellm_params", {})
+                    # First check direct model_info path
+                    model_info = litellm_params.get("model_info") or {}
+                    model_id = model_info.get("id", None)
+
+                    # Fallback to nested metadata path
+                    if not model_id:
+                        metadata = litellm_params.get("metadata") or {}
+                        model_info = metadata.get("model_info") or {}
+                        model_id = model_info.get("id", None)
+
+        return model_id
\ No newline at end of file
diff --git a/litellm/router.py b/litellm/router.py
index 9de34097bde4..6d38d2fc2bd8 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -1639,7 +1639,6 @@ def _update_kwargs_with_deployment(
         - Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging)
         - Adds default litellm params to kwargs, if set.
         """
-        print("MODEL-INFO: ", deployment.get("model_info", {}), flush=True)
         model_info = deployment.get("model_info", {}).copy()
         deployment_litellm_model_name = deployment["litellm_params"]["model"]
         deployment_api_base = deployment["litellm_params"].get("api_base")

From 4a28b015d134e3e5b82d3c6fba667aec3b1be8b0 Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 20:14:47 -0500
Subject: [PATCH 5/9] cleanup

---
 litellm/proxy/common_request_processing.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index f6abe6992e29..928752022d1b 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -379,20 +379,6 @@ async def common_processing_pre_call_logic(
         ):
             self.data["model"] = litellm.model_alias_map[self.data["model"]]
 
-        # Inject model_id into metadata if available
-        # This ensures model_id is available in logging_obj for failed requests
-        if llm_router and self.data.get("model"):
-            try:
-                model_ids = llm_router.get_model_ids(self.data["model"])
-                if model_ids:
-                    if "metadata" not in self.data:
-                        self.data["metadata"] = {}
-                    if "model_info" not in self.data["metadata"]:
-                        self.data["metadata"]["model_info"] = {}
-                    self.data["metadata"]["model_info"]["id"] = model_ids[0]
-            except Exception as e:
-                verbose_proxy_logger.error(f"Error getting model ID from router for model: {self.data['model']}: {e}")
-
         # Check key-specific aliases
         if (
             isinstance(self.data["model"], str)

From 3e2f693f946d6c4e689db2d9a1a5637ffd7178c9 Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 20:30:31 -0500
Subject: [PATCH 6/9] final

---
 litellm/proxy/common_request_processing.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 928752022d1b..cfb9bbe613b3 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -1077,6 +1077,12 @@ def _inject_cost_into_usage_dict(obj: dict, model_name: str) -> Optional[dict]:
         return None
 
     def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]:
+        """
+        Get model_id from logging object or request metadata.
+
+        The router sets model_info.id when selecting a deployment. This tries multiple locations
+        where the ID might be stored depending on the request lifecycle stage.
+        """
         model_id = None
         if _logging_obj:
             # 1. Try getting from litellm_params (updated during call)
@@ -1109,4 +1115,10 @@ def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLogg
                         model_info = metadata.get("model_info") or {}
                         model_id = model_info.get("id", None)
 
+        # 3. Final fallback to self.data["litellm_metadata"] (for routes like /v1/responses that populate data before error)
+        if not model_id:
+            litellm_metadata = self.data.get("litellm_metadata", {}) or {}
+            model_info = litellm_metadata.get("model_info", {}) or {}
+            model_id = model_info.get("id", None)
+
         return model_id
\ No newline at end of file

From 7de22eb3ef69c457f2cbea66dd38103d0cea4f8f Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 20:54:08 -0500
Subject: [PATCH 7/9] cleanup

---
 .../test_anthropic_messages_error_headers.py  | 118 ---------
 .../proxy/test_model_id_header_propagation.py | 250 ++++++++++++++++++
 2 files changed, 250 insertions(+), 118 deletions(-)
 delete mode 100644 tests/test_litellm/proxy/test_anthropic_messages_error_headers.py
 create mode 100644 tests/test_litellm/proxy/test_model_id_header_propagation.py

diff --git a/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py b/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py
deleted file mode 100644
index 2b11b216f095..000000000000
--- a/tests/test_litellm/proxy/test_anthropic_messages_error_headers.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""
-Test that x-litellm-model-id header is returned on /v1/messages error responses.
-
-This test verifies that the model_id header is propagated correctly when
-requests fail after router selection (e.g., due to unsupported parameters).
-"""
-
-import pytest
-import asyncio
-import aiohttp
-
-LITELLM_MASTER_KEY = "sk-1234"
-
-
-async def anthropic_messages_with_headers(session, key, model="gpt-4", **extra_params):
-    """
-    Make a request to /v1/messages and return response headers.
-    """
-    url = "http://0.0.0.0:4000/v1/messages"
-    headers = {
-        "Authorization": f"Bearer {key}",
-        "Content-Type": "application/json",
-    }
-    data = {
-        "model": model,
-        "max_tokens": 10,
-        "messages": [
-            {"role": "user", "content": "Hello!"},
-        ],
-        **extra_params,
-    }
-
-    async with session.post(url, headers=headers, json=data) as response:
-        status = response.status
-        response_text = await response.text()
-
-        print(f"Status: {status}")
-        print(f"Response: {response_text}")
-        print()
-
-        raw_headers = response.raw_headers
-        raw_headers_json = {}
-
-        for item in response.raw_headers:
-            raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8")
-
-        return {
-            "status": status,
-            "headers": raw_headers_json,
-            "response_text": response_text,
-        }
-
-
-@pytest.mark.asyncio
-async def test_anthropic_messages_error_with_model_id_header():
-    """
-    Test that x-litellm-model-id header is returned on error responses.
-
-    This test:
-    1. Makes a request to /v1/messages with an unsupported parameter (reasoning_effort)
-    2. Verifies that the request fails with a 400 error
-    3. Verifies that the x-litellm-model-id header is present in the error response
-
-    The error occurs AFTER router selection, so model_id should be available
-    and included in the error response headers.
-    """
-    async with aiohttp.ClientSession() as session:
-        key = LITELLM_MASTER_KEY
-        result = await anthropic_messages_with_headers(
-            session=session,
-            key=key,
-            model="gpt-4",
-            reasoning_effort="low",  # Unsupported param that triggers error
-        )
-
-        # Verify the request failed
-        assert result["status"] == 400, f"Expected 400, got {result['status']}"
-
-        # Verify model_id header is present
-        assert "x-litellm-model-id" in result["headers"], (
-            f"x-litellm-model-id header missing in error response. "
-            f"Headers: {result['headers'].keys()}"
-        )
-
-        # Verify the header has a non-empty value
-        model_id = result["headers"]["x-litellm-model-id"]
-        assert model_id, "x-litellm-model-id header is empty"
-        print(f"Successfully retrieved model_id on error response: {model_id}")
-
-
-@pytest.mark.asyncio
-async def test_anthropic_messages_success_with_model_id_header():
-    """
-    Test that x-litellm-model-id header is returned on successful responses.
-
-    This is a baseline test to ensure the header is present on success too.
-    """
-    async with aiohttp.ClientSession() as session:
-        key = LITELLM_MASTER_KEY
-        result = await anthropic_messages_with_headers(
-            session=session,
-            key=key,
-            model="gpt-4",
-        )
-
-        # Verify the request succeeded
-        assert result["status"] == 200, f"Expected 200, got {result['status']}"
-
-        # Verify model_id header is present
-        assert "x-litellm-model-id" in result["headers"], (
-            f"x-litellm-model-id header missing in success response. "
-            f"Headers: {result['headers'].keys()}"
-        )
-
-        # Verify the header has a non-empty value
-        model_id = result["headers"]["x-litellm-model-id"]
-        assert model_id, "x-litellm-model-id header is empty"
-        print(f"Successfully retrieved model_id on success response: {model_id}")
diff --git a/tests/test_litellm/proxy/test_model_id_header_propagation.py b/tests/test_litellm/proxy/test_model_id_header_propagation.py
new file mode 100644
index 000000000000..c9dfede0d3ef
--- /dev/null
+++ b/tests/test_litellm/proxy/test_model_id_header_propagation.py
@@ -0,0 +1,250 @@
+"""
+Test that x-litellm-model-id header is propagated correctly on error responses.
+
+This test suite verifies the `maybe_get_model_id_from_logging_obj` method
+which is responsible for extracting model_id from different locations
+depending on the request lifecycle stage.
+"""
+
+import pytest
+from unittest.mock import MagicMock
+
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm.proxy._types import UserAPIKeyAuth
+
+
+def test_maybe_get_model_id_from_logging_obj_from_litellm_params():
+    """
+    Test extraction of model_id from logging_obj.litellm_params (used by /v1/chat/completions).
+    """
+    # Create a ProxyBaseLLMRequestProcessing instance
+    processor = ProxyBaseLLMRequestProcessing(data={})
+
+    # Create a mock logging object with model_info in litellm_params
+    mock_logging_obj = MagicMock()
+    mock_logging_obj.litellm_params = {
+        "model_info": {
+            "id": "test-model-id-from-litellm-params"
+        }
+    }
+
+    # Test extraction
+    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+
+    assert model_id == "test-model-id-from-litellm-params"
+
+
+def test_maybe_get_model_id_from_logging_obj_from_litellm_params_nested():
+    """
+    Test extraction of model_id from nested metadata in logging_obj.litellm_params.
+    """
+    processor = ProxyBaseLLMRequestProcessing(data={})
+
+    # Create a mock logging object with model_info nested in metadata
+    mock_logging_obj = MagicMock()
+    mock_logging_obj.litellm_params = {
+        "metadata": {
+            "model_info": {
+                "id": "test-model-id-nested"
+            }
+        }
+    }
+
+    # Test extraction
+    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+
+    assert model_id == "test-model-id-nested"
+
+
+def test_maybe_get_model_id_from_logging_obj_from_kwargs():
+    """
+    Test extraction of model_id from logging_obj.kwargs (fallback path).
+    """
+    processor = ProxyBaseLLMRequestProcessing(data={})
+
+    # Create a mock logging object with model_info in kwargs
+    mock_logging_obj = MagicMock()
+    mock_logging_obj.litellm_params = None
+    mock_logging_obj.kwargs = {
+        "litellm_params": {
+            "model_info": {
+                "id": "test-model-id-from-kwargs"
+            }
+        }
+    }
+
+    # Test extraction
+    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+
+    assert model_id == "test-model-id-from-kwargs"
+
+
+def test_maybe_get_model_id_from_logging_obj_from_data():
+    """
+    Test extraction of model_id from self.data (used by /v1/messages and /v1/responses).
+    """
+    # Create a processor with model_info in data
+    processor = ProxyBaseLLMRequestProcessing(data={
+        "litellm_metadata": {
+            "model_info": {
+                "id": "test-model-id-from-data"
+            }
+        }
+    })
+
+    # Create a mock logging object without model_info
+    mock_logging_obj = MagicMock()
+    mock_logging_obj.litellm_params = {}
+    mock_logging_obj.kwargs = {}
+
+    # Test extraction - should fall back to self.data
+    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+
+    assert model_id == "test-model-id-from-data"
+
+
+def test_maybe_get_model_id_from_logging_obj_no_logging_obj():
+    """
+    Test extraction of model_id when logging_obj is None (should use self.data).
+    """
+    # Create a processor with model_info in data
+    processor = ProxyBaseLLMRequestProcessing(data={
+        "litellm_metadata": {
+            "model_info": {
+                "id": "test-model-id-no-logging-obj"
+            }
+        }
+    })
+
+    # Test extraction with None logging_obj
+    model_id = processor.maybe_get_model_id_from_logging_obj(None)
+
+    assert model_id == "test-model-id-no-logging-obj"
+
+
+def test_maybe_get_model_id_from_logging_obj_not_found():
+    """
+    Test extraction of model_id when it's not available anywhere (should return None).
+    """
+    processor = ProxyBaseLLMRequestProcessing(data={})
+
+    # Create a mock logging object without model_info anywhere
+    mock_logging_obj = MagicMock()
+    mock_logging_obj.litellm_params = {}
+    mock_logging_obj.kwargs = {}
+
+    # Test extraction - should return None
+    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+
+    assert model_id is None
+
+
+def test_maybe_get_model_id_priority_litellm_params_over_data():
+    """
+    Test that model_id from logging_obj.litellm_params takes priority over self.data.
+    """
+    # Create a processor with model_info in both places
+    processor = ProxyBaseLLMRequestProcessing(data={
+        "litellm_metadata": {
+            "model_info": {
+                "id": "model-id-from-data"
+            }
+        }
+    })
+
+    # Create a mock logging object with model_info
+    mock_logging_obj = MagicMock()
+    mock_logging_obj.litellm_params = {
+        "model_info": {
+            "id": "model-id-from-litellm-params"
+        }
+    }
+
+    # Test extraction - should prefer litellm_params
+    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+
+    assert model_id == "model-id-from-litellm-params"
+
+
+def test_get_custom_headers_includes_model_id():
+    """
+    Test that get_custom_headers includes x-litellm-model-id when model_id is provided.
+    """
+    # Create mock user_api_key_dict with all required attributes
+    mock_user_api_key_dict = MagicMock()
+    mock_user_api_key_dict.user_id = "test-user"
+    mock_user_api_key_dict.team_id = "test-team"
+    mock_user_api_key_dict.tpm_limit = 1000
+    mock_user_api_key_dict.rpm_limit = 100
+
+    # Call get_custom_headers with a model_id
+    headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+        user_api_key_dict=mock_user_api_key_dict,
+        model_id="test-model-123",
+        cache_key="test-cache-key",
+        api_base="https://api.example.com",
+        version="1.0.0",
+        response_cost=0.001,
+        request_data={},
+        hidden_params={}
+    )
+
+    # Verify model_id is in headers
+    assert "x-litellm-model-id" in headers
+    assert headers["x-litellm-model-id"] == "test-model-123"
+
+
+def test_get_custom_headers_without_model_id():
+    """
+    Test that get_custom_headers works correctly when model_id is None or empty.
+    """
+    # Create mock user_api_key_dict with all required attributes
+    mock_user_api_key_dict = MagicMock()
+    mock_user_api_key_dict.user_id = "test-user"
+    mock_user_api_key_dict.team_id = "test-team"
+    mock_user_api_key_dict.tpm_limit = 1000
+    mock_user_api_key_dict.rpm_limit = 100
+
+    # Call get_custom_headers without a model_id
+    headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+        user_api_key_dict=mock_user_api_key_dict,
+        model_id=None,
+        cache_key="test-cache-key",
+        api_base="https://api.example.com",
+        version="1.0.0",
+        response_cost=0.001,
+        request_data={},
+        hidden_params={}
+    )
+
+    # x-litellm-model-id should not be in headers (or should be empty/None)
+    if "x-litellm-model-id" in headers:
+        assert headers["x-litellm-model-id"] in [None, ""]
+
+
+def test_get_custom_headers_with_empty_string_model_id():
+    """
+    Test that get_custom_headers handles empty string model_id correctly.
+    """
+    # Create mock user_api_key_dict with all required attributes
+    mock_user_api_key_dict = MagicMock()
+    mock_user_api_key_dict.user_id = "test-user"
+    mock_user_api_key_dict.team_id = "test-team"
+    mock_user_api_key_dict.tpm_limit = 1000
+    mock_user_api_key_dict.rpm_limit = 100
+
+    # Call get_custom_headers with empty string model_id
+    headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
+        user_api_key_dict=mock_user_api_key_dict,
+        model_id="",
+        cache_key="test-cache-key",
+        api_base="https://api.example.com",
+        version="1.0.0",
+        response_cost=0.001,
+        request_data={},
+        hidden_params={}
+    )
+
+    # x-litellm-model-id should not be in headers (or should be empty)
+    if "x-litellm-model-id" in headers:
+        assert headers["x-litellm-model-id"] == ""

From ff823a276cb4f445d92786ff71854a4fcff757ca Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 21:45:58 -0500
Subject: [PATCH 8/9] clean up method name and fix responses api streaming

---
 litellm/proxy/common_request_processing.py    | 12 ++++++--
 .../proxy/test_model_id_header_propagation.py | 28 +++++++++----------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index cfb9bbe613b3..63ddd2d32639 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -530,6 +530,14 @@ async def base_process_llm_request(
 
         hidden_params = getattr(response, "_hidden_params", {}) or {}
         model_id = hidden_params.get("model_id", None) or ""
+
+        # Fallback: extract model_id from litellm_metadata if not in hidden_params
+        # This is needed for ResponsesAPIStreamingIterator where _hidden_params might not be accessible
+        if not model_id:
+            litellm_metadata = self.data.get("litellm_metadata", {}) or {}
+            model_info = litellm_metadata.get("model_info", {}) or {}
+            model_id = model_info.get("id", "") or ""
+
         cache_key = hidden_params.get("cache_key", None) or ""
         api_base = hidden_params.get("api_base", None) or ""
         response_cost = hidden_params.get("response_cost", None) or ""
@@ -755,7 +763,7 @@ async def _handle_llm_api_exception(
         #
         # Note: We check the direct model_info path first (not nested in metadata) because that's where the router sets it.
         # The nested metadata path is only a fallback for cases where model_info wasn't set at the top level.
-        model_id = self.maybe_get_model_id_from_logging_obj(_litellm_logging_obj)
+        model_id = self.maybe_get_model_id(_litellm_logging_obj)
 
         custom_headers = ProxyBaseLLMRequestProcessing.get_custom_headers(
             user_api_key_dict=user_api_key_dict,
@@ -1076,7 +1084,7 @@ def _inject_cost_into_usage_dict(obj: dict, model_name: str) -> Optional[dict]:
                 return obj
         return None
 
-    def maybe_get_model_id_from_logging_obj(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]:
+    def maybe_get_model_id(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optional[str]:
         """
         Get model_id from logging object or request metadata.
 
diff --git a/tests/test_litellm/proxy/test_model_id_header_propagation.py b/tests/test_litellm/proxy/test_model_id_header_propagation.py
index c9dfede0d3ef..cc4e7c084d6c 100644
--- a/tests/test_litellm/proxy/test_model_id_header_propagation.py
+++ b/tests/test_litellm/proxy/test_model_id_header_propagation.py
@@ -1,7 +1,7 @@
 """
 Test that x-litellm-model-id header is propagated correctly on error responses.
 
-This test suite verifies the `maybe_get_model_id_from_logging_obj` method
+This test suite verifies the `maybe_get_model_id` method
 which is responsible for extracting model_id from different locations
 depending on the request lifecycle stage.
 """
@@ -13,7 +13,7 @@
 from litellm.proxy._types import UserAPIKeyAuth
 
 
-def test_maybe_get_model_id_from_logging_obj_from_litellm_params():
+def test_maybe_get_model_id_from_litellm_params():
     """
     Test extraction of model_id from logging_obj.litellm_params (used by /v1/chat/completions).
     """
@@ -29,12 +29,12 @@ def test_maybe_get_model_id_from_logging_obj_from_litellm_params():
     }
 
     # Test extraction
-    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+    model_id = processor.maybe_get_model_id(mock_logging_obj)
 
     assert model_id == "test-model-id-from-litellm-params"
 
 
-def test_maybe_get_model_id_from_logging_obj_from_litellm_params_nested():
+def test_maybe_get_model_id_from_litellm_params_nested():
     """
     Test extraction of model_id from nested metadata in logging_obj.litellm_params.
     """
@@ -51,12 +51,12 @@ def test_maybe_get_model_id_from_logging_obj_from_litellm_params_nested():
     }
 
     # Test extraction
-    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+    model_id = processor.maybe_get_model_id(mock_logging_obj)
 
     assert model_id == "test-model-id-nested"
 
 
-def test_maybe_get_model_id_from_logging_obj_from_kwargs():
+def test_maybe_get_model_id_from_kwargs():
     """
     Test extraction of model_id from logging_obj.kwargs (fallback path).
     """
@@ -74,12 +74,12 @@ def test_maybe_get_model_id_from_logging_obj_from_kwargs():
     }
 
     # Test extraction
-    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+    model_id = processor.maybe_get_model_id(mock_logging_obj)
 
     assert model_id == "test-model-id-from-kwargs"
 
 
-def test_maybe_get_model_id_from_logging_obj_from_data():
+def test_maybe_get_model_id_from_data():
     """
     Test extraction of model_id from self.data (used by /v1/messages and /v1/responses).
     """
@@ -98,12 +98,12 @@ def test_maybe_get_model_id_from_logging_obj_from_data():
     mock_logging_obj.kwargs = {}
 
     # Test extraction - should fall back to self.data
-    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+    model_id = processor.maybe_get_model_id(mock_logging_obj)
 
     assert model_id == "test-model-id-from-data"
 
 
-def test_maybe_get_model_id_from_logging_obj_no_logging_obj():
+def test_maybe_get_model_id_no_logging_obj():
     """
     Test extraction of model_id when logging_obj is None (should use self.data).
     """
@@ -117,12 +117,12 @@ def test_maybe_get_model_id_from_logging_obj_no_logging_obj():
     })
 
     # Test extraction with None logging_obj
-    model_id = processor.maybe_get_model_id_from_logging_obj(None)
+    model_id = processor.maybe_get_model_id(None)
 
     assert model_id == "test-model-id-no-logging-obj"
 
 
-def test_maybe_get_model_id_from_logging_obj_not_found():
+def test_maybe_get_model_id_not_found():
     """
     Test extraction of model_id when it's not available anywhere (should return None).
     """
@@ -134,7 +134,7 @@ def test_maybe_get_model_id_from_logging_obj_not_found():
     mock_logging_obj.kwargs = {}
 
     # Test extraction - should return None
-    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+    model_id = processor.maybe_get_model_id(mock_logging_obj)
 
     assert model_id is None
 
@@ -161,7 +161,7 @@ def test_maybe_get_model_id_priority_litellm_params_over_data():
     }
 
     # Test extraction - should prefer litellm_params
-    model_id = processor.maybe_get_model_id_from_logging_obj(mock_logging_obj)
+    model_id = processor.maybe_get_model_id(mock_logging_obj)
 
     assert model_id == "model-id-from-litellm-params"
 

From a54a9f91633e12aa05a30a02ae7ba559f37901c3 Mon Sep 17 00:00:00 2001
From: Raghav Jhavar <raghav@stripe.com>
Date: Sat, 22 Nov 2025 21:50:43 -0500
Subject: [PATCH 9/9] remove comment

---
 litellm/proxy/common_request_processing.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 63ddd2d32639..a189216ad467 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -532,7 +532,6 @@ async def base_process_llm_request(
         model_id = hidden_params.get("model_id", None) or ""
 
         # Fallback: extract model_id from litellm_metadata if not in hidden_params
-        # This is needed for ResponsesAPIStreamingIterator where _hidden_params might not be accessible
         if not model_id:
             litellm_metadata = self.data.get("litellm_metadata", {}) or {}
             model_info = litellm_metadata.get("model_info", {}) or {}
@@ -1129,4 +1128,4 @@ def maybe_get_model_id(self, _logging_obj: Optional[LiteLLMLoggingObj]) -> Optio
             model_info = litellm_metadata.get("model_info", {}) or {}
             model_id = model_info.get("id", None)
 
-        return model_id
\ No newline at end of file
+        return model_id