Apply PR BerriAI#16812: Add thought signature support to v1/messages api

Copilot · towry · Copilot · commit 1aecdc68bd92 · 2025-11-19T09:00:17.000Z
Co-authored-by: towry &lt;8279858+towry@users.noreply.github.com&gt;
diff --git a/litellm/litellm_core_utils/prompt_templates/factory.py b/litellm/litellm_core_utils/prompt_templates/factory.py
@@ -1162,16 +1162,58 @@ def _gemini_tool_call_invoke_helper(
     return function_call
 
 
-def _get_thought_signature_from_tool(tool: dict) -> Optional[str]:
-    """Extract thought signature from tool call's provider_specific_fields"""
+def _get_thought_signature_from_tool(tool: dict, model: Optional[str] = None) -> Optional[str]:
+    """Extract thought signature from tool call's provider_specific_fields.
+    
+    Checks both tool.provider_specific_fields and tool.function.provider_specific_fields.
+    If no signature is found and model is gemini-3, returns a dummy signature.
+    """
+    # First check tool's provider_specific_fields
     provider_fields = tool.get("provider_specific_fields") or {}
     if isinstance(provider_fields, dict):
-        return provider_fields.get("thought_signature")
+        signature = provider_fields.get("thought_signature")
+        if signature:
+            return signature
+    
+    # Then check function's provider_specific_fields
+    function = tool.get("function")
+    if function:
+        if isinstance(function, dict):
+            func_provider_fields = function.get("provider_specific_fields") or {}
+            if isinstance(func_provider_fields, dict):
+                signature = func_provider_fields.get("thought_signature")
+                if signature:
+                    return signature
+        elif hasattr(function, "provider_specific_fields") and function.provider_specific_fields:
+            if isinstance(function.provider_specific_fields, dict):
+                signature = function.provider_specific_fields.get("thought_signature")
+                if signature:
+                    return signature
+    
+    # If no signature found and model is gemini-3, return dummy signature
+    from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
+    if model and VertexGeminiConfig._is_gemini_3_or_newer(model):
+        return _get_dummy_thought_signature()
+    
     return None
 
 
+def _get_dummy_thought_signature() -> str:
+    """Generate a dummy thought signature for models that require it.
+    
+    This is used when transferring conversation history from older models
+    (like gemini-2.5-flash) to gemini-3, which requires thought_signature
+    for strict validation.
+    """
+    # Return a base64-encoded dummy signature string
+    # Below dummy signature is recommended by google - https://ai.google.dev/gemini-api/docs/thought-signatures#faqs
+    dummy_data = b"skip_thought_signature_validator"
+    return base64.b64encode(dummy_data).decode("utf-8")
+
+
 def convert_to_gemini_tool_call_invoke(
     message: ChatCompletionAssistantMessage,
+    model: Optional[str] = None,
 ) -> List[VertexPartType]:
     """
     OpenAI tool invokes:
@@ -1229,7 +1271,7 @@ def convert_to_gemini_tool_call_invoke(
                         part_dict: VertexPartType = {
                             "function_call": gemini_function_call
                         }
-                        thought_signature = _get_thought_signature_from_tool(dict(tool))
+                        thought_signature = _get_thought_signature_from_tool(dict(tool), model=model)
                         if thought_signature:
                             part_dict["thoughtSignature"] = thought_signature
                         
@@ -1250,11 +1292,18 @@ def convert_to_gemini_tool_call_invoke(
                 }
                 
                 # Extract thought signature from function_call's provider_specific_fields
+                thought_signature = None
                 provider_fields = function_call.get("provider_specific_fields") if isinstance(function_call, dict) else {}
                 if isinstance(provider_fields, dict):
                     thought_signature = provider_fields.get("thought_signature")
-                    if thought_signature:
-                        part_dict_function["thoughtSignature"] = thought_signature
+                
+                # If no signature found and model is gemini-3, use dummy signature
+                from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
+                if not thought_signature and model and VertexGeminiConfig._is_gemini_3_or_newer(model):
+                    thought_signature = _get_dummy_thought_signature()
+                
+                if thought_signature:
+                    part_dict_function["thoughtSignature"] = thought_signature
                 
                 _parts_list.append(part_dict_function)
             else:  # don't silently drop params. Make it clear to user what's happening.
diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
@@ -3,6 +3,7 @@
     TYPE_CHECKING,
     Any,
     AsyncIterator,
+    Dict,
     List,
     Literal,
     Optional,
@@ -129,6 +130,39 @@ def __init__(self):
 
     ### FOR [BETA] `/v1/messages` endpoint support
 
+    def _extract_signature_from_tool_call(
+        self, tool_call: Any
+    ) -> Optional[str]:
+        """
+        Extract signature from a tool call's provider_specific_fields.
+        Only checks provider_specific_fields, not thinking blocks.
+        """
+        signature = None
+        
+        if hasattr(tool_call, "provider_specific_fields") and tool_call.provider_specific_fields:
+            if "thought_signature" in tool_call.provider_specific_fields:
+                signature = tool_call.provider_specific_fields["thought_signature"]
+        elif (
+            hasattr(tool_call.function, "provider_specific_fields")
+            and tool_call.function.provider_specific_fields
+        ):
+            if "thought_signature" in tool_call.function.provider_specific_fields:
+                signature = tool_call.function.provider_specific_fields["thought_signature"]
+        
+        return signature
+
+    def _extract_signature_from_tool_use_content(
+        self, content: dict[str, Any]
+    ) -> Optional[str]:
+        """
+        Extract signature from a tool_use content block's provider_specific_fields.
+        """
+        provider_specific_fields = content.get("provider_specific_fields", {})
+        if provider_specific_fields:
+            return provider_specific_fields.get("signature")
+        return None
+
+
     def translatable_anthropic_params(self) -> List:
         """
         Which anthropic params, we need to translate to the openai format.
@@ -263,10 +297,18 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                 else:
                                     assistant_message_str += content.get("text", "")
                             elif content.get("type") == "tool_use":
-                                function_chunk = ChatCompletionToolCallFunctionChunk(
-                                    name=content.get("name", ""),
-                                    arguments=json.dumps(content.get("input", {})),
-                                )
+                                function_chunk: ChatCompletionToolCallFunctionChunk = {
+                                    "name": content.get("name", ""),
+                                    "arguments": json.dumps(content.get("input", {})),
+                                }
+                                signature = self._extract_signature_from_tool_use_content(content)
+                                
+                                if signature:
+                                    provider_specific_fields: Dict[str, Any] = (
+                                        function_chunk.get("provider_specific_fields") or {}
+                                    )
+                                    provider_specific_fields["thought_signature"] = signature
+                                    function_chunk["provider_specific_fields"] = provider_specific_fields
 
                                 tool_calls.append(
                                     ChatCompletionAssistantToolCall(
@@ -512,18 +554,27 @@ def _translate_openai_content_to_anthropic(self, choices: List[Choices]) -> List
                 and len(choice.message.tool_calls) > 0
             ):
                 for tool_call in choice.message.tool_calls:
-                    new_content.append(
-                        AnthropicResponseContentBlockToolUse(
-                            type="tool_use",
-                            id=tool_call.id,
-                            name=tool_call.function.name or "",
-                            input=(
-                                json.loads(tool_call.function.arguments)
-                                if tool_call.function.arguments
-                                else {}
-                            ),
-                        )
+                    # Extract signature from provider_specific_fields only
+                    signature = self._extract_signature_from_tool_call(tool_call)
+                    
+                    provider_specific_fields = {}
+                    if signature:
+                        provider_specific_fields["signature"] = signature
+                    
+                    tool_use_block = AnthropicResponseContentBlockToolUse(
+                        type="tool_use",
+                        id=tool_call.id,
+                        name=tool_call.function.name or "",
+                        input=(
+                            json.loads(tool_call.function.arguments)
+                            if tool_call.function.arguments
+                            else {}
+                        ),
                     )
+                    # Add provider_specific_fields if signature is present
+                    if provider_specific_fields:
+                        tool_use_block.provider_specific_fields = provider_specific_fields
+                    new_content.append(tool_use_block)
             # Handle text content
             elif choice.message.content is not None:
                 new_content.append(
diff --git a/litellm/llms/gemini/chat/transformation.py b/litellm/llms/gemini/chat/transformation.py
@@ -140,4 +140,4 @@ def _transform_messages(
                             except Exception:
                                 # If conversion fails, leave as is and let the API handle it
                                 pass
-        return _gemini_convert_messages_with_history(messages=messages)
+        return _gemini_convert_messages_with_history(messages=messages, model=model)
diff --git a/litellm/llms/vertex_ai/context_caching/transformation.py b/litellm/llms/vertex_ai/context_caching/transformation.py
@@ -173,7 +173,7 @@ def transform_openai_messages_to_gemini_context_caching(
         supports_system_message=supports_system_message, messages=messages
     )
 
-    transformed_messages = _gemini_convert_messages_with_history(messages=new_messages)
+    transformed_messages = _gemini_convert_messages_with_history(messages=new_messages, model=model)
     
     model_name = "models/{}".format(model)
 
diff --git a/litellm/llms/vertex_ai/gemini/transformation.py b/litellm/llms/vertex_ai/gemini/transformation.py
@@ -195,6 +195,7 @@ def check_if_part_exists_in_parts(
 
 def _gemini_convert_messages_with_history(  # noqa: PLR0915
     messages: List[AllMessageValues],
+    model: Optional[str] = None,
 ) -> List[ContentType]:
     """
     Converts given messages from OpenAI format to Gemini format
@@ -379,7 +380,7 @@ def _gemini_convert_messages_with_history(  # noqa: PLR0915
                     or assistant_msg.get("function_call") is not None
                 ):  # support assistant tool invoke conversion
                     gemini_tool_call_parts = convert_to_gemini_tool_call_invoke(
-                        assistant_msg
+                        assistant_msg, model=model
                     )
                     ## check if gemini_tool_call already exists in assistant_content
                     for gemini_tool_call_part in gemini_tool_call_parts:
diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
@@ -1852,7 +1852,7 @@ def _transform_google_generate_content_to_openai_model_response(
     def _transform_messages(
         self, messages: List[AllMessageValues], model: Optional[str] = None
     ) -> List[ContentType]:
-        return _gemini_convert_messages_with_history(messages=messages)
+        return _gemini_convert_messages_with_history(messages=messages, model=model)
 
     def get_error_class(
         self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -10994,10 +10994,12 @@
         "supports_web_search": true
     },
     "gemini-3-pro-preview": {
-        "cache_read_input_token_cost": 1.25e-07,
+        "cache_read_input_token_cost": 2e-07,
+        "cache_read_input_token_cost_above_200k_tokens": 4e-07,
         "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07,
         "input_cost_per_token": 2e-06,
         "input_cost_per_token_above_200k_tokens": 4e-06,
+        "input_cost_per_token_batches": 1e-06,
         "litellm_provider": "vertex_ai-language-models",
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
@@ -11011,10 +11013,60 @@
         "mode": "chat",
         "output_cost_per_token": 1.2e-05,
         "output_cost_per_token_above_200k_tokens": 1.8e-05,
+        "output_cost_per_token_batches": 6e-06,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
         "supported_endpoints": [
             "/v1/chat/completions",
-            "/v1/completions"
+            "/v1/completions",
+            "/v1/batch"
+        ],
+        "supported_modalities": [
+            "text",
+            "image",
+            "audio",
+            "video"
+        ],
+        "supported_output_modalities": [
+            "text"
+        ],
+        "supports_audio_input": true,
+        "supports_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_system_messages": true,
+        "supports_tool_choice": true,
+        "supports_video_input": true,
+        "supports_vision": true,
+        "supports_web_search": true
+    },
+    "vertex_ai/gemini-3-pro-preview": {
+        "cache_read_input_token_cost": 2e-07,
+        "cache_read_input_token_cost_above_200k_tokens": 4e-07,
+        "cache_creation_input_token_cost_above_200k_tokens": 2.5e-07,
+        "input_cost_per_token": 2e-06,
+        "input_cost_per_token_above_200k_tokens": 4e-06,
+        "input_cost_per_token_batches": 1e-06,
+        "litellm_provider": "vertex_ai",
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_images_per_prompt": 3000,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 65535,
+        "max_pdf_size_mb": 30,
+        "max_tokens": 65535,
+        "max_video_length": 1,
+        "max_videos_per_prompt": 10,
+        "mode": "chat",
+        "output_cost_per_token": 1.2e-05,
+        "output_cost_per_token_above_200k_tokens": 1.8e-05,
+        "output_cost_per_token_batches": 6e-06,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
+        "supported_endpoints": [
+            "/v1/chat/completions",
+            "/v1/completions",
+            "/v1/batch"
         ],
         "supported_modalities": [
             "text",
@@ -12697,9 +12749,11 @@
         "tpm": 800000
     },
     "gemini/gemini-3-pro-preview": {
-        "cache_read_input_token_cost": 3.125e-07,
+        "cache_read_input_token_cost": 2e-07,
+        "cache_read_input_token_cost_above_200k_tokens": 4e-07,
         "input_cost_per_token": 2e-06,
         "input_cost_per_token_above_200k_tokens": 4e-06,
+        "input_cost_per_token_batches": 1e-06,
         "litellm_provider": "gemini",
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
@@ -12713,11 +12767,13 @@
         "mode": "chat",
         "output_cost_per_token": 1.2e-05,
         "output_cost_per_token_above_200k_tokens": 1.8e-05,
+        "output_cost_per_token_batches": 6e-06,
         "rpm": 2000,
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
         "supported_endpoints": [
             "/v1/chat/completions",
-            "/v1/completions"
+            "/v1/completions",
+            "/v1/batch"
         ],
         "supported_modalities": [
             "text",
diff --git a/litellm/types/llms/anthropic.py b/litellm/types/llms/anthropic.py
@@ -458,6 +458,10 @@ class AnthropicResponseContentBlockToolUse(BaseModel):
     id: str
     name: str
     input: dict
+    provider_specific_fields: Optional[Dict[str, Any]] = None
+    
+    class Config:
+        extra = "allow"  # Allow provider_specific_fields
 
 
 class AnthropicResponseContentBlockThinking(BaseModel):
diff --git a/litellm/types/llms/openai.py b/litellm/types/llms/openai.py
@@ -1,6 +1,6 @@
 from enum import Enum
 from os import PathLike
-from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
+from typing import IO, Any, Dict, Iterable, List, Literal, Mapping, Optional, Tuple, Union
 
 import httpx
 from openai._legacy_response import (
@@ -453,6 +453,7 @@ class ChatCompletionAudioDelta(TypedDict, total=False):
 class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
     name: Optional[str]
     arguments: str
+    provider_specific_fields: Optional[Dict[str, Any]]
 
 
 class ChatCompletionAssistantToolCall(TypedDict):
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
diff --git a/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py b/tests/test_litellm/llms/anthropic/experimental_pass_through/adapters/test_anthropic_experimental_pass_through_adapters_transformation.py
diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_ai_gemini_transformation.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_ai_gemini_transformation.py

Original file line number	Diff line number	Diff line change
`@@ -173,7 +173,7 @@ def transform_openai_messages_to_gemini_context_caching(`
`173`	`173`	`supports_system_message=supports_system_message, messages=messages`
`174`	`174`	`)`
`175`	`175`
`176`		`- transformed_messages = _gemini_convert_messages_with_history(messages=new_messages)`
	`176`	`+ transformed_messages = _gemini_convert_messages_with_history(messages=new_messages, model=model)`
`177`	`177`
`178`	`178`	`model_name = "models/{}".format(model)`
`179`	`179`