Skip to content

Commit 1aecdc6

Browse files
Copilottowry
andcommitted
Apply PR BerriAI#16812: Add thought signature support to v1/messages api
Co-authored-by: towry <[email protected]>
1 parent fc87e30 commit 1aecdc6

File tree

12 files changed

+442
-34
lines changed

12 files changed

+442
-34
lines changed

litellm/litellm_core_utils/prompt_templates/factory.py

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,16 +1162,58 @@ def _gemini_tool_call_invoke_helper(
11621162
return function_call
11631163

11641164

1165-
def _get_thought_signature_from_tool(tool: dict) -> Optional[str]:
1166-
"""Extract thought signature from tool call's provider_specific_fields"""
1165+
def _get_thought_signature_from_tool(tool: dict, model: Optional[str] = None) -> Optional[str]:
1166+
"""Extract thought signature from tool call's provider_specific_fields.
1167+
1168+
Checks both tool.provider_specific_fields and tool.function.provider_specific_fields.
1169+
If no signature is found and model is gemini-3, returns a dummy signature.
1170+
"""
1171+
# First check tool's provider_specific_fields
11671172
provider_fields = tool.get("provider_specific_fields") or {}
11681173
if isinstance(provider_fields, dict):
1169-
return provider_fields.get("thought_signature")
1174+
signature = provider_fields.get("thought_signature")
1175+
if signature:
1176+
return signature
1177+
1178+
# Then check function's provider_specific_fields
1179+
function = tool.get("function")
1180+
if function:
1181+
if isinstance(function, dict):
1182+
func_provider_fields = function.get("provider_specific_fields") or {}
1183+
if isinstance(func_provider_fields, dict):
1184+
signature = func_provider_fields.get("thought_signature")
1185+
if signature:
1186+
return signature
1187+
elif hasattr(function, "provider_specific_fields") and function.provider_specific_fields:
1188+
if isinstance(function.provider_specific_fields, dict):
1189+
signature = function.provider_specific_fields.get("thought_signature")
1190+
if signature:
1191+
return signature
1192+
1193+
# If no signature found and model is gemini-3, return dummy signature
1194+
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
1195+
if model and VertexGeminiConfig._is_gemini_3_or_newer(model):
1196+
return _get_dummy_thought_signature()
1197+
11701198
return None
11711199

11721200

1201+
def _get_dummy_thought_signature() -> str:
1202+
"""Generate a dummy thought signature for models that require it.
1203+
1204+
This is used when transferring conversation history from older models
1205+
(like gemini-2.5-flash) to gemini-3, which requires thought_signature
1206+
for strict validation.
1207+
"""
1208+
# Return a base64-encoded dummy signature string
1209+
# Below dummy signature is recommended by google - https://ai.google.dev/gemini-api/docs/thought-signatures#faqs
1210+
dummy_data = b"skip_thought_signature_validator"
1211+
return base64.b64encode(dummy_data).decode("utf-8")
1212+
1213+
11731214
def convert_to_gemini_tool_call_invoke(
11741215
message: ChatCompletionAssistantMessage,
1216+
model: Optional[str] = None,
11751217
) -> List[VertexPartType]:
11761218
"""
11771219
OpenAI tool invokes:
@@ -1229,7 +1271,7 @@ def convert_to_gemini_tool_call_invoke(
12291271
part_dict: VertexPartType = {
12301272
"function_call": gemini_function_call
12311273
}
1232-
thought_signature = _get_thought_signature_from_tool(dict(tool))
1274+
thought_signature = _get_thought_signature_from_tool(dict(tool), model=model)
12331275
if thought_signature:
12341276
part_dict["thoughtSignature"] = thought_signature
12351277

@@ -1250,11 +1292,18 @@ def convert_to_gemini_tool_call_invoke(
12501292
}
12511293

12521294
# Extract thought signature from function_call's provider_specific_fields
1295+
thought_signature = None
12531296
provider_fields = function_call.get("provider_specific_fields") if isinstance(function_call, dict) else {}
12541297
if isinstance(provider_fields, dict):
12551298
thought_signature = provider_fields.get("thought_signature")
1256-
if thought_signature:
1257-
part_dict_function["thoughtSignature"] = thought_signature
1299+
1300+
# If no signature found and model is gemini-3, use dummy signature
1301+
from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
1302+
if not thought_signature and model and VertexGeminiConfig._is_gemini_3_or_newer(model):
1303+
thought_signature = _get_dummy_thought_signature()
1304+
1305+
if thought_signature:
1306+
part_dict_function["thoughtSignature"] = thought_signature
12581307

12591308
_parts_list.append(part_dict_function)
12601309
else: # don't silently drop params. Make it clear to user what's happening.

litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py

Lines changed: 66 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
TYPE_CHECKING,
44
Any,
55
AsyncIterator,
6+
Dict,
67
List,
78
Literal,
89
Optional,
@@ -129,6 +130,39 @@ def __init__(self):
129130

130131
### FOR [BETA] `/v1/messages` endpoint support
131132

133+
def _extract_signature_from_tool_call(
134+
self, tool_call: Any
135+
) -> Optional[str]:
136+
"""
137+
Extract signature from a tool call's provider_specific_fields.
138+
Only checks provider_specific_fields, not thinking blocks.
139+
"""
140+
signature = None
141+
142+
if hasattr(tool_call, "provider_specific_fields") and tool_call.provider_specific_fields:
143+
if "thought_signature" in tool_call.provider_specific_fields:
144+
signature = tool_call.provider_specific_fields["thought_signature"]
145+
elif (
146+
hasattr(tool_call.function, "provider_specific_fields")
147+
and tool_call.function.provider_specific_fields
148+
):
149+
if "thought_signature" in tool_call.function.provider_specific_fields:
150+
signature = tool_call.function.provider_specific_fields["thought_signature"]
151+
152+
return signature
153+
154+
def _extract_signature_from_tool_use_content(
155+
self, content: dict[str, Any]
156+
) -> Optional[str]:
157+
"""
158+
Extract signature from a tool_use content block's provider_specific_fields.
159+
"""
160+
provider_specific_fields = content.get("provider_specific_fields", {})
161+
if provider_specific_fields:
162+
return provider_specific_fields.get("signature")
163+
return None
164+
165+
132166
def translatable_anthropic_params(self) -> List:
133167
"""
134168
Which anthropic params, we need to translate to the openai format.
@@ -263,10 +297,18 @@ def translate_anthropic_messages_to_openai( # noqa: PLR0915
263297
else:
264298
assistant_message_str += content.get("text", "")
265299
elif content.get("type") == "tool_use":
266-
function_chunk = ChatCompletionToolCallFunctionChunk(
267-
name=content.get("name", ""),
268-
arguments=json.dumps(content.get("input", {})),
269-
)
300+
function_chunk: ChatCompletionToolCallFunctionChunk = {
301+
"name": content.get("name", ""),
302+
"arguments": json.dumps(content.get("input", {})),
303+
}
304+
signature = self._extract_signature_from_tool_use_content(content)
305+
306+
if signature:
307+
provider_specific_fields: Dict[str, Any] = (
308+
function_chunk.get("provider_specific_fields") or {}
309+
)
310+
provider_specific_fields["thought_signature"] = signature
311+
function_chunk["provider_specific_fields"] = provider_specific_fields
270312

271313
tool_calls.append(
272314
ChatCompletionAssistantToolCall(
@@ -512,18 +554,27 @@ def _translate_openai_content_to_anthropic(self, choices: List[Choices]) -> List
512554
and len(choice.message.tool_calls) > 0
513555
):
514556
for tool_call in choice.message.tool_calls:
515-
new_content.append(
516-
AnthropicResponseContentBlockToolUse(
517-
type="tool_use",
518-
id=tool_call.id,
519-
name=tool_call.function.name or "",
520-
input=(
521-
json.loads(tool_call.function.arguments)
522-
if tool_call.function.arguments
523-
else {}
524-
),
525-
)
557+
# Extract signature from provider_specific_fields only
558+
signature = self._extract_signature_from_tool_call(tool_call)
559+
560+
provider_specific_fields = {}
561+
if signature:
562+
provider_specific_fields["signature"] = signature
563+
564+
tool_use_block = AnthropicResponseContentBlockToolUse(
565+
type="tool_use",
566+
id=tool_call.id,
567+
name=tool_call.function.name or "",
568+
input=(
569+
json.loads(tool_call.function.arguments)
570+
if tool_call.function.arguments
571+
else {}
572+
),
526573
)
574+
# Add provider_specific_fields if signature is present
575+
if provider_specific_fields:
576+
tool_use_block.provider_specific_fields = provider_specific_fields
577+
new_content.append(tool_use_block)
527578
# Handle text content
528579
elif choice.message.content is not None:
529580
new_content.append(

litellm/llms/gemini/chat/transformation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,4 @@ def _transform_messages(
140140
except Exception:
141141
# If conversion fails, leave as is and let the API handle it
142142
pass
143-
return _gemini_convert_messages_with_history(messages=messages)
143+
return _gemini_convert_messages_with_history(messages=messages, model=model)

litellm/llms/vertex_ai/context_caching/transformation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def transform_openai_messages_to_gemini_context_caching(
173173
supports_system_message=supports_system_message, messages=messages
174174
)
175175

176-
transformed_messages = _gemini_convert_messages_with_history(messages=new_messages)
176+
transformed_messages = _gemini_convert_messages_with_history(messages=new_messages, model=model)
177177

178178
model_name = "models/{}".format(model)
179179

litellm/llms/vertex_ai/gemini/transformation.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ def check_if_part_exists_in_parts(
195195

196196
def _gemini_convert_messages_with_history( # noqa: PLR0915
197197
messages: List[AllMessageValues],
198+
model: Optional[str] = None,
198199
) -> List[ContentType]:
199200
"""
200201
Converts given messages from OpenAI format to Gemini format
@@ -379,7 +380,7 @@ def _gemini_convert_messages_with_history( # noqa: PLR0915
379380
or assistant_msg.get("function_call") is not None
380381
): # support assistant tool invoke conversion
381382
gemini_tool_call_parts = convert_to_gemini_tool_call_invoke(
382-
assistant_msg
383+
assistant_msg, model=model
383384
)
384385
## check if gemini_tool_call already exists in assistant_content
385386
for gemini_tool_call_part in gemini_tool_call_parts:

litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1852,7 +1852,7 @@ def _transform_google_generate_content_to_openai_model_response(
18521852
def _transform_messages(
18531853
self, messages: List[AllMessageValues], model: Optional[str] = None
18541854
) -> List[ContentType]:
1855-
return _gemini_convert_messages_with_history(messages=messages)
1855+
return _gemini_convert_messages_with_history(messages=messages, model=model)
18561856

18571857
def get_error_class(
18581858
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]

litellm/model_prices_and_context_window_backup.json

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10994,10 +10994,12 @@
1099410994
"supports_web_search": true
1099510995
},
1099610996
"gemini-3-pro-preview": {
10997-
"cache_read_input_token_cost": 1.25e-07,
10997+
"cache_read_input_token_cost": 2e-07,
10998+
"cache_read_input_token_cost_above_200k_tokens": 4e-07,
1099810999
"cache_creation_input_token_cost_above_200k_tokens": 2.5e-07,
1099911000
"input_cost_per_token": 2e-06,
1100011001
"input_cost_per_token_above_200k_tokens": 4e-06,
11002+
"input_cost_per_token_batches": 1e-06,
1100111003
"litellm_provider": "vertex_ai-language-models",
1100211004
"max_audio_length_hours": 8.4,
1100311005
"max_audio_per_prompt": 1,
@@ -11011,10 +11013,60 @@
1101111013
"mode": "chat",
1101211014
"output_cost_per_token": 1.2e-05,
1101311015
"output_cost_per_token_above_200k_tokens": 1.8e-05,
11016+
"output_cost_per_token_batches": 6e-06,
1101411017
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
1101511018
"supported_endpoints": [
1101611019
"/v1/chat/completions",
11017-
"/v1/completions"
11020+
"/v1/completions",
11021+
"/v1/batch"
11022+
],
11023+
"supported_modalities": [
11024+
"text",
11025+
"image",
11026+
"audio",
11027+
"video"
11028+
],
11029+
"supported_output_modalities": [
11030+
"text"
11031+
],
11032+
"supports_audio_input": true,
11033+
"supports_function_calling": true,
11034+
"supports_pdf_input": true,
11035+
"supports_prompt_caching": true,
11036+
"supports_reasoning": true,
11037+
"supports_response_schema": true,
11038+
"supports_system_messages": true,
11039+
"supports_tool_choice": true,
11040+
"supports_video_input": true,
11041+
"supports_vision": true,
11042+
"supports_web_search": true
11043+
},
11044+
"vertex_ai/gemini-3-pro-preview": {
11045+
"cache_read_input_token_cost": 2e-07,
11046+
"cache_read_input_token_cost_above_200k_tokens": 4e-07,
11047+
"cache_creation_input_token_cost_above_200k_tokens": 2.5e-07,
11048+
"input_cost_per_token": 2e-06,
11049+
"input_cost_per_token_above_200k_tokens": 4e-06,
11050+
"input_cost_per_token_batches": 1e-06,
11051+
"litellm_provider": "vertex_ai",
11052+
"max_audio_length_hours": 8.4,
11053+
"max_audio_per_prompt": 1,
11054+
"max_images_per_prompt": 3000,
11055+
"max_input_tokens": 1048576,
11056+
"max_output_tokens": 65535,
11057+
"max_pdf_size_mb": 30,
11058+
"max_tokens": 65535,
11059+
"max_video_length": 1,
11060+
"max_videos_per_prompt": 10,
11061+
"mode": "chat",
11062+
"output_cost_per_token": 1.2e-05,
11063+
"output_cost_per_token_above_200k_tokens": 1.8e-05,
11064+
"output_cost_per_token_batches": 6e-06,
11065+
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
11066+
"supported_endpoints": [
11067+
"/v1/chat/completions",
11068+
"/v1/completions",
11069+
"/v1/batch"
1101811070
],
1101911071
"supported_modalities": [
1102011072
"text",
@@ -12697,9 +12749,11 @@
1269712749
"tpm": 800000
1269812750
},
1269912751
"gemini/gemini-3-pro-preview": {
12700-
"cache_read_input_token_cost": 3.125e-07,
12752+
"cache_read_input_token_cost": 2e-07,
12753+
"cache_read_input_token_cost_above_200k_tokens": 4e-07,
1270112754
"input_cost_per_token": 2e-06,
1270212755
"input_cost_per_token_above_200k_tokens": 4e-06,
12756+
"input_cost_per_token_batches": 1e-06,
1270312757
"litellm_provider": "gemini",
1270412758
"max_audio_length_hours": 8.4,
1270512759
"max_audio_per_prompt": 1,
@@ -12713,11 +12767,13 @@
1271312767
"mode": "chat",
1271412768
"output_cost_per_token": 1.2e-05,
1271512769
"output_cost_per_token_above_200k_tokens": 1.8e-05,
12770+
"output_cost_per_token_batches": 6e-06,
1271612771
"rpm": 2000,
1271712772
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
1271812773
"supported_endpoints": [
1271912774
"/v1/chat/completions",
12720-
"/v1/completions"
12775+
"/v1/completions",
12776+
"/v1/batch"
1272112777
],
1272212778
"supported_modalities": [
1272312779
"text",

litellm/types/llms/anthropic.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,10 @@ class AnthropicResponseContentBlockToolUse(BaseModel):
458458
id: str
459459
name: str
460460
input: dict
461+
provider_specific_fields: Optional[Dict[str, Any]] = None
462+
463+
class Config:
464+
extra = "allow" # Allow provider_specific_fields
461465

462466

463467
class AnthropicResponseContentBlockThinking(BaseModel):

litellm/types/llms/openai.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from enum import Enum
22
from os import PathLike
3-
from typing import IO, Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
3+
from typing import IO, Any, Dict, Iterable, List, Literal, Mapping, Optional, Tuple, Union
44

55
import httpx
66
from openai._legacy_response import (
@@ -453,6 +453,7 @@ class ChatCompletionAudioDelta(TypedDict, total=False):
453453
class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
454454
name: Optional[str]
455455
arguments: str
456+
provider_specific_fields: Optional[Dict[str, Any]]
456457

457458

458459
class ChatCompletionAssistantToolCall(TypedDict):

0 commit comments

Comments
 (0)