From 41b395ce847a744c2cdcf89cc014f97f2afc66f3 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Wed, 19 Nov 2025 13:38:13 -0500 Subject: [PATCH] fix(vertex_ai): add includeThoughts=True for Gemini 3 reasoning_effort Gemini 3 models require 'includeThoughts: True' in the thinkingConfig to return the actual thought text. Previously, using reasoning_effort set the 'thinkingLevel' but missed the boolean flag, resulting in empty reasoning_content. This fix: 1. Updates `_map_reasoning_effort_to_thinking_level` to include `includeThoughts: True` for low/medium/high. 2. Adds unit tests to verify the config mapping. --- .../vertex_and_google_ai_studio_gemini.py | 171 +++++++++++------- ...test_vertex_and_google_ai_studio_gemini.py | 20 +- 2 files changed, 116 insertions(+), 75 deletions(-) diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index d83096b26b0e..3f243951caaa 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -217,12 +217,12 @@ def __init__( @classmethod def get_config(cls): return super().get_config() - + @staticmethod def _is_gemini_3_or_newer(model: str) -> bool: """ Check if the model is Gemini 3 Pro or newer. - + Gemini 3 models include: - gemini-3-pro-preview - Any future Gemini 3.x models @@ -230,7 +230,7 @@ def _is_gemini_3_or_newer(model: str) -> bool: # Check for Gemini 3 models if "gemini-3" in model: return True - + return False def _supports_penalty_parameters(self, model: str) -> bool: @@ -260,11 +260,11 @@ def get_supported_openai_params(self, model: str) -> List[str]: "parallel_tool_calls", "web_search_options", ] - + # Add penalty parameters only for non-preview models if self._supports_penalty_parameters(model): supported_params.extend(["frequency_penalty", "presence_penalty"]) - + if supports_reasoning(model): supported_params.append("reasoning_effort") supported_params.append("thinking") @@ -308,14 +308,14 @@ def _extract_google_maps_retrieval_config( ) -> Tuple[dict, Optional[dict]]: """ Extract location configuration from googleMaps tool for Vertex AI toolConfig. - + Supports two interface styles: 1. Nested (recommended): {"enableWidget": "...", "retrievalConfig": {"latitude": ..., "longitude": ...}} 2. Flat (backward compat): {"enableWidget": "...", "latitude": ..., "longitude": ...} - + Args: google_maps_config: The googleMaps tool configuration from LiteLLM - + Returns: Tuple of (cleaned_google_maps_config, retrieval_config): - cleaned_google_maps_config: googleMaps config without location fields @@ -325,7 +325,7 @@ def _extract_google_maps_retrieval_config( latitude = google_maps_config.get("latitude") longitude = google_maps_config.get("longitude") language_code = google_maps_config.get("languageCode") - + if latitude is not None and longitude is not None: retrieval_config = { "latLng": { @@ -335,21 +335,17 @@ def _extract_google_maps_retrieval_config( } if language_code is not None: retrieval_config["languageCode"] = language_code - + # Remove location fields from tool definition cleaned_config = { k: v for k, v in google_maps_config.items() if k not in ["latitude", "longitude", "languageCode"] } - + return cleaned_config, retrieval_config - - def get_tool_value( - self, - tool: dict, - tool_name: str - ) -> Optional[dict]: + + def get_tool_value(self, tool: dict, tool_name: str) -> Optional[dict]: """ Helper function to get tool value handling both camelCase and underscore_case variants @@ -373,19 +369,19 @@ def get_tool_value( else: return None - def _map_function( # noqa: PLR0915 + def _map_function( # noqa: PLR0915 self, value: List[dict], optional_params: dict ) -> List[Tools]: """ Map OpenAI-style tools/functions to Vertex AI format. - + Args: value: List of tool definitions optional_params: Request-scoped parameters to store retrieval config - + Returns: List of mapped tools in Vertex AI format - + Side effects: May add 'toolConfig' with 'retrievalConfig' to optional_params if googleMaps tools contain location data @@ -432,25 +428,43 @@ def _map_function( # noqa: PLR0915 tool_name = list(tool.keys())[0] if len(tool.keys()) == 1 else None if tool_name and ( - tool_name == "codeExecution" or tool_name == VertexToolName.CODE_EXECUTION.value + tool_name == "codeExecution" + or tool_name == VertexToolName.CODE_EXECUTION.value ): # code_execution maintained for backwards compatibility code_execution = self.get_tool_value(tool, "codeExecution") elif tool_name and tool_name == VertexToolName.GOOGLE_SEARCH.value: - googleSearch = self.get_tool_value(tool, VertexToolName.GOOGLE_SEARCH.value) - elif tool_name and tool_name == VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value: - googleSearchRetrieval = self.get_tool_value(tool, VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value) + googleSearch = self.get_tool_value( + tool, VertexToolName.GOOGLE_SEARCH.value + ) + elif ( + tool_name and tool_name == VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value + ): + googleSearchRetrieval = self.get_tool_value( + tool, VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value + ) elif tool_name and tool_name == VertexToolName.ENTERPRISE_WEB_SEARCH.value: - enterpriseWebSearch = self.get_tool_value(tool, VertexToolName.ENTERPRISE_WEB_SEARCH.value) - elif tool_name and (tool_name == VertexToolName.URL_CONTEXT.value or tool_name == "urlContext"): + enterpriseWebSearch = self.get_tool_value( + tool, VertexToolName.ENTERPRISE_WEB_SEARCH.value + ) + elif tool_name and ( + tool_name == VertexToolName.URL_CONTEXT.value + or tool_name == "urlContext" + ): urlContext = self.get_tool_value(tool, tool_name) elif tool_name and ( - tool_name == VertexToolName.GOOGLE_MAPS.value or tool_name == "google_maps" + tool_name == VertexToolName.GOOGLE_MAPS.value + or tool_name == "google_maps" ): - google_maps_value = self.get_tool_value(tool, VertexToolName.GOOGLE_MAPS.value) - + google_maps_value = self.get_tool_value( + tool, VertexToolName.GOOGLE_MAPS.value + ) + # Extract and transform location configuration for toolConfig if google_maps_value is not None: - googleMaps, google_maps_retrieval_config = self._extract_google_maps_retrieval_config( + ( + googleMaps, + google_maps_retrieval_config, + ) = self._extract_google_maps_retrieval_config( google_maps_config=google_maps_value ) elif openai_function_object is not None: @@ -490,13 +504,15 @@ def _map_function( # noqa: PLR0915 _tools[VertexToolName.URL_CONTEXT.value] = urlContext if googleMaps is not None: _tools[VertexToolName.GOOGLE_MAPS.value] = googleMaps - + # Add retrieval config to toolConfig if googleMaps has location data if google_maps_retrieval_config is not None: if "toolConfig" not in optional_params: optional_params["toolConfig"] = {} - optional_params["toolConfig"]["retrievalConfig"] = google_maps_retrieval_config - + optional_params["toolConfig"][ + "retrievalConfig" + ] = google_maps_retrieval_config + return [_tools] def _map_response_schema(self, value: dict) -> dict: @@ -599,23 +615,27 @@ def _map_reasoning_effort_to_thinking_level( Map reasoning_effort to thinking_level for Gemini 3+ models. Args: reasoning_effort: The reasoning effort value - model: The model name (for validation, currently unused but kept for consistency) - + model: The model name + Returns: - GeminiThinkingConfig with thinkingLevel set + GeminiThinkingConfig with thinkingLevel and includeThoughts """ if reasoning_effort == "minimal": - return {"thinkingLevel": "low"} + return {"thinkingLevel": "low", "includeThoughts": True} elif reasoning_effort == "low": - return {"thinkingLevel": "low"} + return {"thinkingLevel": "low", "includeThoughts": True} elif reasoning_effort == "medium": - return {"thinkingLevel": "high"} # medium is not out yet + return { + "thinkingLevel": "high", + "includeThoughts": True, + } # medium is not out yet elif reasoning_effort == "high": - return {"thinkingLevel": "high"} + return {"thinkingLevel": "high", "includeThoughts": True} elif reasoning_effort == "disable": - return {"thinkingLevel": "low"} # gemini 3 cannot fully disable thinking, so we use "low" + # Gemini 3 cannot fully disable thinking, so we use "low" but hide thoughts + return {"thinkingLevel": "low", "includeThoughts": False} elif reasoning_effort == "none": - return {"thinkingLevel": "low"} # gemini 3 cannot fully disable thinking, so we use "low" + return {"thinkingLevel": "low", "includeThoughts": False} else: raise ValueError(f"Invalid reasoning effort: {reasoning_effort}") @@ -663,7 +683,6 @@ def _validate_thinking_level_conflicts( status_code=400, ) - @staticmethod def _map_thinking_param( thinking_param: AnthropicThinkingParam, @@ -835,9 +854,9 @@ def map_openai_params( # noqa: PLR0915 if VertexGeminiConfig._is_gemini_3_or_newer(model): optional_params[ "thinkingConfig" - ] = VertexGeminiConfig._map_reasoning_effort_to_thinking_level( - value, model - ) + ] = VertexGeminiConfig._map_reasoning_effort_to_thinking_level( + value, model + ) else: optional_params[ "thinkingConfig" @@ -879,7 +898,10 @@ def map_openai_params( # noqa: PLR0915 if VertexGeminiConfig._is_gemini_3_or_newer(model): if "temperature" not in optional_params: optional_params["temperature"] = 1.0 - if "thinkingConfig" not in optional_params or "thinkingLevel" not in optional_params.get("thinkingConfig", {}): + if ( + "thinkingConfig" not in optional_params + or "thinkingLevel" not in optional_params.get("thinkingConfig", {}) + ): thinking_config = optional_params.get("thinkingConfig", {}) thinking_config["thinkingLevel"] = "low" optional_params["thinkingConfig"] = thinking_config @@ -1147,17 +1169,21 @@ def _transform_parts( if "functionCall" in part: _function_chunk: ChatCompletionToolCallFunctionChunk = { "name": part["functionCall"]["name"], - "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False), + "arguments": json.dumps( + part["functionCall"]["args"], ensure_ascii=False + ), } # Extract thought signature if present thought_signature = part.get("thoughtSignature") - + if is_function_call is True: function_dict: Dict[str, Any] = dict(_function_chunk) if thought_signature: if "provider_specific_fields" not in function_dict: function_dict["provider_specific_fields"] = {} - function_dict["provider_specific_fields"]["thought_signature"] = thought_signature + function_dict["provider_specific_fields"][ + "thought_signature" + ] = thought_signature function = cast(ChatCompletionToolCallFunctionChunk, function_dict) else: _tool_response_chunk: ChatCompletionToolCallChunk = { @@ -1506,7 +1532,6 @@ def _convert_grounding_metadata_to_annotations( annotations: List[ChatCompletionAnnotation] = [] - for metadata in grounding_metadata: # Extract groundingSupports - these map text segments to sources grounding_supports = metadata.get("groundingSupports", []) @@ -1527,23 +1552,23 @@ def _convert_grounding_metadata_to_annotations( segment = support.get("segment", {}) start_index = segment.get("startIndex") end_index = segment.get("endIndex") - + # Get the chunk indices for this support chunk_indices = support.get("groundingChunkIndices", []) - + if start_index is not None and end_index is not None and chunk_indices: # Use the first chunk's URL for the annotation first_chunk_idx = chunk_indices[0] if first_chunk_idx in chunk_to_uri_map: uri_info = chunk_to_uri_map[first_chunk_idx] - + url_citation: ChatCompletionAnnotationURLCitation = { "start_index": start_index, "end_index": end_index, "url": uri_info["url"], "title": uri_info["title"], } - + annotation: ChatCompletionAnnotation = { "type": "url_citation", "url_citation": url_citation, @@ -1643,9 +1668,11 @@ def _process_candidates( # noqa: PLR0915 chat_completion_message["reasoning_content"] = reasoning_content if candidate_grounding_metadata: - annotations = VertexGeminiConfig._convert_grounding_metadata_to_annotations( - grounding_metadata=candidate_grounding_metadata, - content_text=content, + annotations = ( + VertexGeminiConfig._convert_grounding_metadata_to_annotations( + grounding_metadata=candidate_grounding_metadata, + content_text=content, + ) ) if annotations: chat_completion_message["annotations"] = annotations # type: ignore @@ -1911,7 +1938,9 @@ async def make_call( ) try: - response = await client.post(api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj) + response = await client.post( + api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj + ) response.raise_for_status() except httpx.HTTPStatusError as e: exception_string = str(await e.response.aread()) @@ -1958,7 +1987,9 @@ def make_sync_call( if client is None: client = HTTPHandler() # Create a new client if none provided - response = client.post(api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj) + response = client.post( + api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj + ) if response.status_code != 200 and response.status_code != 201: raise VertexAIError( @@ -2013,7 +2044,6 @@ async def async_streaming( gemini_api_key: Optional[str] = None, extra_headers: Optional[dict] = None, ) -> CustomStreamWrapper: - should_use_v1beta1_features = self.is_using_v1beta1_features( optional_params=optional_params ) @@ -2050,8 +2080,8 @@ async def async_streaming( **data, vertex_project=vertex_project, vertex_location=vertex_location, - vertex_auth_header=auth_header) # type: ignore - + vertex_auth_header=auth_header, + ) # type: ignore ## LOGGING logging_obj.pre_call( @@ -2144,7 +2174,8 @@ async def async_completion( **data, vertex_project=vertex_project, vertex_location=vertex_location, - vertex_auth_header=auth_header) # type: ignore + vertex_auth_header=auth_header, + ) # type: ignore _async_client_params = {} if timeout: @@ -2168,7 +2199,10 @@ async def async_completion( try: response = await client.post( - api_base, headers=headers, json=cast(dict, request_body), logging_obj=logging_obj + api_base, + headers=headers, + json=cast(dict, request_body), + logging_obj=logging_obj, ) # type: ignore response.raise_for_status() except httpx.HTTPStatusError as err: @@ -2322,9 +2356,10 @@ def completion( ## TRANSFORMATION ## data = sync_transform_request_body( **transform_request_params, - vertex_project=vertex_project, + vertex_project=vertex_project, vertex_location=vertex_location, - vertex_auth_header=auth_header) + vertex_auth_header=auth_header, + ) ## LOGGING logging_obj.pre_call( diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index 2e0e9b4d0122..554bd8e05386 100644 --- a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -1455,7 +1455,7 @@ def test_is_gemini_3_or_newer(): def test_reasoning_effort_maps_to_thinking_level_gemini_3(): - """Test that reasoning_effort maps to thinking_level for Gemini 3+ models""" + """Test that reasoning_effort maps to thinking_level AND includeThoughts for Gemini 3+ models""" from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import ( VertexGeminiConfig, ) @@ -1464,7 +1464,7 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3(): model = "gemini-3-pro-preview" optional_params = {} - # Test minimal -> low + # Test minimal -> low + includeThoughts=True non_default_params = {"reasoning_effort": "minimal"} result = v.map_openai_params( non_default_params=non_default_params, @@ -1473,8 +1473,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3(): drop_params=False, ) assert result["thinkingConfig"]["thinkingLevel"] == "low" + assert result["thinkingConfig"]["includeThoughts"] is True - # Test low -> low + # Test low -> low + includeThoughts=True optional_params = {} non_default_params = {"reasoning_effort": "low"} result = v.map_openai_params( @@ -1484,8 +1485,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3(): drop_params=False, ) assert result["thinkingConfig"]["thinkingLevel"] == "low" + assert result["thinkingConfig"]["includeThoughts"] is True - # Test medium -> high (medium not available yet) + # Test medium -> high + includeThoughts=True (medium not available yet) optional_params = {} non_default_params = {"reasoning_effort": "medium"} result = v.map_openai_params( @@ -1495,8 +1497,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3(): drop_params=False, ) assert result["thinkingConfig"]["thinkingLevel"] == "high" + assert result["thinkingConfig"]["includeThoughts"] is True - # Test high -> high + # Test high -> high + includeThoughts=True optional_params = {} non_default_params = {"reasoning_effort": "high"} result = v.map_openai_params( @@ -1506,8 +1509,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3(): drop_params=False, ) assert result["thinkingConfig"]["thinkingLevel"] == "high" + assert result["thinkingConfig"]["includeThoughts"] is True - # Test disable -> low (cannot fully disable in Gemini 3) + # Test disable -> low + includeThoughts=False (cannot fully disable in Gemini 3) optional_params = {} non_default_params = {"reasoning_effort": "disable"} result = v.map_openai_params( @@ -1517,8 +1521,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3(): drop_params=False, ) assert result["thinkingConfig"]["thinkingLevel"] == "low" + assert result["thinkingConfig"]["includeThoughts"] is False - # Test none -> low (cannot fully disable in Gemini 3) + # Test none -> low + includeThoughts=False (cannot fully disable in Gemini 3) optional_params = {} non_default_params = {"reasoning_effort": "none"} result = v.map_openai_params( @@ -1528,6 +1533,7 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3(): drop_params=False, ) assert result["thinkingConfig"]["thinkingLevel"] == "low" + assert result["thinkingConfig"]["includeThoughts"] is False def test_temperature_default_for_gemini_3():