From 41b395ce847a744c2cdcf89cc014f97f2afc66f3 Mon Sep 17 00:00:00 2001
From: Sebastian <sebslight@gmail.com>
Date: Wed, 19 Nov 2025 13:38:13 -0500
Subject: [PATCH] fix(vertex_ai): add includeThoughts=True for Gemini 3
 reasoning_effort

Gemini 3 models require 'includeThoughts: True' in the thinkingConfig to return the actual thought text. Previously, using reasoning_effort set the 'thinkingLevel' but missed the boolean flag, resulting in empty reasoning_content.

This fix:
1. Updates `_map_reasoning_effort_to_thinking_level` to include `includeThoughts: True` for low/medium/high.
2. Adds unit tests to verify the config mapping.
---
 .../vertex_and_google_ai_studio_gemini.py     | 171 +++++++++++-------
 ...test_vertex_and_google_ai_studio_gemini.py |  20 +-
 2 files changed, 116 insertions(+), 75 deletions(-)

diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
index d83096b26b0e..3f243951caaa 100644
--- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
+++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py
@@ -217,12 +217,12 @@ def __init__(
     @classmethod
     def get_config(cls):
         return super().get_config()
-    
+
     @staticmethod
     def _is_gemini_3_or_newer(model: str) -> bool:
         """
         Check if the model is Gemini 3 Pro or newer.
-        
+
         Gemini 3 models include:
         - gemini-3-pro-preview
         - Any future Gemini 3.x models
@@ -230,7 +230,7 @@ def _is_gemini_3_or_newer(model: str) -> bool:
         # Check for Gemini 3 models
         if "gemini-3" in model:
             return True
-        
+
         return False
 
     def _supports_penalty_parameters(self, model: str) -> bool:
@@ -260,11 +260,11 @@ def get_supported_openai_params(self, model: str) -> List[str]:
             "parallel_tool_calls",
             "web_search_options",
         ]
-        
+
         # Add penalty parameters only for non-preview models
         if self._supports_penalty_parameters(model):
             supported_params.extend(["frequency_penalty", "presence_penalty"])
-        
+
         if supports_reasoning(model):
             supported_params.append("reasoning_effort")
             supported_params.append("thinking")
@@ -308,14 +308,14 @@ def _extract_google_maps_retrieval_config(
     ) -> Tuple[dict, Optional[dict]]:
         """
         Extract location configuration from googleMaps tool for Vertex AI toolConfig.
-        
+
         Supports two interface styles:
         1. Nested (recommended): {"enableWidget": "...", "retrievalConfig": {"latitude": ..., "longitude": ...}}
         2. Flat (backward compat): {"enableWidget": "...", "latitude": ..., "longitude": ...}
-        
+
         Args:
             google_maps_config: The googleMaps tool configuration from LiteLLM
-        
+
         Returns:
             Tuple of (cleaned_google_maps_config, retrieval_config):
                 - cleaned_google_maps_config: googleMaps config without location fields
@@ -325,7 +325,7 @@ def _extract_google_maps_retrieval_config(
         latitude = google_maps_config.get("latitude")
         longitude = google_maps_config.get("longitude")
         language_code = google_maps_config.get("languageCode")
-        
+
         if latitude is not None and longitude is not None:
             retrieval_config = {
                 "latLng": {
@@ -335,21 +335,17 @@ def _extract_google_maps_retrieval_config(
             }
             if language_code is not None:
                 retrieval_config["languageCode"] = language_code
-        
+
         # Remove location fields from tool definition
         cleaned_config = {
             k: v
             for k, v in google_maps_config.items()
             if k not in ["latitude", "longitude", "languageCode"]
         }
-    
+
         return cleaned_config, retrieval_config
-    
-    def get_tool_value(
-        self,
-        tool: dict, 
-        tool_name: str
-    ) -> Optional[dict]:
+
+    def get_tool_value(self, tool: dict, tool_name: str) -> Optional[dict]:
         """
         Helper function to get tool value handling both camelCase and underscore_case variants
 
@@ -373,19 +369,19 @@ def get_tool_value(
         else:
             return None
 
-    def _map_function( # noqa: PLR0915
+    def _map_function(  # noqa: PLR0915
         self, value: List[dict], optional_params: dict
     ) -> List[Tools]:
         """
         Map OpenAI-style tools/functions to Vertex AI format.
-        
+
         Args:
             value: List of tool definitions
             optional_params: Request-scoped parameters to store retrieval config
-        
+
         Returns:
             List of mapped tools in Vertex AI format
-            
+
         Side effects:
             May add 'toolConfig' with 'retrievalConfig' to optional_params if
             googleMaps tools contain location data
@@ -432,25 +428,43 @@ def _map_function( # noqa: PLR0915
 
             tool_name = list(tool.keys())[0] if len(tool.keys()) == 1 else None
             if tool_name and (
-                tool_name == "codeExecution" or tool_name == VertexToolName.CODE_EXECUTION.value
+                tool_name == "codeExecution"
+                or tool_name == VertexToolName.CODE_EXECUTION.value
             ):  # code_execution maintained for backwards compatibility
                 code_execution = self.get_tool_value(tool, "codeExecution")
             elif tool_name and tool_name == VertexToolName.GOOGLE_SEARCH.value:
-                googleSearch = self.get_tool_value(tool, VertexToolName.GOOGLE_SEARCH.value)
-            elif tool_name and tool_name == VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value:
-                googleSearchRetrieval = self.get_tool_value(tool, VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value)
+                googleSearch = self.get_tool_value(
+                    tool, VertexToolName.GOOGLE_SEARCH.value
+                )
+            elif (
+                tool_name and tool_name == VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value
+            ):
+                googleSearchRetrieval = self.get_tool_value(
+                    tool, VertexToolName.GOOGLE_SEARCH_RETRIEVAL.value
+                )
             elif tool_name and tool_name == VertexToolName.ENTERPRISE_WEB_SEARCH.value:
-                enterpriseWebSearch = self.get_tool_value(tool, VertexToolName.ENTERPRISE_WEB_SEARCH.value)
-            elif tool_name and (tool_name == VertexToolName.URL_CONTEXT.value or tool_name == "urlContext"):
+                enterpriseWebSearch = self.get_tool_value(
+                    tool, VertexToolName.ENTERPRISE_WEB_SEARCH.value
+                )
+            elif tool_name and (
+                tool_name == VertexToolName.URL_CONTEXT.value
+                or tool_name == "urlContext"
+            ):
                 urlContext = self.get_tool_value(tool, tool_name)
             elif tool_name and (
-                tool_name == VertexToolName.GOOGLE_MAPS.value or tool_name == "google_maps"
+                tool_name == VertexToolName.GOOGLE_MAPS.value
+                or tool_name == "google_maps"
             ):
-                google_maps_value = self.get_tool_value(tool, VertexToolName.GOOGLE_MAPS.value)
-                
+                google_maps_value = self.get_tool_value(
+                    tool, VertexToolName.GOOGLE_MAPS.value
+                )
+
                 # Extract and transform location configuration for toolConfig
                 if google_maps_value is not None:
-                    googleMaps, google_maps_retrieval_config = self._extract_google_maps_retrieval_config(
+                    (
+                        googleMaps,
+                        google_maps_retrieval_config,
+                    ) = self._extract_google_maps_retrieval_config(
                         google_maps_config=google_maps_value
                     )
             elif openai_function_object is not None:
@@ -490,13 +504,15 @@ def _map_function( # noqa: PLR0915
             _tools[VertexToolName.URL_CONTEXT.value] = urlContext
         if googleMaps is not None:
             _tools[VertexToolName.GOOGLE_MAPS.value] = googleMaps
-        
+
         # Add retrieval config to toolConfig if googleMaps has location data
         if google_maps_retrieval_config is not None:
             if "toolConfig" not in optional_params:
                 optional_params["toolConfig"] = {}
-            optional_params["toolConfig"]["retrievalConfig"] = google_maps_retrieval_config
-        
+            optional_params["toolConfig"][
+                "retrievalConfig"
+            ] = google_maps_retrieval_config
+
         return [_tools]
 
     def _map_response_schema(self, value: dict) -> dict:
@@ -599,23 +615,27 @@ def _map_reasoning_effort_to_thinking_level(
         Map reasoning_effort to thinking_level for Gemini 3+ models.
         Args:
             reasoning_effort: The reasoning effort value
-            model: The model name (for validation, currently unused but kept for consistency)
-            
+            model: The model name
+
         Returns:
-            GeminiThinkingConfig with thinkingLevel set
+            GeminiThinkingConfig with thinkingLevel and includeThoughts
         """
         if reasoning_effort == "minimal":
-            return {"thinkingLevel": "low"}
+            return {"thinkingLevel": "low", "includeThoughts": True}
         elif reasoning_effort == "low":
-            return {"thinkingLevel": "low"}
+            return {"thinkingLevel": "low", "includeThoughts": True}
         elif reasoning_effort == "medium":
-            return {"thinkingLevel": "high"} # medium is not out yet
+            return {
+                "thinkingLevel": "high",
+                "includeThoughts": True,
+            }  # medium is not out yet
         elif reasoning_effort == "high":
-            return {"thinkingLevel": "high"}
+            return {"thinkingLevel": "high", "includeThoughts": True}
         elif reasoning_effort == "disable":
-            return {"thinkingLevel": "low"} # gemini 3 cannot fully disable thinking, so we use "low"
+            # Gemini 3 cannot fully disable thinking, so we use "low" but hide thoughts
+            return {"thinkingLevel": "low", "includeThoughts": False}
         elif reasoning_effort == "none":
-            return {"thinkingLevel": "low"} # gemini 3 cannot fully disable thinking, so we use "low"
+            return {"thinkingLevel": "low", "includeThoughts": False}
         else:
             raise ValueError(f"Invalid reasoning effort: {reasoning_effort}")
 
@@ -663,7 +683,6 @@ def _validate_thinking_level_conflicts(
                     status_code=400,
                 )
 
-
     @staticmethod
     def _map_thinking_param(
         thinking_param: AnthropicThinkingParam,
@@ -835,9 +854,9 @@ def map_openai_params(  # noqa: PLR0915
                 if VertexGeminiConfig._is_gemini_3_or_newer(model):
                     optional_params[
                         "thinkingConfig"
-                        ] = VertexGeminiConfig._map_reasoning_effort_to_thinking_level(
-                            value, model
-                        )
+                    ] = VertexGeminiConfig._map_reasoning_effort_to_thinking_level(
+                        value, model
+                    )
                 else:
                     optional_params[
                         "thinkingConfig"
@@ -879,7 +898,10 @@ def map_openai_params(  # noqa: PLR0915
         if VertexGeminiConfig._is_gemini_3_or_newer(model):
             if "temperature" not in optional_params:
                 optional_params["temperature"] = 1.0
-            if "thinkingConfig" not in optional_params or "thinkingLevel" not in optional_params.get("thinkingConfig", {}):
+            if (
+                "thinkingConfig" not in optional_params
+                or "thinkingLevel" not in optional_params.get("thinkingConfig", {})
+            ):
                 thinking_config = optional_params.get("thinkingConfig", {})
                 thinking_config["thinkingLevel"] = "low"
                 optional_params["thinkingConfig"] = thinking_config
@@ -1147,17 +1169,21 @@ def _transform_parts(
             if "functionCall" in part:
                 _function_chunk: ChatCompletionToolCallFunctionChunk = {
                     "name": part["functionCall"]["name"],
-                    "arguments": json.dumps(part["functionCall"]["args"], ensure_ascii=False),
+                    "arguments": json.dumps(
+                        part["functionCall"]["args"], ensure_ascii=False
+                    ),
                 }
                 # Extract thought signature if present
                 thought_signature = part.get("thoughtSignature")
-                
+
                 if is_function_call is True:
                     function_dict: Dict[str, Any] = dict(_function_chunk)
                     if thought_signature:
                         if "provider_specific_fields" not in function_dict:
                             function_dict["provider_specific_fields"] = {}
-                        function_dict["provider_specific_fields"]["thought_signature"] = thought_signature
+                        function_dict["provider_specific_fields"][
+                            "thought_signature"
+                        ] = thought_signature
                     function = cast(ChatCompletionToolCallFunctionChunk, function_dict)
                 else:
                     _tool_response_chunk: ChatCompletionToolCallChunk = {
@@ -1506,7 +1532,6 @@ def _convert_grounding_metadata_to_annotations(
 
         annotations: List[ChatCompletionAnnotation] = []
 
-        
         for metadata in grounding_metadata:
             # Extract groundingSupports - these map text segments to sources
             grounding_supports = metadata.get("groundingSupports", [])
@@ -1527,23 +1552,23 @@ def _convert_grounding_metadata_to_annotations(
                 segment = support.get("segment", {})
                 start_index = segment.get("startIndex")
                 end_index = segment.get("endIndex")
-                
+
                 # Get the chunk indices for this support
                 chunk_indices = support.get("groundingChunkIndices", [])
-                
+
                 if start_index is not None and end_index is not None and chunk_indices:
                     # Use the first chunk's URL for the annotation
                     first_chunk_idx = chunk_indices[0]
                     if first_chunk_idx in chunk_to_uri_map:
                         uri_info = chunk_to_uri_map[first_chunk_idx]
-                        
+
                         url_citation: ChatCompletionAnnotationURLCitation = {
                             "start_index": start_index,
                             "end_index": end_index,
                             "url": uri_info["url"],
                             "title": uri_info["title"],
                         }
-                        
+
                         annotation: ChatCompletionAnnotation = {
                             "type": "url_citation",
                             "url_citation": url_citation,
@@ -1643,9 +1668,11 @@ def _process_candidates(  # noqa: PLR0915
                     chat_completion_message["reasoning_content"] = reasoning_content
 
                 if candidate_grounding_metadata:
-                    annotations = VertexGeminiConfig._convert_grounding_metadata_to_annotations(
-                        grounding_metadata=candidate_grounding_metadata,
-                        content_text=content,
+                    annotations = (
+                        VertexGeminiConfig._convert_grounding_metadata_to_annotations(
+                            grounding_metadata=candidate_grounding_metadata,
+                            content_text=content,
+                        )
                     )
                     if annotations:
                         chat_completion_message["annotations"] = annotations  # type: ignore
@@ -1911,7 +1938,9 @@ async def make_call(
         )
 
     try:
-        response = await client.post(api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj)
+        response = await client.post(
+            api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj
+        )
         response.raise_for_status()
     except httpx.HTTPStatusError as e:
         exception_string = str(await e.response.aread())
@@ -1958,7 +1987,9 @@ def make_sync_call(
     if client is None:
         client = HTTPHandler()  # Create a new client if none provided
 
-    response = client.post(api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj)
+    response = client.post(
+        api_base, headers=headers, data=data, stream=True, logging_obj=logging_obj
+    )
 
     if response.status_code != 200 and response.status_code != 201:
         raise VertexAIError(
@@ -2013,7 +2044,6 @@ async def async_streaming(
         gemini_api_key: Optional[str] = None,
         extra_headers: Optional[dict] = None,
     ) -> CustomStreamWrapper:
-
         should_use_v1beta1_features = self.is_using_v1beta1_features(
             optional_params=optional_params
         )
@@ -2050,8 +2080,8 @@ async def async_streaming(
             **data,
             vertex_project=vertex_project,
             vertex_location=vertex_location,
-            vertex_auth_header=auth_header)  # type: ignore
-
+            vertex_auth_header=auth_header,
+        )  # type: ignore
 
         ## LOGGING
         logging_obj.pre_call(
@@ -2144,7 +2174,8 @@ async def async_completion(
             **data,
             vertex_project=vertex_project,
             vertex_location=vertex_location,
-            vertex_auth_header=auth_header)  # type: ignore
+            vertex_auth_header=auth_header,
+        )  # type: ignore
 
         _async_client_params = {}
         if timeout:
@@ -2168,7 +2199,10 @@ async def async_completion(
 
         try:
             response = await client.post(
-                api_base, headers=headers, json=cast(dict, request_body), logging_obj=logging_obj
+                api_base,
+                headers=headers,
+                json=cast(dict, request_body),
+                logging_obj=logging_obj,
             )  # type: ignore
             response.raise_for_status()
         except httpx.HTTPStatusError as err:
@@ -2322,9 +2356,10 @@ def completion(
         ## TRANSFORMATION ##
         data = sync_transform_request_body(
             **transform_request_params,
-            vertex_project=vertex_project, 
+            vertex_project=vertex_project,
             vertex_location=vertex_location,
-            vertex_auth_header=auth_header)
+            vertex_auth_header=auth_header,
+        )
 
         ## LOGGING
         logging_obj.pre_call(
diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py
index 2e0e9b4d0122..554bd8e05386 100644
--- a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py
+++ b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py
@@ -1455,7 +1455,7 @@ def test_is_gemini_3_or_newer():
 
 
 def test_reasoning_effort_maps_to_thinking_level_gemini_3():
-    """Test that reasoning_effort maps to thinking_level for Gemini 3+ models"""
+    """Test that reasoning_effort maps to thinking_level AND includeThoughts for Gemini 3+ models"""
     from litellm.llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
         VertexGeminiConfig,
     )
@@ -1464,7 +1464,7 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3():
     model = "gemini-3-pro-preview"
     optional_params = {}
 
-    # Test minimal -> low
+    # Test minimal -> low + includeThoughts=True
     non_default_params = {"reasoning_effort": "minimal"}
     result = v.map_openai_params(
         non_default_params=non_default_params,
@@ -1473,8 +1473,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3():
         drop_params=False,
     )
     assert result["thinkingConfig"]["thinkingLevel"] == "low"
+    assert result["thinkingConfig"]["includeThoughts"] is True
 
-    # Test low -> low
+    # Test low -> low + includeThoughts=True
     optional_params = {}
     non_default_params = {"reasoning_effort": "low"}
     result = v.map_openai_params(
@@ -1484,8 +1485,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3():
         drop_params=False,
     )
     assert result["thinkingConfig"]["thinkingLevel"] == "low"
+    assert result["thinkingConfig"]["includeThoughts"] is True
 
-    # Test medium -> high (medium not available yet)
+    # Test medium -> high + includeThoughts=True (medium not available yet)
     optional_params = {}
     non_default_params = {"reasoning_effort": "medium"}
     result = v.map_openai_params(
@@ -1495,8 +1497,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3():
         drop_params=False,
     )
     assert result["thinkingConfig"]["thinkingLevel"] == "high"
+    assert result["thinkingConfig"]["includeThoughts"] is True
 
-    # Test high -> high
+    # Test high -> high + includeThoughts=True
     optional_params = {}
     non_default_params = {"reasoning_effort": "high"}
     result = v.map_openai_params(
@@ -1506,8 +1509,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3():
         drop_params=False,
     )
     assert result["thinkingConfig"]["thinkingLevel"] == "high"
+    assert result["thinkingConfig"]["includeThoughts"] is True
 
-    # Test disable -> low (cannot fully disable in Gemini 3)
+    # Test disable -> low + includeThoughts=False (cannot fully disable in Gemini 3)
     optional_params = {}
     non_default_params = {"reasoning_effort": "disable"}
     result = v.map_openai_params(
@@ -1517,8 +1521,9 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3():
         drop_params=False,
     )
     assert result["thinkingConfig"]["thinkingLevel"] == "low"
+    assert result["thinkingConfig"]["includeThoughts"] is False
 
-    # Test none -> low (cannot fully disable in Gemini 3)
+    # Test none -> low + includeThoughts=False (cannot fully disable in Gemini 3)
     optional_params = {}
     non_default_params = {"reasoning_effort": "none"}
     result = v.map_openai_params(
@@ -1528,6 +1533,7 @@ def test_reasoning_effort_maps_to_thinking_level_gemini_3():
         drop_params=False,
     )
     assert result["thinkingConfig"]["thinkingLevel"] == "low"
+    assert result["thinkingConfig"]["includeThoughts"] is False
 
 
 def test_temperature_default_for_gemini_3():