From da259ee89088ef173da3f5402f50bc3dc8863bc7 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Wed, 8 Oct 2025 10:37:19 -0700
Subject: [PATCH 01/12] fix: propagate finish_reason from LiteLLM responses

Fixes #3109

This change ensures that the finish_reason field from LiteLLM responses
is properly propagated to LlmResponse objects, enabling callbacks to
detect completion conditions like max_tokens truncation.

Changes:
- Extract finish_reason from LiteLLM response in lite_llm.py
- Update tracing.py to handle both enum (Gemini) and string (LiteLLM)
  finish_reason values
- Add comprehensive unit tests for finish_reason propagation

The fix allows after_model_callback functions to properly detect:
- "length": max_tokens limit reached
- "stop": natural completion
- "tool_calls": tool invocations
- "content_filter": filtered content
---
 src/google/adk/models/lite_llm.py      |   4 +
 src/google/adk/telemetry/tracing.py    |   6 +-
 tests/unittests/models/test_litellm.py | 140 +++++++++++++++++++++++++
 3 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index d8b4d7ce81..1ab30d7591 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -483,13 +483,17 @@ def _model_response_to_generate_content_response(
   """
 
   message = None
+  finish_reason = None
   if response.get("choices", None):
     message = response["choices"][0].get("message", None)
+    finish_reason = response["choices"][0].get("finish_reason", None)
 
   if not message:
     raise ValueError("No message in response")
 
   llm_response = _message_to_generate_content_response(message)
+  if finish_reason:
+    llm_response.finish_reason = finish_reason
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 12fbca1d1a..8118d83dc1 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -281,9 +281,13 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
+    if hasattr(llm_response.finish_reason, 'value'):
+      finish_reason_str = llm_response.finish_reason.value.lower()
+    else:
+      finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',
-        [llm_response.finish_reason.value.lower()],
+        [finish_reason_str],
     )
 
 
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 84fd7f26d0..d67e364884 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1849,3 +1849,143 @@ def test_non_gemini_litellm_no_warning():
     # Test with non-Gemini model
     LiteLlm(model="openai/gpt-4o")
     assert len(w) == 0
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_propagation_non_streaming(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode."""
+  mock_response_with_finish_reason = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="Test response",
+              ),
+              finish_reason="length",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_finish_reason
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.content.role == "model"
+    assert response.content.parts[0].text == "Test response"
+    assert response.finish_reason == "length"
+
+  mock_acompletion.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_propagation_stop(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason='stop' is properly propagated."""
+  mock_response_with_finish_reason = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="Complete response",
+              ),
+              finish_reason="stop",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_finish_reason
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.finish_reason == "stop"
+
+  mock_acompletion.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_propagation_tool_calls(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason='tool_calls' is properly propagated."""
+  mock_response_with_finish_reason = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="",
+                  tool_calls=[
+                      ChatCompletionMessageToolCall(
+                          type="function",
+                          id="test_id",
+                          function=Function(
+                              name="test_function",
+                              arguments='{"arg": "value"}',
+                          ),
+                      )
+                  ],
+              ),
+              finish_reason="tool_calls",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_finish_reason
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.finish_reason == "tool_calls"
+
+  mock_acompletion.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_content_filter(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that finish_reason='content_filter' is properly propagated."""
+  mock_response_with_content_filter = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="",
+              ),
+              finish_reason="content_filter",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response_with_content_filter
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.finish_reason == "content_filter"
+
+  mock_acompletion.assert_called_once()

From 0e9b280325d0d622fe5f5ac88a6a7ed7b0f743f9 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Wed, 8 Oct 2025 10:45:32 -0700
Subject: [PATCH 02/12] refactor: address code review feedback

- Use .name instead of .value for enum finish_reason (more robust for IntEnum)
- Extract first choice using walrus operator for better readability
- Consolidate tests using @pytest.mark.parametrize to reduce duplication
- Strengthen test assertions to verify response content

All 53 tests pass.
---
 src/google/adk/models/lite_llm.py      |   8 +-
 src/google/adk/telemetry/tracing.py    |   4 +-
 tests/unittests/models/test_litellm.py | 162 +++++++------------------
 3 files changed, 54 insertions(+), 120 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index 1ab30d7591..d67f5d01d0 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -484,9 +484,11 @@ def _model_response_to_generate_content_response(
 
   message = None
   finish_reason = None
-  if response.get("choices", None):
-    message = response["choices"][0].get("message", None)
-    finish_reason = response["choices"][0].get("finish_reason", None)
+  if response.get("choices", None) and (
+      first_choice := response["choices"][0]
+  ):
+    message = first_choice.get("message", None)
+    finish_reason = first_choice.get("finish_reason", None)
 
   if not message:
     raise ValueError("No message in response")
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 8118d83dc1..1f3c6fdde9 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -281,8 +281,8 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
-    if hasattr(llm_response.finish_reason, 'value'):
-      finish_reason_str = llm_response.finish_reason.value.lower()
+    if hasattr(llm_response.finish_reason, 'name'):
+      finish_reason_str = llm_response.finish_reason.name.lower()
     else:
       finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index d67e364884..cdff4881d4 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1851,131 +1851,57 @@ def test_non_gemini_litellm_no_warning():
     assert len(w) == 0
 
 
+@pytest.mark.parametrize(
+    "finish_reason,response_content,expected_content,has_tool_calls",
+    [
+        ("length", "Test response", "Test response", False),
+        ("stop", "Complete response", "Complete response", False),
+        (
+            "tool_calls",
+            "",
+            "",
+            True,
+        ),
+        ("content_filter", "", "", False),
+    ],
+    ids=["length", "stop", "tool_calls", "content_filter"],
+)
 @pytest.mark.asyncio
-async def test_finish_reason_propagation_non_streaming(
-    mock_acompletion, lite_llm_instance
-):
-  """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode."""
-  mock_response_with_finish_reason = ModelResponse(
-      choices=[
-          Choices(
-              message=ChatCompletionAssistantMessage(
-                  role="assistant",
-                  content="Test response",
-              ),
-              finish_reason="length",
-          )
-      ]
-  )
-  mock_acompletion.return_value = mock_response_with_finish_reason
-
-  llm_request = LlmRequest(
-      contents=[
-          types.Content(
-              role="user", parts=[types.Part.from_text(text="Test prompt")]
-          )
-      ],
-  )
-
-  async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.content.role == "model"
-    assert response.content.parts[0].text == "Test response"
-    assert response.finish_reason == "length"
-
-  mock_acompletion.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_finish_reason_propagation_stop(
-    mock_acompletion, lite_llm_instance
-):
-  """Test that finish_reason='stop' is properly propagated."""
-  mock_response_with_finish_reason = ModelResponse(
-      choices=[
-          Choices(
-              message=ChatCompletionAssistantMessage(
-                  role="assistant",
-                  content="Complete response",
-              ),
-              finish_reason="stop",
-          )
-      ]
-  )
-  mock_acompletion.return_value = mock_response_with_finish_reason
-
-  llm_request = LlmRequest(
-      contents=[
-          types.Content(
-              role="user", parts=[types.Part.from_text(text="Test prompt")]
-          )
-      ],
-  )
-
-  async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.finish_reason == "stop"
-
-  mock_acompletion.assert_called_once()
-
-
-@pytest.mark.asyncio
-async def test_finish_reason_propagation_tool_calls(
-    mock_acompletion, lite_llm_instance
+async def test_finish_reason_propagation(
+    mock_acompletion,
+    lite_llm_instance,
+    finish_reason,
+    response_content,
+    expected_content,
+    has_tool_calls,
 ):
-  """Test that finish_reason='tool_calls' is properly propagated."""
-  mock_response_with_finish_reason = ModelResponse(
-      choices=[
-          Choices(
-              message=ChatCompletionAssistantMessage(
-                  role="assistant",
-                  content="",
-                  tool_calls=[
-                      ChatCompletionMessageToolCall(
-                          type="function",
-                          id="test_id",
-                          function=Function(
-                              name="test_function",
-                              arguments='{"arg": "value"}',
-                          ),
-                      )
-                  ],
-              ),
-              finish_reason="tool_calls",
-          )
-      ]
-  )
-  mock_acompletion.return_value = mock_response_with_finish_reason
-
-  llm_request = LlmRequest(
-      contents=[
-          types.Content(
-              role="user", parts=[types.Part.from_text(text="Test prompt")]
-          )
-      ],
-  )
-
-  async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.finish_reason == "tool_calls"
-
-  mock_acompletion.assert_called_once()
-
+  """Test that finish_reason is properly propagated from LiteLLM response."""
+  tool_calls = None
+  if has_tool_calls:
+    tool_calls = [
+        ChatCompletionMessageToolCall(
+            type="function",
+            id="test_id",
+            function=Function(
+                name="test_function",
+                arguments='{"arg": "value"}',
+            ),
+        )
+    ]
 
-@pytest.mark.asyncio
-async def test_finish_reason_content_filter(
-    mock_acompletion, lite_llm_instance
-):
-  """Test that finish_reason='content_filter' is properly propagated."""
-  mock_response_with_content_filter = ModelResponse(
+  mock_response = ModelResponse(
       choices=[
           Choices(
               message=ChatCompletionAssistantMessage(
                   role="assistant",
-                  content="",
+                  content=response_content,
+                  tool_calls=tool_calls,
               ),
-              finish_reason="content_filter",
+              finish_reason=finish_reason,
           )
       ]
   )
-  mock_acompletion.return_value = mock_response_with_content_filter
+  mock_acompletion.return_value = mock_response
 
   llm_request = LlmRequest(
       contents=[
@@ -1986,6 +1912,12 @@ async def test_finish_reason_content_filter(
   )
 
   async for response in lite_llm_instance.generate_content_async(llm_request):
-    assert response.finish_reason == "content_filter"
+    assert response.content.role == "model"
+    assert response.finish_reason == finish_reason
+    if expected_content:
+      assert response.content.parts[0].text == expected_content
+    if has_tool_calls:
+      assert len(response.content.parts) > 0
+      assert response.content.parts[-1].function_call.name == "test_function"
 
   mock_acompletion.assert_called_once()

From 931fd01113e7d191716ed43d8fe598a30850469a Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@users.noreply.github.com>
Date: Wed, 8 Oct 2025 10:51:42 -0700
Subject: [PATCH 03/12] Update src/google/adk/models/lite_llm.py

Addressing review comments

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/google/adk/models/lite_llm.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index d67f5d01d0..f2f0647751 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -484,9 +484,8 @@ def _model_response_to_generate_content_response(
 
   message = None
   finish_reason = None
-  if response.get("choices", None) and (
-      first_choice := response["choices"][0]
-  ):
+  if choices := response.get("choices"):
+    first_choice = choices[0]
     message = first_choice.get("message", None)
     finish_reason = first_choice.get("finish_reason", None)
 

From ad838721a4ff8341c786440b380aaa0a1184072f Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@users.noreply.github.com>
Date: Wed, 8 Oct 2025 10:51:53 -0700
Subject: [PATCH 04/12] Update src/google/adk/telemetry/tracing.py

Addressing review comments

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/google/adk/telemetry/tracing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 1f3c6fdde9..90a9a1140b 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -281,7 +281,7 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
-    if hasattr(llm_response.finish_reason, 'name'):
+    if isinstance(llm_response.finish_reason, types.FinishReason):
       finish_reason_str = llm_response.finish_reason.name.lower()
     else:
       finish_reason_str = str(llm_response.finish_reason).lower()

From 323a0944c2334746d54bc165f1e8bf0af78c8847 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Wed, 8 Oct 2025 10:55:13 -0700
Subject: [PATCH 05/12] fix: update finish_reason type hint to support both
 enum and string

Address type safety issue where finish_reason can be either:
- types.FinishReason enum (from Gemini responses)
- str (from LiteLLM responses)

Updated LlmResponse.finish_reason type hint to:
Optional[Union[types.FinishReason, str]]

This ensures type checkers correctly validate the dual nature of this
field across different model providers.

All 53 tests pass.
---
 src/google/adk/models/llm_response.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
index 56eb6318c1..c0ab7f7b16 100644
--- a/src/google/adk/models/llm_response.py
+++ b/src/google/adk/models/llm_response.py
@@ -16,6 +16,7 @@
 
 from typing import Any
 from typing import Optional
+from typing import Union
 
 from google.genai import types
 from pydantic import alias_generators
@@ -77,8 +78,11 @@ class LlmResponse(BaseModel):
   Only used for streaming mode.
   """
 
-  finish_reason: Optional[types.FinishReason] = None
-  """The finish reason of the response."""
+  finish_reason: Optional[Union[types.FinishReason, str]] = None
+  """The finish reason of the response.
+
+  Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM).
+  """
 
   error_code: Optional[str] = None
   """Error code if the response is an error. Code varies by model."""

From 9009b80b21b9e520397294a9fb0813267ca1f8c0 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:11:53 -0700
Subject: [PATCH 06/12] feat: map LiteLLM finish_reason strings to FinishReason
 enum

- Map finish_reason strings to proper FinishReason enum values in lite_llm.py
  - 'length' -> FinishReason.MAX_TOKENS
  - 'stop' -> FinishReason.STOP
  - 'tool_calls'/'function_call' -> FinishReason.STOP
  - 'content_filter' -> FinishReason.SAFETY
  - unknown values -> FinishReason.OTHER

- Add clarifying comment in tracing.py for string fallback path

- Update test_litellm.py to verify enum mapping:
  - Assert finish_reason is FinishReason enum instance
  - Verify correct enum values for each finish_reason string
  - Add test for unknown finish_reason mapping to OTHER

Benefits:
- Type consistency with Gemini native responses
- Avoids runtime warnings from string finish_reason
- Enables proper instanceof checks in callbacks
- Better integration with ADK telemetry
---
 src/google/adk/models/lite_llm.py      | 16 ++++++++-
 src/google/adk/telemetry/tracing.py    |  1 +
 tests/unittests/models/test_litellm.py | 49 +++++++++++++++++++++++++-
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index f2f0647751..bb59b1a3b4 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -494,7 +494,21 @@ def _model_response_to_generate_content_response(
 
   llm_response = _message_to_generate_content_response(message)
   if finish_reason:
-    llm_response.finish_reason = finish_reason
+    # Map LiteLLM finish_reason strings to FinishReason enum
+    # This provides type consistency with Gemini native responses and avoids warnings
+    finish_reason_str = str(finish_reason).lower()
+    if finish_reason_str == "length":
+      llm_response.finish_reason = types.FinishReason.MAX_TOKENS
+    elif finish_reason_str == "stop":
+      llm_response.finish_reason = types.FinishReason.STOP
+    elif "tool" in finish_reason_str or "function" in finish_reason_str:
+      # Handle tool_calls, function_call variants
+      llm_response.finish_reason = types.FinishReason.STOP
+    elif finish_reason_str == "content_filter":
+      llm_response.finish_reason = types.FinishReason.SAFETY
+    else:
+      # For unknown reasons, use OTHER
+      llm_response.finish_reason = types.FinishReason.OTHER
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 90a9a1140b..b5aa34bab8 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -284,6 +284,7 @@ def trace_call_llm(
     if isinstance(llm_response.finish_reason, types.FinishReason):
       finish_reason_str = llm_response.finish_reason.name.lower()
     else:
+      # Fallback for string values (should not occur with LiteLLM after enum mapping)
       finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index cdff4881d4..325b225740 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1913,7 +1913,17 @@ async def test_finish_reason_propagation(
 
   async for response in lite_llm_instance.generate_content_async(llm_request):
     assert response.content.role == "model"
-    assert response.finish_reason == finish_reason
+    # Verify finish_reason is mapped to FinishReason enum, not raw string
+    assert isinstance(response.finish_reason, types.FinishReason)
+    # Verify correct enum mapping
+    if finish_reason == "length":
+      assert response.finish_reason == types.FinishReason.MAX_TOKENS
+    elif finish_reason == "stop":
+      assert response.finish_reason == types.FinishReason.STOP
+    elif finish_reason == "tool_calls":
+      assert response.finish_reason == types.FinishReason.STOP
+    elif finish_reason == "content_filter":
+      assert response.finish_reason == types.FinishReason.SAFETY
     if expected_content:
       assert response.content.parts[0].text == expected_content
     if has_tool_calls:
@@ -1921,3 +1931,40 @@ async def test_finish_reason_propagation(
       assert response.content.parts[-1].function_call.name == "test_function"
 
   mock_acompletion.assert_called_once()
+
+
+
+@pytest.mark.asyncio
+async def test_finish_reason_unknown_maps_to_other(
+    mock_acompletion, lite_llm_instance
+):
+  """Test that unknown finish_reason values map to FinishReason.OTHER."""
+  mock_response = ModelResponse(
+      choices=[
+          Choices(
+              message=ChatCompletionAssistantMessage(
+                  role="assistant",
+                  content="Test response",
+              ),
+              finish_reason="unknown_reason_type",
+          )
+      ]
+  )
+  mock_acompletion.return_value = mock_response
+
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role="user", parts=[types.Part.from_text(text="Test prompt")]
+          )
+      ],
+  )
+
+  async for response in lite_llm_instance.generate_content_async(llm_request):
+    assert response.content.role == "model"
+    # Unknown finish_reason should map to OTHER
+    assert isinstance(response.finish_reason, types.FinishReason)
+    assert response.finish_reason == types.FinishReason.OTHER
+
+  mock_acompletion.assert_called_once()
+

From beadc63526c49781f624f6335dc51b3b11a17497 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:40:00 -0700
Subject: [PATCH 07/12] feat: map LiteLLM finish_reason strings to FinishReason
 enum

Maps LiteLLM finish_reason string values to proper FinishReason enum
for type consistency with Gemini native responses.

Changes:
- Add _FINISH_REASON_MAPPING dictionary for string->enum conversion
  - "length" -> FinishReason.MAX_TOKENS
  - "stop" -> FinishReason.STOP
  - "tool_calls"/"function_call" -> FinishReason.STOP
  - "content_filter" -> FinishReason.SAFETY
  - Unknown values -> FinishReason.OTHER (fallback)

- Update finish_reason type hint to Optional[FinishReason] (no Union needed)

- Update telemetry tracing to use .name for enum serialization

- Add explanatory comments:
  - Why tool_calls maps to STOP (no TOOL_CALL enum exists)
  - Docstring clarifies mapping applies to all model providers

Tests:
- test_finish_reason_propagation: verifies enum mapping for all values
- test_finish_reason_unknown_maps_to_other: verifies fallback behavior

Benefits:
- Type consistency: finish_reason is always FinishReason enum
- No runtime warnings from mixed types
- Enables proper isinstance() checks in callbacks
- Dictionary mapping improves maintainability
- Better integration with ADK telemetry
---
 src/google/adk/models/lite_llm.py     | 28 +++++++++++++++------------
 src/google/adk/models/llm_response.py |  5 +++--
 src/google/adk/telemetry/tracing.py   |  2 +-
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index 95c1369245..b1121acba2 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -64,6 +64,19 @@
 _NEW_LINE = "\n"
 _EXCLUDED_PART_FIELD = {"inline_data": {"data"}}
 
+# Mapping of LiteLLM finish_reason strings to FinishReason enum values
+# Note: tool_calls/function_call map to STOP because:
+# 1. FinishReason.TOOL_CALL enum does not exist (as of google-genai 0.8.0)
+# 2. Tool calls represent normal completion (model stopped to invoke tools)
+# 3. Gemini native responses use STOP for tool calls (see lite_llm.py:910)
+_FINISH_REASON_MAPPING = {
+    "length": types.FinishReason.MAX_TOKENS,
+    "stop": types.FinishReason.STOP,
+    "tool_calls": types.FinishReason.STOP,  # Normal completion with tool invocation
+    "function_call": types.FinishReason.STOP,  # Legacy function call variant
+    "content_filter": types.FinishReason.SAFETY,
+}
+
 
 class ChatCompletionFileUrlObject(TypedDict, total=False):
   file_data: str
@@ -508,18 +521,9 @@ def _model_response_to_generate_content_response(
     # Map LiteLLM finish_reason strings to FinishReason enum
     # This provides type consistency with Gemini native responses and avoids warnings
     finish_reason_str = str(finish_reason).lower()
-    if finish_reason_str == "length":
-      llm_response.finish_reason = types.FinishReason.MAX_TOKENS
-    elif finish_reason_str == "stop":
-      llm_response.finish_reason = types.FinishReason.STOP
-    elif "tool" in finish_reason_str or "function" in finish_reason_str:
-      # Handle tool_calls, function_call variants
-      llm_response.finish_reason = types.FinishReason.STOP
-    elif finish_reason_str == "content_filter":
-      llm_response.finish_reason = types.FinishReason.SAFETY
-    else:
-      # For unknown reasons, use OTHER
-      llm_response.finish_reason = types.FinishReason.OTHER
+    llm_response.finish_reason = _FINISH_REASON_MAPPING.get(
+        finish_reason_str, types.FinishReason.OTHER
+    )
   if response.get("usage", None):
     llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata(
         prompt_token_count=response["usage"].get("prompt_tokens", 0),
diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
index c0ab7f7b16..982127bb1d 100644
--- a/src/google/adk/models/llm_response.py
+++ b/src/google/adk/models/llm_response.py
@@ -78,10 +78,11 @@ class LlmResponse(BaseModel):
   Only used for streaming mode.
   """
 
-  finish_reason: Optional[Union[types.FinishReason, str]] = None
+  finish_reason: Optional[types.FinishReason] = None
   """The finish reason of the response.
 
-  Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM).
+  Always a types.FinishReason enum. String values from underlying model providers
+  are mapped to corresponding enum values (with fallback to OTHER for unknown values).
   """
 
   error_code: Optional[str] = None
diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index b5aa34bab8..6968f300ba 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -284,7 +284,7 @@ def trace_call_llm(
     if isinstance(llm_response.finish_reason, types.FinishReason):
       finish_reason_str = llm_response.finish_reason.name.lower()
     else:
-      # Fallback for string values (should not occur with LiteLLM after enum mapping)
+      # Defensive fallback for string values (should never occur - all values mapped to enum)
       finish_reason_str = str(llm_response.finish_reason).lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',

From 44cba3c02d3bd9134e9431040eaf8b4c6319cc60 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:49:45 -0700
Subject: [PATCH 08/12] refactor: address bot review suggestions

- Simplify tracing.py by removing isinstance check (always enum now)
- Refactor test assertions to use dictionary mapping instead of if/elif
- Reduce code duplication and improve readability

Addresses Gemini Code Assist bot suggestions:
- tracing.py: Direct .name access since finish_reason is always enum
- test_litellm.py: Dictionary mapping for cleaner test assertions
---
 src/google/adk/telemetry/tracing.py    |  7 ++-----
 tests/unittests/models/test_litellm.py | 19 +++++++++----------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py
index 6968f300ba..38af06f93d 100644
--- a/src/google/adk/telemetry/tracing.py
+++ b/src/google/adk/telemetry/tracing.py
@@ -281,11 +281,8 @@ def trace_call_llm(
           llm_response.usage_metadata.candidates_token_count,
       )
   if llm_response.finish_reason:
-    if isinstance(llm_response.finish_reason, types.FinishReason):
-      finish_reason_str = llm_response.finish_reason.name.lower()
-    else:
-      # Defensive fallback for string values (should never occur - all values mapped to enum)
-      finish_reason_str = str(llm_response.finish_reason).lower()
+    # finish_reason is always FinishReason enum
+    finish_reason_str = llm_response.finish_reason.name.lower()
     span.set_attribute(
         'gen_ai.response.finish_reasons',
         [finish_reason_str],
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 14eb97f617..85e6b72fdd 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1967,17 +1967,16 @@ async def test_finish_reason_propagation(
 
   async for response in lite_llm_instance.generate_content_async(llm_request):
     assert response.content.role == "model"
-    # Verify finish_reason is mapped to FinishReason enum, not raw string
+    # Verify finish_reason is mapped to FinishReason enum
     assert isinstance(response.finish_reason, types.FinishReason)
-    # Verify correct enum mapping
-    if finish_reason == "length":
-      assert response.finish_reason == types.FinishReason.MAX_TOKENS
-    elif finish_reason == "stop":
-      assert response.finish_reason == types.FinishReason.STOP
-    elif finish_reason == "tool_calls":
-      assert response.finish_reason == types.FinishReason.STOP
-    elif finish_reason == "content_filter":
-      assert response.finish_reason == types.FinishReason.SAFETY
+    # Verify correct enum mapping using dictionary
+    expected_mapping = {
+        "length": types.FinishReason.MAX_TOKENS,
+        "stop": types.FinishReason.STOP,
+        "tool_calls": types.FinishReason.STOP,
+        "content_filter": types.FinishReason.SAFETY,
+    }
+    assert response.finish_reason == expected_mapping[finish_reason]
     if expected_content:
       assert response.content.parts[0].text == expected_content
     if has_tool_calls:

From 8f3e94df34591d20090d24d1b6a07c23d625ad15 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 20:56:55 -0700
Subject: [PATCH 09/12] refactor: use _FINISH_REASON_MAPPING directly in tests

Import and use the actual _FINISH_REASON_MAPPING from lite_llm instead of
duplicating it in tests. This ensures tests stay in sync with implementation
changes automatically.

Benefits:
- Single source of truth for finish_reason mappings
- Tests automatically reflect any future mapping changes
- Reduced code duplication
- Better maintainability

Addresses review comment:
https://github.com/google/adk-python/pull/3114#pullrequestreview-3338249498
---
 tests/unittests/models/test_litellm.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index 85e6b72fdd..f15bebfe0a 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -19,6 +19,7 @@
 import warnings
 
 from google.adk.models.lite_llm import _content_to_message_param
+from google.adk.models.lite_llm import _FINISH_REASON_MAPPING
 from google.adk.models.lite_llm import _function_declaration_to_tool_param
 from google.adk.models.lite_llm import _get_content
 from google.adk.models.lite_llm import _message_to_generate_content_response
@@ -1969,14 +1970,8 @@ async def test_finish_reason_propagation(
     assert response.content.role == "model"
     # Verify finish_reason is mapped to FinishReason enum
     assert isinstance(response.finish_reason, types.FinishReason)
-    # Verify correct enum mapping using dictionary
-    expected_mapping = {
-        "length": types.FinishReason.MAX_TOKENS,
-        "stop": types.FinishReason.STOP,
-        "tool_calls": types.FinishReason.STOP,
-        "content_filter": types.FinishReason.SAFETY,
-    }
-    assert response.finish_reason == expected_mapping[finish_reason]
+    # Verify correct enum mapping using the actual mapping from lite_llm
+    assert response.finish_reason == _FINISH_REASON_MAPPING[finish_reason]
     if expected_content:
       assert response.content.parts[0].text == expected_content
     if has_tool_calls:

From 695352c013129b0288b2b72580fcca03a61f4717 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 14 Oct 2025 21:06:39 -0700
Subject: [PATCH 10/12] refactor: remove unused Union import from
 llm_response.py

The Union type is no longer needed since finish_reason is always
a FinishReason enum (never a string after our mapping).

Addresses review comment:
https://github.com/google/adk-python/pull/3114#discussion_r2431044481
---
 src/google/adk/models/llm_response.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
index 982127bb1d..fc5190a277 100644
--- a/src/google/adk/models/llm_response.py
+++ b/src/google/adk/models/llm_response.py
@@ -16,7 +16,6 @@
 
 from typing import Any
 from typing import Optional
-from typing import Union
 
 from google.genai import types
 from pydantic import alias_generators

From d9aaead0f50b0fefdbcba932575b66864da10fcf Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@users.noreply.github.com>
Date: Tue, 14 Oct 2025 21:09:45 -0700
Subject: [PATCH 11/12] Apply suggestion from @gemini-code-assist[bot]

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 src/google/adk/models/lite_llm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index b1121acba2..f6fcaee279 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -508,7 +508,7 @@ def _model_response_to_generate_content_response(
 
   message = None
   finish_reason = None
-  if choices := response.get("choices"):
+  if (choices := response.get("choices")) and choices:
     first_choice = choices[0]
     message = first_choice.get("message", None)
     finish_reason = first_choice.get("finish_reason", None)

From fa03b4dbf71cb138ad49f60893813215ac11a670 Mon Sep 17 00:00:00 2001
From: Andrew Grande <aperepel@gmail.com>
Date: Tue, 28 Oct 2025 11:14:40 -0700
Subject: [PATCH 12/12] Pyink reformatting

---
 src/google/adk/models/lite_llm.py      | 4 +++-
 tests/unittests/models/test_litellm.py | 2 --
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
index f6fcaee279..1964995b5c 100644
--- a/src/google/adk/models/lite_llm.py
+++ b/src/google/adk/models/lite_llm.py
@@ -72,7 +72,9 @@
 _FINISH_REASON_MAPPING = {
     "length": types.FinishReason.MAX_TOKENS,
     "stop": types.FinishReason.STOP,
-    "tool_calls": types.FinishReason.STOP,  # Normal completion with tool invocation
+    "tool_calls": (
+        types.FinishReason.STOP
+    ),  # Normal completion with tool invocation
     "function_call": types.FinishReason.STOP,  # Legacy function call variant
     "content_filter": types.FinishReason.SAFETY,
 }
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
index f15bebfe0a..9d7e9494e6 100644
--- a/tests/unittests/models/test_litellm.py
+++ b/tests/unittests/models/test_litellm.py
@@ -1981,7 +1981,6 @@ async def test_finish_reason_propagation(
   mock_acompletion.assert_called_once()
 
 
-
 @pytest.mark.asyncio
 async def test_finish_reason_unknown_maps_to_other(
     mock_acompletion, lite_llm_instance
@@ -2015,4 +2014,3 @@ async def test_finish_reason_unknown_maps_to_other(
     assert response.finish_reason == types.FinishReason.OTHER
 
   mock_acompletion.assert_called_once()
-