From da259ee89088ef173da3f5402f50bc3dc8863bc7 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:37:19 -0700 Subject: [PATCH 01/12] fix: propagate finish_reason from LiteLLM responses Fixes #3109 This change ensures that the finish_reason field from LiteLLM responses is properly propagated to LlmResponse objects, enabling callbacks to detect completion conditions like max_tokens truncation. Changes: - Extract finish_reason from LiteLLM response in lite_llm.py - Update tracing.py to handle both enum (Gemini) and string (LiteLLM) finish_reason values - Add comprehensive unit tests for finish_reason propagation The fix allows after_model_callback functions to properly detect: - "length": max_tokens limit reached - "stop": natural completion - "tool_calls": tool invocations - "content_filter": filtered content --- src/google/adk/models/lite_llm.py | 4 + src/google/adk/telemetry/tracing.py | 6 +- tests/unittests/models/test_litellm.py | 140 +++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index d8b4d7ce81..1ab30d7591 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -483,13 +483,17 @@ def _model_response_to_generate_content_response( """ message = None + finish_reason = None if response.get("choices", None): message = response["choices"][0].get("message", None) + finish_reason = response["choices"][0].get("finish_reason", None) if not message: raise ValueError("No message in response") llm_response = _message_to_generate_content_response(message) + if finish_reason: + llm_response.finish_reason = finish_reason if response.get("usage", None): llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata( prompt_token_count=response["usage"].get("prompt_tokens", 0), diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 12fbca1d1a..8118d83dc1 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -281,9 +281,13 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: + if hasattr(llm_response.finish_reason, 'value'): + finish_reason_str = llm_response.finish_reason.value.lower() + else: + finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( 'gen_ai.response.finish_reasons', - [llm_response.finish_reason.value.lower()], + [finish_reason_str], ) diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 84fd7f26d0..d67e364884 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1849,3 +1849,143 @@ def test_non_gemini_litellm_no_warning(): # Test with non-Gemini model LiteLlm(model="openai/gpt-4o") assert len(w) == 0 + + +@pytest.mark.asyncio +async def test_finish_reason_propagation_non_streaming( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode.""" + mock_response_with_finish_reason = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="Test response", + ), + finish_reason="length", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_finish_reason + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.content.role == "model" + assert response.content.parts[0].text == "Test response" + assert response.finish_reason == "length" + + mock_acompletion.assert_called_once() + + +@pytest.mark.asyncio +async def test_finish_reason_propagation_stop( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason='stop' is properly propagated.""" + mock_response_with_finish_reason = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="Complete response", + ), + finish_reason="stop", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_finish_reason + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.finish_reason == "stop" + + mock_acompletion.assert_called_once() + + +@pytest.mark.asyncio +async def test_finish_reason_propagation_tool_calls( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason='tool_calls' is properly propagated.""" + mock_response_with_finish_reason = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="", + tool_calls=[ + ChatCompletionMessageToolCall( + type="function", + id="test_id", + function=Function( + name="test_function", + arguments='{"arg": "value"}', + ), + ) + ], + ), + finish_reason="tool_calls", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_finish_reason + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.finish_reason == "tool_calls" + + mock_acompletion.assert_called_once() + + +@pytest.mark.asyncio +async def test_finish_reason_content_filter( + mock_acompletion, lite_llm_instance +): + """Test that finish_reason='content_filter' is properly propagated.""" + mock_response_with_content_filter = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="", + ), + finish_reason="content_filter", + ) + ] + ) + mock_acompletion.return_value = mock_response_with_content_filter + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.finish_reason == "content_filter" + + mock_acompletion.assert_called_once() From 0e9b280325d0d622fe5f5ac88a6a7ed7b0f743f9 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:45:32 -0700 Subject: [PATCH 02/12] refactor: address code review feedback - Use .name instead of .value for enum finish_reason (more robust for IntEnum) - Extract first choice using walrus operator for better readability - Consolidate tests using @pytest.mark.parametrize to reduce duplication - Strengthen test assertions to verify response content All 53 tests pass. --- src/google/adk/models/lite_llm.py | 8 +- src/google/adk/telemetry/tracing.py | 4 +- tests/unittests/models/test_litellm.py | 162 +++++++------------------ 3 files changed, 54 insertions(+), 120 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 1ab30d7591..d67f5d01d0 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -484,9 +484,11 @@ def _model_response_to_generate_content_response( message = None finish_reason = None - if response.get("choices", None): - message = response["choices"][0].get("message", None) - finish_reason = response["choices"][0].get("finish_reason", None) + if response.get("choices", None) and ( + first_choice := response["choices"][0] + ): + message = first_choice.get("message", None) + finish_reason = first_choice.get("finish_reason", None) if not message: raise ValueError("No message in response") diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 8118d83dc1..1f3c6fdde9 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -281,8 +281,8 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: - if hasattr(llm_response.finish_reason, 'value'): - finish_reason_str = llm_response.finish_reason.value.lower() + if hasattr(llm_response.finish_reason, 'name'): + finish_reason_str = llm_response.finish_reason.name.lower() else: finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index d67e364884..cdff4881d4 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1851,131 +1851,57 @@ def test_non_gemini_litellm_no_warning(): assert len(w) == 0 +@pytest.mark.parametrize( + "finish_reason,response_content,expected_content,has_tool_calls", + [ + ("length", "Test response", "Test response", False), + ("stop", "Complete response", "Complete response", False), + ( + "tool_calls", + "", + "", + True, + ), + ("content_filter", "", "", False), + ], + ids=["length", "stop", "tool_calls", "content_filter"], +) @pytest.mark.asyncio -async def test_finish_reason_propagation_non_streaming( - mock_acompletion, lite_llm_instance -): - """Test that finish_reason is properly propagated from LiteLLM response in non-streaming mode.""" - mock_response_with_finish_reason = ModelResponse( - choices=[ - Choices( - message=ChatCompletionAssistantMessage( - role="assistant", - content="Test response", - ), - finish_reason="length", - ) - ] - ) - mock_acompletion.return_value = mock_response_with_finish_reason - - llm_request = LlmRequest( - contents=[ - types.Content( - role="user", parts=[types.Part.from_text(text="Test prompt")] - ) - ], - ) - - async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.content.role == "model" - assert response.content.parts[0].text == "Test response" - assert response.finish_reason == "length" - - mock_acompletion.assert_called_once() - - -@pytest.mark.asyncio -async def test_finish_reason_propagation_stop( - mock_acompletion, lite_llm_instance -): - """Test that finish_reason='stop' is properly propagated.""" - mock_response_with_finish_reason = ModelResponse( - choices=[ - Choices( - message=ChatCompletionAssistantMessage( - role="assistant", - content="Complete response", - ), - finish_reason="stop", - ) - ] - ) - mock_acompletion.return_value = mock_response_with_finish_reason - - llm_request = LlmRequest( - contents=[ - types.Content( - role="user", parts=[types.Part.from_text(text="Test prompt")] - ) - ], - ) - - async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.finish_reason == "stop" - - mock_acompletion.assert_called_once() - - -@pytest.mark.asyncio -async def test_finish_reason_propagation_tool_calls( - mock_acompletion, lite_llm_instance +async def test_finish_reason_propagation( + mock_acompletion, + lite_llm_instance, + finish_reason, + response_content, + expected_content, + has_tool_calls, ): - """Test that finish_reason='tool_calls' is properly propagated.""" - mock_response_with_finish_reason = ModelResponse( - choices=[ - Choices( - message=ChatCompletionAssistantMessage( - role="assistant", - content="", - tool_calls=[ - ChatCompletionMessageToolCall( - type="function", - id="test_id", - function=Function( - name="test_function", - arguments='{"arg": "value"}', - ), - ) - ], - ), - finish_reason="tool_calls", - ) - ] - ) - mock_acompletion.return_value = mock_response_with_finish_reason - - llm_request = LlmRequest( - contents=[ - types.Content( - role="user", parts=[types.Part.from_text(text="Test prompt")] - ) - ], - ) - - async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.finish_reason == "tool_calls" - - mock_acompletion.assert_called_once() - + """Test that finish_reason is properly propagated from LiteLLM response.""" + tool_calls = None + if has_tool_calls: + tool_calls = [ + ChatCompletionMessageToolCall( + type="function", + id="test_id", + function=Function( + name="test_function", + arguments='{"arg": "value"}', + ), + ) + ] -@pytest.mark.asyncio -async def test_finish_reason_content_filter( - mock_acompletion, lite_llm_instance -): - """Test that finish_reason='content_filter' is properly propagated.""" - mock_response_with_content_filter = ModelResponse( + mock_response = ModelResponse( choices=[ Choices( message=ChatCompletionAssistantMessage( role="assistant", - content="", + content=response_content, + tool_calls=tool_calls, ), - finish_reason="content_filter", + finish_reason=finish_reason, ) ] ) - mock_acompletion.return_value = mock_response_with_content_filter + mock_acompletion.return_value = mock_response llm_request = LlmRequest( contents=[ @@ -1986,6 +1912,12 @@ async def test_finish_reason_content_filter( ) async for response in lite_llm_instance.generate_content_async(llm_request): - assert response.finish_reason == "content_filter" + assert response.content.role == "model" + assert response.finish_reason == finish_reason + if expected_content: + assert response.content.parts[0].text == expected_content + if has_tool_calls: + assert len(response.content.parts) > 0 + assert response.content.parts[-1].function_call.name == "test_function" mock_acompletion.assert_called_once() From 931fd01113e7d191716ed43d8fe598a30850469a Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:51:42 -0700 Subject: [PATCH 03/12] Update src/google/adk/models/lite_llm.py Addressing review comments Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/google/adk/models/lite_llm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index d67f5d01d0..f2f0647751 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -484,9 +484,8 @@ def _model_response_to_generate_content_response( message = None finish_reason = None - if response.get("choices", None) and ( - first_choice := response["choices"][0] - ): + if choices := response.get("choices"): + first_choice = choices[0] message = first_choice.get("message", None) finish_reason = first_choice.get("finish_reason", None) From ad838721a4ff8341c786440b380aaa0a1184072f Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:51:53 -0700 Subject: [PATCH 04/12] Update src/google/adk/telemetry/tracing.py Addressing review comments Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/google/adk/telemetry/tracing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 1f3c6fdde9..90a9a1140b 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -281,7 +281,7 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: - if hasattr(llm_response.finish_reason, 'name'): + if isinstance(llm_response.finish_reason, types.FinishReason): finish_reason_str = llm_response.finish_reason.name.lower() else: finish_reason_str = str(llm_response.finish_reason).lower() From 323a0944c2334746d54bc165f1e8bf0af78c8847 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Wed, 8 Oct 2025 10:55:13 -0700 Subject: [PATCH 05/12] fix: update finish_reason type hint to support both enum and string Address type safety issue where finish_reason can be either: - types.FinishReason enum (from Gemini responses) - str (from LiteLLM responses) Updated LlmResponse.finish_reason type hint to: Optional[Union[types.FinishReason, str]] This ensures type checkers correctly validate the dual nature of this field across different model providers. All 53 tests pass. --- src/google/adk/models/llm_response.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py index 56eb6318c1..c0ab7f7b16 100644 --- a/src/google/adk/models/llm_response.py +++ b/src/google/adk/models/llm_response.py @@ -16,6 +16,7 @@ from typing import Any from typing import Optional +from typing import Union from google.genai import types from pydantic import alias_generators @@ -77,8 +78,11 @@ class LlmResponse(BaseModel): Only used for streaming mode. """ - finish_reason: Optional[types.FinishReason] = None - """The finish reason of the response.""" + finish_reason: Optional[Union[types.FinishReason, str]] = None + """The finish reason of the response. + + Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM). + """ error_code: Optional[str] = None """Error code if the response is an error. Code varies by model.""" From 9009b80b21b9e520397294a9fb0813267ca1f8c0 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:11:53 -0700 Subject: [PATCH 06/12] feat: map LiteLLM finish_reason strings to FinishReason enum - Map finish_reason strings to proper FinishReason enum values in lite_llm.py - 'length' -> FinishReason.MAX_TOKENS - 'stop' -> FinishReason.STOP - 'tool_calls'/'function_call' -> FinishReason.STOP - 'content_filter' -> FinishReason.SAFETY - unknown values -> FinishReason.OTHER - Add clarifying comment in tracing.py for string fallback path - Update test_litellm.py to verify enum mapping: - Assert finish_reason is FinishReason enum instance - Verify correct enum values for each finish_reason string - Add test for unknown finish_reason mapping to OTHER Benefits: - Type consistency with Gemini native responses - Avoids runtime warnings from string finish_reason - Enables proper instanceof checks in callbacks - Better integration with ADK telemetry --- src/google/adk/models/lite_llm.py | 16 ++++++++- src/google/adk/telemetry/tracing.py | 1 + tests/unittests/models/test_litellm.py | 49 +++++++++++++++++++++++++- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index f2f0647751..bb59b1a3b4 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -494,7 +494,21 @@ def _model_response_to_generate_content_response( llm_response = _message_to_generate_content_response(message) if finish_reason: - llm_response.finish_reason = finish_reason + # Map LiteLLM finish_reason strings to FinishReason enum + # This provides type consistency with Gemini native responses and avoids warnings + finish_reason_str = str(finish_reason).lower() + if finish_reason_str == "length": + llm_response.finish_reason = types.FinishReason.MAX_TOKENS + elif finish_reason_str == "stop": + llm_response.finish_reason = types.FinishReason.STOP + elif "tool" in finish_reason_str or "function" in finish_reason_str: + # Handle tool_calls, function_call variants + llm_response.finish_reason = types.FinishReason.STOP + elif finish_reason_str == "content_filter": + llm_response.finish_reason = types.FinishReason.SAFETY + else: + # For unknown reasons, use OTHER + llm_response.finish_reason = types.FinishReason.OTHER if response.get("usage", None): llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata( prompt_token_count=response["usage"].get("prompt_tokens", 0), diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 90a9a1140b..b5aa34bab8 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -284,6 +284,7 @@ def trace_call_llm( if isinstance(llm_response.finish_reason, types.FinishReason): finish_reason_str = llm_response.finish_reason.name.lower() else: + # Fallback for string values (should not occur with LiteLLM after enum mapping) finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( 'gen_ai.response.finish_reasons', diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index cdff4881d4..325b225740 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1913,7 +1913,17 @@ async def test_finish_reason_propagation( async for response in lite_llm_instance.generate_content_async(llm_request): assert response.content.role == "model" - assert response.finish_reason == finish_reason + # Verify finish_reason is mapped to FinishReason enum, not raw string + assert isinstance(response.finish_reason, types.FinishReason) + # Verify correct enum mapping + if finish_reason == "length": + assert response.finish_reason == types.FinishReason.MAX_TOKENS + elif finish_reason == "stop": + assert response.finish_reason == types.FinishReason.STOP + elif finish_reason == "tool_calls": + assert response.finish_reason == types.FinishReason.STOP + elif finish_reason == "content_filter": + assert response.finish_reason == types.FinishReason.SAFETY if expected_content: assert response.content.parts[0].text == expected_content if has_tool_calls: @@ -1921,3 +1931,40 @@ async def test_finish_reason_propagation( assert response.content.parts[-1].function_call.name == "test_function" mock_acompletion.assert_called_once() + + + +@pytest.mark.asyncio +async def test_finish_reason_unknown_maps_to_other( + mock_acompletion, lite_llm_instance +): + """Test that unknown finish_reason values map to FinishReason.OTHER.""" + mock_response = ModelResponse( + choices=[ + Choices( + message=ChatCompletionAssistantMessage( + role="assistant", + content="Test response", + ), + finish_reason="unknown_reason_type", + ) + ] + ) + mock_acompletion.return_value = mock_response + + llm_request = LlmRequest( + contents=[ + types.Content( + role="user", parts=[types.Part.from_text(text="Test prompt")] + ) + ], + ) + + async for response in lite_llm_instance.generate_content_async(llm_request): + assert response.content.role == "model" + # Unknown finish_reason should map to OTHER + assert isinstance(response.finish_reason, types.FinishReason) + assert response.finish_reason == types.FinishReason.OTHER + + mock_acompletion.assert_called_once() + From beadc63526c49781f624f6335dc51b3b11a17497 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:40:00 -0700 Subject: [PATCH 07/12] feat: map LiteLLM finish_reason strings to FinishReason enum Maps LiteLLM finish_reason string values to proper FinishReason enum for type consistency with Gemini native responses. Changes: - Add _FINISH_REASON_MAPPING dictionary for string->enum conversion - "length" -> FinishReason.MAX_TOKENS - "stop" -> FinishReason.STOP - "tool_calls"/"function_call" -> FinishReason.STOP - "content_filter" -> FinishReason.SAFETY - Unknown values -> FinishReason.OTHER (fallback) - Update finish_reason type hint to Optional[FinishReason] (no Union needed) - Update telemetry tracing to use .name for enum serialization - Add explanatory comments: - Why tool_calls maps to STOP (no TOOL_CALL enum exists) - Docstring clarifies mapping applies to all model providers Tests: - test_finish_reason_propagation: verifies enum mapping for all values - test_finish_reason_unknown_maps_to_other: verifies fallback behavior Benefits: - Type consistency: finish_reason is always FinishReason enum - No runtime warnings from mixed types - Enables proper isinstance() checks in callbacks - Dictionary mapping improves maintainability - Better integration with ADK telemetry --- src/google/adk/models/lite_llm.py | 28 +++++++++++++++------------ src/google/adk/models/llm_response.py | 5 +++-- src/google/adk/telemetry/tracing.py | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index 95c1369245..b1121acba2 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -64,6 +64,19 @@ _NEW_LINE = "\n" _EXCLUDED_PART_FIELD = {"inline_data": {"data"}} +# Mapping of LiteLLM finish_reason strings to FinishReason enum values +# Note: tool_calls/function_call map to STOP because: +# 1. FinishReason.TOOL_CALL enum does not exist (as of google-genai 0.8.0) +# 2. Tool calls represent normal completion (model stopped to invoke tools) +# 3. Gemini native responses use STOP for tool calls (see lite_llm.py:910) +_FINISH_REASON_MAPPING = { + "length": types.FinishReason.MAX_TOKENS, + "stop": types.FinishReason.STOP, + "tool_calls": types.FinishReason.STOP, # Normal completion with tool invocation + "function_call": types.FinishReason.STOP, # Legacy function call variant + "content_filter": types.FinishReason.SAFETY, +} + class ChatCompletionFileUrlObject(TypedDict, total=False): file_data: str @@ -508,18 +521,9 @@ def _model_response_to_generate_content_response( # Map LiteLLM finish_reason strings to FinishReason enum # This provides type consistency with Gemini native responses and avoids warnings finish_reason_str = str(finish_reason).lower() - if finish_reason_str == "length": - llm_response.finish_reason = types.FinishReason.MAX_TOKENS - elif finish_reason_str == "stop": - llm_response.finish_reason = types.FinishReason.STOP - elif "tool" in finish_reason_str or "function" in finish_reason_str: - # Handle tool_calls, function_call variants - llm_response.finish_reason = types.FinishReason.STOP - elif finish_reason_str == "content_filter": - llm_response.finish_reason = types.FinishReason.SAFETY - else: - # For unknown reasons, use OTHER - llm_response.finish_reason = types.FinishReason.OTHER + llm_response.finish_reason = _FINISH_REASON_MAPPING.get( + finish_reason_str, types.FinishReason.OTHER + ) if response.get("usage", None): llm_response.usage_metadata = types.GenerateContentResponseUsageMetadata( prompt_token_count=response["usage"].get("prompt_tokens", 0), diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py index c0ab7f7b16..982127bb1d 100644 --- a/src/google/adk/models/llm_response.py +++ b/src/google/adk/models/llm_response.py @@ -78,10 +78,11 @@ class LlmResponse(BaseModel): Only used for streaming mode. """ - finish_reason: Optional[Union[types.FinishReason, str]] = None + finish_reason: Optional[types.FinishReason] = None """The finish reason of the response. - Can be either a types.FinishReason enum (from Gemini) or a string (from LiteLLM). + Always a types.FinishReason enum. String values from underlying model providers + are mapped to corresponding enum values (with fallback to OTHER for unknown values). """ error_code: Optional[str] = None diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index b5aa34bab8..6968f300ba 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -284,7 +284,7 @@ def trace_call_llm( if isinstance(llm_response.finish_reason, types.FinishReason): finish_reason_str = llm_response.finish_reason.name.lower() else: - # Fallback for string values (should not occur with LiteLLM after enum mapping) + # Defensive fallback for string values (should never occur - all values mapped to enum) finish_reason_str = str(llm_response.finish_reason).lower() span.set_attribute( 'gen_ai.response.finish_reasons', From 44cba3c02d3bd9134e9431040eaf8b4c6319cc60 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:49:45 -0700 Subject: [PATCH 08/12] refactor: address bot review suggestions - Simplify tracing.py by removing isinstance check (always enum now) - Refactor test assertions to use dictionary mapping instead of if/elif - Reduce code duplication and improve readability Addresses Gemini Code Assist bot suggestions: - tracing.py: Direct .name access since finish_reason is always enum - test_litellm.py: Dictionary mapping for cleaner test assertions --- src/google/adk/telemetry/tracing.py | 7 ++----- tests/unittests/models/test_litellm.py | 19 +++++++++---------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/google/adk/telemetry/tracing.py b/src/google/adk/telemetry/tracing.py index 6968f300ba..38af06f93d 100644 --- a/src/google/adk/telemetry/tracing.py +++ b/src/google/adk/telemetry/tracing.py @@ -281,11 +281,8 @@ def trace_call_llm( llm_response.usage_metadata.candidates_token_count, ) if llm_response.finish_reason: - if isinstance(llm_response.finish_reason, types.FinishReason): - finish_reason_str = llm_response.finish_reason.name.lower() - else: - # Defensive fallback for string values (should never occur - all values mapped to enum) - finish_reason_str = str(llm_response.finish_reason).lower() + # finish_reason is always FinishReason enum + finish_reason_str = llm_response.finish_reason.name.lower() span.set_attribute( 'gen_ai.response.finish_reasons', [finish_reason_str], diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 14eb97f617..85e6b72fdd 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1967,17 +1967,16 @@ async def test_finish_reason_propagation( async for response in lite_llm_instance.generate_content_async(llm_request): assert response.content.role == "model" - # Verify finish_reason is mapped to FinishReason enum, not raw string + # Verify finish_reason is mapped to FinishReason enum assert isinstance(response.finish_reason, types.FinishReason) - # Verify correct enum mapping - if finish_reason == "length": - assert response.finish_reason == types.FinishReason.MAX_TOKENS - elif finish_reason == "stop": - assert response.finish_reason == types.FinishReason.STOP - elif finish_reason == "tool_calls": - assert response.finish_reason == types.FinishReason.STOP - elif finish_reason == "content_filter": - assert response.finish_reason == types.FinishReason.SAFETY + # Verify correct enum mapping using dictionary + expected_mapping = { + "length": types.FinishReason.MAX_TOKENS, + "stop": types.FinishReason.STOP, + "tool_calls": types.FinishReason.STOP, + "content_filter": types.FinishReason.SAFETY, + } + assert response.finish_reason == expected_mapping[finish_reason] if expected_content: assert response.content.parts[0].text == expected_content if has_tool_calls: From 8f3e94df34591d20090d24d1b6a07c23d625ad15 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 20:56:55 -0700 Subject: [PATCH 09/12] refactor: use _FINISH_REASON_MAPPING directly in tests Import and use the actual _FINISH_REASON_MAPPING from lite_llm instead of duplicating it in tests. This ensures tests stay in sync with implementation changes automatically. Benefits: - Single source of truth for finish_reason mappings - Tests automatically reflect any future mapping changes - Reduced code duplication - Better maintainability Addresses review comment: https://github.com/google/adk-python/pull/3114#pullrequestreview-3338249498 --- tests/unittests/models/test_litellm.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index 85e6b72fdd..f15bebfe0a 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -19,6 +19,7 @@ import warnings from google.adk.models.lite_llm import _content_to_message_param +from google.adk.models.lite_llm import _FINISH_REASON_MAPPING from google.adk.models.lite_llm import _function_declaration_to_tool_param from google.adk.models.lite_llm import _get_content from google.adk.models.lite_llm import _message_to_generate_content_response @@ -1969,14 +1970,8 @@ async def test_finish_reason_propagation( assert response.content.role == "model" # Verify finish_reason is mapped to FinishReason enum assert isinstance(response.finish_reason, types.FinishReason) - # Verify correct enum mapping using dictionary - expected_mapping = { - "length": types.FinishReason.MAX_TOKENS, - "stop": types.FinishReason.STOP, - "tool_calls": types.FinishReason.STOP, - "content_filter": types.FinishReason.SAFETY, - } - assert response.finish_reason == expected_mapping[finish_reason] + # Verify correct enum mapping using the actual mapping from lite_llm + assert response.finish_reason == _FINISH_REASON_MAPPING[finish_reason] if expected_content: assert response.content.parts[0].text == expected_content if has_tool_calls: From 695352c013129b0288b2b72580fcca03a61f4717 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 21:06:39 -0700 Subject: [PATCH 10/12] refactor: remove unused Union import from llm_response.py The Union type is no longer needed since finish_reason is always a FinishReason enum (never a string after our mapping). Addresses review comment: https://github.com/google/adk-python/pull/3114#discussion_r2431044481 --- src/google/adk/models/llm_response.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py index 982127bb1d..fc5190a277 100644 --- a/src/google/adk/models/llm_response.py +++ b/src/google/adk/models/llm_response.py @@ -16,7 +16,6 @@ from typing import Any from typing import Optional -from typing import Union from google.genai import types from pydantic import alias_generators From d9aaead0f50b0fefdbcba932575b66864da10fcf Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 14 Oct 2025 21:09:45 -0700 Subject: [PATCH 11/12] Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/google/adk/models/lite_llm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index b1121acba2..f6fcaee279 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -508,7 +508,7 @@ def _model_response_to_generate_content_response( message = None finish_reason = None - if choices := response.get("choices"): + if (choices := response.get("choices")) and choices: first_choice = choices[0] message = first_choice.get("message", None) finish_reason = first_choice.get("finish_reason", None) From fa03b4dbf71cb138ad49f60893813215ac11a670 Mon Sep 17 00:00:00 2001 From: Andrew Grande Date: Tue, 28 Oct 2025 11:14:40 -0700 Subject: [PATCH 12/12] Pyink reformatting --- src/google/adk/models/lite_llm.py | 4 +++- tests/unittests/models/test_litellm.py | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index f6fcaee279..1964995b5c 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -72,7 +72,9 @@ _FINISH_REASON_MAPPING = { "length": types.FinishReason.MAX_TOKENS, "stop": types.FinishReason.STOP, - "tool_calls": types.FinishReason.STOP, # Normal completion with tool invocation + "tool_calls": ( + types.FinishReason.STOP + ), # Normal completion with tool invocation "function_call": types.FinishReason.STOP, # Legacy function call variant "content_filter": types.FinishReason.SAFETY, } diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py index f15bebfe0a..9d7e9494e6 100644 --- a/tests/unittests/models/test_litellm.py +++ b/tests/unittests/models/test_litellm.py @@ -1981,7 +1981,6 @@ async def test_finish_reason_propagation( mock_acompletion.assert_called_once() - @pytest.mark.asyncio async def test_finish_reason_unknown_maps_to_other( mock_acompletion, lite_llm_instance @@ -2015,4 +2014,3 @@ async def test_finish_reason_unknown_maps_to_other( assert response.finish_reason == types.FinishReason.OTHER mock_acompletion.assert_called_once() -