From d144cbae744d7a3f88bca96d08f581ff5055f69e Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Tue, 31 Mar 2026 09:59:03 +0200 Subject: [PATCH 01/13] fix: add LiteLLM custom provider override for hosted OpenAI-compatible endpoints --- .../algo/ai_handlers/litellm_ai_handler.py | 4 + pr_agent/settings/configuration.toml | 1 + .../unittest/test_litellm_custom_provider.py | 77 +++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 tests/unittest/test_litellm_custom_provider.py diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index de9993284d..26b30e42a6 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -395,6 +395,10 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: # Support for custom OpenAI body fields (e.g., Flex Processing) kwargs = _process_litellm_extra_body(kwargs) + custom_llm_provider = get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", None) + if custom_llm_provider: + kwargs["custom_llm_provider"] = custom_llm_provider + # Support for Bedrock custom inference profile via model_id model_id = get_settings().get("litellm.model_id") if model_id and 'bedrock/' in model: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 16ffbcae2a..1f361cb9a5 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -323,6 +323,7 @@ enable_callbacks = false success_callback = [] failure_callback = [] service_callback = [] +# custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock [pr_similar_issue] diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py new file mode 100644 index 0000000000..7c383f4c0a --- /dev/null +++ b/tests/unittest/test_litellm_custom_provider.py @@ -0,0 +1,77 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +import pr_agent.algo.ai_handlers.litellm_ai_handler as litellm_handler +from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler + + +def create_mock_settings(custom_llm_provider=None): + litellm_settings = type("", (), {"get": lambda self, key, default=None: default})() + if custom_llm_provider is not None: + litellm_settings.custom_llm_provider = custom_llm_provider + + def get_value(key, default=None): + values = { + "LITELLM.CUSTOM_LLM_PROVIDER": custom_llm_provider, + } + return values.get(key, default) + + return type("", (), { + "config": type("", (), { + "ai_timeout": 120, + "custom_reasoning_model": False, + "verbosity_level": 0, + "get": lambda self, key, default=None: default, + })(), + "litellm": litellm_settings, + "get": staticmethod(get_value), + })() + + +def create_mock_acompletion_response(): + mock_response = MagicMock() + mock_response.__getitem__ = lambda self, key: { + "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}] + }[key] + mock_response.dict.return_value = {"choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]} + return mock_response + + +@pytest.mark.asyncio +async def test_custom_llm_provider_is_forwarded_without_rewriting_model(monkeypatch): + fake_settings = create_mock_settings("openai") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler.chat_completion( + model="claude-sonnet-4-5", + system="test system", + user="test user", + ) + + call_kwargs = mock_completion.call_args[1] + assert call_kwargs["model"] == "claude-sonnet-4-5" + assert call_kwargs["custom_llm_provider"] == "openai" + + +@pytest.mark.asyncio +async def test_custom_llm_provider_is_omitted_when_unset(monkeypatch): + fake_settings = create_mock_settings() + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler.chat_completion( + model="claude-sonnet-4-5", + system="test system", + user="test user", + ) + + call_kwargs = mock_completion.call_args[1] + assert "custom_llm_provider" not in call_kwargs From fa0e7f4991446e6063c1a16641ce696fe61f46f7 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Tue, 21 Apr 2026 09:34:57 +0200 Subject: [PATCH 02/13] :bug: Fix: Force streaming for OpenAI-compatible LiteLLM responses --- .gitignore | 1 + .../algo/ai_handlers/litellm_ai_handler.py | 17 ++++- .../unittest/test_litellm_custom_provider.py | 71 +++++++++++++++---- 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 8da05107fd..d13345bd17 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ .venv/ venv/ pr_agent/settings/.secrets.toml +pr_agent/settings_prod/.secrets.toml __pycache__ dist/ *.egg-info/ diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 26b30e42a6..5ca7957bae 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -446,9 +446,22 @@ async def _get_completion(self, **kwargs): Wrapper that automatically handles streaming for required models. """ model = kwargs["model"] - if model in self.streaming_required_models: + custom_llm_provider = kwargs.get("custom_llm_provider") + api_base = (kwargs.get("api_base") or "").lower() + force_streaming = ( + custom_llm_provider == "openai" + and "snowflakecomputing.com" in api_base + ) + + # Some OpenAI-compatible endpoints can return an empty-string + # finish_reason on non-streaming responses, which LiteLLM rejects during + # response normalization. Streaming avoids that conversion path. + if model in self.streaming_required_models or force_streaming: kwargs["stream"] = True - get_logger().info(f"Using streaming mode for model {model}") + if force_streaming and model not in self.streaming_required_models: + get_logger().info(f"Using streaming mode for model {model} due to OpenAI-compatible endpoint compatibility") + else: + get_logger().info(f"Using streaming mode for model {model}") response = await acompletion(**kwargs) resp, finish_reason = await _handle_streaming_response(response) # Create MockResponse for streaming since we don't have the full response object diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index 7c383f4c0a..488d04f17f 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -17,16 +17,24 @@ def get_value(key, default=None): } return values.get(key, default) - return type("", (), { - "config": type("", (), { - "ai_timeout": 120, - "custom_reasoning_model": False, - "verbosity_level": 0, - "get": lambda self, key, default=None: default, - })(), - "litellm": litellm_settings, - "get": staticmethod(get_value), - })() + return type( + "", + (), + { + "config": type( + "", + (), + { + "ai_timeout": 120, + "custom_reasoning_model": False, + "verbosity_level": 0, + "get": lambda self, key, default=None: default, + }, + )(), + "litellm": litellm_settings, + "get": staticmethod(get_value), + }, + )() def create_mock_acompletion_response(): @@ -34,7 +42,9 @@ def create_mock_acompletion_response(): mock_response.__getitem__ = lambda self, key: { "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}] }[key] - mock_response.dict.return_value = {"choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]} + mock_response.dict.return_value = { + "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}] + } return mock_response @@ -43,7 +53,10 @@ async def test_custom_llm_provider_is_forwarded_without_rewriting_model(monkeypa fake_settings = create_mock_settings("openai") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) - with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion: + with patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion: mock_completion.return_value = create_mock_acompletion_response() handler = LiteLLMAIHandler() @@ -63,7 +76,10 @@ async def test_custom_llm_provider_is_omitted_when_unset(monkeypatch): fake_settings = create_mock_settings() monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) - with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion: + with patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion: mock_completion.return_value = create_mock_acompletion_response() handler = LiteLLMAIHandler() @@ -75,3 +91,32 @@ async def test_custom_llm_provider_is_omitted_when_unset(monkeypatch): call_kwargs = mock_completion.call_args[1] assert "custom_llm_provider" not in call_kwargs + + +@pytest.mark.asyncio +async def test_openai_compatible_endpoint_calls_force_streaming(monkeypatch): + fake_settings = create_mock_settings("openai") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with ( + patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion, + patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler._handle_streaming_response", + new_callable=AsyncMock, + ) as mock_stream_handler, + ): + mock_stream_handler.return_value = ("test", "stop") + handler = LiteLLMAIHandler() + await handler._get_completion( + model="claude-sonnet-4-5", + messages=[], + timeout=120, + api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1", + custom_llm_provider="openai", + ) + + call_kwargs = mock_completion.call_args[1] + assert call_kwargs["stream"] is True From 38e827004ede8b8cf439a26eb779a98344cc553c Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Tue, 21 Apr 2026 10:05:58 +0200 Subject: [PATCH 03/13] :bug: Fix: Normalize LiteLLM provider and api_base checks --- .../algo/ai_handlers/litellm_ai_handler.py | 5 +- .../unittest/test_litellm_custom_provider.py | 53 +++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 5ca7957bae..6b93e78f7f 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -446,8 +446,9 @@ async def _get_completion(self, **kwargs): Wrapper that automatically handles streaming for required models. """ model = kwargs["model"] - custom_llm_provider = kwargs.get("custom_llm_provider") - api_base = (kwargs.get("api_base") or "").lower() + custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower() + api_base_value = kwargs.get("api_base") + api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else "" force_streaming = ( custom_llm_provider == "openai" and "snowflakecomputing.com" in api_base diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index 488d04f17f..93408537b9 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -120,3 +120,56 @@ async def test_openai_compatible_endpoint_calls_force_streaming(monkeypatch): call_kwargs = mock_completion.call_args[1] assert call_kwargs["stream"] is True + + +@pytest.mark.asyncio +async def test_openai_compatible_endpoint_normalizes_custom_provider_for_streaming(monkeypatch): + fake_settings = create_mock_settings(" OpenAI ") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with ( + patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion, + patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler._handle_streaming_response", + new_callable=AsyncMock, + ) as mock_stream_handler, + ): + mock_stream_handler.return_value = ("test", "stop") + handler = LiteLLMAIHandler() + await handler._get_completion( + model="claude-sonnet-4-5", + messages=[], + timeout=120, + api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1", + custom_llm_provider=" OpenAI ", + ) + + call_kwargs = mock_completion.call_args[1] + assert call_kwargs["stream"] is True + + +@pytest.mark.asyncio +async def test_openai_compatible_endpoint_ignores_non_string_api_base(monkeypatch): + fake_settings = create_mock_settings("openai") + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler._get_completion( + model="claude-sonnet-4-5", + messages=[], + timeout=120, + api_base=123, + custom_llm_provider="openai", + ) + + call_kwargs = mock_completion.call_args[1] + assert "stream" not in call_kwargs From 3f6646d8fc4f700e0de8a1fd547b82fcb153a564 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Tue, 21 Apr 2026 10:25:31 +0200 Subject: [PATCH 04/13] :bug: Fix: Make Snowflake streaming workaround configurable --- .../algo/ai_handlers/litellm_ai_handler.py | 10 ++++- pr_agent/settings/configuration.toml | 2 + .../unittest/test_litellm_custom_provider.py | 41 ++++++++++++++++++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 6b93e78f7f..5da434d20f 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -449,9 +449,15 @@ async def _get_completion(self, **kwargs): custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower() api_base_value = kwargs.get("api_base") api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else "" + force_streaming_provider = str(get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or "").strip().lower() + force_streaming_api_base_substrings = [ + str(value).strip().lower() + for value in (get_settings().get("LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", []) or []) + if str(value).strip() + ] force_streaming = ( - custom_llm_provider == "openai" - and "snowflakecomputing.com" in api_base + custom_llm_provider == force_streaming_provider + and any(substring in api_base for substring in force_streaming_api_base_substrings) ) # Some OpenAI-compatible endpoints can return an empty-string diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 1f361cb9a5..f2527b1014 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -324,6 +324,8 @@ success_callback = [] failure_callback = [] service_callback = [] # custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints +force_streaming_custom_llm_provider = "openai" +force_streaming_api_base_substrings = ["snowflakecomputing.com"] # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock [pr_similar_issue] diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index 93408537b9..bc24ccccef 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -6,14 +6,25 @@ from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler -def create_mock_settings(custom_llm_provider=None): +def create_mock_settings( + custom_llm_provider=None, + force_streaming_custom_llm_provider="openai", + force_streaming_api_base_substrings=None, +): + if force_streaming_api_base_substrings is None: + force_streaming_api_base_substrings = ["snowflakecomputing.com"] + litellm_settings = type("", (), {"get": lambda self, key, default=None: default})() if custom_llm_provider is not None: litellm_settings.custom_llm_provider = custom_llm_provider + litellm_settings.force_streaming_custom_llm_provider = force_streaming_custom_llm_provider + litellm_settings.force_streaming_api_base_substrings = force_streaming_api_base_substrings def get_value(key, default=None): values = { "LITELLM.CUSTOM_LLM_PROVIDER": custom_llm_provider, + "LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER": force_streaming_custom_llm_provider, + "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS": force_streaming_api_base_substrings, } return values.get(key, default) @@ -173,3 +184,31 @@ async def test_openai_compatible_endpoint_ignores_non_string_api_base(monkeypatc call_kwargs = mock_completion.call_args[1] assert "stream" not in call_kwargs + + +@pytest.mark.asyncio +async def test_force_streaming_is_settings_driven(monkeypatch): + fake_settings = create_mock_settings( + "openai", + force_streaming_custom_llm_provider="openai", + force_streaming_api_base_substrings=["example-gateway.local"], + ) + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler._get_completion( + model="claude-sonnet-4-5", + messages=[], + timeout=120, + api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1", + custom_llm_provider="openai", + ) + + call_kwargs = mock_completion.call_args[1] + assert "stream" not in call_kwargs From 846d82e79ef01052cb09bd2287bf6e9f86392dde Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 09:16:26 +0200 Subject: [PATCH 05/13] :bug: Fix: Comment out configs --- pr_agent/settings/configuration.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index f2527b1014..cec9212b0f 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -324,8 +324,8 @@ success_callback = [] failure_callback = [] service_callback = [] # custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints -force_streaming_custom_llm_provider = "openai" -force_streaming_api_base_substrings = ["snowflakecomputing.com"] +# force_streaming_custom_llm_provider = "openai" # Optional: provider value that enables forced streaming workaround +# force_streaming_api_base_substrings = ["snowflakecomputing.com"] # Optional: api_base substrings that trigger forced streaming workaround # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock [pr_similar_issue] From 5861932e93ade1fbbf23ea5a93c43d63f41256a9 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 09:43:35 +0200 Subject: [PATCH 06/13] :bug: Fix: Harden configurable forced streaming checks --- .../algo/ai_handlers/litellm_ai_handler.py | 24 +++++--- pr_agent/settings/configuration.toml | 3 - .../unittest/test_litellm_custom_provider.py | 56 +++++++++++++++++++ 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 5da434d20f..7d4670cf4e 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -449,14 +449,24 @@ async def _get_completion(self, **kwargs): custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower() api_base_value = kwargs.get("api_base") api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else "" - force_streaming_provider = str(get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or "").strip().lower() - force_streaming_api_base_substrings = [ - str(value).strip().lower() - for value in (get_settings().get("LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", []) or []) - if str(value).strip() - ] + force_streaming_provider = str( + get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or "" + ).strip().lower() + raw_force_streaming_api_base_substrings = get_settings().get( + "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", [] + ) + if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)): + force_streaming_api_base_substrings = [ + str(value).strip().lower() + for value in raw_force_streaming_api_base_substrings + if str(value).strip() + ] + else: + force_streaming_api_base_substrings = [] force_streaming = ( - custom_llm_provider == force_streaming_provider + bool(force_streaming_provider) + and custom_llm_provider == force_streaming_provider + and bool(force_streaming_api_base_substrings) and any(substring in api_base for substring in force_streaming_api_base_substrings) ) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index cec9212b0f..16ffbcae2a 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -323,9 +323,6 @@ enable_callbacks = false success_callback = [] failure_callback = [] service_callback = [] -# custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints -# force_streaming_custom_llm_provider = "openai" # Optional: provider value that enables forced streaming workaround -# force_streaming_api_base_substrings = ["snowflakecomputing.com"] # Optional: api_base substrings that trigger forced streaming workaround # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock [pr_similar_issue] diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index bc24ccccef..e90a586daf 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -212,3 +212,59 @@ async def test_force_streaming_is_settings_driven(monkeypatch): call_kwargs = mock_completion.call_args[1] assert "stream" not in call_kwargs + + +@pytest.mark.asyncio +async def test_force_streaming_requires_non_empty_provider_setting(monkeypatch): + fake_settings = create_mock_settings( + "openai", + force_streaming_custom_llm_provider="", + force_streaming_api_base_substrings=["snowflakecomputing.com"], + ) + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler._get_completion( + model="claude-sonnet-4-5", + messages=[], + timeout=120, + api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1", + custom_llm_provider="", + ) + + call_kwargs = mock_completion.call_args[1] + assert "stream" not in call_kwargs + + +@pytest.mark.asyncio +async def test_force_streaming_ignores_non_collection_substring_setting(monkeypatch): + fake_settings = create_mock_settings( + "openai", + force_streaming_custom_llm_provider="openai", + force_streaming_api_base_substrings="snowflakecomputing.com", + ) + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion: + mock_completion.return_value = create_mock_acompletion_response() + + handler = LiteLLMAIHandler() + await handler._get_completion( + model="claude-sonnet-4-5", + messages=[], + timeout=120, + api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1", + custom_llm_provider="openai", + ) + + call_kwargs = mock_completion.call_args[1] + assert "stream" not in call_kwargs From 8ade68c6dc9388514207ec52fb79cab4d083269f Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 09:54:18 +0200 Subject: [PATCH 07/13] :bug: Fix: Normalize and document LiteLLM streaming overrides --- pr_agent/algo/ai_handlers/litellm_ai_handler.py | 9 +++++++-- pr_agent/settings/configuration.toml | 3 +++ tests/unittest/test_litellm_custom_provider.py | 15 ++++++++------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 7d4670cf4e..600da7ef68 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -395,7 +395,9 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: # Support for custom OpenAI body fields (e.g., Flex Processing) kwargs = _process_litellm_extra_body(kwargs) - custom_llm_provider = get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", None) + custom_llm_provider = str( + get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", "") or "" + ).strip().lower() if custom_llm_provider: kwargs["custom_llm_provider"] = custom_llm_provider @@ -476,7 +478,10 @@ async def _get_completion(self, **kwargs): if model in self.streaming_required_models or force_streaming: kwargs["stream"] = True if force_streaming and model not in self.streaming_required_models: - get_logger().info(f"Using streaming mode for model {model} due to OpenAI-compatible endpoint compatibility") + get_logger().info( + f"Using streaming mode for model {model} " + "due to OpenAI-compatible endpoint compatibility" + ) else: get_logger().info(f"Using streaming mode for model {model}") response = await acompletion(**kwargs) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 16ffbcae2a..10695f2605 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -323,6 +323,9 @@ enable_callbacks = false success_callback = [] failure_callback = [] service_callback = [] +custom_llm_provider = "" +force_streaming_custom_llm_provider = "" +force_streaming_api_base_substrings = [] # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock [pr_similar_issue] diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index e90a586daf..99f835b5a1 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -49,19 +49,20 @@ def get_value(key, default=None): def create_mock_acompletion_response(): - mock_response = MagicMock() - mock_response.__getitem__ = lambda self, key: { - "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}] - }[key] - mock_response.dict.return_value = { + response_payload = { "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}] } - return mock_response + + class MockCompletionResponse(dict): + def dict(self): + return dict(self) + + return MockCompletionResponse(response_payload) @pytest.mark.asyncio async def test_custom_llm_provider_is_forwarded_without_rewriting_model(monkeypatch): - fake_settings = create_mock_settings("openai") + fake_settings = create_mock_settings(" OpenAI ") monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) with patch( From e8a81508ddf9e1ca27e18d0e08f4556773999ed9 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 10:05:39 +0200 Subject: [PATCH 08/13] :bug: Fix: Clean up LiteLLM custom provider tests --- tests/unittest/test_litellm_custom_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index 99f835b5a1..d8a096f18e 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -1,4 +1,4 @@ -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock, patch import pytest From 34af597c4109550dc34e4406ef0cf45a0f8f5fb9 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 10:15:11 +0200 Subject: [PATCH 09/13] :bug: Fix: Warn on invalid forced streaming substring config --- .../algo/ai_handlers/litellm_ai_handler.py | 5 +++ .../unittest/test_litellm_custom_provider.py | 32 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 600da7ef68..ed6692135f 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -464,6 +464,11 @@ async def _get_completion(self, **kwargs): if str(value).strip() ] else: + if raw_force_streaming_api_base_substrings: + get_logger().warning( + "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set." + "Ignoring invalid value." + ) force_streaming_api_base_substrings = [] force_streaming = ( bool(force_streaming_provider) diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index d8a096f18e..f739bb2e51 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -269,3 +269,35 @@ async def test_force_streaming_ignores_non_collection_substring_setting(monkeypa call_kwargs = mock_completion.call_args[1] assert "stream" not in call_kwargs + + +@pytest.mark.asyncio +async def test_force_streaming_warns_on_invalid_substring_setting(monkeypatch): + fake_settings = create_mock_settings( + "openai", + force_streaming_custom_llm_provider="openai", + force_streaming_api_base_substrings="snowflakecomputing.com", + ) + monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings) + + with ( + patch( + "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", + new_callable=AsyncMock, + ) as mock_completion, + patch("pr_agent.algo.ai_handlers.litellm_ai_handler.get_logger") as mock_logger, + ): + mock_completion.return_value = create_mock_acompletion_response() + handler = LiteLLMAIHandler() + await handler._get_completion( + model="claude-sonnet-4-5", + messages=[], + timeout=120, + api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1", + custom_llm_provider="openai", + ) + + mock_logger.return_value.warning.assert_called_once_with( + "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. " + "Ignoring invalid value." + ) From de933c5db77421785d667f4136e6b54290594e60 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 10:27:03 +0200 Subject: [PATCH 10/13] :bug: Fix: Align LiteLLM settings lookups and warning text --- pr_agent/algo/ai_handlers/litellm_ai_handler.py | 8 ++++---- tests/unittest/test_litellm_custom_provider.py | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index ed6692135f..f4887806f9 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -396,7 +396,7 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: kwargs = _process_litellm_extra_body(kwargs) custom_llm_provider = str( - get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", "") or "" + get_settings().get("litellm.custom_llm_provider", "") or "" ).strip().lower() if custom_llm_provider: kwargs["custom_llm_provider"] = custom_llm_provider @@ -452,10 +452,10 @@ async def _get_completion(self, **kwargs): api_base_value = kwargs.get("api_base") api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else "" force_streaming_provider = str( - get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or "" + get_settings().get("litellm.force_streaming_custom_llm_provider", "") or "" ).strip().lower() raw_force_streaming_api_base_substrings = get_settings().get( - "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", [] + "litellm.force_streaming_api_base_substrings", [] ) if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)): force_streaming_api_base_substrings = [ @@ -466,7 +466,7 @@ async def _get_completion(self, **kwargs): else: if raw_force_streaming_api_base_substrings: get_logger().warning( - "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set." + "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. " "Ignoring invalid value." ) force_streaming_api_base_substrings = [] diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py index f739bb2e51..b953de69d8 100644 --- a/tests/unittest/test_litellm_custom_provider.py +++ b/tests/unittest/test_litellm_custom_provider.py @@ -23,8 +23,11 @@ def create_mock_settings( def get_value(key, default=None): values = { "LITELLM.CUSTOM_LLM_PROVIDER": custom_llm_provider, + "litellm.custom_llm_provider": custom_llm_provider, "LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER": force_streaming_custom_llm_provider, + "litellm.force_streaming_custom_llm_provider": force_streaming_custom_llm_provider, "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS": force_streaming_api_base_substrings, + "litellm.force_streaming_api_base_substrings": force_streaming_api_base_substrings, } return values.get(key, default) From 5a9ebe22bd01e1fca7b6746a73ce3a72aad4d4a2 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 11:08:20 +0200 Subject: [PATCH 11/13] :bug: Fix: Use consistent litellm settings access --- pr_agent/algo/ai_handlers/litellm_ai_handler.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index f4887806f9..1788791a79 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -396,7 +396,7 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: kwargs = _process_litellm_extra_body(kwargs) custom_llm_provider = str( - get_settings().get("litellm.custom_llm_provider", "") or "" + getattr(get_settings().litellm, "custom_llm_provider", "") or "" ).strip().lower() if custom_llm_provider: kwargs["custom_llm_provider"] = custom_llm_provider @@ -452,10 +452,10 @@ async def _get_completion(self, **kwargs): api_base_value = kwargs.get("api_base") api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else "" force_streaming_provider = str( - get_settings().get("litellm.force_streaming_custom_llm_provider", "") or "" + getattr(get_settings().litellm, "force_streaming_custom_llm_provider", "") or "" ).strip().lower() - raw_force_streaming_api_base_substrings = get_settings().get( - "litellm.force_streaming_api_base_substrings", [] + raw_force_streaming_api_base_substrings = getattr( + get_settings().litellm, "force_streaming_api_base_substrings", [] ) if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)): force_streaming_api_base_substrings = [ From 60d52db69b888db116214c94ad961ef09ae21ffa Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 11:15:50 +0200 Subject: [PATCH 12/13] :bug: Fix: Move forced streaming config validation out of request path --- .../algo/ai_handlers/litellm_ai_handler.py | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 1788791a79..0b660d6c7d 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -152,6 +152,25 @@ def __init__(self): # Models that require streaming self.streaming_required_models = STREAMING_REQUIRED_MODELS + self.force_streaming_provider = str( + getattr(get_settings().litellm, "force_streaming_custom_llm_provider", "") or "" + ).strip().lower() + raw_force_streaming_api_base_substrings = getattr( + get_settings().litellm, "force_streaming_api_base_substrings", [] + ) + if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)): + self.force_streaming_api_base_substrings = [ + str(value).strip().lower() + for value in raw_force_streaming_api_base_substrings + if str(value).strip() + ] + else: + if raw_force_streaming_api_base_substrings: + get_logger().warning( + "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. " + "Ignoring invalid value." + ) + self.force_streaming_api_base_substrings = [] def prepare_logs(self, response, system, user, resp, finish_reason): response_log = response.dict().copy() @@ -451,30 +470,11 @@ async def _get_completion(self, **kwargs): custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower() api_base_value = kwargs.get("api_base") api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else "" - force_streaming_provider = str( - getattr(get_settings().litellm, "force_streaming_custom_llm_provider", "") or "" - ).strip().lower() - raw_force_streaming_api_base_substrings = getattr( - get_settings().litellm, "force_streaming_api_base_substrings", [] - ) - if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)): - force_streaming_api_base_substrings = [ - str(value).strip().lower() - for value in raw_force_streaming_api_base_substrings - if str(value).strip() - ] - else: - if raw_force_streaming_api_base_substrings: - get_logger().warning( - "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. " - "Ignoring invalid value." - ) - force_streaming_api_base_substrings = [] force_streaming = ( - bool(force_streaming_provider) - and custom_llm_provider == force_streaming_provider - and bool(force_streaming_api_base_substrings) - and any(substring in api_base for substring in force_streaming_api_base_substrings) + bool(self.force_streaming_provider) + and custom_llm_provider == self.force_streaming_provider + and bool(self.force_streaming_api_base_substrings) + and any(substring in api_base for substring in self.force_streaming_api_base_substrings) ) # Some OpenAI-compatible endpoints can return an empty-string From 9b9cb644d4dc9d487b053a3eff06c7c95b27b9b6 Mon Sep 17 00:00:00 2001 From: Taooo-habitus Date: Wed, 22 Apr 2026 11:23:27 +0200 Subject: [PATCH 13/13] :bug: Fix: Tighten forced streaming config normalization --- pr_agent/algo/ai_handlers/litellm_ai_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 0b660d6c7d..e2992f313c 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -162,7 +162,7 @@ def __init__(self): self.force_streaming_api_base_substrings = [ str(value).strip().lower() for value in raw_force_streaming_api_base_substrings - if str(value).strip() + if value is not None and str(value).strip() ] else: if raw_force_streaming_api_base_substrings: @@ -469,7 +469,7 @@ async def _get_completion(self, **kwargs): model = kwargs["model"] custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower() api_base_value = kwargs.get("api_base") - api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else "" + api_base = api_base_value.strip().lower() if isinstance(api_base_value, str) else "" force_streaming = ( bool(self.force_streaming_provider) and custom_llm_provider == self.force_streaming_provider