From d144cbae744d7a3f88bca96d08f581ff5055f69e Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Tue, 31 Mar 2026 09:59:03 +0200
Subject: [PATCH 01/13] fix: add LiteLLM custom provider override for hosted
 OpenAI-compatible endpoints

---
 .../algo/ai_handlers/litellm_ai_handler.py    |  4 +
 pr_agent/settings/configuration.toml          |  1 +
 .../unittest/test_litellm_custom_provider.py  | 77 +++++++++++++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 tests/unittest/test_litellm_custom_provider.py

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index de9993284d..26b30e42a6 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -395,6 +395,10 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
             # Support for custom OpenAI body fields (e.g., Flex Processing)
             kwargs = _process_litellm_extra_body(kwargs)
 
+            custom_llm_provider = get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", None)
+            if custom_llm_provider:
+                kwargs["custom_llm_provider"] = custom_llm_provider
+
             # Support for Bedrock custom inference profile via model_id
             model_id = get_settings().get("litellm.model_id")
             if model_id and 'bedrock/' in model:
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 16ffbcae2a..1f361cb9a5 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -323,6 +323,7 @@ enable_callbacks = false
 success_callback = []
 failure_callback = []
 service_callback = []
+# custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints
 # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
 
 [pr_similar_issue]
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
new file mode 100644
index 0000000000..7c383f4c0a
--- /dev/null
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -0,0 +1,77 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+import pr_agent.algo.ai_handlers.litellm_ai_handler as litellm_handler
+from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
+
+
+def create_mock_settings(custom_llm_provider=None):
+    litellm_settings = type("", (), {"get": lambda self, key, default=None: default})()
+    if custom_llm_provider is not None:
+        litellm_settings.custom_llm_provider = custom_llm_provider
+
+    def get_value(key, default=None):
+        values = {
+            "LITELLM.CUSTOM_LLM_PROVIDER": custom_llm_provider,
+        }
+        return values.get(key, default)
+
+    return type("", (), {
+        "config": type("", (), {
+            "ai_timeout": 120,
+            "custom_reasoning_model": False,
+            "verbosity_level": 0,
+            "get": lambda self, key, default=None: default,
+        })(),
+        "litellm": litellm_settings,
+        "get": staticmethod(get_value),
+    })()
+
+
+def create_mock_acompletion_response():
+    mock_response = MagicMock()
+    mock_response.__getitem__ = lambda self, key: {
+        "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]
+    }[key]
+    mock_response.dict.return_value = {"choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]}
+    return mock_response
+
+
+@pytest.mark.asyncio
+async def test_custom_llm_provider_is_forwarded_without_rewriting_model(monkeypatch):
+    fake_settings = create_mock_settings("openai")
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion:
+        mock_completion.return_value = create_mock_acompletion_response()
+
+        handler = LiteLLMAIHandler()
+        await handler.chat_completion(
+            model="claude-sonnet-4-5",
+            system="test system",
+            user="test user",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert call_kwargs["model"] == "claude-sonnet-4-5"
+        assert call_kwargs["custom_llm_provider"] == "openai"
+
+
+@pytest.mark.asyncio
+async def test_custom_llm_provider_is_omitted_when_unset(monkeypatch):
+    fake_settings = create_mock_settings()
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion:
+        mock_completion.return_value = create_mock_acompletion_response()
+
+        handler = LiteLLMAIHandler()
+        await handler.chat_completion(
+            model="claude-sonnet-4-5",
+            system="test system",
+            user="test user",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert "custom_llm_provider" not in call_kwargs

From fa0e7f4991446e6063c1a16641ce696fe61f46f7 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Tue, 21 Apr 2026 09:34:57 +0200
Subject: [PATCH 02/13] :bug: Fix: Force streaming for OpenAI-compatible
 LiteLLM responses

---
 .gitignore                                    |  1 +
 .../algo/ai_handlers/litellm_ai_handler.py    | 17 ++++-
 .../unittest/test_litellm_custom_provider.py  | 71 +++++++++++++++----
 3 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8da05107fd..d13345bd17 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
 .venv/
 venv/
 pr_agent/settings/.secrets.toml
+pr_agent/settings_prod/.secrets.toml
 __pycache__
 dist/
 *.egg-info/
diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 26b30e42a6..5ca7957bae 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -446,9 +446,22 @@ async def _get_completion(self, **kwargs):
         Wrapper that automatically handles streaming for required models.
         """
         model = kwargs["model"]
-        if model in self.streaming_required_models:
+        custom_llm_provider = kwargs.get("custom_llm_provider")
+        api_base = (kwargs.get("api_base") or "").lower()
+        force_streaming = (
+            custom_llm_provider == "openai"
+            and "snowflakecomputing.com" in api_base
+        )
+
+        # Some OpenAI-compatible endpoints can return an empty-string
+        # finish_reason on non-streaming responses, which LiteLLM rejects during
+        # response normalization. Streaming avoids that conversion path.
+        if model in self.streaming_required_models or force_streaming:
             kwargs["stream"] = True
-            get_logger().info(f"Using streaming mode for model {model}")
+            if force_streaming and model not in self.streaming_required_models:
+                get_logger().info(f"Using streaming mode for model {model} due to OpenAI-compatible endpoint compatibility")
+            else:
+                get_logger().info(f"Using streaming mode for model {model}")
             response = await acompletion(**kwargs)
             resp, finish_reason = await _handle_streaming_response(response)
             # Create MockResponse for streaming since we don't have the full response object
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index 7c383f4c0a..488d04f17f 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -17,16 +17,24 @@ def get_value(key, default=None):
         }
         return values.get(key, default)
 
-    return type("", (), {
-        "config": type("", (), {
-            "ai_timeout": 120,
-            "custom_reasoning_model": False,
-            "verbosity_level": 0,
-            "get": lambda self, key, default=None: default,
-        })(),
-        "litellm": litellm_settings,
-        "get": staticmethod(get_value),
-    })()
+    return type(
+        "",
+        (),
+        {
+            "config": type(
+                "",
+                (),
+                {
+                    "ai_timeout": 120,
+                    "custom_reasoning_model": False,
+                    "verbosity_level": 0,
+                    "get": lambda self, key, default=None: default,
+                },
+            )(),
+            "litellm": litellm_settings,
+            "get": staticmethod(get_value),
+        },
+    )()
 
 
 def create_mock_acompletion_response():
@@ -34,7 +42,9 @@ def create_mock_acompletion_response():
     mock_response.__getitem__ = lambda self, key: {
         "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]
     }[key]
-    mock_response.dict.return_value = {"choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]}
+    mock_response.dict.return_value = {
+        "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]
+    }
     return mock_response
 
 
@@ -43,7 +53,10 @@ async def test_custom_llm_provider_is_forwarded_without_rewriting_model(monkeypa
     fake_settings = create_mock_settings("openai")
     monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
 
-    with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion:
+    with patch(
+        "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+        new_callable=AsyncMock,
+    ) as mock_completion:
         mock_completion.return_value = create_mock_acompletion_response()
 
         handler = LiteLLMAIHandler()
@@ -63,7 +76,10 @@ async def test_custom_llm_provider_is_omitted_when_unset(monkeypatch):
     fake_settings = create_mock_settings()
     monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
 
-    with patch("pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion", new_callable=AsyncMock) as mock_completion:
+    with patch(
+        "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+        new_callable=AsyncMock,
+    ) as mock_completion:
         mock_completion.return_value = create_mock_acompletion_response()
 
         handler = LiteLLMAIHandler()
@@ -75,3 +91,32 @@ async def test_custom_llm_provider_is_omitted_when_unset(monkeypatch):
 
         call_kwargs = mock_completion.call_args[1]
         assert "custom_llm_provider" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_openai_compatible_endpoint_calls_force_streaming(monkeypatch):
+    fake_settings = create_mock_settings("openai")
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with (
+        patch(
+            "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+            new_callable=AsyncMock,
+        ) as mock_completion,
+        patch(
+            "pr_agent.algo.ai_handlers.litellm_ai_handler._handle_streaming_response",
+            new_callable=AsyncMock,
+        ) as mock_stream_handler,
+    ):
+        mock_stream_handler.return_value = ("test", "stop")
+        handler = LiteLLMAIHandler()
+        await handler._get_completion(
+            model="claude-sonnet-4-5",
+            messages=[],
+            timeout=120,
+            api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1",
+            custom_llm_provider="openai",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert call_kwargs["stream"] is True

From 38e827004ede8b8cf439a26eb779a98344cc553c Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Tue, 21 Apr 2026 10:05:58 +0200
Subject: [PATCH 03/13] :bug: Fix: Normalize LiteLLM provider and api_base
 checks

---
 .../algo/ai_handlers/litellm_ai_handler.py    |  5 +-
 .../unittest/test_litellm_custom_provider.py  | 53 +++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 5ca7957bae..6b93e78f7f 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -446,8 +446,9 @@ async def _get_completion(self, **kwargs):
         Wrapper that automatically handles streaming for required models.
         """
         model = kwargs["model"]
-        custom_llm_provider = kwargs.get("custom_llm_provider")
-        api_base = (kwargs.get("api_base") or "").lower()
+        custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower()
+        api_base_value = kwargs.get("api_base")
+        api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else ""
         force_streaming = (
             custom_llm_provider == "openai"
             and "snowflakecomputing.com" in api_base
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index 488d04f17f..93408537b9 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -120,3 +120,56 @@ async def test_openai_compatible_endpoint_calls_force_streaming(monkeypatch):
 
         call_kwargs = mock_completion.call_args[1]
         assert call_kwargs["stream"] is True
+
+
+@pytest.mark.asyncio
+async def test_openai_compatible_endpoint_normalizes_custom_provider_for_streaming(monkeypatch):
+    fake_settings = create_mock_settings(" OpenAI ")
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with (
+        patch(
+            "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+            new_callable=AsyncMock,
+        ) as mock_completion,
+        patch(
+            "pr_agent.algo.ai_handlers.litellm_ai_handler._handle_streaming_response",
+            new_callable=AsyncMock,
+        ) as mock_stream_handler,
+    ):
+        mock_stream_handler.return_value = ("test", "stop")
+        handler = LiteLLMAIHandler()
+        await handler._get_completion(
+            model="claude-sonnet-4-5",
+            messages=[],
+            timeout=120,
+            api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1",
+            custom_llm_provider=" OpenAI ",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert call_kwargs["stream"] is True
+
+
+@pytest.mark.asyncio
+async def test_openai_compatible_endpoint_ignores_non_string_api_base(monkeypatch):
+    fake_settings = create_mock_settings("openai")
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with patch(
+        "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+        new_callable=AsyncMock,
+    ) as mock_completion:
+        mock_completion.return_value = create_mock_acompletion_response()
+
+        handler = LiteLLMAIHandler()
+        await handler._get_completion(
+            model="claude-sonnet-4-5",
+            messages=[],
+            timeout=120,
+            api_base=123,
+            custom_llm_provider="openai",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert "stream" not in call_kwargs

From 3f6646d8fc4f700e0de8a1fd547b82fcb153a564 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Tue, 21 Apr 2026 10:25:31 +0200
Subject: [PATCH 04/13] :bug: Fix: Make Snowflake streaming workaround
 configurable

---
 .../algo/ai_handlers/litellm_ai_handler.py    | 10 ++++-
 pr_agent/settings/configuration.toml          |  2 +
 .../unittest/test_litellm_custom_provider.py  | 41 ++++++++++++++++++-
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 6b93e78f7f..5da434d20f 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -449,9 +449,15 @@ async def _get_completion(self, **kwargs):
         custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower()
         api_base_value = kwargs.get("api_base")
         api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else ""
+        force_streaming_provider = str(get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or "").strip().lower()
+        force_streaming_api_base_substrings = [
+            str(value).strip().lower()
+            for value in (get_settings().get("LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", []) or [])
+            if str(value).strip()
+        ]
         force_streaming = (
-            custom_llm_provider == "openai"
-            and "snowflakecomputing.com" in api_base
+            custom_llm_provider == force_streaming_provider
+            and any(substring in api_base for substring in force_streaming_api_base_substrings)
         )
 
         # Some OpenAI-compatible endpoints can return an empty-string
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 1f361cb9a5..f2527b1014 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -324,6 +324,8 @@ success_callback = []
 failure_callback = []
 service_callback = []
 # custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints
+force_streaming_custom_llm_provider = "openai"
+force_streaming_api_base_substrings = ["snowflakecomputing.com"]
 # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
 
 [pr_similar_issue]
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index 93408537b9..bc24ccccef 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -6,14 +6,25 @@
 from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
 
 
-def create_mock_settings(custom_llm_provider=None):
+def create_mock_settings(
+    custom_llm_provider=None,
+    force_streaming_custom_llm_provider="openai",
+    force_streaming_api_base_substrings=None,
+):
+    if force_streaming_api_base_substrings is None:
+        force_streaming_api_base_substrings = ["snowflakecomputing.com"]
+
     litellm_settings = type("", (), {"get": lambda self, key, default=None: default})()
     if custom_llm_provider is not None:
         litellm_settings.custom_llm_provider = custom_llm_provider
+    litellm_settings.force_streaming_custom_llm_provider = force_streaming_custom_llm_provider
+    litellm_settings.force_streaming_api_base_substrings = force_streaming_api_base_substrings
 
     def get_value(key, default=None):
         values = {
             "LITELLM.CUSTOM_LLM_PROVIDER": custom_llm_provider,
+            "LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER": force_streaming_custom_llm_provider,
+            "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS": force_streaming_api_base_substrings,
         }
         return values.get(key, default)
 
@@ -173,3 +184,31 @@ async def test_openai_compatible_endpoint_ignores_non_string_api_base(monkeypatc
 
         call_kwargs = mock_completion.call_args[1]
         assert "stream" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_force_streaming_is_settings_driven(monkeypatch):
+    fake_settings = create_mock_settings(
+        "openai",
+        force_streaming_custom_llm_provider="openai",
+        force_streaming_api_base_substrings=["example-gateway.local"],
+    )
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with patch(
+        "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+        new_callable=AsyncMock,
+    ) as mock_completion:
+        mock_completion.return_value = create_mock_acompletion_response()
+
+        handler = LiteLLMAIHandler()
+        await handler._get_completion(
+            model="claude-sonnet-4-5",
+            messages=[],
+            timeout=120,
+            api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1",
+            custom_llm_provider="openai",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert "stream" not in call_kwargs

From 846d82e79ef01052cb09bd2287bf6e9f86392dde Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 09:16:26 +0200
Subject: [PATCH 05/13] :bug: Fix: Comment out configs

---
 pr_agent/settings/configuration.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index f2527b1014..cec9212b0f 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -324,8 +324,8 @@ success_callback = []
 failure_callback = []
 service_callback = []
 # custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints
-force_streaming_custom_llm_provider = "openai"
-force_streaming_api_base_substrings = ["snowflakecomputing.com"]
+# force_streaming_custom_llm_provider = "openai" # Optional: provider value that enables forced streaming workaround
+# force_streaming_api_base_substrings = ["snowflakecomputing.com"] # Optional: api_base substrings that trigger forced streaming workaround
 # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
 
 [pr_similar_issue]

From 5861932e93ade1fbbf23ea5a93c43d63f41256a9 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 09:43:35 +0200
Subject: [PATCH 06/13] :bug: Fix: Harden configurable forced streaming checks

---
 .../algo/ai_handlers/litellm_ai_handler.py    | 24 +++++---
 pr_agent/settings/configuration.toml          |  3 -
 .../unittest/test_litellm_custom_provider.py  | 56 +++++++++++++++++++
 3 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 5da434d20f..7d4670cf4e 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -449,14 +449,24 @@ async def _get_completion(self, **kwargs):
         custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower()
         api_base_value = kwargs.get("api_base")
         api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else ""
-        force_streaming_provider = str(get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or "").strip().lower()
-        force_streaming_api_base_substrings = [
-            str(value).strip().lower()
-            for value in (get_settings().get("LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", []) or [])
-            if str(value).strip()
-        ]
+        force_streaming_provider = str(
+            get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or ""
+        ).strip().lower()
+        raw_force_streaming_api_base_substrings = get_settings().get(
+            "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", []
+        )
+        if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)):
+            force_streaming_api_base_substrings = [
+                str(value).strip().lower()
+                for value in raw_force_streaming_api_base_substrings
+                if str(value).strip()
+            ]
+        else:
+            force_streaming_api_base_substrings = []
         force_streaming = (
-            custom_llm_provider == force_streaming_provider
+            bool(force_streaming_provider)
+            and custom_llm_provider == force_streaming_provider
+            and bool(force_streaming_api_base_substrings)
             and any(substring in api_base for substring in force_streaming_api_base_substrings)
         )
 
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index cec9212b0f..16ffbcae2a 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -323,9 +323,6 @@ enable_callbacks = false
 success_callback = []
 failure_callback = []
 service_callback = []
-# custom_llm_provider = "" # Optional: Override LiteLLM provider inference for hosted OpenAI-compatible endpoints
-# force_streaming_custom_llm_provider = "openai" # Optional: provider value that enables forced streaming workaround
-# force_streaming_api_base_substrings = ["snowflakecomputing.com"] # Optional: api_base substrings that trigger forced streaming workaround
 # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
 
 [pr_similar_issue]
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index bc24ccccef..e90a586daf 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -212,3 +212,59 @@ async def test_force_streaming_is_settings_driven(monkeypatch):
 
         call_kwargs = mock_completion.call_args[1]
         assert "stream" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_force_streaming_requires_non_empty_provider_setting(monkeypatch):
+    fake_settings = create_mock_settings(
+        "openai",
+        force_streaming_custom_llm_provider="",
+        force_streaming_api_base_substrings=["snowflakecomputing.com"],
+    )
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with patch(
+        "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+        new_callable=AsyncMock,
+    ) as mock_completion:
+        mock_completion.return_value = create_mock_acompletion_response()
+
+        handler = LiteLLMAIHandler()
+        await handler._get_completion(
+            model="claude-sonnet-4-5",
+            messages=[],
+            timeout=120,
+            api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1",
+            custom_llm_provider="",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert "stream" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_force_streaming_ignores_non_collection_substring_setting(monkeypatch):
+    fake_settings = create_mock_settings(
+        "openai",
+        force_streaming_custom_llm_provider="openai",
+        force_streaming_api_base_substrings="snowflakecomputing.com",
+    )
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with patch(
+        "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+        new_callable=AsyncMock,
+    ) as mock_completion:
+        mock_completion.return_value = create_mock_acompletion_response()
+
+        handler = LiteLLMAIHandler()
+        await handler._get_completion(
+            model="claude-sonnet-4-5",
+            messages=[],
+            timeout=120,
+            api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1",
+            custom_llm_provider="openai",
+        )
+
+        call_kwargs = mock_completion.call_args[1]
+        assert "stream" not in call_kwargs

From 8ade68c6dc9388514207ec52fb79cab4d083269f Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 09:54:18 +0200
Subject: [PATCH 07/13] :bug: Fix: Normalize and document LiteLLM streaming
 overrides

---
 pr_agent/algo/ai_handlers/litellm_ai_handler.py |  9 +++++++--
 pr_agent/settings/configuration.toml            |  3 +++
 tests/unittest/test_litellm_custom_provider.py  | 15 ++++++++-------
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 7d4670cf4e..600da7ef68 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -395,7 +395,9 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
             # Support for custom OpenAI body fields (e.g., Flex Processing)
             kwargs = _process_litellm_extra_body(kwargs)
 
-            custom_llm_provider = get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", None)
+            custom_llm_provider = str(
+                get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", "") or ""
+            ).strip().lower()
             if custom_llm_provider:
                 kwargs["custom_llm_provider"] = custom_llm_provider
 
@@ -476,7 +478,10 @@ async def _get_completion(self, **kwargs):
         if model in self.streaming_required_models or force_streaming:
             kwargs["stream"] = True
             if force_streaming and model not in self.streaming_required_models:
-                get_logger().info(f"Using streaming mode for model {model} due to OpenAI-compatible endpoint compatibility")
+                get_logger().info(
+                    f"Using streaming mode for model {model} "
+                    "due to OpenAI-compatible endpoint compatibility"
+                )
             else:
                 get_logger().info(f"Using streaming mode for model {model}")
             response = await acompletion(**kwargs)
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 16ffbcae2a..10695f2605 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -323,6 +323,9 @@ enable_callbacks = false
 success_callback = []
 failure_callback = []
 service_callback = []
+custom_llm_provider = ""
+force_streaming_custom_llm_provider = ""
+force_streaming_api_base_substrings = []
 # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
 
 [pr_similar_issue]
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index e90a586daf..99f835b5a1 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -49,19 +49,20 @@ def get_value(key, default=None):
 
 
 def create_mock_acompletion_response():
-    mock_response = MagicMock()
-    mock_response.__getitem__ = lambda self, key: {
-        "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]
-    }[key]
-    mock_response.dict.return_value = {
+    response_payload = {
         "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]
     }
-    return mock_response
+
+    class MockCompletionResponse(dict):
+        def dict(self):
+            return dict(self)
+
+    return MockCompletionResponse(response_payload)
 
 
 @pytest.mark.asyncio
 async def test_custom_llm_provider_is_forwarded_without_rewriting_model(monkeypatch):
-    fake_settings = create_mock_settings("openai")
+    fake_settings = create_mock_settings(" OpenAI ")
     monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
 
     with patch(

From e8a81508ddf9e1ca27e18d0e08f4556773999ed9 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 10:05:39 +0200
Subject: [PATCH 08/13] :bug: Fix: Clean up LiteLLM custom provider tests

---
 tests/unittest/test_litellm_custom_provider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index 99f835b5a1..d8a096f18e 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -1,4 +1,4 @@
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, patch
 
 import pytest
 

From 34af597c4109550dc34e4406ef0cf45a0f8f5fb9 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 10:15:11 +0200
Subject: [PATCH 09/13] :bug: Fix: Warn on invalid forced streaming substring
 config

---
 .../algo/ai_handlers/litellm_ai_handler.py    |  5 +++
 .../unittest/test_litellm_custom_provider.py  | 32 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 600da7ef68..ed6692135f 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -464,6 +464,11 @@ async def _get_completion(self, **kwargs):
                 if str(value).strip()
             ]
         else:
+            if raw_force_streaming_api_base_substrings:
+                get_logger().warning(
+                    "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set."
+                    "Ignoring invalid value."
+                )
             force_streaming_api_base_substrings = []
         force_streaming = (
             bool(force_streaming_provider)
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index d8a096f18e..f739bb2e51 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -269,3 +269,35 @@ async def test_force_streaming_ignores_non_collection_substring_setting(monkeypa
 
         call_kwargs = mock_completion.call_args[1]
         assert "stream" not in call_kwargs
+
+
+@pytest.mark.asyncio
+async def test_force_streaming_warns_on_invalid_substring_setting(monkeypatch):
+    fake_settings = create_mock_settings(
+        "openai",
+        force_streaming_custom_llm_provider="openai",
+        force_streaming_api_base_substrings="snowflakecomputing.com",
+    )
+    monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
+
+    with (
+        patch(
+            "pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion",
+            new_callable=AsyncMock,
+        ) as mock_completion,
+        patch("pr_agent.algo.ai_handlers.litellm_ai_handler.get_logger") as mock_logger,
+    ):
+        mock_completion.return_value = create_mock_acompletion_response()
+        handler = LiteLLMAIHandler()
+        await handler._get_completion(
+            model="claude-sonnet-4-5",
+            messages=[],
+            timeout=120,
+            api_base="https://example-account.snowflakecomputing.com/api/v2/cortex/v1",
+            custom_llm_provider="openai",
+        )
+
+        mock_logger.return_value.warning.assert_called_once_with(
+            "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. "
+            "Ignoring invalid value."
+        )

From de933c5db77421785d667f4136e6b54290594e60 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 10:27:03 +0200
Subject: [PATCH 10/13] :bug: Fix: Align LiteLLM settings lookups and warning
 text

---
 pr_agent/algo/ai_handlers/litellm_ai_handler.py | 8 ++++----
 tests/unittest/test_litellm_custom_provider.py  | 3 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index ed6692135f..f4887806f9 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -396,7 +396,7 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
             kwargs = _process_litellm_extra_body(kwargs)
 
             custom_llm_provider = str(
-                get_settings().get("LITELLM.CUSTOM_LLM_PROVIDER", "") or ""
+                get_settings().get("litellm.custom_llm_provider", "") or ""
             ).strip().lower()
             if custom_llm_provider:
                 kwargs["custom_llm_provider"] = custom_llm_provider
@@ -452,10 +452,10 @@ async def _get_completion(self, **kwargs):
         api_base_value = kwargs.get("api_base")
         api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else ""
         force_streaming_provider = str(
-            get_settings().get("LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER", "") or ""
+            get_settings().get("litellm.force_streaming_custom_llm_provider", "") or ""
         ).strip().lower()
         raw_force_streaming_api_base_substrings = get_settings().get(
-            "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS", []
+            "litellm.force_streaming_api_base_substrings", []
         )
         if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)):
             force_streaming_api_base_substrings = [
@@ -466,7 +466,7 @@ async def _get_completion(self, **kwargs):
         else:
             if raw_force_streaming_api_base_substrings:
                 get_logger().warning(
-                    "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set."
+                    "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. "
                     "Ignoring invalid value."
                 )
             force_streaming_api_base_substrings = []
diff --git a/tests/unittest/test_litellm_custom_provider.py b/tests/unittest/test_litellm_custom_provider.py
index f739bb2e51..b953de69d8 100644
--- a/tests/unittest/test_litellm_custom_provider.py
+++ b/tests/unittest/test_litellm_custom_provider.py
@@ -23,8 +23,11 @@ def create_mock_settings(
     def get_value(key, default=None):
         values = {
             "LITELLM.CUSTOM_LLM_PROVIDER": custom_llm_provider,
+            "litellm.custom_llm_provider": custom_llm_provider,
             "LITELLM.FORCE_STREAMING_CUSTOM_LLM_PROVIDER": force_streaming_custom_llm_provider,
+            "litellm.force_streaming_custom_llm_provider": force_streaming_custom_llm_provider,
             "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS": force_streaming_api_base_substrings,
+            "litellm.force_streaming_api_base_substrings": force_streaming_api_base_substrings,
         }
         return values.get(key, default)
 

From 5a9ebe22bd01e1fca7b6746a73ce3a72aad4d4a2 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 11:08:20 +0200
Subject: [PATCH 11/13] :bug: Fix: Use consistent litellm settings access

---
 pr_agent/algo/ai_handlers/litellm_ai_handler.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index f4887806f9..1788791a79 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -396,7 +396,7 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
             kwargs = _process_litellm_extra_body(kwargs)
 
             custom_llm_provider = str(
-                get_settings().get("litellm.custom_llm_provider", "") or ""
+                getattr(get_settings().litellm, "custom_llm_provider", "") or ""
             ).strip().lower()
             if custom_llm_provider:
                 kwargs["custom_llm_provider"] = custom_llm_provider
@@ -452,10 +452,10 @@ async def _get_completion(self, **kwargs):
         api_base_value = kwargs.get("api_base")
         api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else ""
         force_streaming_provider = str(
-            get_settings().get("litellm.force_streaming_custom_llm_provider", "") or ""
+            getattr(get_settings().litellm, "force_streaming_custom_llm_provider", "") or ""
         ).strip().lower()
-        raw_force_streaming_api_base_substrings = get_settings().get(
-            "litellm.force_streaming_api_base_substrings", []
+        raw_force_streaming_api_base_substrings = getattr(
+            get_settings().litellm, "force_streaming_api_base_substrings", []
         )
         if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)):
             force_streaming_api_base_substrings = [

From 60d52db69b888db116214c94ad961ef09ae21ffa Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 11:15:50 +0200
Subject: [PATCH 12/13] :bug: Fix: Move forced streaming config validation out
 of request path

---
 .../algo/ai_handlers/litellm_ai_handler.py    | 46 +++++++++----------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 1788791a79..0b660d6c7d 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -152,6 +152,25 @@ def __init__(self):
 
         # Models that require streaming
         self.streaming_required_models = STREAMING_REQUIRED_MODELS
+        self.force_streaming_provider = str(
+            getattr(get_settings().litellm, "force_streaming_custom_llm_provider", "") or ""
+        ).strip().lower()
+        raw_force_streaming_api_base_substrings = getattr(
+            get_settings().litellm, "force_streaming_api_base_substrings", []
+        )
+        if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)):
+            self.force_streaming_api_base_substrings = [
+                str(value).strip().lower()
+                for value in raw_force_streaming_api_base_substrings
+                if str(value).strip()
+            ]
+        else:
+            if raw_force_streaming_api_base_substrings:
+                get_logger().warning(
+                    "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. "
+                    "Ignoring invalid value."
+                )
+            self.force_streaming_api_base_substrings = []
 
     def prepare_logs(self, response, system, user, resp, finish_reason):
         response_log = response.dict().copy()
@@ -451,30 +470,11 @@ async def _get_completion(self, **kwargs):
         custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower()
         api_base_value = kwargs.get("api_base")
         api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else ""
-        force_streaming_provider = str(
-            getattr(get_settings().litellm, "force_streaming_custom_llm_provider", "") or ""
-        ).strip().lower()
-        raw_force_streaming_api_base_substrings = getattr(
-            get_settings().litellm, "force_streaming_api_base_substrings", []
-        )
-        if isinstance(raw_force_streaming_api_base_substrings, (list, tuple, set)):
-            force_streaming_api_base_substrings = [
-                str(value).strip().lower()
-                for value in raw_force_streaming_api_base_substrings
-                if str(value).strip()
-            ]
-        else:
-            if raw_force_streaming_api_base_substrings:
-                get_logger().warning(
-                    "LITELLM.FORCE_STREAMING_API_BASE_SUBSTRINGS must be a list, tuple, or set. "
-                    "Ignoring invalid value."
-                )
-            force_streaming_api_base_substrings = []
         force_streaming = (
-            bool(force_streaming_provider)
-            and custom_llm_provider == force_streaming_provider
-            and bool(force_streaming_api_base_substrings)
-            and any(substring in api_base for substring in force_streaming_api_base_substrings)
+            bool(self.force_streaming_provider)
+            and custom_llm_provider == self.force_streaming_provider
+            and bool(self.force_streaming_api_base_substrings)
+            and any(substring in api_base for substring in self.force_streaming_api_base_substrings)
         )
 
         # Some OpenAI-compatible endpoints can return an empty-string

From 9b9cb644d4dc9d487b053a3eff06c7c95b27b9b6 Mon Sep 17 00:00:00 2001
From: Taooo-habitus <ttan@habitus.dk>
Date: Wed, 22 Apr 2026 11:23:27 +0200
Subject: [PATCH 13/13] :bug: Fix: Tighten forced streaming config
 normalization

---
 pr_agent/algo/ai_handlers/litellm_ai_handler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
index 0b660d6c7d..e2992f313c 100644
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -162,7 +162,7 @@ def __init__(self):
             self.force_streaming_api_base_substrings = [
                 str(value).strip().lower()
                 for value in raw_force_streaming_api_base_substrings
-                if str(value).strip()
+                if value is not None and str(value).strip()
             ]
         else:
             if raw_force_streaming_api_base_substrings:
@@ -469,7 +469,7 @@ async def _get_completion(self, **kwargs):
         model = kwargs["model"]
         custom_llm_provider = str(kwargs.get("custom_llm_provider") or "").strip().lower()
         api_base_value = kwargs.get("api_base")
-        api_base = kwargs.get("api_base").strip().lower() if isinstance(api_base_value, str) else ""
+        api_base = api_base_value.strip().lower() if isinstance(api_base_value, str) else ""
         force_streaming = (
             bool(self.force_streaming_provider)
             and custom_llm_provider == self.force_streaming_provider