Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,10 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
try:
resp, finish_reason = None, None
deployment_id = self.deployment_id
# Capture the original model string so an explicit provider prefix in the
# user config (e.g. "azure/gpt-5...") can be preserved when the GPT-5 branch
# rebuilds the routed model name below.
user_model = model
if self.azure:
model = 'azure/' + model
if 'claude' in model and not system:
Expand All @@ -431,7 +435,15 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
{"type": "image_url", "image_url": {"url": img_path}}]

thinking_kwargs_gpt5 = None
if model.startswith('gpt-5'):
# Detect GPT-5 family regardless of provider prefix(es) on the model name.
# Users sometimes put a provider prefix in config (e.g. "openai/gpt-5.1-codex-max"),
# and Azure mode auto-prepends "azure/", which together can produce stacked prefixes
# like "azure/openai/gpt-5...". Without normalization the GPT-5 path is skipped and
# litellm rejects the request with UnsupportedParamsError for temperature=0.2.
model_base = model
while model_base.startswith(('openai/', 'azure/')):
model_base = model_base.removeprefix('openai/').removeprefix('azure/')
if model_base.startswith('gpt-5'):
# Use configured reasoning_effort or default to MEDIUM
config_effort = get_settings().config.reasoning_effort
try:
Expand All @@ -450,7 +462,18 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
"allowed_openai_params": ["reasoning_effort"],
}
get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model")
model = 'openai/'+model.replace('_thinking', '') # remove _thinking suffix
# Routing priority: Azure mode > explicit provider prefix in user config > openai/
# default. This preserves an explicit "azure/" the user wrote in config even when
# self.azure is false, and avoids stacking when self.azure already added "azure/".
if self.azure:
provider_prefix = 'azure/'
elif user_model.startswith('azure/'):
provider_prefix = 'azure/'
elif user_model.startswith('openai/'):
provider_prefix = 'openai/'
else:
provider_prefix = 'openai/'
model = provider_prefix + model_base.replace('_thinking', '') # remove _thinking suffix


# Currently, some models do not support a separate system and user prompts
Expand Down
144 changes: 144 additions & 0 deletions tests/unittest/test_litellm_reasoning_effort.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,147 @@ async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger):
# Should have reasoning_effort
call_kwargs = mock_completion.call_args[1]
assert call_kwargs["reasoning_effort"] == "medium"

# ========== Group 8: Provider Prefix Handling ==========

@pytest.mark.asyncio
async def test_gpt5_with_openai_prefix_triggers_reasoning_effort(self, monkeypatch, mock_logger):
"""Regression: model="openai/gpt-5*" must enter the GPT-5 reasoning_effort path.

Before the fix, startswith('gpt-5') was False for prefixed names, so the handler
sent temperature=0.2 to litellm and the request failed with UnsupportedParamsError
for gpt-5 codex models.
"""
fake_settings = create_mock_settings("medium")
monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
# Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
monkeypatch.delenv(_var, raising=False)

prefixed_models = [
"openai/gpt-5",
"openai/gpt-5.1-codex",
"openai/gpt-5.1-codex-max",
"openai/gpt-5.4-mini",
]

for model in prefixed_models:
with patch(
'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
new_callable=AsyncMock,
) as mock_completion:
mock_completion.return_value = create_mock_acompletion_response()

handler = LiteLLMAIHandler()
await handler.chat_completion(
model=model,
system="test system",
user="test user"
)

call_kwargs = mock_completion.call_args[1]
# GPT-5 path must trigger and drop temperature in favor of reasoning_effort
assert call_kwargs["reasoning_effort"] == "medium", f"failed for {model}"
assert "reasoning_effort" in call_kwargs["allowed_openai_params"], f"failed for {model}"
assert "temperature" not in call_kwargs, f"temperature leaked for {model}"
# Model name passed to litellm must keep the openai/ prefix exactly once
assert call_kwargs["model"] == model, f"model double-prefixed: {call_kwargs['model']}"

@pytest.mark.asyncio
async def test_gpt5_with_openai_prefix_strips_thinking_suffix(self, monkeypatch, mock_logger):
"""Prefixed _thinking models must have the suffix removed without double-prefixing."""
fake_settings = create_mock_settings("low")
monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
# Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
monkeypatch.delenv(_var, raising=False)

with patch(
'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
new_callable=AsyncMock,
) as mock_completion:
mock_completion.return_value = create_mock_acompletion_response()

handler = LiteLLMAIHandler()
await handler.chat_completion(
model="openai/gpt-5_thinking",
system="test system",
user="test user"
)

call_kwargs = mock_completion.call_args[1]
assert call_kwargs["model"] == "openai/gpt-5"
assert call_kwargs["reasoning_effort"] == "low"

@pytest.mark.asyncio
async def test_gpt5_with_explicit_azure_prefix_preserves_routing(self, monkeypatch, mock_logger):
"""Explicit `azure/` prefix in user config must be preserved (not silently rewritten to openai/)."""
fake_settings = create_mock_settings("medium")
monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
# Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
monkeypatch.delenv(_var, raising=False)

with patch(
'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
new_callable=AsyncMock,
) as mock_completion:
mock_completion.return_value = create_mock_acompletion_response()

handler = LiteLLMAIHandler()
# self.azure is False by default (no Azure AD creds in test env)
await handler.chat_completion(
model="azure/gpt-5.1-codex-max",
system="test system",
user="test user"
)

call_kwargs = mock_completion.call_args[1]
assert call_kwargs["reasoning_effort"] == "medium"
assert "temperature" not in call_kwargs
# Provider prefix from user config must be preserved verbatim
assert call_kwargs["model"] == "azure/gpt-5.1-codex-max"

@pytest.mark.asyncio
async def test_gpt5_in_azure_mode_does_not_stack_prefixes(self, monkeypatch, mock_logger):
"""Azure mode must produce exactly one `azure/` prefix, even if user config also has a prefix."""
fake_settings = create_mock_settings("medium")
monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)
# Isolate from runner env: LiteLLMAIHandler.__init__ branches on these vars.
for _var in ("AWS_USE_IMDS", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY",
"AWS_SESSION_TOKEN", "AWS_REGION_NAME", "OPENAI_API_KEY"):
monkeypatch.delenv(_var, raising=False)

# Cases: bare name, openai/-prefixed config, azure/-prefixed config — all in azure mode
cases = [
("gpt-5.1-codex", "azure/gpt-5.1-codex"),
("openai/gpt-5.1-codex", "azure/gpt-5.1-codex"),
("azure/gpt-5.1-codex", "azure/gpt-5.1-codex"),
]

for input_model, expected in cases:
with patch(
'pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion',
new_callable=AsyncMock,
) as mock_completion:
mock_completion.return_value = create_mock_acompletion_response()

handler = LiteLLMAIHandler()
handler.azure = True # simulate Azure-mode handler
await handler.chat_completion(
model=input_model,
system="test system",
user="test user"
)

call_kwargs = mock_completion.call_args[1]
# GPT-5 path must trigger
assert call_kwargs["reasoning_effort"] == "medium", f"failed for {input_model}"
assert "temperature" not in call_kwargs, f"temperature leaked for {input_model}"
# Exactly one azure/ prefix, no stacked/duplicated provider segments
assert call_kwargs["model"] == expected, (
f"wrong routing for {input_model}: got {call_kwargs['model']}, expected {expected}"
)
Loading