From 2c12923d7f4b268565f19899a9db7f0fc6b09ad4 Mon Sep 17 00:00:00 2001 From: enyst Date: Fri, 31 Oct 2025 18:02:56 +0000 Subject: [PATCH 1/4] tests: ensure all LLM attributes are forwarded to litellm (detects missing custom_llm_provider) Adds a unit test that patches litellm.completion and asserts both sampling options (temperature/top_p/top_k, max tokens) and transport-level options (base_url, api_version, timeout, drop_params, seed, custom_llm_provider) are forwarded from LLM to LiteLLM. The test currently fails on main due to custom_llm_provider not being passed through. Co-authored-by: openhands --- tests/sdk/llm/test_llm_attr_forwarding.py | 71 +++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/sdk/llm/test_llm_attr_forwarding.py diff --git a/tests/sdk/llm/test_llm_attr_forwarding.py b/tests/sdk/llm/test_llm_attr_forwarding.py new file mode 100644 index 0000000000..64eabc3718 --- /dev/null +++ b/tests/sdk/llm/test_llm_attr_forwarding.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from unittest.mock import patch + +from pydantic import SecretStr + +from openhands.sdk.llm import LLM, Message, TextContent +from tests.conftest import create_mock_litellm_response + + +@patch("openhands.sdk.llm.llm.litellm_completion") +def test_all_config_attrs_are_forwarded_to_litellm_completion(mock_completion): + """Verify LLM forwards key attributes to litellm.completion. + + This covers both sampling options managed by select_chat_options + (temperature/top_p/top_k, max tokens) and transport-level options + not handled there (base_url, api_version, drop_params, seed, + custom_llm_provider). If any are not forwarded, the test fails. + """ + mock_completion.return_value = create_mock_litellm_response("ok") + + llm = LLM( + model="gpt-4o", # non-reasoning path keeps sampling params present + api_key=SecretStr("sk-test-123"), + base_url="https://example.com/v1", + api_version="2024-01-01", + custom_llm_provider="my-provider", + timeout=42, + drop_params=False, + seed=1234, + temperature=0.5, + top_p=0.8, + top_k=5, + max_output_tokens=99, + usage_id="forwarding-test", + ) + + _ = llm.completion( + messages=[Message(role="user", content=[TextContent(text="hi")])] + ) + + # Collect the actual kwargs forwarded to litellm.completion + assert mock_completion.called, "Expected litellm.completion to be called" + called_kwargs = mock_completion.call_args.kwargs + + # Expectations: direct passthrough (not via select_chat_options) + assert called_kwargs.get("model") == "gpt-4o" + assert called_kwargs.get("api_key") == "sk-test-123" + assert called_kwargs.get("base_url") == "https://example.com/v1" + assert called_kwargs.get("api_version") == "2024-01-01" + # Known gap on main: ensure we detect missing provider forwarding + assert called_kwargs.get("custom_llm_provider") == "my-provider", ( + "custom_llm_provider was not forwarded" + ) + assert called_kwargs.get("timeout") == 42 + assert called_kwargs.get("drop_params") is False + assert called_kwargs.get("seed") == 1234 + + # Expectations: values that flow through select_chat_options + # - top_k/top_p/temperature should be present as-is for non-reasoning models + assert called_kwargs.get("top_k") == 5, "Expected top_k to be forwarded" + assert called_kwargs.get("top_p") == 0.8, "Expected top_p to be forwarded" + assert called_kwargs.get("temperature") == 0.5, ( + "Expected temperature to be forwarded" + ) + + # - max_output_tokens is normalized to `max_completion_tokens` for OpenAI chat + # (Azure uses `max_tokens`, but this test uses non-Azure model id) + assert called_kwargs.get("max_completion_tokens") == 99, ( + "Expected max_output_tokens -> max_completion_tokens forwarding" + ) From 5ac801ca6b7cc887569c4ce62d6a2201adef363d Mon Sep 17 00:00:00 2001 From: enyst Date: Fri, 31 Oct 2025 18:20:04 +0000 Subject: [PATCH 2/4] tests: add Responses API attribute-forwarding test; assert known gap last Adds tests/sdk/llm/test_llm_responses_attr_forwarding.py which patches litellm.responses and verifies forwarding of transport-level params and responses options. The check for custom_llm_provider (known missing) is placed at the end so earlier regressions are visible first. Co-authored-by: openhands --- .../llm/test_llm_responses_attr_forwarding.py | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 tests/sdk/llm/test_llm_responses_attr_forwarding.py diff --git a/tests/sdk/llm/test_llm_responses_attr_forwarding.py b/tests/sdk/llm/test_llm_responses_attr_forwarding.py new file mode 100644 index 0000000000..624b3159ba --- /dev/null +++ b/tests/sdk/llm/test_llm_responses_attr_forwarding.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from unittest.mock import patch + +from pydantic import SecretStr + +from openhands.sdk.llm import LLM, Message, TextContent + + +@patch("openhands.sdk.llm.llm.litellm_responses") +def test_responses_api_forwards_all_relevant_attrs(mock_responses): + """Ensure LLM.responses forwards key attributes to LiteLLM Responses API. + + This covers both: + - transport-level options not handled by select_responses_options + (model, api_key, api_base/base_url, api_version, timeout, drop_params, seed) + - responses options normalized by select_responses_options + (temperature, tool_choice, include, store, reasoning, max_output_tokens) + + The known gap custom_llm_provider is asserted at the end to avoid masking + any other potential failures. + """ + + # Minimal valid fake response object expected by code path is not needed because + # we do not reach parsing if assertions fail; return a simple dict-like with + # fields that won't be accessed by this test. A plain object is fine as long + # as the call is made. + class DummyResponses: + pass + + mock_responses.return_value = DummyResponses() + + llm = LLM( + model="gpt-4o", # non-reasoning path; Responses still enforced temperature=1.0 + api_key=SecretStr("sk-test-456"), + base_url="https://example.com/v1", + api_version="2024-01-01", + custom_llm_provider="my-provider", + timeout=7, + drop_params=False, + seed=4321, + max_output_tokens=123, + enable_encrypted_reasoning=True, # ensures include carries encrypted content + usage_id="responses-forwarding", + ) + + _ = llm.responses(messages=[Message(role="user", content=[TextContent(text="hi")])]) + + assert mock_responses.called, "Expected litellm.responses to be called" + called_kwargs = mock_responses.call_args.kwargs + + # Transport-level passthrough + assert called_kwargs.get("model") == "gpt-4o" + assert called_kwargs.get("api_key") == "sk-test-456" + # responses() uses api_base for base URL + assert called_kwargs.get("api_base") == "https://example.com/v1" + assert called_kwargs.get("api_version") == "2024-01-01" + assert called_kwargs.get("timeout") == 7 + assert called_kwargs.get("drop_params") is False + assert called_kwargs.get("seed") == 4321 + + # Responses path options + assert called_kwargs.get("temperature") == 1.0 + assert called_kwargs.get("tool_choice") == "auto" + assert called_kwargs.get("input") is not None + assert called_kwargs.get("instructions") is not None + # store defaults to False unless provided + assert called_kwargs.get("store") is False + # include should contain encrypted reasoning content when store=False + include = called_kwargs.get("include") or [] + assert "reasoning.encrypted_content" in include + # reasoning payload present with effort derived from llm + reasoning = called_kwargs.get("reasoning") or {} + assert reasoning.get("effort") in {"low", "medium", "high", "none"} + assert "summary" in reasoning + + # max_output_tokens should be forwarded directly on Responses path + assert called_kwargs.get("max_output_tokens") == 123 + + # Known bug: custom_llm_provider not forwarded; check this last so it doesn't + # hide earlier failures + assert called_kwargs.get("custom_llm_provider") == "my-provider", ( + "custom_llm_provider was not forwarded in Responses API call" + ) From a950799663e7d854f7ae68d11d955bae14ae6516 Mon Sep 17 00:00:00 2001 From: enyst Date: Fri, 31 Oct 2025 18:31:56 +0000 Subject: [PATCH 3/4] tests: move custom_llm_provider assertion to the end of completions test; make Responses API patch return minimal typed response This reorders the known failing assertion in the chat-completions test and fixes the Responses test to return a minimal ResponsesAPIResponse so other kwargs assertions can execute. Co-authored-by: openhands --- tests/sdk/llm/test_llm_attr_forwarding.py | 10 ++++++---- .../llm/test_llm_responses_attr_forwarding.py | 17 +++++++++-------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/sdk/llm/test_llm_attr_forwarding.py b/tests/sdk/llm/test_llm_attr_forwarding.py index 64eabc3718..2dea79457c 100644 --- a/tests/sdk/llm/test_llm_attr_forwarding.py +++ b/tests/sdk/llm/test_llm_attr_forwarding.py @@ -48,10 +48,6 @@ def test_all_config_attrs_are_forwarded_to_litellm_completion(mock_completion): assert called_kwargs.get("api_key") == "sk-test-123" assert called_kwargs.get("base_url") == "https://example.com/v1" assert called_kwargs.get("api_version") == "2024-01-01" - # Known gap on main: ensure we detect missing provider forwarding - assert called_kwargs.get("custom_llm_provider") == "my-provider", ( - "custom_llm_provider was not forwarded" - ) assert called_kwargs.get("timeout") == 42 assert called_kwargs.get("drop_params") is False assert called_kwargs.get("seed") == 1234 @@ -69,3 +65,9 @@ def test_all_config_attrs_are_forwarded_to_litellm_completion(mock_completion): assert called_kwargs.get("max_completion_tokens") == 99, ( "Expected max_output_tokens -> max_completion_tokens forwarding" ) + + # Known bug: custom_llm_provider not forwarded; check this last so it doesn't + # hide earlier failures + assert called_kwargs.get("custom_llm_provider") == "my-provider", ( + "custom_llm_provider was not forwarded" + ) diff --git a/tests/sdk/llm/test_llm_responses_attr_forwarding.py b/tests/sdk/llm/test_llm_responses_attr_forwarding.py index 624b3159ba..2a7a073184 100644 --- a/tests/sdk/llm/test_llm_responses_attr_forwarding.py +++ b/tests/sdk/llm/test_llm_responses_attr_forwarding.py @@ -21,14 +21,15 @@ def test_responses_api_forwards_all_relevant_attrs(mock_responses): any other potential failures. """ - # Minimal valid fake response object expected by code path is not needed because - # we do not reach parsing if assertions fail; return a simple dict-like with - # fields that won't be accessed by this test. A plain object is fine as long - # as the call is made. - class DummyResponses: - pass - - mock_responses.return_value = DummyResponses() + # Return a minimal valid ResponsesAPIResponse to let the call path complete + # and avoid exceptions before our assertions. + from litellm.types.llms.openai import ResponsesAPIResponse + + mock_responses.return_value = ResponsesAPIResponse( + id="resp_test_1", + created_at=1234567890, + output=[], + ) llm = LLM( model="gpt-4o", # non-reasoning path; Responses still enforced temperature=1.0 From 99cea8650fd8bbf6865d1fd0d1f84aaa138c325c Mon Sep 17 00:00:00 2001 From: enyst Date: Fri, 31 Oct 2025 18:50:18 +0000 Subject: [PATCH 4/4] tests(responses): choose model that uses Responses API per model_features; keep custom_llm_provider assertion last - Use gpt-5-mini to ensure uses_responses_api=True - Include a system message so instructions is non-empty - Leave known failing custom_llm_provider assertion last Co-authored-by: openhands --- tests/sdk/llm/test_llm_responses_attr_forwarding.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/sdk/llm/test_llm_responses_attr_forwarding.py b/tests/sdk/llm/test_llm_responses_attr_forwarding.py index 2a7a073184..ecaeff6f67 100644 --- a/tests/sdk/llm/test_llm_responses_attr_forwarding.py +++ b/tests/sdk/llm/test_llm_responses_attr_forwarding.py @@ -32,7 +32,9 @@ def test_responses_api_forwards_all_relevant_attrs(mock_responses): ) llm = LLM( - model="gpt-4o", # non-reasoning path; Responses still enforced temperature=1.0 + # Choose a model that triggers the Responses API path per model_features + # See RESPONSES_API_PATTERNS in model_features.py (e.g., 'gpt-5*') + model="gpt-5-mini", api_key=SecretStr("sk-test-456"), base_url="https://example.com/v1", api_version="2024-01-01", @@ -45,13 +47,18 @@ def test_responses_api_forwards_all_relevant_attrs(mock_responses): usage_id="responses-forwarding", ) - _ = llm.responses(messages=[Message(role="user", content=[TextContent(text="hi")])]) + _ = llm.responses( + messages=[ + Message(role="system", content=[TextContent(text="You are helpful")]), + Message(role="user", content=[TextContent(text="hi")]), + ] + ) assert mock_responses.called, "Expected litellm.responses to be called" called_kwargs = mock_responses.call_args.kwargs # Transport-level passthrough - assert called_kwargs.get("model") == "gpt-4o" + assert called_kwargs.get("model") == "gpt-5-mini" assert called_kwargs.get("api_key") == "sk-test-456" # responses() uses api_base for base URL assert called_kwargs.get("api_base") == "https://example.com/v1"