diff --git a/wren-ai-service/src/pipelines/generation/sql_diagnosis.py b/wren-ai-service/src/pipelines/generation/sql_diagnosis.py index 3f22b9d512..60f3285d29 100644 --- a/wren-ai-service/src/pipelines/generation/sql_diagnosis.py +++ b/wren-ai-service/src/pipelines/generation/sql_diagnosis.py @@ -91,8 +91,11 @@ async def generate_sql_diagnosis( @observe(capture_input=False) async def post_process( generate_sql_diagnosis: dict, -) -> str: - return orjson.loads(generate_sql_diagnosis.get("replies")[0]) +) -> dict: + reply = generate_sql_diagnosis.get("replies", [""])[0] + if not reply or not reply.strip(): + return {"reasoning": "LLM did not return any response."} + return orjson.loads(reply) ## End of Pipeline diff --git a/wren-ai-service/src/providers/llm/litellm.py b/wren-ai-service/src/providers/llm/litellm.py index e94918b8c5..539d34440a 100644 --- a/wren-ai-service/src/providers/llm/litellm.py +++ b/wren-ai-service/src/providers/llm/litellm.py @@ -101,8 +101,16 @@ async def _run( generation_kwargs = { **combined_generation_kwargs, - **(generation_kwargs or {}), + **(generation_kwargs or {}), } + # Strip response_format with type=json_schema — only supported by OpenAI + # native models. Custom api_base models (e.g. Ollama, LiteLLM proxy with + # non-OpenAI backends) return empty responses when this is forwarded. + # System prompts already include explicit JSON format instructions. + if self._api_base and isinstance( + generation_kwargs.get("response_format"), dict + ) and generation_kwargs["response_format"].get("type") == "json_schema": + generation_kwargs.pop("response_format") allowed_openai_params = generation_kwargs.get( "allowed_openai_params", []