Fix ollama support for Kodu when muxing (#1022)

Muixing was failing for 2 reasons: 1. Sometimes we return OpenAI format from ollama provider. Before we were assuming that everything that was returned from ollama provider had ollama format. 2. The OpenAI format returned from ollama provider had an invalid `created` field.
stacklok · Feb 12, 2025 · e6600f6 · e6600f6
1 parent 58512fd
commit e6600f6
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 23 deletions.
diff --git a/src/codegate/muxing/adapter.py b/src/codegate/muxing/adapter.py
@@ -158,7 +158,12 @@ def _format_ollama(self, chunk: str) -> str:
             ollama_chunk = ChatResponse(**chunk_dict)
             open_ai_chunk = OLlamaToModel.normalize_chat_chunk(ollama_chunk)
             return open_ai_chunk.model_dump_json(exclude_none=True, exclude_unset=True)
-        except Exception:
+        except Exception as e:
+            # Sometimes we receive an OpenAI formatted chunk from ollama. Specifically when
+            # talking to Cline or Kodu. If that's the case we use the format_openai function.
+            if "data:" in chunk:
+                return self._format_openai(chunk)
+            logger.warning(f"Error formatting Ollama chunk: {chunk}. Error: {e}")
             return chunk
 
     def _format_antropic(self, chunk: str) -> str:

diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
@@ -8,6 +8,7 @@
 
 from codegate.clients.clients import ClientType
 from codegate.providers.base import BaseCompletionHandler
+from codegate.providers.ollama.adapter import OLlamaToModel
 
 logger = structlog.get_logger("codegate")
 
@@ -24,29 +25,9 @@ async def ollama_stream_generator(  # noqa: C901
                 # the correct format and start to handle multiple clients
                 # in a more robust way.
                 if client_type in [ClientType.CLINE, ClientType.KODU]:
-                    # First get the raw dict from the chunk
                     chunk_dict = chunk.model_dump()
-                    # Create response dictionary in OpenAI-like format
-                    response = {
-                        "id": f"chatcmpl-{chunk_dict.get('created_at', '')}",
-                        "object": "chat.completion.chunk",
-                        "created": chunk_dict.get("created_at"),
-                        "model": chunk_dict.get("model"),
-                        "choices": [
-                            {
-                                "index": 0,
-                                "delta": {
-                                    "content": chunk_dict.get("message", {}).get("content", ""),
-                                    "role": chunk_dict.get("message", {}).get("role", "assistant"),
-                                },
-                                "finish_reason": (
-                                    chunk_dict.get("done_reason")
-                                    if chunk_dict.get("done", False)
-                                    else None
-                                ),
-                            }
-                        ],
-                    }
+                    model_response = OLlamaToModel.normalize_chat_chunk(chunk)
+                    response = model_response.model_dump()
                     # Preserve existing type or add default if missing
                     response["type"] = chunk_dict.get("type", "stream")