Replace Azure Inference URL with GitHub Models API URL (#236)

Copilot · pamelafox · web-flow · commit 6fae6389d81b · 2025-07-24T10:11:26.000-07:00
* Initial plan

* Replace Azure Inference URL with GitHub Models API URL and update model names

Co-authored-by: pamelafox &lt;297042+pamelafox@users.noreply.github.com&gt;

* Remove GITHUB_BASE_URL env var and hard-code GitHub Models URL

Co-authored-by: pamelafox &lt;297042+pamelafox@users.noreply.github.com&gt;

* Format test file with ruff to fix CI formatting check

Co-authored-by: pamelafox &lt;297042+pamelafox@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: pamelafox &lt;297042+pamelafox@users.noreply.github.com&gt;
diff --git a/.env.sample b/.env.sample
@@ -37,8 +37,7 @@ OLLAMA_EMBED_MODEL=nomic-embed-text
 OLLAMA_EMBEDDING_COLUMN=embedding_nomic
 # Needed for GitHub Models:
 GITHUB_TOKEN=YOUR-GITHUB-TOKEN
-GITHUB_BASE_URL=https://models.inference.ai.azure.com
-GITHUB_MODEL=gpt-4o
-GITHUB_EMBED_MODEL=text-embedding-3-large
+GITHUB_MODEL=openai/gpt-4o
+GITHUB_EMBED_MODEL=openai/text-embedding-3-large
 GITHUB_EMBED_DIMENSIONS=1024
 GITHUB_EMBEDDING_COLUMN=embedding_3l
diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py
@@ -53,7 +53,7 @@ async def common_parameters():
         embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic"
     elif OPENAI_EMBED_HOST == "github":
         openai_embed_deployment = None
-        openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "text-embedding-3-large"
+        openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "openai/text-embedding-3-large"
         openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024))
         embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l"
     else:
@@ -70,7 +70,7 @@ async def common_parameters():
         openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
     elif OPENAI_CHAT_HOST == "github":
         openai_chat_deployment = None
-        openai_chat_model = os.getenv("GITHUB_MODEL") or "gpt-4o"
+        openai_chat_model = os.getenv("GITHUB_MODEL") or "openai/gpt-4o"
     else:
         openai_chat_deployment = None
         openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo"
diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py
@@ -54,11 +54,10 @@ async def create_openai_chat_client(
         )
     elif OPENAI_CHAT_HOST == "github":
         logger.info("Setting up OpenAI client for chat completions using GitHub Models")
-        github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com")
-        github_model = os.getenv("GITHUB_MODEL", "gpt-4o")
-        logger.info(f"Using GitHub Models with base URL: {github_base_url}, model: {github_model}")
+        github_model = os.getenv("GITHUB_MODEL", "openai/gpt-4o")
+        logger.info(f"Using GitHub Models with model: {github_model}")
         openai_chat_client = openai.AsyncOpenAI(
-            base_url=github_base_url,
+            base_url="https://models.github.ai/inference",
             api_key=os.getenv("GITHUB_TOKEN"),
         )
     else:
@@ -114,11 +113,10 @@ async def create_openai_embed_client(
         )
     elif OPENAI_EMBED_HOST == "github":
         logger.info("Setting up OpenAI client for embeddings using GitHub Models")
-        github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com")
-        github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "text-embedding-3-small")
-        logger.info(f"Using GitHub Models with base URL: {github_base_url}, embedding model: {github_embed_model}")
+        github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "openai/text-embedding-3-small")
+        logger.info(f"Using GitHub Models with embedding model: {github_embed_model}")
         openai_embed_client = openai.AsyncOpenAI(
-            base_url=github_base_url,
+            base_url="https://models.github.ai/inference",
             api_key=os.getenv("GITHUB_TOKEN"),
         )
     else:
diff --git a/tests/test_openai_clients.py b/tests/test_openai_clients.py
@@ -1,5 +1,6 @@
 import pytest
 
+from fastapi_app.dependencies import common_parameters
 from fastapi_app.openai_clients import create_openai_chat_client, create_openai_embed_client
 from tests.data import test_data
 
@@ -22,3 +23,44 @@ async def test_create_openai_chat_client(mock_azure_credential, mock_openai_chat
         model="gpt-4o-mini", messages=[{"content": "test", "role": "user"}]
     )
     assert response.choices[0].message.content == "The capital of France is Paris. [Benefit_Options-2.pdf]."
+
+
+@pytest.mark.asyncio
+async def test_github_models_configuration(monkeypatch):
+    """Test that GitHub Models uses the correct URLs and model names."""
+    # Set up environment for GitHub Models
+    monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
+    monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
+    monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
+    # Don't set GITHUB_MODEL to test defaults
+
+    # Test chat client configuration
+    chat_client = await create_openai_chat_client(None)
+    assert str(chat_client.base_url).rstrip("/") == "https://models.github.ai/inference"
+    assert chat_client.api_key == "fake-token"
+
+    # Test embed client configuration
+    embed_client = await create_openai_embed_client(None)
+    assert str(embed_client.base_url).rstrip("/") == "https://models.github.ai/inference"
+    assert embed_client.api_key == "fake-token"
+
+    # Test that dependencies use correct defaults
+    context = await common_parameters()
+    assert context.openai_chat_model == "openai/gpt-4o"
+    assert context.openai_embed_model == "openai/text-embedding-3-large"
+
+
+@pytest.mark.asyncio
+async def test_github_models_with_custom_values(monkeypatch):
+    """Test that GitHub Models respects custom environment values."""
+    # Set up environment for GitHub Models with custom values
+    monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
+    monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
+    monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
+    monkeypatch.setenv("GITHUB_MODEL", "openai/gpt-4")
+    monkeypatch.setenv("GITHUB_EMBED_MODEL", "openai/text-embedding-ada-002")
+
+    # Test that dependencies use custom values
+    context = await common_parameters()
+    assert context.openai_chat_model == "openai/gpt-4"
+    assert context.openai_embed_model == "openai/text-embedding-ada-002"