From 302867e5553a6fce7fd118776628a150cd574d9f Mon Sep 17 00:00:00 2001
From: "aryopg@gmail.com" <aryopg@gmail.com>
Date: Fri, 25 Apr 2025 00:00:51 +0100
Subject: [PATCH 1/6] allowing prefilling for deepseek models

---
 safetytooling/apis/inference/openai/chat.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py
index a87557c..320c50a 100644
--- a/safetytooling/apis/inference/openai/chat.py
+++ b/safetytooling/apis/inference/openai/chat.py
@@ -117,8 +117,14 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) -
             )
         else:
             api_func = self.aclient.chat.completions.create
+        if model_id in {"deepseek-chat", "deepseek-reasoner"}:
+            if prompt.is_last_message_assistant():
+                self.aclient.base_url = "https://api.deepseek.com/beta"
+            messages = prompt.deepseek_format()
+        else:
+            messages = prompt.openai_format()
         api_response: openai.types.chat.ChatCompletion = await api_func(
-            messages=prompt.openai_format(),
+            messages=messages,
             model=model_id,
             **kwargs,
         )

From e9843f78a937edd54b2b3a40357b84ad566f421d Mon Sep 17 00:00:00 2001
From: "aryopg@gmail.com" <aryopg@gmail.com>
Date: Fri, 25 Apr 2025 00:01:09 +0100
Subject: [PATCH 2/6] OAI model return reasoning content

---
 safetytooling/apis/inference/openai/batch_api.py | 1 +
 safetytooling/apis/inference/openai/chat.py      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/safetytooling/apis/inference/openai/batch_api.py b/safetytooling/apis/inference/openai/batch_api.py
index 658e191..4386663 100644
--- a/safetytooling/apis/inference/openai/batch_api.py
+++ b/safetytooling/apis/inference/openai/batch_api.py
@@ -123,6 +123,7 @@ async def __call__(
                     api_duration=None,
                     cost=0,
                     batch_custom_id=result["custom_id"],
+                    reasoning_content=choice["message"].get("reasoning_content", None),
                 )
 
         responses = []
diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py
index 320c50a..cc4fedd 100644
--- a/safetytooling/apis/inference/openai/chat.py
+++ b/safetytooling/apis/inference/openai/chat.py
@@ -166,6 +166,7 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) -
                     duration=duration,
                     cost=context_cost + count_tokens(choice.message.content, model_id) * completion_token_cost,
                     logprobs=(self.convert_top_logprobs(choice.logprobs) if choice.logprobs is not None else None),
+                    reasoning_content=choice.message.reasoning_content,
                 )
             )
         self.add_response_to_prompt_file(prompt_file, responses)

From 0b6541aa44484de0b87bfd42bc2a320e0bf74d9f Mon Sep 17 00:00:00 2001
From: "aryopg@gmail.com" <aryopg@gmail.com>
Date: Fri, 25 Apr 2025 00:01:23 +0100
Subject: [PATCH 3/6] developer role for o1-like models

---
 safetytooling/data_models/messages.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/safetytooling/data_models/messages.py b/safetytooling/data_models/messages.py
index 655f898..9569aa8 100644
--- a/safetytooling/data_models/messages.py
+++ b/safetytooling/data_models/messages.py
@@ -26,6 +26,7 @@
 PRINT_COLORS = {
     "user": "cyan",
     "system": "magenta",
+    "developer": "magenta",
     "assistant": "light_green",
     "audio": "yellow",
     "image": "yellow",
@@ -36,6 +37,7 @@
 class MessageRole(str, Enum):
     user = "user"
     system = "system"
+    developer = "developer"  # A new system message for OpenAI o1 models
     assistant = "assistant"
     audio = "audio"
     image = "image"

From 7d80a38937b2508391dc70d0cfb07b3a3ef1a71d Mon Sep 17 00:00:00 2001
From: "aryopg@gmail.com" <aryopg@gmail.com>
Date: Fri, 25 Apr 2025 00:11:04 +0100
Subject: [PATCH 4/6] gracefully handle cases when there is no reasoning
 content

---
 safetytooling/apis/inference/openai/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py
index cc4fedd..190210e 100644
--- a/safetytooling/apis/inference/openai/chat.py
+++ b/safetytooling/apis/inference/openai/chat.py
@@ -166,7 +166,7 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) -
                     duration=duration,
                     cost=context_cost + count_tokens(choice.message.content, model_id) * completion_token_cost,
                     logprobs=(self.convert_top_logprobs(choice.logprobs) if choice.logprobs is not None else None),
-                    reasoning_content=choice.message.reasoning_content,
+                    reasoning_content=getattr(choice.message, "reasoning_content", None),
                 )
             )
         self.add_response_to_prompt_file(prompt_file, responses)

From 71fe0d461cffd3e9a15c8f495a2a73e85d67e1b2 Mon Sep 17 00:00:00 2001
From: "aryopg@gmail.com" <aryopg@gmail.com>
Date: Fri, 25 Apr 2025 11:13:58 +0100
Subject: [PATCH 5/6] handle non-prefilled prompt, and revert back to the
 original base url when successful

---
 safetytooling/apis/inference/openai/chat.py | 34 ++++++++++++++-------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py
index 190210e..cac3a87 100644
--- a/safetytooling/apis/inference/openai/chat.py
+++ b/safetytooling/apis/inference/openai/chat.py
@@ -117,17 +117,29 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) -
             )
         else:
             api_func = self.aclient.chat.completions.create
-        if model_id in {"deepseek-chat", "deepseek-reasoner"}:
-            if prompt.is_last_message_assistant():
-                self.aclient.base_url = "https://api.deepseek.com/beta"
-            messages = prompt.deepseek_format()
-        else:
-            messages = prompt.openai_format()
-        api_response: openai.types.chat.ChatCompletion = await api_func(
-            messages=messages,
-            model=model_id,
-            **kwargs,
-        )
+
+        original_base_url = self.aclient.base_url
+        try:
+            if model_id in {"deepseek-chat", "deepseek-reasoner"}:
+                if prompt.is_last_message_assistant():
+                    # Use the beta endpoint for assistant prefilled prompts with DeepSeek
+                    self.aclient.base_url = "https://api.deepseek.com/beta"
+                else:
+                    # Use the standard v1 endpoint otherwise
+                    self.aclient.base_url = "https://api.deepseek.com/v1"
+                messages = prompt.deepseek_format()
+            else:
+                messages = prompt.openai_format()
+
+            api_response: openai.types.chat.ChatCompletion = await api_func(
+                messages=messages,
+                model=model_id,
+                **kwargs,
+            )
+        finally:
+            # Always revert the base_url after the call
+            self.aclient.base_url = original_base_url
+
         if hasattr(api_response, "error") and (
             "Rate limit exceeded" in api_response.error["message"] or api_response.error["code"] == 429
         ):  # OpenRouter routes through the error messages from the different providers, so we catch them here

From cd3190ff0bd868a0a5c929d51fca31129b6740ed Mon Sep 17 00:00:00 2001
From: "aryopg@gmail.com" <aryopg@gmail.com>
Date: Tue, 6 May 2025 23:21:48 +0100
Subject: [PATCH 6/6] remove outdated comment

---
 safetytooling/apis/inference/anthropic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/safetytooling/apis/inference/anthropic.py b/safetytooling/apis/inference/anthropic.py
index 73154f2..db33d62 100644
--- a/safetytooling/apis/inference/anthropic.py
+++ b/safetytooling/apis/inference/anthropic.py
@@ -305,7 +305,7 @@ async def __call__(
 
                 # Safely extract text and thinking content
                 text_content = None
-                reasoning_content = None  # We can extract this even if not used by LLMResponse yet
+                reasoning_content = None
                 if content:
                     for block in content:
                         if block.type == "text" and hasattr(block, "text"):