From 302867e5553a6fce7fd118776628a150cd574d9f Mon Sep 17 00:00:00 2001 From: "aryopg@gmail.com" Date: Fri, 25 Apr 2025 00:00:51 +0100 Subject: [PATCH 1/6] allowing prefilling for deepseek models --- safetytooling/apis/inference/openai/chat.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py index a87557c..320c50a 100644 --- a/safetytooling/apis/inference/openai/chat.py +++ b/safetytooling/apis/inference/openai/chat.py @@ -117,8 +117,14 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) - ) else: api_func = self.aclient.chat.completions.create + if model_id in {"deepseek-chat", "deepseek-reasoner"}: + if prompt.is_last_message_assistant(): + self.aclient.base_url = "https://api.deepseek.com/beta" + messages = prompt.deepseek_format() + else: + messages = prompt.openai_format() api_response: openai.types.chat.ChatCompletion = await api_func( - messages=prompt.openai_format(), + messages=messages, model=model_id, **kwargs, ) From e9843f78a937edd54b2b3a40357b84ad566f421d Mon Sep 17 00:00:00 2001 From: "aryopg@gmail.com" Date: Fri, 25 Apr 2025 00:01:09 +0100 Subject: [PATCH 2/6] OAI model return reasoning content --- safetytooling/apis/inference/openai/batch_api.py | 1 + safetytooling/apis/inference/openai/chat.py | 1 + 2 files changed, 2 insertions(+) diff --git a/safetytooling/apis/inference/openai/batch_api.py b/safetytooling/apis/inference/openai/batch_api.py index 658e191..4386663 100644 --- a/safetytooling/apis/inference/openai/batch_api.py +++ b/safetytooling/apis/inference/openai/batch_api.py @@ -123,6 +123,7 @@ async def __call__( api_duration=None, cost=0, batch_custom_id=result["custom_id"], + reasoning_content=choice["message"].get("reasoning_content", None), ) responses = [] diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py index 320c50a..cc4fedd 100644 --- a/safetytooling/apis/inference/openai/chat.py +++ b/safetytooling/apis/inference/openai/chat.py @@ -166,6 +166,7 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) - duration=duration, cost=context_cost + count_tokens(choice.message.content, model_id) * completion_token_cost, logprobs=(self.convert_top_logprobs(choice.logprobs) if choice.logprobs is not None else None), + reasoning_content=choice.message.reasoning_content, ) ) self.add_response_to_prompt_file(prompt_file, responses) From 0b6541aa44484de0b87bfd42bc2a320e0bf74d9f Mon Sep 17 00:00:00 2001 From: "aryopg@gmail.com" Date: Fri, 25 Apr 2025 00:01:23 +0100 Subject: [PATCH 3/6] developer role for o1-like models --- safetytooling/data_models/messages.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/safetytooling/data_models/messages.py b/safetytooling/data_models/messages.py index 655f898..9569aa8 100644 --- a/safetytooling/data_models/messages.py +++ b/safetytooling/data_models/messages.py @@ -26,6 +26,7 @@ PRINT_COLORS = { "user": "cyan", "system": "magenta", + "developer": "magenta", "assistant": "light_green", "audio": "yellow", "image": "yellow", @@ -36,6 +37,7 @@ class MessageRole(str, Enum): user = "user" system = "system" + developer = "developer" # A new system message for OpenAI o1 models assistant = "assistant" audio = "audio" image = "image" From 7d80a38937b2508391dc70d0cfb07b3a3ef1a71d Mon Sep 17 00:00:00 2001 From: "aryopg@gmail.com" Date: Fri, 25 Apr 2025 00:11:04 +0100 Subject: [PATCH 4/6] gracefully handle cases when there is no reasoning content --- safetytooling/apis/inference/openai/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py index cc4fedd..190210e 100644 --- a/safetytooling/apis/inference/openai/chat.py +++ b/safetytooling/apis/inference/openai/chat.py @@ -166,7 +166,7 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) - duration=duration, cost=context_cost + count_tokens(choice.message.content, model_id) * completion_token_cost, logprobs=(self.convert_top_logprobs(choice.logprobs) if choice.logprobs is not None else None), - reasoning_content=choice.message.reasoning_content, + reasoning_content=getattr(choice.message, "reasoning_content", None), ) ) self.add_response_to_prompt_file(prompt_file, responses) From 71fe0d461cffd3e9a15c8f495a2a73e85d67e1b2 Mon Sep 17 00:00:00 2001 From: "aryopg@gmail.com" Date: Fri, 25 Apr 2025 11:13:58 +0100 Subject: [PATCH 5/6] handle non-prefilled prompt, and revert back to the original base url when successful --- safetytooling/apis/inference/openai/chat.py | 34 ++++++++++++++------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/safetytooling/apis/inference/openai/chat.py b/safetytooling/apis/inference/openai/chat.py index 190210e..cac3a87 100644 --- a/safetytooling/apis/inference/openai/chat.py +++ b/safetytooling/apis/inference/openai/chat.py @@ -117,17 +117,29 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) - ) else: api_func = self.aclient.chat.completions.create - if model_id in {"deepseek-chat", "deepseek-reasoner"}: - if prompt.is_last_message_assistant(): - self.aclient.base_url = "https://api.deepseek.com/beta" - messages = prompt.deepseek_format() - else: - messages = prompt.openai_format() - api_response: openai.types.chat.ChatCompletion = await api_func( - messages=messages, - model=model_id, - **kwargs, - ) + + original_base_url = self.aclient.base_url + try: + if model_id in {"deepseek-chat", "deepseek-reasoner"}: + if prompt.is_last_message_assistant(): + # Use the beta endpoint for assistant prefilled prompts with DeepSeek + self.aclient.base_url = "https://api.deepseek.com/beta" + else: + # Use the standard v1 endpoint otherwise + self.aclient.base_url = "https://api.deepseek.com/v1" + messages = prompt.deepseek_format() + else: + messages = prompt.openai_format() + + api_response: openai.types.chat.ChatCompletion = await api_func( + messages=messages, + model=model_id, + **kwargs, + ) + finally: + # Always revert the base_url after the call + self.aclient.base_url = original_base_url + if hasattr(api_response, "error") and ( "Rate limit exceeded" in api_response.error["message"] or api_response.error["code"] == 429 ): # OpenRouter routes through the error messages from the different providers, so we catch them here From cd3190ff0bd868a0a5c929d51fca31129b6740ed Mon Sep 17 00:00:00 2001 From: "aryopg@gmail.com" Date: Tue, 6 May 2025 23:21:48 +0100 Subject: [PATCH 6/6] remove outdated comment --- safetytooling/apis/inference/anthropic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/safetytooling/apis/inference/anthropic.py b/safetytooling/apis/inference/anthropic.py index 73154f2..db33d62 100644 --- a/safetytooling/apis/inference/anthropic.py +++ b/safetytooling/apis/inference/anthropic.py @@ -305,7 +305,7 @@ async def __call__( # Safely extract text and thinking content text_content = None - reasoning_content = None # We can extract this even if not used by LLMResponse yet + reasoning_content = None if content: for block in content: if block.type == "text" and hasattr(block, "text"):