Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion safetytooling/apis/inference/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ async def __call__(

# Safely extract text and thinking content
text_content = None
reasoning_content = None # We can extract this even if not used by LLMResponse yet
reasoning_content = None
if content:
for block in content:
if block.type == "text" and hasattr(block, "text"):
Expand Down
1 change: 1 addition & 0 deletions safetytooling/apis/inference/openai/batch_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ async def __call__(
api_duration=None,
cost=0,
batch_custom_id=result["custom_id"],
reasoning_content=choice["message"].get("reasoning_content", None),
)

responses = []
Expand Down
29 changes: 24 additions & 5 deletions safetytooling/apis/inference/openai/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,29 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) -
)
else:
api_func = self.aclient.chat.completions.create
api_response: openai.types.chat.ChatCompletion = await api_func(
messages=prompt.openai_format(),
model=model_id,
**kwargs,
)

original_base_url = self.aclient.base_url
try:
if model_id in {"deepseek-chat", "deepseek-reasoner"}:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a DEEPSEEK_MODELS dict somewhere?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(or list)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have one in safety-tooling/safetytooling/apis/inference/api.py, but that would result in a circular import. We can create a constants file somewhere maybe?

if prompt.is_last_message_assistant():
# Use the beta endpoint for assistant prefilled prompts with DeepSeek
self.aclient.base_url = "https://api.deepseek.com/beta"
else:
# Use the standard v1 endpoint otherwise
self.aclient.base_url = "https://api.deepseek.com/v1"
messages = prompt.deepseek_format()
else:
messages = prompt.openai_format()

api_response: openai.types.chat.ChatCompletion = await api_func(
messages=messages,
model=model_id,
**kwargs,
)
finally:
# Always revert the base_url after the call
self.aclient.base_url = original_base_url
Comment on lines +139 to +141
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one thought - could this have strange async race conditions :/

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, maybe the base url should be passed directly to the api_func on a call-wise basis (rather than setting it as an attribute of the entire class). Since the class itself could be handling many requests with different models (and even different providers if it was set up differently).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah that's true, should we have an asyncio lock maybe?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

base url should be passed directly to the api_func on a call-wise basis

The api_func doesn't accept base url unfortunately. And i guess locking would harm concurrency..
Another (naive) approach is to instantiate the api_func again and again (instantiate openai.AsyncClient).

Would you be against locking?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think locking would mess things up in terms of throughput since it would lock until the async call is complete which would be bad. Perhaps you can override the URL via "extra_headers"?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still a little worried about this. Can we just use "https://api.deepseek.com/beta" always? Then we can set in api.py and remove all this logic internally of needing to swap between

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what ended up happening here?


if hasattr(api_response, "error") and (
"Rate limit exceeded" in api_response.error["message"] or api_response.error["code"] == 429
): # OpenRouter routes through the error messages from the different providers, so we catch them here
Expand Down Expand Up @@ -160,6 +178,7 @@ async def _make_api_call(self, prompt: Prompt, model_id, start_time, **kwargs) -
duration=duration,
cost=context_cost + count_tokens(choice.message.content, model_id) * completion_token_cost,
logprobs=(self.convert_top_logprobs(choice.logprobs) if choice.logprobs is not None else None),
reasoning_content=getattr(choice.message, "reasoning_content", None),
)
)
self.add_response_to_prompt_file(prompt_file, responses)
Expand Down
2 changes: 2 additions & 0 deletions safetytooling/data_models/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
PRINT_COLORS = {
"user": "cyan",
"system": "magenta",
"developer": "magenta",
"assistant": "light_green",
"audio": "yellow",
"image": "yellow",
Expand All @@ -36,6 +37,7 @@
class MessageRole(str, Enum):
user = "user"
system = "system"
developer = "developer" # A new system message for OpenAI o1 models
assistant = "assistant"
audio = "audio"
image = "image"
Expand Down
Loading