o3-mini and reasoning_effort option, refs #728

simonw · simonw · commit eb0e1e761bfd · 2025-01-31T12:14:02.000-08:00
diff --git a/docs/openai-models.md b/docs/openai-models.md
@@ -50,6 +50,7 @@ OpenAI Chat: o1
 OpenAI Chat: o1-2024-12-17
 OpenAI Chat: o1-preview
 OpenAI Chat: o1-mini
+OpenAI Chat: o3-mini
 OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
 ```
 <!-- [[[end]]] -->
diff --git a/docs/usage.md b/docs/usage.md
@@ -484,6 +484,7 @@ OpenAI Chat: o1
     logit_bias: dict, str
     seed: int
     json_object: boolean
+    reasoning_effort: str
   Attachment types:
     image/gif, image/jpeg, image/png, image/webp
 OpenAI Chat: o1-2024-12-17
@@ -497,6 +498,7 @@ OpenAI Chat: o1-2024-12-17
     logit_bias: dict, str
     seed: int
     json_object: boolean
+    reasoning_effort: str
   Attachment types:
     image/gif, image/jpeg, image/png, image/webp
 OpenAI Chat: o1-preview
@@ -521,6 +523,18 @@ OpenAI Chat: o1-mini
     logit_bias: dict, str
     seed: int
     json_object: boolean
+OpenAI Chat: o3-mini
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
+    reasoning_effort: str
 OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
   Options:
     temperature: float
diff --git a/llm/cli.py b/llm/cli.py
@@ -1139,9 +1139,9 @@ def models_list(options, async_, query):
                     any_of = [{"type": field["type"]}]
                 types = ", ".join(
                     [
-                        _type_lookup.get(item["type"], item["type"])
+                        _type_lookup.get(item.get("type"), item.get("type", "str"))
                         for item in any_of
-                        if item["type"] != "null"
+                        if item.get("type") != "null"
                     ]
                 )
                 bits = ["\n    ", name, ": ", types]
diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py
@@ -8,6 +8,7 @@
 )
 import click
 import datetime
+from enum import Enum
 import httpx
 import openai
 import os
@@ -71,8 +72,8 @@ def register_models(register):
     # o1
     for model_id in ("o1", "o1-2024-12-17"):
         register(
-            Chat(model_id, vision=True, can_stream=False),
-            AsyncChat(model_id, vision=True, can_stream=False),
+            Chat(model_id, vision=True, can_stream=False, reasoning=True),
+            AsyncChat(model_id, vision=True, can_stream=False, reasoning=True),
         )
 
     register(
@@ -83,6 +84,10 @@ def register_models(register):
         Chat("o1-mini", allows_system_prompt=False),
         AsyncChat("o1-mini", allows_system_prompt=False),
     )
+    register(
+        Chat("o3-mini", reasoning=True),
+        AsyncChat("o3-mini", reasoning=True),
+    )
     # The -instruct completion model
     register(
         Completion("gpt-3.5-turbo-instruct", default_max_tokens=256),
@@ -322,6 +327,27 @@ def validate_logit_bias(cls, logit_bias):
         return validated_logit_bias
 
 
+class ReasoningEffortEnum(str, Enum):
+    low = "low"
+    medium = "medium"
+    high = "high"
+
+
+class OptionsForReasoning(SharedOptions):
+    json_object: Optional[bool] = Field(
+        description="Output a valid JSON object {...}. Prompt must mention JSON.",
+        default=None,
+    )
+    reasoning_effort: Optional[ReasoningEffortEnum] = Field(
+        description=(
+            "Constraints effort on reasoning for reasoning models. Currently supported "
+            "values are low, medium, and high. Reducing reasoning effort can result in "
+            "faster responses and fewer tokens used on reasoning in a response."
+        ),
+        default=None,
+    )
+
+
 def _attachment(attachment):
     url = attachment.url
     base64_content = ""
@@ -355,6 +381,7 @@ def __init__(
         can_stream=True,
         vision=False,
         audio=False,
+        reasoning=False,
         allows_system_prompt=True,
     ):
         self.model_id = model_id
@@ -371,6 +398,9 @@ def __init__(
 
         self.attachment_types = set()
 
+        if reasoning:
+            self.Options = OptionsForReasoning
+
         if vision:
             self.attachment_types.update(
                 {

Original file line number	Diff line number	Diff line change
`@@ -1139,9 +1139,9 @@ def models_list(options, async_, query):`
`1139`	`1139`	`any_of = [{"type": field["type"]}]`
`1140`	`1140`	`types = ", ".join(`
`1141`	`1141`	`[`
`1142`		`- _type_lookup.get(item["type"], item["type"])`
	`1142`	`+ _type_lookup.get(item.get("type"), item.get("type", "str"))`
`1143`	`1143`	`for item in any_of`
`1144`		`- if item["type"] != "null"`
	`1144`	`+ if item.get("type") != "null"`
`1145`	`1145`	`]`
`1146`	`1146`	`)`
`1147`	`1147`	`bits = ["\n ", name, ": ", types]`