Fix an error and add model info for the o1 model

daxian-dbw · daxian-dbw · commit d49d5ca0c8fa · 2025-01-17T16:46:35.000-08:00
diff --git a/shell/agents/AIShell.OpenAI.Agent/ModelInfo.cs b/shell/agents/AIShell.OpenAI.Agent/ModelInfo.cs
@@ -5,9 +5,8 @@ namespace AIShell.OpenAI.Agent;
 internal class ModelInfo
 {
     // Models gpt4, gpt3.5, and the variants of them all use the 'cl100k_base' token encoding.
-    // But the gpt-4o model uses the 'o200k_base' token encoding. For reference:
-    //   https://github.com/openai/tiktoken/blob/5d970c1100d3210b42497203d6b5c1e30cfda6cb/tiktoken/model.py#L7
-    //   https://github.com/dmitry-brazhenko/SharpToken/blob/main/SharpToken/Lib/Model.cs#L8
+    // But gpt-4o and o1 models use the 'o200k_base' token encoding. For reference:
+    //   https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/tiktoken/model.py
     private const string Gpt4oEncoding = "o200k_base";
     private const string Gpt34Encoding = "cl100k_base";
 
@@ -21,6 +20,7 @@ static ModelInfo()
         // https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
         s_modelMap = new(StringComparer.OrdinalIgnoreCase)
         {
+            ["o1"]            = new(tokenLimit: 200_000, encoding: Gpt4oEncoding),
             ["gpt-4o"]        = new(tokenLimit: 128_000, encoding: Gpt4oEncoding),
             ["gpt-4"]         = new(tokenLimit: 8_192),
             ["gpt-4-32k"]     = new(tokenLimit: 32_768),
diff --git a/shell/agents/AIShell.OpenAI.Agent/Service.cs b/shell/agents/AIShell.OpenAI.Agent/Service.cs
@@ -78,7 +78,8 @@ internal void CalibrateChatHistory(ChatTokenUsage usage, AssistantChatMessage re
         // Every reply is primed with <|start|>assistant<|message|>, so we subtract 3 from the 'InputTokenCount'.
         int promptTokenCount = usage.InputTokenCount - 3;
         // 'ReasoningTokenCount' should be 0 for non-o1 models.
-        int responseTokenCount = usage.OutputTokenCount - usage.OutputTokenDetails.ReasoningTokenCount;
+        int reasoningTokenCount = usage.OutputTokenDetails is null ? 0 : usage.OutputTokenDetails.ReasoningTokenCount;
+        int responseTokenCount = usage.OutputTokenCount - reasoningTokenCount;
 
         if (_totalInputToken is 0)
         {

Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,8 @@ internal void CalibrateChatHistory(ChatTokenUsage usage, AssistantChatMessage re`
`78`	`78`	`// Every reply is primed with <\|start\|>assistant<\|message\|>, so we subtract 3 from the 'InputTokenCount'.`
`79`	`79`	`int promptTokenCount = usage.InputTokenCount - 3;`
`80`	`80`	`// 'ReasoningTokenCount' should be 0 for non-o1 models.`
`81`		`- int responseTokenCount = usage.OutputTokenCount - usage.OutputTokenDetails.ReasoningTokenCount;`
	`81`	`+ int reasoningTokenCount = usage.OutputTokenDetails is null ? 0 : usage.OutputTokenDetails.ReasoningTokenCount;`
	`82`	`+ int responseTokenCount = usage.OutputTokenCount - reasoningTokenCount;`
`82`	`83`
`83`	`84`	`if (_totalInputToken is 0)`
`84`	`85`	`{`