diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 1c713070..00000000 --- a/AGENTS.md +++ /dev/null @@ -1,29 +0,0 @@ -# AGENTS.md - -These rules apply to all agent-made changes in this repository. - -## PR Gate - -- Before opening or updating a PR, run the same local gates as `.github/workflows/quality-gates.yml`. -- Required commands: - - `./scripts/lint.sh` - - `./tests/scripts/check-refactor-line-gate.sh` - - `./tests/scripts/run-unit-all.sh` - - `npm run build --prefix webui` - -## Go Lint Rules - -- Run `gofmt -w` on every changed Go file before commit or push. -- Do not ignore error returns from I/O-style cleanup calls such as `Close`, `Flush`, `Sync`, or similar methods. -- If a cleanup error cannot be returned, log it explicitly. - -## Change Scope - -- Keep changes additive and tightly scoped to the requested feature or bugfix. -- Do not mix unrelated refactors into feature PRs unless they are required to make the change pass gates. - -## Documentation Sync - -- When business logic or user-visible behavior changes, update the corresponding documentation in the same change. -- `docs/prompt-compatibility.md` is the source-of-truth document for the “API -> pure-text web-chat context” compatibility flow. -- If a change affects message normalization, tool prompt injection, prompt-visible tool history, file/reference handling, history split, or completion payload assembly, update `docs/prompt-compatibility.md` in the same change. diff --git a/internal/deepseek/client/client_completion.go b/internal/deepseek/client/client_completion.go index 1b91ce2f..dbb76261 100644 --- a/internal/deepseek/client/client_completion.go +++ b/internal/deepseek/client/client_completion.go @@ -14,6 +14,12 @@ import ( trans "ds2api/internal/deepseek/transport" ) +// ThinkingCacheInjector is a function that injects thinking content into request payload +var ThinkingCacheInjector func(payload map[string]any, model string) map[string]any + +// ThinkingCacheExtractor is a function that extracts and stores thinking from response +var ThinkingCacheExtractor func(payload map[string]any, model string, thinking string) + func (c *Client) CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error) { if maxAttempts <= 0 { maxAttempts = c.maxRetries diff --git a/internal/httpapi/claude/handler_messages.go b/internal/httpapi/claude/handler_messages.go index 8478dc7b..901b4a64 100644 --- a/internal/httpapi/claude/handler_messages.go +++ b/internal/httpapi/claude/handler_messages.go @@ -20,6 +20,7 @@ import ( "ds2api/internal/httpapi/requestbody" "ds2api/internal/promptcompat" "ds2api/internal/responsehistory" + "ds2api/internal/thinkingcache" streamengine "ds2api/internal/stream" "ds2api/internal/translatorcliproxy" "ds2api/internal/util" @@ -70,12 +71,32 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo writeClaudeError(w, http.StatusBadRequest, "invalid json") return true } + // Save original messages for thinking cache (before any modifications) + var originalMessages []any + if msgs, ok := req["messages"].([]any); ok { + originalMessages = msgs + } + norm, err := normalizeClaudeRequest(h.Store, req) if err != nil { writeClaudeError(w, http.StatusBadRequest, err.Error()) return true } exposeThinking := norm.Standard.Thinking + + // Entry point: Apply thinking cache to restore assistant reasoning from previous turns + cacheModel := norm.Standard.ResolvedModel + if cacheModel == "" { + cacheModel = norm.Standard.RequestedModel + } + if len(originalMessages) > 0 { + if injectedMessages, changed := thinkingcache.Apply(originalMessages, cacheModel); changed { + req["messages"] = injectedMessages + norm, _ = normalizeClaudeRequest(h.Store, req) + originalMessages = injectedMessages + } + } + a, err := h.Auth.Determine(r) if err != nil { writeClaudeError(w, http.StatusUnauthorized, err.Error()) @@ -96,7 +117,7 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo Standard: stdReq, }) if stdReq.Stream { - h.handleClaudeDirectStream(w, r, a, stdReq, historySession) + h.handleClaudeDirectStream(w, r, a, stdReq, historySession, originalMessages, cacheModel) return true } result, outErr := completionruntime.ExecuteNonStreamWithRetry(r.Context(), h.DS, a, stdReq, completionruntime.Options{ @@ -113,6 +134,12 @@ func (h *Handler) handleClaudeDirect(w http.ResponseWriter, r *http.Request) boo if historySession != nil { historySession.SuccessTurn(http.StatusOK, result.Turn, responsehistory.GenericUsage(result.Turn)) } + + // Exit point: Store thinking content for future turns (non-stream) + if thinking := result.Turn.Thinking; thinking != "" { + thinkingcache.Store(originalMessages, cacheModel, thinking) + } + writeJSON(w, http.StatusOK, claudefmt.BuildMessageResponseFromTurn( fmt.Sprintf("msg_%d", time.Now().UnixNano()), stdReq.ResponseModel, @@ -133,7 +160,7 @@ func mapCurrentInputFileError(err error) (int, string) { return history.MapError(err) } -func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, historySession *responsehistory.Session) { +func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, historySession *responsehistory.Session, originalMessages []any, cacheModel string) { start, outErr := completionruntime.StartCompletion(r.Context(), h.DS, a, stdReq, completionruntime.Options{ CurrentInputFile: h.Store, }) @@ -145,7 +172,7 @@ func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Reques return } streamReq := start.Request - h.handleClaudeStreamRealtime(w, r, start.Response, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession) + h.handleClaudeStreamRealtimeWithCache(w, r, start.Response, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, originalMessages, cacheModel, historySession) } func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store ConfigReader) bool { @@ -300,6 +327,10 @@ func stripClaudeThinkingBlocks(raw []byte) []byte { } func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySessions ...*responsehistory.Session) { + h.handleClaudeStreamRealtimeWithCache(w, r, resp, model, messages, thinkingEnabled, searchEnabled, toolNames, toolsRaw, nil, "", historySessions...) +} + +func (h *Handler) handleClaudeStreamRealtimeWithCache(w http.ResponseWriter, r *http.Request, resp *http.Response, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, originalMessages []any, cacheModel string, historySessions ...*responsehistory.Session) { var historySession *responsehistory.Session if len(historySessions) > 0 { historySession = historySessions[0] @@ -324,7 +355,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ config.Logger.Warn("[claude_stream] response writer does not support flush; streaming may be buffered") } - streamRuntime := newClaudeStreamRuntime( + streamRuntime := newClaudeStreamRuntimeWithCache( w, rc, canFlush, @@ -337,6 +368,8 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ toolsRaw, buildClaudePromptTokenText(messages, thinkingEnabled), historySession, + originalMessages, + cacheModel, ) streamRuntime.sendMessageStart() diff --git a/internal/httpapi/claude/stream_runtime_core.go b/internal/httpapi/claude/stream_runtime_core.go index 9c9e656e..bdfd17c6 100644 --- a/internal/httpapi/claude/stream_runtime_core.go +++ b/internal/httpapi/claude/stream_runtime_core.go @@ -48,6 +48,10 @@ type claudeStreamRuntime struct { ended bool upstreamErr string history *responsehistory.Session + + // For thinking cache + originalMessages []any + cacheModel string } func newClaudeStreamRuntime( @@ -84,6 +88,28 @@ func newClaudeStreamRuntime( } } +func newClaudeStreamRuntimeWithCache( + w http.ResponseWriter, + rc *http.ResponseController, + canFlush bool, + model string, + messages []any, + thinkingEnabled bool, + searchEnabled bool, + stripReferenceMarkers bool, + toolNames []string, + toolsRaw any, + promptTokenText string, + history *responsehistory.Session, + originalMessages []any, + cacheModel string, +) *claudeStreamRuntime { + s := newClaudeStreamRuntime(w, rc, canFlush, model, messages, thinkingEnabled, searchEnabled, stripReferenceMarkers, toolNames, toolsRaw, promptTokenText, history) + s.originalMessages = originalMessages + s.cacheModel = cacheModel + return s +} + func (s *claudeStreamRuntime) onParsed(parsed sse.LineResult) streamengine.ParsedDecision { if !parsed.Parsed { return streamengine.ParsedDecision{} diff --git a/internal/httpapi/claude/stream_runtime_finalize.go b/internal/httpapi/claude/stream_runtime_finalize.go index f63b1253..ae18da87 100644 --- a/internal/httpapi/claude/stream_runtime_finalize.go +++ b/internal/httpapi/claude/stream_runtime_finalize.go @@ -4,6 +4,7 @@ import ( "ds2api/internal/assistantturn" "ds2api/internal/responsehistory" "ds2api/internal/sse" + "ds2api/internal/thinkingcache" "ds2api/internal/toolcall" "ds2api/internal/toolstream" "encoding/json" @@ -186,6 +187,11 @@ func (s *claudeStreamRuntime) finalize(stopReason string) { ) } + // Exit point: Store thinking content for future turns (stream) + if thinking := turn.Thinking; thinking != "" { + thinkingcache.Store(s.originalMessages, s.cacheModel, thinking) + } + s.send("message_delta", map[string]any{ "type": "message_delta", "delta": map[string]any{ diff --git a/internal/thinkingcache/cache.go b/internal/thinkingcache/cache.go new file mode 100644 index 00000000..65992d3e --- /dev/null +++ b/internal/thinkingcache/cache.go @@ -0,0 +1,446 @@ +package thinkingcache + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "sync" + "time" + + "ds2api/internal/config" +) + +const ( + defaultDirRel = "data/thinking_cache" + defaultMaxEntries = 500 + ttl = 120 * time.Minute +) + +var mu sync.Mutex + +type entry struct { + Thinking string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` + Size int `json:"size,omitempty"` + Created time.Time `json:"created"` + Expires time.Time `json:"expires"` +} + +type indexedEntry struct { + Key string + Meta entry + Order time.Time +} + +// Apply restores assistant reasoning into historical messages before prompt +// rendering. Cached reasoning takes priority over existing reasoning fields. +func Apply(messages []any, model string) ([]any, bool) { + if len(messages) == 0 { + return messages, false + } + mu.Lock() + defer mu.Unlock() + + index, _ := loadIndexLocked() + index = pruneExpiredLocked(index, time.Now()) + + var out []any + changed := false + cacheRestored := 0 + + for i, item := range messages { + msg, ok := item.(map[string]any) + if !ok { + continue + } + role := strings.ToLower(strings.TrimSpace(asString(msg["role"]))) + if role != "assistant" { + continue + } + + key := keyFor(messages[:i], model) + if key == "" { + continue + } + + cached := readThinkingLocked(index, key) + if strings.TrimSpace(cached) == "" { + continue + } + + // Cache takes priority: use cached thinking regardless of existing content + existing := strings.TrimSpace(asString(msg["reasoning_content"])) + if existing == cached { + continue + } + + if out == nil { + out = append([]any(nil), messages...) + } + cloned := cloneMap(msg) + cloned["reasoning_content"] = cached + out[i] = cloned + cacheRestored++ + changed = true + } + + if !changed { + return messages, false + } + + config.Logger.Info( + "[thinking_cache] injected assistant reasoning", + "model", strings.TrimSpace(model), + "cache_restored", cacheRestored, + ) + return out, true +} + +// Store saves the current assistant turn's reasoning under the fingerprint of +// the prompt-visible messages that preceded it. +func Store(messages []any, model, thinking string) { + thinking = strings.TrimSpace(thinking) + if len(messages) == 0 || thinking == "" { + return + } + + key := keyFor(messages, model) + if key == "" { + return + } + + mu.Lock() + defer mu.Unlock() + + now := time.Now() + index, _ := loadIndexLocked() + index = pruneExpiredLocked(index, now) + + if err := os.MkdirAll(dir(), 0o755); err != nil { + config.Logger.Warn("[thinking_cache] mkdir failed", "error", err) + return + } + + if err := os.WriteFile(dataPath(key), []byte(thinking), 0o600); err != nil { + config.Logger.Warn("[thinking_cache] write data failed", "key", key, "error", err) + return + } + + index[key] = entry{ + Thinking: "", + Signature: "", + Size: len([]byte(thinking)), + Created: now, + Expires: now.Add(ttl), + } + + index = pruneOverflowLocked(index, maxEntries()) + + if err := saveIndexLocked(index); err != nil { + config.Logger.Warn("[thinking_cache] write index failed", "error", err) + return + } + + config.Logger.Info( + "[thinking_cache] stored assistant reasoning", + "model", strings.TrimSpace(model), + "key", key, + "size", len([]byte(thinking)), + ) +} + +func hasThinkingBlock(msg map[string]any) bool { + content, ok := msg["content"].([]any) + if !ok { + return false + } + for _, item := range content { + block, ok := item.(map[string]any) + if !ok { + continue + } + blockType := strings.ToLower(strings.TrimSpace(asString(block["type"]))) + if blockType == "thinking" || blockType == "redacted_thinking" { + return true + } + } + return false +} + +func readThinkingLocked(index map[string]entry, key string) string { + meta, ok := index[key] + if !ok || time.Now().After(meta.Expires) { + return "" + } + if text := strings.TrimSpace(meta.Thinking); text != "" { + return text + } + b, err := os.ReadFile(dataPath(key)) + if err != nil { + return "" + } + return strings.TrimSpace(string(b)) +} + +func firstReasoning(msg map[string]any) (string, string) { + for _, key := range []string{"reasoning_content", "reasoning", "thinking", "thinking_content"} { + if text := strings.TrimSpace(reasoningString(msg[key])); text != "" { + return text, key + } + } + if text := strings.TrimSpace(contentReasoning(msg["content"])); text != "" { + return text, "content_reasoning" + } + return "", "" +} + +func reasoningString(v any) string { + switch x := v.(type) { + case string: + return x + case map[string]any: + for _, key := range []string{"text", "content", "thinking", "reasoning"} { + if text := strings.TrimSpace(asString(x[key])); text != "" { + return text + } + } + case []any: + parts := make([]string, 0, len(x)) + for _, item := range x { + if text := strings.TrimSpace(reasoningString(item)); text != "" { + parts = append(parts, text) + } + } + return strings.Join(parts, "\n") + } + return "" +} + +func contentReasoning(v any) string { + items, ok := v.([]any) + if !ok { + return "" + } + parts := make([]string, 0, len(items)) + for _, item := range items { + m, ok := item.(map[string]any) + if !ok { + continue + } + switch strings.ToLower(strings.TrimSpace(asString(m["type"]))) { + case "reasoning", "thinking": + if text := strings.TrimSpace(reasoningString(m)); text != "" { + parts = append(parts, text) + } + } + } + return strings.Join(parts, "\n") +} + +func keyFor(messages []any, model string) string { + normalized := map[string]any{ + "model": strings.TrimSpace(model), + "messages": normalizeForFingerprint(messages), + } + b, err := json.Marshal(normalized) + if err != nil { + return "" + } + sum := sha256.Sum256(b) + return hex.EncodeToString(sum[:])[:16] +} + +func normalizeForFingerprint(messages []any) []any { + out := make([]any, 0, len(messages)) + for _, item := range messages { + msg, ok := item.(map[string]any) + if !ok { + continue + } + + role := strings.ToLower(strings.TrimSpace(asString(msg["role"]))) + if role == "system" { + continue + } + + normalized := make(map[string]any) + keys := make([]string, 0, len(msg)) + + for k := range msg { + lower := strings.ToLower(strings.TrimSpace(k)) + switch lower { + case "reasoning", "reasoning_content", "thinking", "thinking_content": + continue + } + keys = append(keys, k) + } + + sort.Strings(keys) + for _, k := range keys { + if k == "content" { + normalized[k] = normalizeContentForFingerprint(msg[k]) + continue + } + normalized[k] = msg[k] + } + out = append(out, normalized) + } + return out +} + +func normalizeContentForFingerprint(v any) any { + items, ok := v.([]any) + if !ok { + return v + } + out := make([]any, 0, len(items)) + for _, item := range items { + m, ok := item.(map[string]any) + if !ok { + out = append(out, item) + continue + } + + blockType := strings.ToLower(strings.TrimSpace(asString(m["type"]))) + if blockType == "reasoning" || blockType == "thinking" { + continue + } + + // Normalize block structure + normalized := make(map[string]any) + switch blockType { + case "text": + normalized["type"] = "text" + normalized["text"] = m["text"] + case "tool_use": + normalized["type"] = "tool_use" + normalized["id"] = m["id"] + normalized["name"] = m["name"] + normalized["input"] = m["input"] + case "tool_result": + normalized["type"] = "tool_result" + normalized["tool_use_id"] = m["tool_use_id"] + normalized["content"] = m["content"] + default: + normalized["type"] = blockType + } + out = append(out, normalized) + } + return out +} + +func loadIndexLocked() (map[string]entry, error) { + b, err := os.ReadFile(indexPath()) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return map[string]entry{}, nil + } + return map[string]entry{}, err + } + var index map[string]entry + if err := json.Unmarshal(b, &index); err != nil { + return map[string]entry{}, err + } + if index == nil { + index = map[string]entry{} + } + return index, nil +} + +func saveIndexLocked(index map[string]entry) error { + if err := os.MkdirAll(dir(), 0o755); err != nil { + return err + } + b, err := json.MarshalIndent(index, "", " ") + if err != nil { + return err + } + return os.WriteFile(indexPath(), append(b, '\n'), 0o600) +} + +func pruneExpiredLocked(index map[string]entry, now time.Time) map[string]entry { + for key, meta := range index { + if meta.Expires.IsZero() || now.Before(meta.Expires) { + continue + } + delete(index, key) + removeDataLocked(key) + } + return index +} + +func pruneOverflowLocked(index map[string]entry, limit int) map[string]entry { + if limit <= 0 || len(index) <= limit { + return index + } + items := make([]indexedEntry, 0, len(index)) + for key, meta := range index { + order := meta.Created + if order.IsZero() { + order = meta.Expires + } + items = append(items, indexedEntry{Key: key, Meta: meta, Order: order}) + } + sort.Slice(items, func(i, j int) bool { + return items[i].Order.Before(items[j].Order) + }) + for len(items) > limit { + victim := items[0] + items = items[1:] + delete(index, victim.Key) + removeDataLocked(victim.Key) + } + return index +} + +func removeDataLocked(key string) { + if err := os.Remove(dataPath(key)); err != nil && !errors.Is(err, os.ErrNotExist) { + config.Logger.Warn("[thinking_cache] remove data failed", "key", key, "error", err) + } +} + +func dir() string { + return config.ResolvePath("DS2API_THINKING_CACHE_DIR", defaultDirRel) +} + +func indexPath() string { + return filepath.Join(dir(), "index.json") +} + +func dataPath(key string) string { + return filepath.Join(dir(), fmt.Sprintf("%s.dat", key)) +} + +func maxEntries() int { + raw := strings.TrimSpace(os.Getenv("DS2API_THINKING_CACHE_MAX_ENTRIES")) + if raw == "" { + return defaultMaxEntries + } + n, err := strconv.Atoi(raw) + if err != nil || n <= 0 { + return defaultMaxEntries + } + return n +} + +func cloneMap(in map[string]any) map[string]any { + out := make(map[string]any, len(in)) + for k, v := range in { + out[k] = v + } + return out +} + +func asString(v any) string { + if s, ok := v.(string); ok { + return s + } + return "" +} diff --git a/internal/thinkingcache/cache_test.go b/internal/thinkingcache/cache_test.go new file mode 100644 index 00000000..4437d7c5 --- /dev/null +++ b/internal/thinkingcache/cache_test.go @@ -0,0 +1,164 @@ +package thinkingcache + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + "time" +) + +func TestStoreAndApplyRestoresMissingAssistantReasoning(t *testing.T) { + t.Setenv("DS2API_THINKING_CACHE_DIR", t.TempDir()) + + prefix := []any{ + map[string]any{"role": "user", "content": "solve it"}, + } + Store(prefix, "deepseek-v4-pro", "cached reasoning") + + messages := []any{ + map[string]any{"role": "user", "content": "solve it"}, + map[string]any{"role": "assistant", "content": "answer"}, + map[string]any{"role": "user", "content": "continue"}, + } + out, changed := Apply(messages, "deepseek-v4-pro") + if !changed { + t.Fatal("expected cached reasoning to be injected") + } + assistant := out[1].(map[string]any) + if assistant["reasoning_content"] != "cached reasoning" { + t.Fatalf("expected cached reasoning_content, got %#v", assistant["reasoning_content"]) + } +} + +func TestApplyPrefersCachedReasoningOverExistingReasoningContent(t *testing.T) { + t.Setenv("DS2API_THINKING_CACHE_DIR", t.TempDir()) + + prefix := []any{ + map[string]any{"role": "user", "content": "solve it"}, + } + Store(prefix, "deepseek-v4-pro", "cached reasoning") + + messages := []any{ + map[string]any{"role": "user", "content": "solve it"}, + map[string]any{"role": "assistant", "content": "answer", "reasoning_content": "client reasoning"}, + } + out, changed := Apply(messages, "deepseek-v4-pro") + if !changed { + t.Fatal("expected cached reasoning to replace existing reasoning_content") + } + assistant := out[1].(map[string]any) + if assistant["reasoning_content"] != "cached reasoning" { + t.Fatalf("expected cached reasoning_content, got %#v", assistant["reasoning_content"]) + } +} + +func TestApplyNormalizesUnsupportedReasoningField(t *testing.T) { + t.Setenv("DS2API_THINKING_CACHE_DIR", t.TempDir()) + + messages := []any{ + map[string]any{"role": "user", "content": "question"}, + map[string]any{"role": "assistant", "content": "answer", "reasoning": "client reasoning"}, + } + out, changed := Apply(messages, "deepseek-v4-pro") + if !changed { + t.Fatal("expected unsupported reasoning field to be normalized") + } + assistant := out[1].(map[string]any) + if assistant["reasoning_content"] != "client reasoning" { + t.Fatalf("expected reasoning_content from reasoning, got %#v", assistant["reasoning_content"]) + } +} + +func TestApplyNormalizesContentReasoningBlock(t *testing.T) { + t.Setenv("DS2API_THINKING_CACHE_DIR", t.TempDir()) + + messages := []any{ + map[string]any{"role": "user", "content": "question"}, + map[string]any{ + "role": "assistant", + "content": []any{ + map[string]any{"type": "reasoning", "text": "content reasoning"}, + map[string]any{"type": "text", "text": "answer"}, + }, + }, + } + out, changed := Apply(messages, "deepseek-v4-pro") + if !changed { + t.Fatal("expected content reasoning block to be normalized") + } + assistant := out[1].(map[string]any) + if assistant["reasoning_content"] != "content reasoning" { + t.Fatalf("expected reasoning_content from content block, got %#v", assistant["reasoning_content"]) + } +} + +func TestApplyDoesNotTreatUnknownContentBlockAsReasoning(t *testing.T) { + t.Setenv("DS2API_THINKING_CACHE_DIR", t.TempDir()) + + messages := []any{ + map[string]any{"role": "user", "content": "question"}, + map[string]any{ + "role": "assistant", + "content": []any{ + map[string]any{"type": "unsupported_reasoning", "text": "do not inject"}, + map[string]any{"type": "text", "text": "answer"}, + }, + }, + } + if _, changed := Apply(messages, "deepseek-v4-pro"); changed { + t.Fatal("did not expect unknown content block to be normalized as reasoning") + } +} + +func TestApplyIgnoresExpiredEntries(t *testing.T) { + cacheDir := t.TempDir() + t.Setenv("DS2API_THINKING_CACHE_DIR", cacheDir) + + prefix := []any{map[string]any{"role": "user", "content": "old"}} + key := keyFor(prefix, "deepseek-v4-pro") + if err := os.WriteFile(filepath.Join(cacheDir, key+".dat"), []byte("expired reasoning"), 0o600); err != nil { + t.Fatalf("write data: %v", err) + } + index := map[string]entry{ + key: { + Size: len("expired reasoning"), + Created: time.Now().Add(-3 * time.Hour), + Expires: time.Now().Add(-time.Minute), + }, + } + b, err := json.Marshal(index) + if err != nil { + t.Fatalf("marshal index: %v", err) + } + if err := os.WriteFile(filepath.Join(cacheDir, "index.json"), b, 0o600); err != nil { + t.Fatalf("write index: %v", err) + } + + messages := []any{ + map[string]any{"role": "user", "content": "old"}, + map[string]any{"role": "assistant", "content": "answer"}, + } + if _, changed := Apply(messages, "deepseek-v4-pro"); changed { + t.Fatal("did not expect expired cache entry to be injected") + } + if _, err := os.Stat(filepath.Join(cacheDir, key+".dat")); !os.IsNotExist(err) { + t.Fatalf("expected expired data file removed, err=%v", err) + } +} + +func TestStorePrunesOldestEntriesOverThreshold(t *testing.T) { + t.Setenv("DS2API_THINKING_CACHE_DIR", t.TempDir()) + t.Setenv("DS2API_THINKING_CACHE_MAX_ENTRIES", "1") + + Store([]any{map[string]any{"role": "user", "content": "one"}}, "deepseek-v4-pro", "first") + Store([]any{map[string]any{"role": "user", "content": "two"}}, "deepseek-v4-pro", "second") + + index, err := loadIndexLocked() + if err != nil { + t.Fatalf("load index: %v", err) + } + if len(index) != 1 { + t.Fatalf("expected one retained entry, got %d", len(index)) + } +} diff --git a/third_party/opencode-deepseek-thinking-fix b/third_party/opencode-deepseek-thinking-fix new file mode 160000 index 00000000..21766758 --- /dev/null +++ b/third_party/opencode-deepseek-thinking-fix @@ -0,0 +1 @@ +Subproject commit 21766758c397574774eda6cd49b54df89ec13719