From 665a51a2eb245fc05b0fef64cdacc7c144c13fb1 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 26 May 2026 01:49:57 +0000 Subject: [PATCH 1/2] Accept source_mode on /memory/ingest to preserve chat/tool partition PR #877 partitioned the memory store by execution mode (chat, chat_sandbox, work_host, work_sandbox) and made pure-chat recall filter out tool-using contributions to prevent phantom-tool priming. The HTTP ingest endpoint, however, was hardcoded to tag every turn as .chat, regardless of where the source turns actually came from. That means anyone seeding memory from existing logs, migrating from another system, or running offline batch ingestion (LoCoMo benchmark runs are exactly this) ends up writing tool-flavoured turns under the chat partition. When the agent later runs in pure-chat mode, the chatOnly filter happily surfaces those rows -- the very leak the partition was designed to prevent. Fix by accepting an optional source_mode at both the request level (batch default) and per-turn (override). Both fields default to .chat so existing callers keep working byte-for-byte. MemorySourceMode is already Codable with the right string raw values, so callers send 'chat' / 'chat_sandbox' / 'work_host' / 'work_sandbox' as JSON strings. Docs/MEMORY.md updated with the new fields and a short note on why tagging matters. Co-authored-by: Michael Meding --- .../OsaurusCore/Networking/HTTPHandler.swift | 18 +++++++++++++++--- docs/MEMORY.md | 5 ++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Packages/OsaurusCore/Networking/HTTPHandler.swift b/Packages/OsaurusCore/Networking/HTTPHandler.swift index 0f8877549..48bf1f0fe 100644 --- a/Packages/OsaurusCore/Networking/HTTPHandler.swift +++ b/Packages/OsaurusCore/Networking/HTTPHandler.swift @@ -1335,18 +1335,28 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { // MARK: - Memory Ingestion /// Request body for the `/memory/ingest` endpoint. + /// + /// `source_mode` (request-level default) and `MemoryIngestTurn.source_mode` + /// (per-turn override) let callers tag ingested turns with the execution + /// context they originated in (`chat`, `chat_sandbox`, `work_host`, + /// `work_sandbox`). The memory partition machinery added in PR #877 uses + /// this to keep tool-using turns out of pure-chat recall. Both fields are + /// optional and default to `.chat` to preserve the previous behaviour for + /// existing API clients. private struct MemoryIngestRequest: Codable { let agent_id: String let conversation_id: String let turns: [MemoryIngestTurn] let session_date: String? let skip_extraction: Bool? + let source_mode: MemorySourceMode? } private struct MemoryIngestTurn: Codable { let user: String let assistant: String let date: String? + let source_mode: MemorySourceMode? } /// Bulk-ingest conversation turns into the memory system. @@ -1374,7 +1384,7 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { version: head.version, status: .badRequest, headers: [("Content-Type", "text/plain; charset=utf-8")], - body: "Invalid request format. Expected {agent_id, conversation_id, turns: [{user, assistant}]}" + body: "Invalid request format. Expected {agent_id, conversation_id, turns: [{user, assistant, date?, source_mode?}], session_date?, skip_extraction?, source_mode?}" ) logRequest( method: "POST", @@ -1406,11 +1416,13 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { ) let skipExtraction = req.skip_extraction ?? false + let defaultSourceMode = req.source_mode ?? .chat try? db.deleteChunksForConversation(req.conversation_id) for (i, turn) in req.turns.enumerated() { let turnDate = turn.date ?? req.session_date + let turnSourceMode = turn.source_mode ?? defaultSourceMode let pairs: [(role: String, content: String, index: Int)] = [ ("user", turn.user, i * 2), @@ -1433,7 +1445,7 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { content: content, tokenCount: tokens, createdAt: turnDate, - sourceMode: .chat + sourceMode: turnSourceMode ) await MemorySearchService.shared.indexConversationChunk(chunk) } @@ -1444,7 +1456,7 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { assistantMessage: turn.assistant, agentId: req.agent_id, conversationId: req.conversation_id, - sourceMode: .chat, + sourceMode: turnSourceMode, sessionDate: turnDate ) } diff --git a/docs/MEMORY.md b/docs/MEMORY.md index da53fffa9..4b5df4631 100644 --- a/docs/MEMORY.md +++ b/docs/MEMORY.md @@ -384,7 +384,10 @@ curl http://127.0.0.1:1337/memory/ingest \ |-----------|------|-------------| | `agent_id` | string | Identifier for the agent whose memory is being populated | | `conversation_id` | string | Identifier for the conversation session | -| `turns` | array | Array of turn objects, each with `user` and `assistant` fields | +| `turns` | array | Array of turn objects, each with `user` and `assistant` fields (optional `date`, optional `source_mode` per turn) | +| `session_date` | string (optional) | ISO date applied to any turn without its own `date` | +| `skip_extraction` | bool (optional) | When `true`, store chunks only and skip background memory extraction | +| `source_mode` | string (optional) | Default execution context for all turns in this batch. One of `chat` (default), `chat_sandbox`, `work_host`, `work_sandbox`. Pure-chat recall filters out tool-using modes, so tagging ingested logs correctly prevents phantom tool affordances from leaking back into chat-only sessions. | Memory extraction runs asynchronously in the background — ingested turns are processed without blocking the API response. From af55291827d81b3a151201bfea662883b0df2e05 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 27 May 2026 05:08:34 +0000 Subject: [PATCH 2/2] Fix flake: skip ModelManager launch-time HF fetch under xctest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ModelManager.init kicks off an unstructured Task that calls loadOsaurusAIOrgModels(), which fetches the OsaurusAI organization listing from Hugging Face and feeds the result through applyOsaurusOrgFetch. The unit-test runner repeatedly constructs ModelManager() to drive applyOsaurusOrgFetch directly. The background launch-time fetch races with those test calls — whichever finishes last wins, and the merge result is non-deterministic. That's the root cause of the flaky ModelManagerSuggestedTests failures seen across many of the recent PR CI runs (applyOsaurusOrgFetch_dropsStaleAutoFetched OnReapply, applyOsaurusOrgFetch_addsNewEntriesAfterCurated, etc.). Gate the launch-time fetch on a small isRunningInTestEnvironment helper that checks for any of XCTestConfigurationFilePath, XCTestBundlePath, or XCTestSessionIdentifier in the process environment. Those variables are only present inside an xctest host process; production app launches still get the HF fetch exactly as before. This is a network call, so removing it under tests also has the side benefit of making the test suite work offline / on hermetic CI runners. Co-authored-by: Michael Meding --- .../Managers/Model/ModelManager.swift | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Packages/OsaurusCore/Managers/Model/ModelManager.swift b/Packages/OsaurusCore/Managers/Model/ModelManager.swift index c87d515f6..dc6595695 100644 --- a/Packages/OsaurusCore/Managers/Model/ModelManager.swift +++ b/Packages/OsaurusCore/Managers/Model/ModelManager.swift @@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject { // Pull the OsaurusAI HF org listing once on launch so newly published // models surface in the Recommended tab without requiring a code push. - Task { [weak self] in await self?.loadOsaurusAIOrgModels() } + // + // The unit-test runner constructs `ModelManager()` repeatedly to drive + // `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races + // with those test calls, whichever finishes last wins and the merge + // result is non-deterministic — that's the regression class behind + // `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI. + // Skip the background fetch under XCTest; production launches still + // get it because `XCTestConfigurationFilePath` is only set inside + // a test host. + if !Self.isRunningInTestEnvironment { + Task { [weak self] in await self?.loadOsaurusAIOrgModels() } + } + } + + /// True when the current process was launched by xctest. Used to gate + /// network-touching launch-time side effects so tests can drive the + /// affected code paths deterministically. + nonisolated private static var isRunningInTestEnvironment: Bool { + ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil + || ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil + || ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil } // MARK: - Public Methods