mimeding · mimeding · May 26, 2026 · May 27, 2026
diff --git a/Packages/OsaurusCore/Managers/Model/ModelManager.swift b/Packages/OsaurusCore/Managers/Model/ModelManager.swift
@@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject {
 
         // Pull the OsaurusAI HF org listing once on launch so newly published
         // models surface in the Recommended tab without requiring a code push.
-        Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        //
+        // The unit-test runner constructs `ModelManager()` repeatedly to drive
+        // `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races
+        // with those test calls, whichever finishes last wins and the merge
+        // result is non-deterministic — that's the regression class behind
+        // `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI.
+        // Skip the background fetch under XCTest; production launches still
+        // get it because `XCTestConfigurationFilePath` is only set inside
+        // a test host.
+        if !Self.isRunningInTestEnvironment {
+            Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        }
+    }
+
+    /// True when the current process was launched by xctest. Used to gate
+    /// network-touching launch-time side effects so tests can drive the
+    /// affected code paths deterministically.
+    nonisolated private static var isRunningInTestEnvironment: Bool {
+        ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil
     }
 
     // MARK: - Public Methods

diff --git a/Packages/OsaurusCore/Networking/HTTPHandler.swift b/Packages/OsaurusCore/Networking/HTTPHandler.swift
@@ -1335,18 +1335,28 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
     // MARK: - Memory Ingestion
 
     /// Request body for the `/memory/ingest` endpoint.
+    ///
+    /// `source_mode` (request-level default) and `MemoryIngestTurn.source_mode`
+    /// (per-turn override) let callers tag ingested turns with the execution
+    /// context they originated in (`chat`, `chat_sandbox`, `work_host`,
+    /// `work_sandbox`). The memory partition machinery added in PR #877 uses
+    /// this to keep tool-using turns out of pure-chat recall. Both fields are
+    /// optional and default to `.chat` to preserve the previous behaviour for
+    /// existing API clients.
     private struct MemoryIngestRequest: Codable {
         let agent_id: String
         let conversation_id: String
         let turns: [MemoryIngestTurn]
         let session_date: String?
         let skip_extraction: Bool?
+        let source_mode: MemorySourceMode?
     }
 
     private struct MemoryIngestTurn: Codable {
         let user: String
         let assistant: String
         let date: String?
+        let source_mode: MemorySourceMode?
     }
 
     /// Bulk-ingest conversation turns into the memory system.
@@ -1374,7 +1384,7 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
                 version: head.version,
                 status: .badRequest,
                 headers: [("Content-Type", "text/plain; charset=utf-8")],
-                body: "Invalid request format. Expected {agent_id, conversation_id, turns: [{user, assistant}]}"
+                body: "Invalid request format. Expected {agent_id, conversation_id, turns: [{user, assistant, date?, source_mode?}], session_date?, skip_extraction?, source_mode?}"
             )
             logRequest(
                 method: "POST",
@@ -1406,11 +1416,13 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
             )
 
             let skipExtraction = req.skip_extraction ?? false
+            let defaultSourceMode = req.source_mode ?? .chat
 
             try? db.deleteChunksForConversation(req.conversation_id)
 
             for (i, turn) in req.turns.enumerated() {
                 let turnDate = turn.date ?? req.session_date
+                let turnSourceMode = turn.source_mode ?? defaultSourceMode
 
                 let pairs: [(role: String, content: String, index: Int)] = [
                     ("user", turn.user, i * 2),
@@ -1433,7 +1445,7 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
                         content: content,
                         tokenCount: tokens,
                         createdAt: turnDate,
-                        sourceMode: .chat
+                        sourceMode: turnSourceMode
                     )
                     await MemorySearchService.shared.indexConversationChunk(chunk)
                 }
@@ -1444,7 +1456,7 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
                         assistantMessage: turn.assistant,
                         agentId: req.agent_id,
                         conversationId: req.conversation_id,
-                        sourceMode: .chat,
+                        sourceMode: turnSourceMode,
                         sessionDate: turnDate
                     )
                 }

diff --git a/docs/MEMORY.md b/docs/MEMORY.md
@@ -384,7 +384,10 @@ curl http://127.0.0.1:1337/memory/ingest \
 |-----------|------|-------------|
 | `agent_id` | string | Identifier for the agent whose memory is being populated |
 | `conversation_id` | string | Identifier for the conversation session |
-| `turns` | array | Array of turn objects, each with `user` and `assistant` fields |
+| `turns` | array | Array of turn objects, each with `user` and `assistant` fields (optional `date`, optional `source_mode` per turn) |
+| `session_date` | string (optional) | ISO date applied to any turn without its own `date` |
+| `skip_extraction` | bool (optional) | When `true`, store chunks only and skip background memory extraction |
+| `source_mode` | string (optional) | Default execution context for all turns in this batch. One of `chat` (default), `chat_sandbox`, `work_host`, `work_sandbox`. Pure-chat recall filters out tool-using modes, so tagging ingested logs correctly prevents phantom tool affordances from leaking back into chat-only sessions. |
 
 Memory extraction runs asynchronously in the background — ingested turns are processed without blocking the API response.