diff --git a/Packages/OsaurusCore/Managers/Model/ModelManager.swift b/Packages/OsaurusCore/Managers/Model/ModelManager.swift
index c87d515f6..dc6595695 100644
--- a/Packages/OsaurusCore/Managers/Model/ModelManager.swift
+++ b/Packages/OsaurusCore/Managers/Model/ModelManager.swift
@@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject {
 
         // Pull the OsaurusAI HF org listing once on launch so newly published
         // models surface in the Recommended tab without requiring a code push.
-        Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        //
+        // The unit-test runner constructs `ModelManager()` repeatedly to drive
+        // `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races
+        // with those test calls, whichever finishes last wins and the merge
+        // result is non-deterministic — that's the regression class behind
+        // `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI.
+        // Skip the background fetch under XCTest; production launches still
+        // get it because `XCTestConfigurationFilePath` is only set inside
+        // a test host.
+        if !Self.isRunningInTestEnvironment {
+            Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        }
+    }
+
+    /// True when the current process was launched by xctest. Used to gate
+    /// network-touching launch-time side effects so tests can drive the
+    /// affected code paths deterministically.
+    nonisolated private static var isRunningInTestEnvironment: Bool {
+        ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil
     }
 
     // MARK: - Public Methods
diff --git a/Packages/OsaurusCore/Networking/HTTPHandler.swift b/Packages/OsaurusCore/Networking/HTTPHandler.swift
index 0f8877549..f86096539 100644
--- a/Packages/OsaurusCore/Networking/HTTPHandler.swift
+++ b/Packages/OsaurusCore/Networking/HTTPHandler.swift
@@ -2010,14 +2010,22 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
                     session_id: req.session_id
                 )
 
-                var responseContent = ""
+                // Accumulate deltas in an array and join once at the end.
+                // The previous `responseContent += delta` formed the assistant
+                // turn via repeated `String` concatenation, which is O(n^2) in
+                // the number of streamed characters because each `+=` builds a
+                // fresh `String` storage. Long agent turns (especially on /run
+                // SSE) made this measurable on top of inference latency.
+                var deltaBuffer: [String] = []
+                var accumulatedLength = 0
                 var toolInvoked: ServiceToolInvocation?
 
                 do {
                     let stream = try await chatEngine.streamChat(request: iterationReq)
                     for try await delta in stream {
                         if StreamingToolHint.isSentinel(delta) { continue }
-                        responseContent += delta
+                        deltaBuffer.append(delta)
+                        accumulatedLength += delta.utf8.count
                         hop {
                             writerBound.value.writeContent(
                                 delta,
@@ -2047,8 +2055,18 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
                     return
                 }
 
+                // Materialize the full assistant text exactly once. Single
+                // allocation: `String.reserveCapacity` + chunk-by-chunk
+                // append is O(n) versus the O(n^2) cost of `+=` on the
+                // streaming hot path. The tool-invocation branch below
+                // reads this too (as the assistant's pre-tool-call text),
+                // so it has to live outside the `guard`.
+                var responseContent = String()
+                responseContent.reserveCapacity(accumulatedLength)
+                for chunk in deltaBuffer { responseContent.append(chunk) }
+
                 guard let invocation = toolInvoked else {
-                    // Final text response — done
+                    // Final text response — done.
                     messages.append(ChatMessage(role: "assistant", content: responseContent))
                     break
                 }