diff --git a/Packages/OsaurusCore/Managers/Model/ModelManager.swift b/Packages/OsaurusCore/Managers/Model/ModelManager.swift
index c87d515f6..dc6595695 100644
--- a/Packages/OsaurusCore/Managers/Model/ModelManager.swift
+++ b/Packages/OsaurusCore/Managers/Model/ModelManager.swift
@@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject {
 
         // Pull the OsaurusAI HF org listing once on launch so newly published
         // models surface in the Recommended tab without requiring a code push.
-        Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        //
+        // The unit-test runner constructs `ModelManager()` repeatedly to drive
+        // `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races
+        // with those test calls, whichever finishes last wins and the merge
+        // result is non-deterministic — that's the regression class behind
+        // `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI.
+        // Skip the background fetch under XCTest; production launches still
+        // get it because `XCTestConfigurationFilePath` is only set inside
+        // a test host.
+        if !Self.isRunningInTestEnvironment {
+            Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        }
+    }
+
+    /// True when the current process was launched by xctest. Used to gate
+    /// network-touching launch-time side effects so tests can drive the
+    /// affected code paths deterministically.
+    nonisolated private static var isRunningInTestEnvironment: Bool {
+        ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil
     }
 
     // MARK: - Public Methods
diff --git a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
index b92007d91..a5f8c8dd5 100644
--- a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
+++ b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
@@ -204,15 +204,21 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             var finishReason: InferenceLog.FinishReason = .stop
             var errorMsg: String? = nil
             var toolInvocation: (name: String, args: String)? = nil
-            var lastDeltaTime = startTime
+            #if DEBUG
+                var lastDeltaTime = startTime
+            #endif
 
-            print("[Osaurus][Stream] Starting stream wrapper for model: \(model)")
+            #if DEBUG
+                print("[Osaurus][Stream] Starting stream wrapper for model: \(model)")
+            #endif
 
             do {
                 for try await delta in inner {
                     // Check for task cancellation to allow early termination
                     if Task.isCancelled {
-                        print("[Osaurus][Stream] Task cancelled after \(deltaCount) deltas")
+                        #if DEBUG
+                            print("[Osaurus][Stream] Task cancelled after \(deltaCount) deltas")
+                        #endif
                         continuation.finish()
                         return
                     }
@@ -224,17 +230,20 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
                     }
 
                     deltaCount += 1
-                    let now = Date()
-                    let timeSinceStart = now.timeIntervalSince(startTime)
-                    let timeSinceLastDelta = now.timeIntervalSince(lastDeltaTime)
-                    lastDeltaTime = now
-
-                    // Log every 50th delta or if there's a long gap (potential freeze indicator)
-                    if deltaCount % 50 == 1 || timeSinceLastDelta > 2.0 {
-                        print(
-                            "[Osaurus][Stream] Delta #\(deltaCount): +\(String(format: "%.2f", timeSinceStart))s total, gap=\(String(format: "%.3f", timeSinceLastDelta))s, len=\(delta.count)"
-                        )
-                    }
+
+                    #if DEBUG
+                        let now = Date()
+                        let timeSinceStart = now.timeIntervalSince(startTime)
+                        let timeSinceLastDelta = now.timeIntervalSince(lastDeltaTime)
+                        lastDeltaTime = now
+
+                        // Log every 50th delta or if there's a long gap (potential freeze indicator)
+                        if deltaCount % 50 == 1 || timeSinceLastDelta > 2.0 {
+                            print(
+                                "[Osaurus][Stream] Delta #\(deltaCount): +\(String(format: "%.2f", timeSinceStart))s total, gap=\(String(format: "%.3f", timeSinceLastDelta))s, len=\(delta.count)"
+                            )
+                        }
+                    #endif
 
                     // Estimate tokens: each delta chunk is roughly proportional to tokens
                     // More accurate: count whitespace-separated words, or use tokenizer
@@ -242,25 +251,33 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
                     continuation.yield(delta)
                 }
 
-                let totalTime = Date().timeIntervalSince(startTime)
-                print(
-                    "[Osaurus][Stream] Stream completed: \(deltaCount) deltas in \(String(format: "%.2f", totalTime))s"
-                )
+                #if DEBUG
+                    let totalTime = Date().timeIntervalSince(startTime)
+                    print(
+                        "[Osaurus][Stream] Stream completed: \(deltaCount) deltas in \(String(format: "%.2f", totalTime))s"
+                    )
+                #endif
 
                 continuation.finish()
             } catch let inv as ServiceToolInvocation {
-                print("[Osaurus][Stream] Tool invocation: \(inv.toolName)")
+                #if DEBUG
+                    print("[Osaurus][Stream] Tool invocation: \(inv.toolName)")
+                #endif
                 toolInvocation = (inv.toolName, inv.jsonArguments)
                 finishReason = .toolCalls
                 continuation.finish(throwing: inv)
             } catch {
                 // Check if this is a CancellationError (expected when consumer stops)
                 if Task.isCancelled || error is CancellationError {
-                    print("[Osaurus][Stream] Stream cancelled after \(deltaCount) deltas")
+                    #if DEBUG
+                        print("[Osaurus][Stream] Stream cancelled after \(deltaCount) deltas")
+                    #endif
                     continuation.finish()
                     return
                 }
-                print("[Osaurus][Stream] Stream error after \(deltaCount) deltas: \(error.localizedDescription)")
+                #if DEBUG
+                    print("[Osaurus][Stream] Stream error after \(deltaCount) deltas: \(error.localizedDescription)")
+                #endif
                 finishReason = .error
                 errorMsg = error.localizedDescription
                 continuation.finish(throwing: error)
diff --git a/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift b/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift
index 9a412dd03..c7bc950b3 100644
--- a/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift
+++ b/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift
@@ -246,17 +246,19 @@ struct StreamAccumulator: AsyncSequence, Sendable {
 
                 // Log info events and surface generation stats downstream.
                 if let info = event.info {
-                    print(
-                        String(
-                            format: "[MLX] prompt: %d tokens %.1f tok/s (%.2fs) | gen: %d tokens %.1f tok/s (%.2fs)",
-                            info.promptTokenCount,
-                            info.promptTokensPerSecond,
-                            info.promptTime,
-                            info.generationTokenCount,
-                            info.tokensPerSecond,
-                            info.generateTime
+                    #if DEBUG
+                        print(
+                            String(
+                                format: "[MLX] prompt: %d tokens %.1f tok/s (%.2fs) | gen: %d tokens %.1f tok/s (%.2fs)",
+                                info.promptTokenCount,
+                                info.promptTokensPerSecond,
+                                info.promptTime,
+                                info.generationTokenCount,
+                                info.tokensPerSecond,
+                                info.generateTime
+                            )
                         )
-                    )
+                    #endif
                     // Emit GPU-accurate stats as a signpost event so they appear in
                     // Instruments and can be captured by `log stream --type signpost`.
                     accumSignposter.emitEvent(
diff --git a/Packages/OsaurusCore/Views/Chat/ChatView.swift b/Packages/OsaurusCore/Views/Chat/ChatView.swift
index f20b436f4..c9ac5aa4f 100644
--- a/Packages/OsaurusCore/Views/Chat/ChatView.swift
+++ b/Packages/OsaurusCore/Views/Chat/ChatView.swift
@@ -1144,10 +1144,12 @@ final class ChatSession: ObservableObject {
                             }
                         }
 
-                        let totalTime = Date().timeIntervalSince(streamStartTime)
-                        print(
-                            "[Osaurus][UI] Stream consumption completed: \(uiDeltaCount) deltas in \(String(format: "%.2f", totalTime))s, final contentLen=\(assistantTurn.contentLength)"
-                        )
+                        #if DEBUG
+                            let totalTime = Date().timeIntervalSince(streamStartTime)
+                            print(
+                                "[Osaurus][UI] Stream consumption completed: \(uiDeltaCount) deltas in \(String(format: "%.2f", totalTime))s, final contentLen=\(assistantTurn.contentLength)"
+                            )
+                        #endif
 
                         break  // finished normally
                     } catch let inv as ServiceToolInvocation {