From 767b7933f05ae92ac18f93e30d9b4ac9cdb71cba Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Wed, 27 May 2026 04:03:07 +0000
Subject: [PATCH 1/2] Gate hot-path stream prints behind #if DEBUG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The audit found three high-frequency print() statements running in
release builds:

  * ChatEngine producer task — one print on stream start, one every
    50 deltas (or whenever a delta gap exceeds 2s), one on stream
    end, and one per tool invocation. Each token batch on a 100 tok/s
    local model could fire one of these.
  * StreamAccumulator — one print per MLX completion-info event,
    duplicating the same data we already emit via signposts and
    os_log (which both respect privacy redaction policies).
  * ChatView stream consumer — one print per UI stream completion.

stdout in release builds is captured by Console.app and the unified
logging system, but bypasses the privacy('public'/'private')
redaction that the matching Logger calls use. The signpost + os_log
calls in StreamAccumulator already give Instruments and 'log stream'
all the perf data, so the print is purely a duplicate.

Gate every flagged print behind '#if DEBUG'. Behavior in DEBUG is
unchanged; release builds skip the formatting, the String allocations,
and the stdout write entirely.

The lastDeltaTime tracking in ChatEngine is moved inside the same
DEBUG guard since it has no purpose outside the freeze-detection
log. startTime is kept (still used by the InsightsService log call).
Tool-call prints in ChatView (lines 1179, 1242) are left alone for
now -- they include user-supplied content and deserve a separate,
focused privacy-cleanup pass.

Co-authored-by: Michael Meding <mimeding@users.noreply.github.com>
---
 .../Services/Chat/ChatEngine.swift            | 59 ++++++++++++-------
 .../ModelRuntime/StreamAccumulator.swift      | 22 +++----
 .../OsaurusCore/Views/Chat/ChatView.swift     | 10 ++--
 3 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
index b92007d91..a5f8c8dd5 100644
--- a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
+++ b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift
@@ -204,15 +204,21 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
             var finishReason: InferenceLog.FinishReason = .stop
             var errorMsg: String? = nil
             var toolInvocation: (name: String, args: String)? = nil
-            var lastDeltaTime = startTime
+            #if DEBUG
+                var lastDeltaTime = startTime
+            #endif
 
-            print("[Osaurus][Stream] Starting stream wrapper for model: \(model)")
+            #if DEBUG
+                print("[Osaurus][Stream] Starting stream wrapper for model: \(model)")
+            #endif
 
             do {
                 for try await delta in inner {
                     // Check for task cancellation to allow early termination
                     if Task.isCancelled {
-                        print("[Osaurus][Stream] Task cancelled after \(deltaCount) deltas")
+                        #if DEBUG
+                            print("[Osaurus][Stream] Task cancelled after \(deltaCount) deltas")
+                        #endif
                         continuation.finish()
                         return
                     }
@@ -224,17 +230,20 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
                     }
 
                     deltaCount += 1
-                    let now = Date()
-                    let timeSinceStart = now.timeIntervalSince(startTime)
-                    let timeSinceLastDelta = now.timeIntervalSince(lastDeltaTime)
-                    lastDeltaTime = now
-
-                    // Log every 50th delta or if there's a long gap (potential freeze indicator)
-                    if deltaCount % 50 == 1 || timeSinceLastDelta > 2.0 {
-                        print(
-                            "[Osaurus][Stream] Delta #\(deltaCount): +\(String(format: "%.2f", timeSinceStart))s total, gap=\(String(format: "%.3f", timeSinceLastDelta))s, len=\(delta.count)"
-                        )
-                    }
+
+                    #if DEBUG
+                        let now = Date()
+                        let timeSinceStart = now.timeIntervalSince(startTime)
+                        let timeSinceLastDelta = now.timeIntervalSince(lastDeltaTime)
+                        lastDeltaTime = now
+
+                        // Log every 50th delta or if there's a long gap (potential freeze indicator)
+                        if deltaCount % 50 == 1 || timeSinceLastDelta > 2.0 {
+                            print(
+                                "[Osaurus][Stream] Delta #\(deltaCount): +\(String(format: "%.2f", timeSinceStart))s total, gap=\(String(format: "%.3f", timeSinceLastDelta))s, len=\(delta.count)"
+                            )
+                        }
+                    #endif
 
                     // Estimate tokens: each delta chunk is roughly proportional to tokens
                     // More accurate: count whitespace-separated words, or use tokenizer
@@ -242,25 +251,33 @@ actor ChatEngine: Sendable, ChatEngineProtocol {
                     continuation.yield(delta)
                 }
 
-                let totalTime = Date().timeIntervalSince(startTime)
-                print(
-                    "[Osaurus][Stream] Stream completed: \(deltaCount) deltas in \(String(format: "%.2f", totalTime))s"
-                )
+                #if DEBUG
+                    let totalTime = Date().timeIntervalSince(startTime)
+                    print(
+                        "[Osaurus][Stream] Stream completed: \(deltaCount) deltas in \(String(format: "%.2f", totalTime))s"
+                    )
+                #endif
 
                 continuation.finish()
             } catch let inv as ServiceToolInvocation {
-                print("[Osaurus][Stream] Tool invocation: \(inv.toolName)")
+                #if DEBUG
+                    print("[Osaurus][Stream] Tool invocation: \(inv.toolName)")
+                #endif
                 toolInvocation = (inv.toolName, inv.jsonArguments)
                 finishReason = .toolCalls
                 continuation.finish(throwing: inv)
             } catch {
                 // Check if this is a CancellationError (expected when consumer stops)
                 if Task.isCancelled || error is CancellationError {
-                    print("[Osaurus][Stream] Stream cancelled after \(deltaCount) deltas")
+                    #if DEBUG
+                        print("[Osaurus][Stream] Stream cancelled after \(deltaCount) deltas")
+                    #endif
                     continuation.finish()
                     return
                 }
-                print("[Osaurus][Stream] Stream error after \(deltaCount) deltas: \(error.localizedDescription)")
+                #if DEBUG
+                    print("[Osaurus][Stream] Stream error after \(deltaCount) deltas: \(error.localizedDescription)")
+                #endif
                 finishReason = .error
                 errorMsg = error.localizedDescription
                 continuation.finish(throwing: error)
diff --git a/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift b/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift
index 9a412dd03..c7bc950b3 100644
--- a/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift
+++ b/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift
@@ -246,17 +246,19 @@ struct StreamAccumulator: AsyncSequence, Sendable {
 
                 // Log info events and surface generation stats downstream.
                 if let info = event.info {
-                    print(
-                        String(
-                            format: "[MLX] prompt: %d tokens %.1f tok/s (%.2fs) | gen: %d tokens %.1f tok/s (%.2fs)",
-                            info.promptTokenCount,
-                            info.promptTokensPerSecond,
-                            info.promptTime,
-                            info.generationTokenCount,
-                            info.tokensPerSecond,
-                            info.generateTime
+                    #if DEBUG
+                        print(
+                            String(
+                                format: "[MLX] prompt: %d tokens %.1f tok/s (%.2fs) | gen: %d tokens %.1f tok/s (%.2fs)",
+                                info.promptTokenCount,
+                                info.promptTokensPerSecond,
+                                info.promptTime,
+                                info.generationTokenCount,
+                                info.tokensPerSecond,
+                                info.generateTime
+                            )
                         )
-                    )
+                    #endif
                     // Emit GPU-accurate stats as a signpost event so they appear in
                     // Instruments and can be captured by `log stream --type signpost`.
                     accumSignposter.emitEvent(
diff --git a/Packages/OsaurusCore/Views/Chat/ChatView.swift b/Packages/OsaurusCore/Views/Chat/ChatView.swift
index f20b436f4..c9ac5aa4f 100644
--- a/Packages/OsaurusCore/Views/Chat/ChatView.swift
+++ b/Packages/OsaurusCore/Views/Chat/ChatView.swift
@@ -1144,10 +1144,12 @@ final class ChatSession: ObservableObject {
                             }
                         }
 
-                        let totalTime = Date().timeIntervalSince(streamStartTime)
-                        print(
-                            "[Osaurus][UI] Stream consumption completed: \(uiDeltaCount) deltas in \(String(format: "%.2f", totalTime))s, final contentLen=\(assistantTurn.contentLength)"
-                        )
+                        #if DEBUG
+                            let totalTime = Date().timeIntervalSince(streamStartTime)
+                            print(
+                                "[Osaurus][UI] Stream consumption completed: \(uiDeltaCount) deltas in \(String(format: "%.2f", totalTime))s, final contentLen=\(assistantTurn.contentLength)"
+                            )
+                        #endif
 
                         break  // finished normally
                     } catch let inv as ServiceToolInvocation {

From 81bf0eb411e5b66f5b28935103180d863aa40303 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Wed, 27 May 2026 05:08:34 +0000
Subject: [PATCH 2/2] Fix flake: skip ModelManager launch-time HF fetch under
 xctest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ModelManager.init kicks off an unstructured Task that calls
loadOsaurusAIOrgModels(), which fetches the OsaurusAI organization
listing from Hugging Face and feeds the result through
applyOsaurusOrgFetch.

The unit-test runner repeatedly constructs ModelManager() to drive
applyOsaurusOrgFetch directly. The background launch-time fetch
races with those test calls — whichever finishes last wins, and
the merge result is non-deterministic. That's the root cause of
the flaky ModelManagerSuggestedTests failures seen across many of
the recent PR CI runs (applyOsaurusOrgFetch_dropsStaleAutoFetched
OnReapply, applyOsaurusOrgFetch_addsNewEntriesAfterCurated, etc.).

Gate the launch-time fetch on a small isRunningInTestEnvironment
helper that checks for any of XCTestConfigurationFilePath,
XCTestBundlePath, or XCTestSessionIdentifier in the process
environment. Those variables are only present inside an xctest host
process; production app launches still get the HF fetch exactly as
before.

This is a network call, so removing it under tests also has the
side benefit of making the test suite work offline / on hermetic
CI runners.

Co-authored-by: Michael Meding <mimeding@users.noreply.github.com>
---
 .../Managers/Model/ModelManager.swift         | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/Packages/OsaurusCore/Managers/Model/ModelManager.swift b/Packages/OsaurusCore/Managers/Model/ModelManager.swift
index c87d515f6..dc6595695 100644
--- a/Packages/OsaurusCore/Managers/Model/ModelManager.swift
+++ b/Packages/OsaurusCore/Managers/Model/ModelManager.swift
@@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject {
 
         // Pull the OsaurusAI HF org listing once on launch so newly published
         // models surface in the Recommended tab without requiring a code push.
-        Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        //
+        // The unit-test runner constructs `ModelManager()` repeatedly to drive
+        // `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races
+        // with those test calls, whichever finishes last wins and the merge
+        // result is non-deterministic — that's the regression class behind
+        // `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI.
+        // Skip the background fetch under XCTest; production launches still
+        // get it because `XCTestConfigurationFilePath` is only set inside
+        // a test host.
+        if !Self.isRunningInTestEnvironment {
+            Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
+        }
+    }
+
+    /// True when the current process was launched by xctest. Used to gate
+    /// network-touching launch-time side effects so tests can drive the
+    /// affected code paths deterministically.
+    nonisolated private static var isRunningInTestEnvironment: Bool {
+        ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil
+            || ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil
     }
 
     // MARK: - Public Methods