From 767b7933f05ae92ac18f93e30d9b4ac9cdb71cba Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 27 May 2026 04:03:07 +0000 Subject: [PATCH 1/2] Gate hot-path stream prints behind #if DEBUG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The audit found three high-frequency print() statements running in release builds: * ChatEngine producer task — one print on stream start, one every 50 deltas (or whenever a delta gap exceeds 2s), one on stream end, and one per tool invocation. Each token batch on a 100 tok/s local model could fire one of these. * StreamAccumulator — one print per MLX completion-info event, duplicating the same data we already emit via signposts and os_log (which both respect privacy redaction policies). * ChatView stream consumer — one print per UI stream completion. stdout in release builds is captured by Console.app and the unified logging system, but bypasses the privacy('public'/'private') redaction that the matching Logger calls use. The signpost + os_log calls in StreamAccumulator already give Instruments and 'log stream' all the perf data, so the print is purely a duplicate. Gate every flagged print behind '#if DEBUG'. Behavior in DEBUG is unchanged; release builds skip the formatting, the String allocations, and the stdout write entirely. The lastDeltaTime tracking in ChatEngine is moved inside the same DEBUG guard since it has no purpose outside the freeze-detection log. startTime is kept (still used by the InsightsService log call). Tool-call prints in ChatView (lines 1179, 1242) are left alone for now -- they include user-supplied content and deserve a separate, focused privacy-cleanup pass. Co-authored-by: Michael Meding --- .../Services/Chat/ChatEngine.swift | 59 ++++++++++++------- .../ModelRuntime/StreamAccumulator.swift | 22 +++---- .../OsaurusCore/Views/Chat/ChatView.swift | 10 ++-- 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift index b92007d91..a5f8c8dd5 100644 --- a/Packages/OsaurusCore/Services/Chat/ChatEngine.swift +++ b/Packages/OsaurusCore/Services/Chat/ChatEngine.swift @@ -204,15 +204,21 @@ actor ChatEngine: Sendable, ChatEngineProtocol { var finishReason: InferenceLog.FinishReason = .stop var errorMsg: String? = nil var toolInvocation: (name: String, args: String)? = nil - var lastDeltaTime = startTime + #if DEBUG + var lastDeltaTime = startTime + #endif - print("[Osaurus][Stream] Starting stream wrapper for model: \(model)") + #if DEBUG + print("[Osaurus][Stream] Starting stream wrapper for model: \(model)") + #endif do { for try await delta in inner { // Check for task cancellation to allow early termination if Task.isCancelled { - print("[Osaurus][Stream] Task cancelled after \(deltaCount) deltas") + #if DEBUG + print("[Osaurus][Stream] Task cancelled after \(deltaCount) deltas") + #endif continuation.finish() return } @@ -224,17 +230,20 @@ actor ChatEngine: Sendable, ChatEngineProtocol { } deltaCount += 1 - let now = Date() - let timeSinceStart = now.timeIntervalSince(startTime) - let timeSinceLastDelta = now.timeIntervalSince(lastDeltaTime) - lastDeltaTime = now - - // Log every 50th delta or if there's a long gap (potential freeze indicator) - if deltaCount % 50 == 1 || timeSinceLastDelta > 2.0 { - print( - "[Osaurus][Stream] Delta #\(deltaCount): +\(String(format: "%.2f", timeSinceStart))s total, gap=\(String(format: "%.3f", timeSinceLastDelta))s, len=\(delta.count)" - ) - } + + #if DEBUG + let now = Date() + let timeSinceStart = now.timeIntervalSince(startTime) + let timeSinceLastDelta = now.timeIntervalSince(lastDeltaTime) + lastDeltaTime = now + + // Log every 50th delta or if there's a long gap (potential freeze indicator) + if deltaCount % 50 == 1 || timeSinceLastDelta > 2.0 { + print( + "[Osaurus][Stream] Delta #\(deltaCount): +\(String(format: "%.2f", timeSinceStart))s total, gap=\(String(format: "%.3f", timeSinceLastDelta))s, len=\(delta.count)" + ) + } + #endif // Estimate tokens: each delta chunk is roughly proportional to tokens // More accurate: count whitespace-separated words, or use tokenizer @@ -242,25 +251,33 @@ actor ChatEngine: Sendable, ChatEngineProtocol { continuation.yield(delta) } - let totalTime = Date().timeIntervalSince(startTime) - print( - "[Osaurus][Stream] Stream completed: \(deltaCount) deltas in \(String(format: "%.2f", totalTime))s" - ) + #if DEBUG + let totalTime = Date().timeIntervalSince(startTime) + print( + "[Osaurus][Stream] Stream completed: \(deltaCount) deltas in \(String(format: "%.2f", totalTime))s" + ) + #endif continuation.finish() } catch let inv as ServiceToolInvocation { - print("[Osaurus][Stream] Tool invocation: \(inv.toolName)") + #if DEBUG + print("[Osaurus][Stream] Tool invocation: \(inv.toolName)") + #endif toolInvocation = (inv.toolName, inv.jsonArguments) finishReason = .toolCalls continuation.finish(throwing: inv) } catch { // Check if this is a CancellationError (expected when consumer stops) if Task.isCancelled || error is CancellationError { - print("[Osaurus][Stream] Stream cancelled after \(deltaCount) deltas") + #if DEBUG + print("[Osaurus][Stream] Stream cancelled after \(deltaCount) deltas") + #endif continuation.finish() return } - print("[Osaurus][Stream] Stream error after \(deltaCount) deltas: \(error.localizedDescription)") + #if DEBUG + print("[Osaurus][Stream] Stream error after \(deltaCount) deltas: \(error.localizedDescription)") + #endif finishReason = .error errorMsg = error.localizedDescription continuation.finish(throwing: error) diff --git a/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift b/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift index 9a412dd03..c7bc950b3 100644 --- a/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift +++ b/Packages/OsaurusCore/Services/ModelRuntime/StreamAccumulator.swift @@ -246,17 +246,19 @@ struct StreamAccumulator: AsyncSequence, Sendable { // Log info events and surface generation stats downstream. if let info = event.info { - print( - String( - format: "[MLX] prompt: %d tokens %.1f tok/s (%.2fs) | gen: %d tokens %.1f tok/s (%.2fs)", - info.promptTokenCount, - info.promptTokensPerSecond, - info.promptTime, - info.generationTokenCount, - info.tokensPerSecond, - info.generateTime + #if DEBUG + print( + String( + format: "[MLX] prompt: %d tokens %.1f tok/s (%.2fs) | gen: %d tokens %.1f tok/s (%.2fs)", + info.promptTokenCount, + info.promptTokensPerSecond, + info.promptTime, + info.generationTokenCount, + info.tokensPerSecond, + info.generateTime + ) ) - ) + #endif // Emit GPU-accurate stats as a signpost event so they appear in // Instruments and can be captured by `log stream --type signpost`. accumSignposter.emitEvent( diff --git a/Packages/OsaurusCore/Views/Chat/ChatView.swift b/Packages/OsaurusCore/Views/Chat/ChatView.swift index f20b436f4..c9ac5aa4f 100644 --- a/Packages/OsaurusCore/Views/Chat/ChatView.swift +++ b/Packages/OsaurusCore/Views/Chat/ChatView.swift @@ -1144,10 +1144,12 @@ final class ChatSession: ObservableObject { } } - let totalTime = Date().timeIntervalSince(streamStartTime) - print( - "[Osaurus][UI] Stream consumption completed: \(uiDeltaCount) deltas in \(String(format: "%.2f", totalTime))s, final contentLen=\(assistantTurn.contentLength)" - ) + #if DEBUG + let totalTime = Date().timeIntervalSince(streamStartTime) + print( + "[Osaurus][UI] Stream consumption completed: \(uiDeltaCount) deltas in \(String(format: "%.2f", totalTime))s, final contentLen=\(assistantTurn.contentLength)" + ) + #endif break // finished normally } catch let inv as ServiceToolInvocation { From 81bf0eb411e5b66f5b28935103180d863aa40303 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 27 May 2026 05:08:34 +0000 Subject: [PATCH 2/2] Fix flake: skip ModelManager launch-time HF fetch under xctest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ModelManager.init kicks off an unstructured Task that calls loadOsaurusAIOrgModels(), which fetches the OsaurusAI organization listing from Hugging Face and feeds the result through applyOsaurusOrgFetch. The unit-test runner repeatedly constructs ModelManager() to drive applyOsaurusOrgFetch directly. The background launch-time fetch races with those test calls — whichever finishes last wins, and the merge result is non-deterministic. That's the root cause of the flaky ModelManagerSuggestedTests failures seen across many of the recent PR CI runs (applyOsaurusOrgFetch_dropsStaleAutoFetched OnReapply, applyOsaurusOrgFetch_addsNewEntriesAfterCurated, etc.). Gate the launch-time fetch on a small isRunningInTestEnvironment helper that checks for any of XCTestConfigurationFilePath, XCTestBundlePath, or XCTestSessionIdentifier in the process environment. Those variables are only present inside an xctest host process; production app launches still get the HF fetch exactly as before. This is a network call, so removing it under tests also has the side benefit of making the test suite work offline / on hermetic CI runners. Co-authored-by: Michael Meding --- .../Managers/Model/ModelManager.swift | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Packages/OsaurusCore/Managers/Model/ModelManager.swift b/Packages/OsaurusCore/Managers/Model/ModelManager.swift index c87d515f6..dc6595695 100644 --- a/Packages/OsaurusCore/Managers/Model/ModelManager.swift +++ b/Packages/OsaurusCore/Managers/Model/ModelManager.swift @@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject { // Pull the OsaurusAI HF org listing once on launch so newly published // models surface in the Recommended tab without requiring a code push. - Task { [weak self] in await self?.loadOsaurusAIOrgModels() } + // + // The unit-test runner constructs `ModelManager()` repeatedly to drive + // `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races + // with those test calls, whichever finishes last wins and the merge + // result is non-deterministic — that's the regression class behind + // `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI. + // Skip the background fetch under XCTest; production launches still + // get it because `XCTestConfigurationFilePath` is only set inside + // a test host. + if !Self.isRunningInTestEnvironment { + Task { [weak self] in await self?.loadOsaurusAIOrgModels() } + } + } + + /// True when the current process was launched by xctest. Used to gate + /// network-touching launch-time side effects so tests can drive the + /// affected code paths deterministically. + nonisolated private static var isRunningInTestEnvironment: Bool { + ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil + || ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil + || ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil } // MARK: - Public Methods