diff --git a/Packages/OsaurusCore/Managers/Model/ModelManager.swift b/Packages/OsaurusCore/Managers/Model/ModelManager.swift index c87d515f6..dc6595695 100644 --- a/Packages/OsaurusCore/Managers/Model/ModelManager.swift +++ b/Packages/OsaurusCore/Managers/Model/ModelManager.swift @@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject { // Pull the OsaurusAI HF org listing once on launch so newly published // models surface in the Recommended tab without requiring a code push. - Task { [weak self] in await self?.loadOsaurusAIOrgModels() } + // + // The unit-test runner constructs `ModelManager()` repeatedly to drive + // `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races + // with those test calls, whichever finishes last wins and the merge + // result is non-deterministic — that's the regression class behind + // `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI. + // Skip the background fetch under XCTest; production launches still + // get it because `XCTestConfigurationFilePath` is only set inside + // a test host. + if !Self.isRunningInTestEnvironment { + Task { [weak self] in await self?.loadOsaurusAIOrgModels() } + } + } + + /// True when the current process was launched by xctest. Used to gate + /// network-touching launch-time side effects so tests can drive the + /// affected code paths deterministically. + nonisolated private static var isRunningInTestEnvironment: Bool { + ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil + || ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil + || ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil } // MARK: - Public Methods diff --git a/Packages/OsaurusCore/Networking/HTTPHandler.swift b/Packages/OsaurusCore/Networking/HTTPHandler.swift index 0f8877549..f86096539 100644 --- a/Packages/OsaurusCore/Networking/HTTPHandler.swift +++ b/Packages/OsaurusCore/Networking/HTTPHandler.swift @@ -2010,14 +2010,22 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { session_id: req.session_id ) - var responseContent = "" + // Accumulate deltas in an array and join once at the end. + // The previous `responseContent += delta` formed the assistant + // turn via repeated `String` concatenation, which is O(n^2) in + // the number of streamed characters because each `+=` builds a + // fresh `String` storage. Long agent turns (especially on /run + // SSE) made this measurable on top of inference latency. + var deltaBuffer: [String] = [] + var accumulatedLength = 0 var toolInvoked: ServiceToolInvocation? do { let stream = try await chatEngine.streamChat(request: iterationReq) for try await delta in stream { if StreamingToolHint.isSentinel(delta) { continue } - responseContent += delta + deltaBuffer.append(delta) + accumulatedLength += delta.utf8.count hop { writerBound.value.writeContent( delta, @@ -2047,8 +2055,18 @@ final class HTTPHandler: ChannelInboundHandler, Sendable { return } + // Materialize the full assistant text exactly once. Single + // allocation: `String.reserveCapacity` + chunk-by-chunk + // append is O(n) versus the O(n^2) cost of `+=` on the + // streaming hot path. The tool-invocation branch below + // reads this too (as the assistant's pre-tool-call text), + // so it has to live outside the `guard`. + var responseContent = String() + responseContent.reserveCapacity(accumulatedLength) + for chunk in deltaBuffer { responseContent.append(chunk) } + guard let invocation = toolInvoked else { - // Final text response — done + // Final text response — done. messages.append(ChatMessage(role: "assistant", content: responseContent)) break }