Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion Packages/OsaurusCore/Managers/Model/ModelManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,27 @@ final class ModelManager: NSObject, ObservableObject {

// Pull the OsaurusAI HF org listing once on launch so newly published
// models surface in the Recommended tab without requiring a code push.
Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
//
// The unit-test runner constructs `ModelManager()` repeatedly to drive
// `applyOsaurusOrgFetch` directly. If the launch-time HF fetch races
// with those test calls, whichever finishes last wins and the merge
// result is non-deterministic — that's the regression class behind
// `ModelManagerSuggestedTests/applyOsaurusOrgFetch_*` flaking in CI.
// Skip the background fetch under XCTest; production launches still
// get it because `XCTestConfigurationFilePath` is only set inside
// a test host.
if !Self.isRunningInTestEnvironment {
Task { [weak self] in await self?.loadOsaurusAIOrgModels() }
}
}

/// True when the current process was launched by xctest. Used to gate
/// network-touching launch-time side effects so tests can drive the
/// affected code paths deterministically.
nonisolated private static var isRunningInTestEnvironment: Bool {
ProcessInfo.processInfo.environment["XCTestConfigurationFilePath"] != nil
|| ProcessInfo.processInfo.environment["XCTestBundlePath"] != nil
|| ProcessInfo.processInfo.environment["XCTestSessionIdentifier"] != nil
}

// MARK: - Public Methods
Expand Down
24 changes: 21 additions & 3 deletions Packages/OsaurusCore/Networking/HTTPHandler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2010,14 +2010,22 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
session_id: req.session_id
)

var responseContent = ""
// Accumulate deltas in an array and join once at the end.
// The previous `responseContent += delta` formed the assistant
// turn via repeated `String` concatenation, which is O(n^2) in
// the number of streamed characters because each `+=` builds a
// fresh `String` storage. Long agent turns (especially on /run
// SSE) made this measurable on top of inference latency.
var deltaBuffer: [String] = []
var accumulatedLength = 0
var toolInvoked: ServiceToolInvocation?

do {
let stream = try await chatEngine.streamChat(request: iterationReq)
for try await delta in stream {
if StreamingToolHint.isSentinel(delta) { continue }
responseContent += delta
deltaBuffer.append(delta)
accumulatedLength += delta.utf8.count
hop {
writerBound.value.writeContent(
delta,
Expand Down Expand Up @@ -2047,8 +2055,18 @@ final class HTTPHandler: ChannelInboundHandler, Sendable {
return
}

// Materialize the full assistant text exactly once. Single
// allocation: `String.reserveCapacity` + chunk-by-chunk
// append is O(n) versus the O(n^2) cost of `+=` on the
// streaming hot path. The tool-invocation branch below
// reads this too (as the assistant's pre-tool-call text),
// so it has to live outside the `guard`.
var responseContent = String()
responseContent.reserveCapacity(accumulatedLength)
for chunk in deltaBuffer { responseContent.append(chunk) }

guard let invocation = toolInvoked else {
// Final text response — done
// Final text response — done.
messages.append(ChatMessage(role: "assistant", content: responseContent))
break
}
Expand Down
Loading