diff --git a/CHANGELOG.md b/CHANGELOG.md index a4c7e4e..83ceaa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -665,12 +665,10 @@ The format is based on Keep a Changelog. ### Added - App Branch source card now shows an Apps-tab drag hint in the header. -- Added custom LLM model options: - - `mlx-community/Qwen3.5-0.8B-MLX-4bit` - - `mlx-community/Qwen3.5-2B-MLX-4bit` +- Added an experimental custom LLM model path for early `Qwen3.5` evaluation. ### Changed -- Upgraded `mlx-swift-lm` to a newer revision with `qwen3_5` model-type support. +- Upgraded `mlx-swift-lm` to a newer revision for early `Qwen3.5` model-type support. - Improved App Branch localization coverage for tab content and related sheets. ### Fixed diff --git a/README.md b/README.md index 6801e58..5bd5d14 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ Notes for the current MLX Audio integration: - Voxt stores MLX Audio downloads under its `mlx-audio` model storage root and checks canonical model identifiers before deciding whether a model is already installed. - Older saved model IDs are auto-migrated to the current canonical IDs for `Parakeet`, `GLM-ASR Nano`, `Voxtral Realtime`, and `FireRed ASR 2`, so existing settings should continue working after upgrade. - Alignment-only repositories are rejected explicitly; for example, `Qwen3-ForcedAligner` is not treated as a transcription model. -- The current package source is the Voxt mirror fork `hehehai/mlx-audio-swift` pinned to commit `c96fe7b8577fb1db5a9987a6582e706acb388a8e`. See [docs/MLXAudioDependency.md](docs/MLXAudioDependency.md) for the dependency policy. +- The current package source is the Voxt mirror fork `hehehai/mlx-audio-swift` pinned to commit `8ae0c745360b32c128c0ba6d4e46b27ee3214529`. See [docs/MLXAudioDependency.md](docs/MLXAudioDependency.md) for the dependency policy. #### Whisper (WhisperKit) diff --git a/Voxt.xcodeproj/project.pbxproj b/Voxt.xcodeproj/project.pbxproj index b44c992..0db145f 100644 --- a/Voxt.xcodeproj/project.pbxproj +++ b/Voxt.xcodeproj/project.pbxproj @@ -13,6 +13,7 @@ A1B2C3D4E5F6078901234569 /* MLXAudioSTT in Frameworks */ = {isa = PBXBuildFile; productRef = A1B2C3D4E5F607890123456A /* MLXAudioSTT */; }; B1C2D3E4F5060708090A0B0C /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = B1C2D3E4F5060708090A0B0D /* Sparkle */; }; C1D2E3F4A5060708090A0B01 /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = C1D2E3F4A5060708090A0B02 /* MLXLMCommon */; }; + C1D2E3F4A5060708090A0B06 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = C1D2E3F4A5060708090A0B07 /* MLXLLM */; }; D1E2F3A4B5C60718293A4B5C /* WhisperKit in Frameworks */ = {isa = PBXBuildFile; productRef = D1E2F3A4B5C60718293A4B5D /* WhisperKit */; }; E2A1C3B4D5F60718293A4B6C /* PermissionFlow in Frameworks */ = {isa = PBXBuildFile; productRef = E2A1C3B4D5F60718293A4B6D /* PermissionFlow */; }; E2A1C3B4D5F60718293A4B6E /* SystemSettingsKit in Frameworks */ = {isa = PBXBuildFile; productRef = E2A1C3B4D5F60718293A4B6F /* SystemSettingsKit */; }; @@ -75,6 +76,7 @@ A1B2C3D4E5F6078901234569 /* MLXAudioSTT in Frameworks */, B1C2D3E4F5060708090A0B0C /* Sparkle in Frameworks */, C1D2E3F4A5060708090A0B01 /* MLXLMCommon in Frameworks */, + C1D2E3F4A5060708090A0B06 /* MLXLLM in Frameworks */, D1E2F3A4B5C60718293A4B5C /* WhisperKit in Frameworks */, E2A1C3B4D5F60718293A4B6C /* PermissionFlow in Frameworks */, E2A1C3B4D5F60718293A4B6E /* SystemSettingsKit in Frameworks */, @@ -172,6 +174,7 @@ A1B2C3D4E5F607890123456A /* MLXAudioSTT */, B1C2D3E4F5060708090A0B0D /* Sparkle */, C1D2E3F4A5060708090A0B02 /* MLXLMCommon */, + C1D2E3F4A5060708090A0B07 /* MLXLLM */, D1E2F3A4B5C60718293A4B5D /* WhisperKit */, E2A1C3B4D5F60718293A4B6D /* PermissionFlow */, E2A1C3B4D5F60718293A4B6F /* SystemSettingsKit */, @@ -879,7 +882,7 @@ repositoryURL = "https://github.com/hehehai/mlx-audio-swift.git"; requirement = { kind = revision; - revision = c96fe7b8577fb1db5a9987a6582e706acb388a8e; + revision = 8ae0c745360b32c128c0ba6d4e46b27ee3214529; }; }; B1C2D3E4F5060708090A0B0E /* XCRemoteSwiftPackageReference "Sparkle" */ = { @@ -894,8 +897,8 @@ isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/ml-explore/mlx-swift-lm.git"; requirement = { - kind = revision; - revision = e33eba8513595bde535719c48fedcb10ade5af57; + kind = exactVersion; + version = 3.31.3; }; }; D1E2F3A4B5C60718293A4B5E /* XCRemoteSwiftPackageReference "WhisperKit" */ = { @@ -945,6 +948,11 @@ package = C1D2E3F4A5060708090A0B03 /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; productName = MLXLMCommon; }; + C1D2E3F4A5060708090A0B07 /* MLXLLM */ = { + isa = XCSwiftPackageProductDependency; + package = C1D2E3F4A5060708090A0B03 /* XCRemoteSwiftPackageReference "mlx-swift-lm" */; + productName = MLXLLM; + }; D1E2F3A4B5C60718293A4B5D /* WhisperKit */ = { isa = XCSwiftPackageProductDependency; package = D1E2F3A4B5C60718293A4B5E /* XCRemoteSwiftPackageReference "WhisperKit" */; diff --git a/Voxt/App/AppDelegate+EnhancementBrowserContext.swift b/Voxt/App/AppDelegate+EnhancementBrowserContext.swift index 3533ac1..574b6a5 100644 --- a/Voxt/App/AppDelegate+EnhancementBrowserContext.swift +++ b/Voxt/App/AppDelegate+EnhancementBrowserContext.swift @@ -10,17 +10,22 @@ extension AppDelegate { func activeBrowserTabURL(frontmostBundleID: String?) -> String? { guard let frontmostBundleID else { return nil } + if let deniedUntil = browserAutomationDeniedUntilByBundleID[frontmostBundleID], + deniedUntil > Date() { + return nil + } guard NSRunningApplication.runningApplications(withBundleIdentifier: frontmostBundleID) .contains(where: { !$0.isTerminated }) else { - VoxtLog.info("Browser process not running while resolving active tab URL. bundleID=\(frontmostBundleID)") + VoxtLog.model("Browser process not running while resolving active tab URL. bundleID=\(frontmostBundleID)") return nil } guard let provider = browserScriptProvider(for: frontmostBundleID) else { return nil } if let scriptedURL = runAppleScriptCandidates(provider.scripts, providerName: provider.name) { + browserAutomationDeniedUntilByBundleID.removeValue(forKey: frontmostBundleID) return scriptedURL } if let axURL = activeBrowserTabURLFromAccessibility(frontmostBundleID: frontmostBundleID) { - VoxtLog.info("Browser active-tab URL read succeeded via AX fallback. provider=\(provider.name)") + VoxtLog.model("Browser active-tab URL read succeeded via AX fallback. provider=\(provider.name)") return axURL } return nil @@ -129,28 +134,35 @@ extension AppDelegate { !output.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { let elapsedMs = Int(Date().timeIntervalSince(startedAt) * 1000) if index > 0 { - VoxtLog.info("Browser active-tab URL read succeeded via fallback. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs)") + VoxtLog.model("Browser active-tab URL read succeeded via fallback. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs)") } return output } if let executionError { let elapsedMs = Int(Date().timeIntervalSince(startedAt) * 1000) - VoxtLog.info( + VoxtLog.model( "Browser active-tab URL candidate failed. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs), error=\(executionError)" ) lastError = executionError - if let errorNumber = executionError["NSAppleScriptErrorNumber"] as? Int, errorNumber == -600 { - break + if let errorNumber = executionError["NSAppleScriptErrorNumber"] as? Int { + if errorNumber == -1743 || errorNumber == -10004 { + if let frontmostBundleID = NSWorkspace.shared.frontmostApplication?.bundleIdentifier { + browserAutomationDeniedUntilByBundleID[frontmostBundleID] = Date().addingTimeInterval(300) + } + } + if errorNumber == -600 { + break + } } } else { let elapsedMs = Int(Date().timeIntervalSince(startedAt) * 1000) - VoxtLog.info( + VoxtLog.model( "Browser active-tab URL candidate returned empty/timed out. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs)" ) } } if let lastError { - VoxtLog.info("Browser active-tab URL read failed. provider=\(providerName), error=\(lastError)") + VoxtLog.model("Browser active-tab URL read failed. provider=\(providerName), error=\(lastError)") } return nil } @@ -176,7 +188,7 @@ extension AppDelegate { guard let script = NSAppleScript(source: wrappedSource) else { return nil } guard let output = script.executeAndReturnError(&error).stringValue else { if logFailure, let error { - VoxtLog.info("Browser active-tab URL read failed: \(error)") + VoxtLog.model("Browser active-tab URL read failed: \(error)") } return nil } @@ -185,13 +197,13 @@ extension AppDelegate { func activeBrowserTabURLFromAccessibility(frontmostBundleID: String) -> String? { guard AccessibilityPermissionManager.isTrusted() else { - VoxtLog.info("Browser active-tab AX fallback unavailable: accessibility not trusted") + VoxtLog.model("Browser active-tab AX fallback unavailable: accessibility not trusted") return nil } guard let app = NSWorkspace.shared.frontmostApplication, app.bundleIdentifier == frontmostBundleID else { - VoxtLog.info("Browser active-tab AX fallback skipped: frontmost app changed") + VoxtLog.model("Browser active-tab AX fallback skipped: frontmost app changed") return nil } @@ -207,7 +219,7 @@ extension AppDelegate { let url = axDocumentURL(from: focusedWindow) { return url } else if focusedStatus != .success { - VoxtLog.info("Browser active-tab AX fallback focused window unavailable: status=\(focusedStatus.rawValue)") + VoxtLog.model("Browser active-tab AX fallback focused window unavailable: status=\(focusedStatus.rawValue)") } var mainWindowValue: CFTypeRef? @@ -220,7 +232,7 @@ extension AppDelegate { let mainWindow = mainWindowValue { return axDocumentURL(from: mainWindow) } - VoxtLog.info("Browser active-tab AX fallback main window unavailable: status=\(mainStatus.rawValue)") + VoxtLog.model("Browser active-tab AX fallback main window unavailable: status=\(mainStatus.rawValue)") return nil } @@ -234,7 +246,7 @@ extension AppDelegate { &documentValue ) guard status == .success, let documentValue else { - VoxtLog.info("Browser active-tab AX fallback document attribute unavailable: status=\(status.rawValue)") + VoxtLog.model("Browser active-tab AX fallback document attribute unavailable: status=\(status.rawValue)") return nil } return documentValue as? String diff --git a/Voxt/App/AppDelegate+MeetingSummary.swift b/Voxt/App/AppDelegate+MeetingSummary.swift index cb24ea0..1d58125 100644 --- a/Voxt/App/AppDelegate+MeetingSummary.swift +++ b/Voxt/App/AppDelegate+MeetingSummary.swift @@ -49,7 +49,7 @@ extension AppDelegate { ) } - let downloadedCustomOptions: [MeetingSummaryModelOption] = CustomLLMModelManager.availableModels.compactMap { model -> MeetingSummaryModelOption? in + let downloadedCustomOptions: [MeetingSummaryModelOption] = CustomLLMModelManager.displayModels(including: customLLMManager.currentModelRepo).compactMap { model -> MeetingSummaryModelOption? in guard customLLMManager.isModelDownloaded(repo: model.id) else { return nil } diff --git a/Voxt/App/AppDelegate+RecordingSession.swift b/Voxt/App/AppDelegate+RecordingSession.swift index 60842dc..7acefb0 100644 --- a/Voxt/App/AppDelegate+RecordingSession.swift +++ b/Voxt/App/AppDelegate+RecordingSession.swift @@ -332,7 +332,7 @@ extension AppDelegate { VoxtLog.info("Transcription result received. characters=\(text.count), output=\(sessionOutputMode == .translation ? "translation" : "transcription")") VoxtLog.info("Transcription result output mode resolved as \(RecordingSessionSupport.outputLabel(for: sessionOutputMode)).", verbose: true) - VoxtLog.info( + VoxtLog.model( "Session text model routing: \(RecordingSessionSupport.textModelRoutingDescription(outputMode: sessionOutputMode, transcriptionSettings: transcriptionFeatureSettings, translationSettings: translationFeatureSettings, rewriteSettings: rewriteFeatureSettings))" ) @@ -357,7 +357,7 @@ extension AppDelegate { return } - VoxtLog.info("Transcription flow dispatch: standard. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)") + VoxtLog.info("Transcription flow dispatch: standard. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)", verbose: true) processStandardTranscription(text, sessionID: sessionID) } @@ -370,7 +370,7 @@ extension AppDelegate { let resolvedDelay = delay ?? sessionFinishDelay let finishingSessionID = activeRecordingSessionID - VoxtLog.info("Finish session scheduled. delayMs=\(Int(resolvedDelay * 1000)), displayMode=\(overlayState.displayMode), isRecording=\(overlayState.isRecording), isEnhancing=\(overlayState.isEnhancing), isRequesting=\(overlayState.isRequesting)") + VoxtLog.info("Finish session scheduled. delayMs=\(Int(resolvedDelay * 1000)), displayMode=\(overlayState.displayMode), isRecording=\(overlayState.isRecording), isEnhancing=\(overlayState.isEnhancing), isRequesting=\(overlayState.isRequesting)", verbose: true) overlayState.isCompleting = resolvedDelay > 0 if overlayState.displayMode != .answer { overlayState.isEnhancing = false @@ -394,7 +394,7 @@ extension AppDelegate { ) return } - VoxtLog.info("Finish session executing now. displayMode=\(self.overlayState.displayMode)") + VoxtLog.info("Finish session executing now. displayMode=\(self.overlayState.displayMode)", verbose: true) self.executeSessionEndPipeline(for: finishingSessionID, trigger: "finish") } } diff --git a/Voxt/App/AppDelegate+TranscriptionFlow.swift b/Voxt/App/AppDelegate+TranscriptionFlow.swift index 2455a90..4f3d11f 100644 --- a/Voxt/App/AppDelegate+TranscriptionFlow.swift +++ b/Voxt/App/AppDelegate+TranscriptionFlow.swift @@ -17,12 +17,12 @@ extension AppDelegate { func processStandardTranscription(_ text: String, sessionID: UUID) { guard shouldHandleCallbacks(for: sessionID) else { return } - VoxtLog.info("Standard transcription flow entered. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)") + VoxtLog.info("Standard transcription flow entered. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)", verbose: true) switch enhancementMode { case .off: setEnhancingState(false) overlayState.transcribedText = text - VoxtLog.info("Standard transcription committing raw text immediately. characters=\(text.count)") + VoxtLog.info("Standard transcription committing raw text immediately. characters=\(text.count)", verbose: true) commitTranscription(text, llmDurationSeconds: nil) { [weak self] in self?.finishSession(after: 0) } @@ -36,7 +36,7 @@ extension AppDelegate { ) setEnhancingState(false) overlayState.transcribedText = text - VoxtLog.info("Standard transcription falling back to raw text because custom model is unavailable. characters=\(text.count)") + VoxtLog.info("Standard transcription falling back to raw text because custom model is unavailable. characters=\(text.count)", verbose: true) commitTranscription(text, llmDurationSeconds: nil) { [weak self] in self?.finishSession(after: 0) } @@ -59,9 +59,9 @@ extension AppDelegate { let llmStartedAt = Date() if let asrAt = self.transcriptionResultReceivedAt { let handoffMs = Int(llmStartedAt.timeIntervalSince(asrAt) * 1000) - VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=\(max(handoffMs, 0)), inputChars=\(text.count)") + VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=\(max(handoffMs, 0)), inputChars=\(text.count)", verbose: true) } else { - VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=unknown, inputChars=\(text.count)") + VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=unknown, inputChars=\(text.count)", verbose: true) } do { let enhanced = try await self.runStandardTranscriptionPipeline(text: text) diff --git a/Voxt/App/VoxtApp.swift b/Voxt/App/VoxtApp.swift index d64ff49..1692e12 100644 --- a/Voxt/App/VoxtApp.swift +++ b/Voxt/App/VoxtApp.swift @@ -186,6 +186,7 @@ class AppDelegate: NSObject, NSApplicationDelegate { var currentEndingSessionID: UUID? var lastCompletedSessionEndSessionID: UUID? var isSessionCancellationRequested = false + var browserAutomationDeniedUntilByBundleID: [String: Date] = [:] var pendingCompletedHistoryAudioArchiveURL: URL? var latestInjectableOutputText: String? var sessionTargetApplicationPID: pid_t? diff --git a/Voxt/Settings/AboutSettingsView.swift b/Voxt/Settings/AboutSettingsView.swift index 6f324cc..7000294 100644 --- a/Voxt/Settings/AboutSettingsView.swift +++ b/Voxt/Settings/AboutSettingsView.swift @@ -91,8 +91,8 @@ struct AboutSettingsView: View { Text("Thanks") .font(.headline) Link( - "github.com/Blaizzy/mlx-audio-swift", - destination: URL(string: "https://github.com/Blaizzy/mlx-audio-swift")! + "github.com/hehehai/mlx-audio-swift", + destination: URL(string: "https://github.com/hehehai/mlx-audio-swift")! ) .font(.caption) Link( diff --git a/Voxt/Settings/AppPreferenceKey.swift b/Voxt/Settings/AppPreferenceKey.swift index d4dad7a..97c36ad 100644 --- a/Voxt/Settings/AppPreferenceKey.swift +++ b/Voxt/Settings/AppPreferenceKey.swift @@ -107,6 +107,8 @@ enum AppPreferenceKey { static let autoCheckForUpdates = "autoCheckForUpdates" static let hotkeyDebugLoggingEnabled = "hotkeyDebugLoggingEnabled" static let llmDebugLoggingEnabled = "llmDebugLoggingEnabled" + static let llmDebugCustomPrompt = "llmDebugCustomPrompt" + static let llmDebugPresetPromptOverrides = "llmDebugPresetPromptOverrides" static let useSystemProxy = "useSystemProxy" static let networkProxyMode = "networkProxyMode" static let customProxyScheme = "customProxyScheme" diff --git a/Voxt/Settings/FeatureModelCatalogBuilder.swift b/Voxt/Settings/FeatureModelCatalogBuilder.swift index e837acc..cf2e2c1 100644 --- a/Voxt/Settings/FeatureModelCatalogBuilder.swift +++ b/Voxt/Settings/FeatureModelCatalogBuilder.swift @@ -96,6 +96,7 @@ struct FeatureModelCatalogBuilder { ), statusText: localized("Works immediately with no model download."), usageLocations: usageLabels(for: .dictation), + badgeText: nil, isSelectable: true, disabledReason: nil ) @@ -108,8 +109,10 @@ struct FeatureModelCatalogBuilder { selectionID: selectionID, title: model.title, engine: localized("MLX Audio"), - sizeText: isInstalled ? mlxModelManager.modelSizeOnDisk(repo: model.id) : mlxModelManager.remoteSizeText(repo: model.id), - ratingText: model.id.contains("1.7B") || model.id.contains("FireRed") || model.id.localizedCaseInsensitiveContains("cohere") ? "4.8" : "4.3", + sizeText: isInstalled + ? (mlxModelManager.cachedModelSizeText(repo: model.id) ?? mlxModelManager.remoteSizeText(repo: model.id)) + : mlxModelManager.remoteSizeText(repo: model.id), + ratingText: MLXModelManager.ratingText(for: model.id), filterTags: featureFilterTags( base: [localized("Local")] + mlxSpeedTags(for: model.id), installed: isInstalled, @@ -125,6 +128,7 @@ struct FeatureModelCatalogBuilder { ), statusText: isInstalled ? localized("Installed") : localized("Not installed"), usageLocations: usageLabels(for: selectionID), + badgeText: nil, isSelectable: isInstalled, disabledReason: isInstalled ? nil : localized("Install this model in Model settings first.") ) @@ -137,8 +141,10 @@ struct FeatureModelCatalogBuilder { selectionID: selectionID, title: model.title, engine: localized("Whisper"), - sizeText: isInstalled ? whisperModelManager.modelSizeOnDisk(id: model.id) : whisperModelManager.remoteSizeText(id: model.id), - ratingText: model.id == "large-v3" ? "4.9" : (model.id == "medium" ? "4.7" : "4.1"), + sizeText: isInstalled + ? (whisperModelManager.cachedModelSizeText(id: model.id) ?? whisperModelManager.remoteSizeText(id: model.id)) + : whisperModelManager.remoteSizeText(id: model.id), + ratingText: WhisperKitModelManager.ratingText(for: model.id), filterTags: featureFilterTags( base: [localized("Local")] + whisperSpeedTags(for: model.id), installed: isInstalled, @@ -154,6 +160,7 @@ struct FeatureModelCatalogBuilder { ), statusText: isInstalled ? localized("Installed") : localized("Not installed"), usageLocations: usageLabels(for: selectionID), + badgeText: nil, isSelectable: isInstalled, disabledReason: isInstalled ? nil : localized("Install this model in Model settings first.") ) @@ -190,6 +197,7 @@ struct FeatureModelCatalogBuilder { ), statusText: configuration.isConfigured ? localized("Configured") : localized("Not configured"), usageLocations: usageLabels(for: selectionID), + badgeText: nil, isSelectable: configuration.isConfigured, disabledReason: configuration.isConfigured ? nil : localized("Configure this provider in Model settings first.") ) @@ -217,6 +225,7 @@ struct FeatureModelCatalogBuilder { ), statusText: localized("Available on this Mac"), usageLocations: usageLabels(for: .appleIntelligence), + badgeText: nil, isSelectable: true, disabledReason: nil ) @@ -230,8 +239,10 @@ struct FeatureModelCatalogBuilder { selectionID: selectionID, title: model.title, engine: localized("Local LLM"), - sizeText: isInstalled ? customLLMManager.modelSizeOnDisk(repo: model.id) : customLLMManager.remoteSizeText(repo: model.id), - ratingText: model.id.contains("8B") || model.id.contains("9B") ? "4.8" : "4.3", + sizeText: isInstalled + ? (customLLMManager.cachedModelSizeText(repo: model.id) ?? customLLMManager.remoteSizeText(repo: model.id)) + : customLLMManager.remoteSizeText(repo: model.id), + ratingText: CustomLLMModelManager.ratingText(for: model.id), filterTags: featureFilterTags( base: [localized("Local")] + llmSpeedTags(for: model.id), installed: isInstalled, @@ -247,6 +258,16 @@ struct FeatureModelCatalogBuilder { ), statusText: isInstalled ? localized("Installed") : localized("Not installed"), usageLocations: usageLabels(for: selectionID), + badgeText: { + switch CustomLLMModelManager.releaseStatus(for: model.id) { + case .deprecatedSoon: + return localized("即将下线") + case .new: + return localized("New") + case .standard: + return nil + } + }(), isSelectable: isInstalled, disabledReason: isInstalled ? nil : localized("Install this model in Model settings first.") ) @@ -284,6 +305,7 @@ struct FeatureModelCatalogBuilder { ), statusText: isConfigured ? localized("Configured") : localized("Not configured"), usageLocations: usageLabels(for: selectionID), + badgeText: nil, isSelectable: isConfigured, disabledReason: isConfigured ? nil : localized("Configure this provider in Model settings first.") ) @@ -336,6 +358,7 @@ struct FeatureModelCatalogBuilder { ), statusText: whisperSelectable ? localized("Ready when Whisper ASR is selected") : localized("Unavailable"), usageLocations: usageLabels(for: .whisperDirectTranslate), + badgeText: nil, isSelectable: whisperSelectable, disabledReason: whisperDisabledReason ), @@ -413,45 +436,15 @@ struct FeatureModelCatalogBuilder { } private func mlxSpeedTags(for repo: String) -> [String] { - var tags = [String]() - if mlxSupportsMultilingual(repo) { - tags.append(localized("Multilingual")) - } - if MLXModelManager.isRealtimeCapableModelRepo(repo) { - tags.append(contentsOf: [localized("Realtime"), localized("Fast")]) - return deduplicatedFeatureTags(tags) - } - if repo.contains("0.6B") || repo.contains("Nano") { - tags.append(localized("Fast")) - } - if repo.contains("1.7B") || repo.contains("FireRed") || repo.localizedCaseInsensitiveContains("cohere") { - tags.append(localized("Accurate")) - } - return deduplicatedFeatureTags(tags) + deduplicatedFeatureTags(MLXModelManager.catalogTagKeys(for: repo).map(localized)) } private func whisperSpeedTags(for modelID: String) -> [String] { - var tags = [localized("Multilingual")] - switch modelID { - case "tiny", "base": - tags.append(localized("Fast")) - case "medium", "large-v3": - tags.append(localized("Accurate")) - default: - break - } - return deduplicatedFeatureTags(tags) + deduplicatedFeatureTags(WhisperKitModelManager.catalogTagKeys(for: modelID).map(localized)) } private func llmSpeedTags(for repo: String) -> [String] { - var tags = [String]() - if repo.contains("1B") || repo.contains("1.5B") || repo.contains("2B") { - tags.append(localized("Fast")) - } - if repo.contains("8B") || repo.contains("9B") { - tags.append(localized("Accurate")) - } - return deduplicatedFeatureTags(tags) + deduplicatedFeatureTags(CustomLLMModelManager.catalogTagKeys(for: repo).map(localized)) } private func remoteASRTags( @@ -485,23 +478,7 @@ struct FeatureModelCatalogBuilder { } private func mlxSupportsMultilingual(_ repo: String) -> Bool { - let key = repo.lowercased() - if key.contains("parakeet") { - return false - } - if key.contains("qwen3-asr") - || key.contains("voxtral") - || key.contains("cohere") - || key.contains("sensevoice") - || key.contains("granite") - || key.contains("glm-asr") - || key.contains("firered") { - return true - } - guard let option = MLXModelManager.availableModels.first(where: { $0.id == repo }) else { - return false - } - return option.description.localizedCaseInsensitiveContains("multilingual") + MLXModelManager.isMultilingualModelRepo(repo) } private func primaryLanguageSupportTag(for selectionID: FeatureModelSelectionID) -> String? { diff --git a/Voxt/Settings/FeatureModelSelectorDialog.swift b/Voxt/Settings/FeatureModelSelectorDialog.swift index 5427312..c369e2c 100644 --- a/Voxt/Settings/FeatureModelSelectorDialog.swift +++ b/Voxt/Settings/FeatureModelSelectorDialog.swift @@ -356,6 +356,18 @@ private struct FeatureModelSelectorRow: View { Text(titleOverride ?? entry.title) .font(.headline) + if let badgeText = entry.badgeText { + Text(badgeText) + .font(.caption.weight(.semibold)) + .foregroundStyle(Color.orange) + .padding(.horizontal, 7) + .padding(.vertical, 3) + .background( + Capsule(style: .continuous) + .fill(Color.orange.opacity(0.14)) + ) + } + if showsEngine { Text(entry.engine) .font(.caption.weight(.medium)) @@ -457,6 +469,18 @@ private struct FeatureModelSelectorGroupCard: View { Text(group.title) .font(.headline) + if let badgeText = group.badgeText { + Text(badgeText) + .font(.caption.weight(.semibold)) + .foregroundStyle(Color.orange) + .padding(.horizontal, 7) + .padding(.vertical, 3) + .background( + Capsule(style: .continuous) + .fill(Color.orange.opacity(0.14)) + ) + } + Text(group.engine) .font(.caption.weight(.medium)) .foregroundStyle(.secondary) @@ -637,7 +661,7 @@ private enum FeatureSelectorTagPriority { static var groups: [[String]] { [ [localized("Local"), localized("Remote")], - [localized("Fast"), localized("Accurate"), localized("Realtime")], + [localized("Fast"), localized("Balanced"), localized("Accurate"), localized("Realtime")], [localized("Installed"), localized("Configured"), localized("In Use")] ] } diff --git a/Voxt/Settings/FeatureModelSelectorSupport.swift b/Voxt/Settings/FeatureModelSelectorSupport.swift index efa8e66..7d0f958 100644 --- a/Voxt/Settings/FeatureModelSelectorSupport.swift +++ b/Voxt/Settings/FeatureModelSelectorSupport.swift @@ -42,6 +42,7 @@ struct FeatureModelSelectorEntry: Identifiable { let displayTags: [String] let statusText: String let usageLocations: [String] + let badgeText: String? let isSelectable: Bool let disabledReason: String? diff --git a/Voxt/Settings/GeneralSettingsSections.swift b/Voxt/Settings/GeneralSettingsSections.swift index 373261f..7d628ef 100644 --- a/Voxt/Settings/GeneralSettingsSections.swift +++ b/Voxt/Settings/GeneralSettingsSections.swift @@ -392,8 +392,8 @@ struct GeneralLoggingCard: View { ) GeneralToggleRow( - title: "Enable LLM debug logs", - description: "Records local and remote LLM request details to help inspect model calls and responses.", + title: "Enable model debug logs", + description: "Records local and remote model details, including LLM, ASR, model downloads, and model routing, for debugging.", isOn: $llmDebugLoggingEnabled ) } diff --git a/Voxt/Settings/LocalModelSeriesGrouping.swift b/Voxt/Settings/LocalModelSeriesGrouping.swift index 2342c6a..417de54 100644 --- a/Voxt/Settings/LocalModelSeriesGrouping.swift +++ b/Voxt/Settings/LocalModelSeriesGrouping.swift @@ -46,6 +46,7 @@ struct FeatureModelSelectorGroupSection: Identifiable { let usageLocations: [String] let installedCount: Int let ratingText: String + let badgeText: String? let entries: [FeatureModelSelectorEntry] let defaultExpanded: Bool } @@ -133,6 +134,7 @@ enum LocalModelSeriesGrouping { usageLocations: orderedUsageLocations(from: groupEntries.flatMap { $0.usageLocations }), installedCount: groupEntries.filter { $0.filterTags.contains(localized("Installed")) }.count, ratingText: averageRatingText(from: groupEntries.map { $0.ratingText }), + badgeText: groupEntries.compactMap { $0.badgeText }.first, entries: groupEntries, defaultExpanded: groupEntries.contains(where: { $0.selectionID == selectedID || !$0.usageLocations.isEmpty @@ -152,6 +154,7 @@ enum LocalModelSeriesGrouping { localized("Remote"), localized("Built-in"), localized("Fast"), + localized("Balanced"), localized("Accurate"), localized("Realtime"), localized("Supports Primary Language"), diff --git a/Voxt/Settings/ModelSettingsCatalogBuilder.swift b/Voxt/Settings/ModelSettingsCatalogBuilder.swift index 8c7eb6b..92c46ec 100644 --- a/Voxt/Settings/ModelSettingsCatalogBuilder.swift +++ b/Voxt/Settings/ModelSettingsCatalogBuilder.swift @@ -129,8 +129,10 @@ struct ModelCatalogBuilder { id: "mlx:\(repo)", title: mlxModelManager.displayTitle(for: repo), engine: localized("MLX Audio"), - sizeText: isInstalled ? mlxModelManager.modelSizeOnDisk(repo: repo) : mlxModelManager.remoteSizeText(repo: repo), - ratingText: repo.contains("1.7B") || repo.contains("FireRed") || repo.localizedCaseInsensitiveContains("cohere") ? "4.8" : "4.3", + sizeText: isInstalled + ? (mlxModelManager.cachedModelSizeText(repo: repo) ?? mlxModelManager.remoteSizeText(repo: repo)) + : mlxModelManager.remoteSizeText(repo: repo), + ratingText: MLXModelManager.ratingText(for: repo), filterTags: catalogFilterTags( base: [localized("Local")] + mlxCatalogTags(for: repo), installed: isInstalled, @@ -207,8 +209,10 @@ struct ModelCatalogBuilder { id: "whisper:\(modelID)", title: whisperModelManager.displayTitle(for: modelID), engine: localized("Whisper"), - sizeText: isInstalled ? whisperModelManager.modelSizeOnDisk(id: modelID) : whisperModelManager.remoteSizeText(id: modelID), - ratingText: modelID == "large-v3" ? "4.9" : (modelID == "medium" ? "4.7" : "4.2"), + sizeText: isInstalled + ? (whisperModelManager.cachedModelSizeText(id: modelID) ?? whisperModelManager.remoteSizeText(id: modelID)) + : whisperModelManager.remoteSizeText(id: modelID), + ratingText: WhisperKitModelManager.ratingText(for: modelID), filterTags: catalogFilterTags( base: [localized("Local")] + whisperCatalogTags(for: modelID), installed: isInstalled, @@ -330,8 +334,10 @@ struct ModelCatalogBuilder { id: "local-llm:\(repo)", title: customLLMManager.displayTitle(for: repo), engine: localized("Local LLM"), - sizeText: isInstalled ? customLLMManager.modelSizeOnDisk(repo: repo) : customLLMManager.remoteSizeText(repo: repo), - ratingText: repo.contains("8B") || repo.contains("9B") ? "4.8" : "4.3", + sizeText: isInstalled + ? (customLLMManager.cachedModelSizeText(repo: repo) ?? customLLMManager.remoteSizeText(repo: repo)) + : customLLMManager.remoteSizeText(repo: repo), + ratingText: CustomLLMModelManager.ratingText(for: repo), filterTags: catalogFilterTags( base: [localized("Local")] + llmCatalogTags(for: repo), installed: isInstalled, @@ -456,45 +462,15 @@ struct ModelCatalogBuilder { } private func mlxCatalogTags(for repo: String) -> [String] { - var tags = [String]() - if mlxSupportsMultilingual(repo) { - tags.append(localized("Multilingual")) - } - if MLXModelManager.isRealtimeCapableModelRepo(repo) { - tags.append(contentsOf: [localized("Realtime"), localized("Fast")]) - return deduplicatedTags(tags) - } - if repo.contains("0.6B") || repo.contains("Nano") { - tags.append(localized("Fast")) - } - if repo.contains("1.7B") || repo.contains("FireRed") || repo.localizedCaseInsensitiveContains("cohere") { - tags.append(localized("Accurate")) - } - return deduplicatedTags(tags) + deduplicatedTags(MLXModelManager.catalogTagKeys(for: repo).map(localized)) } private func whisperCatalogTags(for modelID: String) -> [String] { - var tags = [localized("Multilingual")] - switch modelID { - case "tiny", "base": - tags.append(localized("Fast")) - case "medium", "large-v3": - tags.append(localized("Accurate")) - default: - break - } - return deduplicatedTags(tags) + deduplicatedTags(WhisperKitModelManager.catalogTagKeys(for: modelID).map(localized)) } private func llmCatalogTags(for repo: String) -> [String] { - var tags = [String]() - if repo.contains("1B") || repo.contains("1.5B") || repo.contains("2B") { - tags.append(localized("Fast")) - } - if repo.contains("8B") || repo.contains("9B") { - tags.append(localized("Accurate")) - } - return deduplicatedTags(tags) + deduplicatedTags(CustomLLMModelManager.catalogTagKeys(for: repo).map(localized)) } private func remoteASRCatalogTags( @@ -528,23 +504,7 @@ struct ModelCatalogBuilder { } private func mlxSupportsMultilingual(_ repo: String) -> Bool { - let key = repo.lowercased() - if key.contains("parakeet") { - return false - } - if key.contains("qwen3-asr") - || key.contains("voxtral") - || key.contains("cohere") - || key.contains("sensevoice") - || key.contains("granite") - || key.contains("glm-asr") - || key.contains("firered") { - return true - } - guard let option = MLXModelManager.availableModels.first(where: { MLXModelManager.canonicalModelRepo($0.id) == repo }) else { - return false - } - return option.description.localizedCaseInsensitiveContains("multilingual") + MLXModelManager.isMultilingualModelRepo(repo) } private func primaryLanguageSupportTag(for selectionID: FeatureModelSelectionID) -> String? { diff --git a/Voxt/Settings/ModelSettingsCatalogComponents.swift b/Voxt/Settings/ModelSettingsCatalogComponents.swift index 52329c6..ae3b0cd 100644 --- a/Voxt/Settings/ModelSettingsCatalogComponents.swift +++ b/Voxt/Settings/ModelSettingsCatalogComponents.swift @@ -29,7 +29,7 @@ enum ModelCatalogTag { static var groups: [[String]] { [ [localized("Local"), localized("Remote")], - [localized("Fast"), localized("Accurate"), localized("Realtime")], + [localized("Fast"), localized("Balanced"), localized("Accurate"), localized("Realtime")], [localized("Installed"), localized("Configured"), localized("In Use")] ] } diff --git a/Voxt/Settings/ModelSettingsComponents.swift b/Voxt/Settings/ModelSettingsComponents.swift index ab93685..a9f62aa 100644 --- a/Voxt/Settings/ModelSettingsComponents.swift +++ b/Voxt/Settings/ModelSettingsComponents.swift @@ -30,7 +30,7 @@ struct PromptEditorView: View { } } -struct PromptTemplateVariableDescriptor: Identifiable { +struct PromptTemplateVariableDescriptor: Identifiable, Hashable { let token: String let tipKey: String diff --git a/Voxt/Settings/ModelSettingsView+ModelActions.swift b/Voxt/Settings/ModelSettingsView+ModelActions.swift index 892311a..4946a37 100644 --- a/Voxt/Settings/ModelSettingsView+ModelActions.swift +++ b/Voxt/Settings/ModelSettingsView+ModelActions.swift @@ -118,7 +118,7 @@ extension ModelSettingsView { } var customLLMRows: [ModelTableRow] { - CustomLLMModelManager.availableModels.map { model in + CustomLLMModelManager.displayModels(including: customLLMRepo).map { model in let isDownloaded = customLLMManager.isModelDownloaded(repo: model.id) let actions = customLLMActions(for: model.id, isDownloaded: isDownloaded) @@ -154,7 +154,18 @@ extension ModelSettingsView { .translationCustomLLM(repo), .rewriteCustomLLM(repo) ] - return missingConfigurationIssues.contains(where: { scopes.contains($0.scope) }) ? AppLocalization.localizedString("Needs Setup") : nil + if missingConfigurationIssues.contains(where: { scopes.contains($0.scope) }) { + return AppLocalization.localizedString("Needs Setup") + } + + switch CustomLLMModelManager.releaseStatus(for: repo) { + case .deprecatedSoon: + return AppLocalization.localizedString("即将下线") + case .new: + return AppLocalization.localizedString("New") + case .standard: + return nil + } } private func whisperActions(for modelID: String, isDownloaded: Bool) -> [ModelTableAction] { @@ -404,7 +415,7 @@ extension ModelSettingsView { } func isCurrentCustomLLM(_ repo: String) -> Bool { - repo == customLLMRepo + CustomLLMModelManager.canonicalModelRepo(repo) == CustomLLMModelManager.canonicalModelRepo(customLLMRepo) } func isDownloadingCustomLLM(_ repo: String) -> Bool { @@ -477,6 +488,7 @@ extension ModelSettingsView { configuration, updating: remoteASRProviderConfigurationsRaw ) + NotificationCenter.default.post(name: .voxtRemoteProviderConfigurationsDidChange, object: nil) } func remoteASRStatusText( @@ -566,6 +578,7 @@ extension ModelSettingsView { configuration, updating: remoteLLMProviderConfigurationsRaw ) + NotificationCenter.default.post(name: .voxtRemoteProviderConfigurationsDidChange, object: nil) } func updateMirrorSetting() { diff --git a/Voxt/Settings/ModelSettingsView+Selection.swift b/Voxt/Settings/ModelSettingsView+Selection.swift index 2f372e5..1f37d01 100644 --- a/Voxt/Settings/ModelSettingsView+Selection.swift +++ b/Voxt/Settings/ModelSettingsView+Selection.swift @@ -1,6 +1,15 @@ import SwiftUI extension ModelSettingsView { + private func installedCustomLLMOptions(including currentRepo: String) -> [TranslationModelOption] { + CustomLLMModelManager.displayModels(including: currentRepo).compactMap { model in + guard customLLMManager.isModelDownloaded(repo: model.id) else { + return nil + } + return TranslationModelOption(id: model.id, title: model.title) + } + } + var whisperModelSelectionBinding: Binding { Binding( get: { @@ -31,12 +40,7 @@ extension ModelSettingsView { } var installedCustomLLMOptions: [TranslationModelOption] { - CustomLLMModelManager.availableModels.compactMap { model in - guard customLLMManager.isModelDownloaded(repo: model.id) else { - return nil - } - return TranslationModelOption(id: model.id, title: model.title) - } + installedCustomLLMOptions(including: customLLMRepo) } var configuredRemoteLLMOptions: [TranslationModelOption] { @@ -59,7 +63,7 @@ extension ModelSettingsView { case .remoteLLM: return configuredRemoteLLMOptions case .customLLM: - return installedCustomLLMOptions + return installedCustomLLMOptions(including: translationCustomLLMRepo) case .whisperKit: return [] } @@ -70,7 +74,7 @@ extension ModelSettingsView { case .remoteLLM: return configuredRemoteLLMOptions case .customLLM: - return installedCustomLLMOptions + return installedCustomLLMOptions(including: rewriteCustomLLMRepo) } } @@ -92,12 +96,17 @@ extension ModelSettingsView { var resolvedTranslationSelection: String { let options = translationModelOptions + let rawSelection = currentTranslationSelectionRaw + let canonicalSelection = CustomLLMModelManager.canonicalModelRepo(rawSelection) guard !options.isEmpty else { - return currentTranslationSelectionRaw + return selectedTranslationModelProvider == .customLLM ? canonicalSelection : rawSelection } - if options.contains(where: { $0.id == currentTranslationSelectionRaw }) { - return currentTranslationSelectionRaw + let selectionToMatch = selectedTranslationModelProvider == .customLLM + ? canonicalSelection + : rawSelection + if options.contains(where: { $0.id == selectionToMatch }) { + return selectionToMatch } return options[0].id } @@ -118,12 +127,17 @@ extension ModelSettingsView { var resolvedRewriteSelection: String { let options = rewriteModelOptions + let rawSelection = currentRewriteSelectionRaw + let canonicalSelection = CustomLLMModelManager.canonicalModelRepo(rawSelection) guard !options.isEmpty else { - return currentRewriteSelectionRaw + return selectedRewriteModelProvider == .customLLM ? canonicalSelection : rawSelection } - if options.contains(where: { $0.id == currentRewriteSelectionRaw }) { - return currentRewriteSelectionRaw + let selectionToMatch = selectedRewriteModelProvider == .customLLM + ? canonicalSelection + : rawSelection + if options.contains(where: { $0.id == selectionToMatch }) { + return selectionToMatch } return options[0].id } @@ -227,12 +241,9 @@ extension ModelSettingsView { translationRemoteLLMProviderRaw = first.id } case .customLLM: - let options = installedCustomLLMOptions - if let first = options.first { - if !options.contains(where: { $0.id == translationCustomLLMRepo }) { - translationCustomLLMRepo = first.id - } - } else { + if translationCustomLLMRepo.isEmpty { + translationCustomLLMRepo = customLLMRepo + } else if !CustomLLMModelManager.isSupportedModelRepo(translationCustomLLMRepo) { translationCustomLLMRepo = customLLMRepo } case .whisperKit: @@ -252,12 +263,9 @@ extension ModelSettingsView { rewriteRemoteLLMProviderRaw = first.id } case .customLLM: - let options = installedCustomLLMOptions - if let first = options.first { - if !options.contains(where: { $0.id == rewriteCustomLLMRepo }) { - rewriteCustomLLMRepo = first.id - } - } else { + if rewriteCustomLLMRepo.isEmpty { + rewriteCustomLLMRepo = customLLMRepo + } else if !CustomLLMModelManager.isSupportedModelRepo(rewriteCustomLLMRepo) { rewriteCustomLLMRepo = customLLMRepo } } diff --git a/Voxt/Settings/ModelSettingsView.swift b/Voxt/Settings/ModelSettingsView.swift index fede570..c5ac31e 100644 --- a/Voxt/Settings/ModelSettingsView.swift +++ b/Voxt/Settings/ModelSettingsView.swift @@ -28,6 +28,22 @@ struct ModelSettingsView: View { let detailText: String } + private struct CatalogSnapshot { + let allEntries: [ModelCatalogEntry] + let availableTags: [String] + let availableTagGroups: [[String]] + let filteredEntries: [ModelCatalogEntry] + let displayItems: [ModelCatalogDisplayItem] + + static let empty = CatalogSnapshot( + allEntries: [], + availableTags: [], + availableTagGroups: [], + filteredEntries: [], + displayItems: [] + ) + } + @AppStorage(AppPreferenceKey.transcriptionEngine) var engineRaw = TranscriptionEngine.mlxAudio.rawValue @AppStorage(AppPreferenceKey.enhancementMode) var enhancementModeRaw = EnhancementMode.off.rawValue @AppStorage(AppPreferenceKey.enhancementSystemPrompt) var systemPrompt = "" @@ -89,6 +105,7 @@ struct ModelSettingsView: View { @State private var collapsedModelGroupIDs = Set() @State private var globalDownloadEndpointResult: DownloadEndpointCheckResult? @State private var chinaDownloadEndpointResult: DownloadEndpointCheckResult? + @State private var catalogSnapshot = CatalogSnapshot.empty let modelStateRefreshTimer = Timer.publish(every: 2.5, on: .main, in: .common).autoconnect() @@ -211,14 +228,7 @@ struct ModelSettingsView: View { ) } - private var allEntries: [ModelCatalogEntry] { - switch catalogTab { - case .asr: - return prioritizedEntries(catalogBuilder.asrEntries()) - case .llm: - return prioritizedEntries(catalogBuilder.llmEntries()) - } - } + private var allEntries: [ModelCatalogEntry] { catalogSnapshot.allEntries } private var locationScopedEntriesForTags: [ModelCatalogEntry] { if selectedTags.contains(localized("Local")) { @@ -230,37 +240,13 @@ struct ModelSettingsView: View { return allEntries } - private var availableTags: [String] { - let locationTags = Set(allEntries.flatMap(\.filterTags)).intersection(ModelCatalogTag.locationTags) - let tags = locationTags.union(Set(locationScopedEntriesForTags.flatMap(\.filterTags))) - return ModelCatalogTag.priority.compactMap { tags.contains($0) ? $0 : nil } - } + private var availableTags: [String] { catalogSnapshot.availableTags } - private var availableTagGroups: [[String]] { - let available = Set(availableTags) - var groups = [[String]]() - let locationGroup = ModelCatalogTag.groups[0].filter { available.contains($0) } - if !locationGroup.isEmpty { - groups.append(locationGroup) - } - groups.append( - contentsOf: ModelCatalogTag.groups.dropFirst() - .map { group in - group.filter { available.contains($0) } - } - .filter { !$0.isEmpty } - ) - return groups - } + private var availableTagGroups: [[String]] { catalogSnapshot.availableTagGroups } - private var filteredEntries: [ModelCatalogEntry] { - guard !selectedTags.isEmpty else { return allEntries } - return allEntries.filter { selectedTags.isSubset(of: Set($0.filterTags)) } - } + private var filteredEntries: [ModelCatalogEntry] { catalogSnapshot.filteredEntries } - private var displayItems: [ModelCatalogDisplayItem] { - LocalModelSeriesGrouping.modelCatalogItems(from: filteredEntries) - } + private var displayItems: [ModelCatalogDisplayItem] { catalogSnapshot.displayItems } private func prioritizedEntries(_ entries: [ModelCatalogEntry]) -> [ModelCatalogEntry] { entries.enumerated() @@ -374,93 +360,141 @@ struct ModelSettingsView: View { } private var contentWithLifecycle: some View { - mainContent - .onAppear(perform: handleOnAppear) - .onAppear(perform: reloadCachedConfigurationState) - .onAppear(perform: refreshModelStorageDisplayPath) - .onChange(of: modelRepo) { _, newValue in - let canonicalRepo = MLXModelManager.canonicalModelRepo(newValue) - if canonicalRepo != newValue { - modelRepo = canonicalRepo - return - } - mlxModelManager.updateModel(repo: canonicalRepo) - } - .onChange(of: whisperModelID) { _, newValue in - let canonicalModelID = WhisperKitModelManager.canonicalModelID(newValue) - if canonicalModelID != newValue { - whisperModelID = canonicalModelID - return - } - whisperModelManager.updateModel(id: canonicalModelID) - } - .onChange(of: whisperKeepResidentLoaded) { _, _ in - whisperModelManager.refreshResidencyPolicy() - guard selectedEngine == .whisperKit, whisperKeepResidentLoaded else { return } - Task { @MainActor in - whisperModelManager.beginActiveUse() - defer { whisperModelManager.endActiveUse() } - _ = try? await whisperModelManager.loadWhisper() - } - } - .onChange(of: engineRaw) { _, _ in - whisperModelManager.refreshResidencyPolicy() - guard selectedEngine == .whisperKit, whisperKeepResidentLoaded else { return } - Task { @MainActor in - whisperModelManager.beginActiveUse() - defer { whisperModelManager.endActiveUse() } - _ = try? await whisperModelManager.loadWhisper() - } - } - .onChange(of: customLLMRepo) { _, newValue in - customLLMManager.updateModel(repo: newValue) - ensureTranslationModelSelectionConsistency() - ensureRewriteModelSelectionConsistency() - } - .onChange(of: translationModelProviderRaw) { _, _ in - syncTranslationFallbackProvider() - ensureTranslationModelSelectionConsistency() - } - .onChange(of: rewriteModelProviderRaw) { _, _ in - ensureRewriteModelSelectionConsistency() - } - .onChange(of: remoteLLMProviderConfigurationsRaw) { _, _ in - cachedRemoteLLMConfigurations = RemoteModelConfigurationStore.loadConfigurations( - from: remoteLLMProviderConfigurationsRaw, - sensitiveValueLoading: .metadataOnly - ) - ensureTranslationModelSelectionConsistency() - ensureRewriteModelSelectionConsistency() - } - .onChange(of: remoteASRProviderConfigurationsRaw) { _, _ in - cachedRemoteASRConfigurations = RemoteModelConfigurationStore.loadConfigurations( - from: remoteASRProviderConfigurationsRaw, - sensitiveValueLoading: .metadataOnly - ) - } - .onChange(of: useHfMirror) { _, _ in - updateMirrorSetting() - } - .onChange(of: modelStorageRootPath) { _, _ in - refreshModelStorageDisplayPath() - } - .onChange(of: featureSettingsRaw) { _, _ in - cachedFeatureSettings = FeatureSettingsStore.load(defaults: .standard) - pruneSelectedTags() - } - .onChange(of: catalogTab) { _, _ in - pruneSelectedTags() - } - .onChange(of: selectedTags) { _, _ in - pruneSelectedTags() - } - .onReceive(modelStateRefreshTimer) { _ in - guard isActive else { return } - guard mainWindowState.isVisible else { return } - guard shouldPollModelState else { return } - refreshModelInstallStateIfNeeded() - pruneSelectedTags() - } + let appeared = AnyView( + mainContent + .onAppear(perform: handleOnAppear) + .onAppear(perform: reloadCachedConfigurationState) + .onAppear(perform: refreshModelStorageDisplayPath) + .onAppear(perform: refreshCatalogSnapshot) + ) + + let modelObserved = AnyView( + appeared + .onChange(of: modelRepo) { _, newValue in + let canonicalRepo = MLXModelManager.canonicalModelRepo(newValue) + if canonicalRepo != newValue { + modelRepo = canonicalRepo + return + } + mlxModelManager.updateModel(repo: canonicalRepo) + refreshCatalogSnapshot() + } + .onChange(of: whisperModelID) { _, newValue in + let canonicalModelID = WhisperKitModelManager.canonicalModelID(newValue) + if canonicalModelID != newValue { + whisperModelID = canonicalModelID + return + } + whisperModelManager.updateModel(id: canonicalModelID) + refreshCatalogSnapshot() + } + .onChange(of: whisperKeepResidentLoaded) { _, _ in + whisperModelManager.refreshResidencyPolicy() + guard selectedEngine == .whisperKit, whisperKeepResidentLoaded else { return } + Task { @MainActor in + whisperModelManager.beginActiveUse() + defer { whisperModelManager.endActiveUse() } + _ = try? await whisperModelManager.loadWhisper() + } + } + .onChange(of: engineRaw) { _, _ in + whisperModelManager.refreshResidencyPolicy() + guard selectedEngine == .whisperKit, whisperKeepResidentLoaded else { return } + Task { @MainActor in + whisperModelManager.beginActiveUse() + defer { whisperModelManager.endActiveUse() } + _ = try? await whisperModelManager.loadWhisper() + } + } + ) + + let configurationObserved = AnyView( + modelObserved + .onChange(of: customLLMRepo) { _, newValue in + customLLMManager.updateModel(repo: newValue) + ensureTranslationModelSelectionConsistency() + ensureRewriteModelSelectionConsistency() + refreshCatalogSnapshot() + } + .onChange(of: translationModelProviderRaw) { _, _ in + syncTranslationFallbackProvider() + ensureTranslationModelSelectionConsistency() + refreshCatalogSnapshot() + } + .onChange(of: rewriteModelProviderRaw) { _, _ in + ensureRewriteModelSelectionConsistency() + refreshCatalogSnapshot() + } + .onChange(of: remoteLLMProviderConfigurationsRaw) { _, _ in + cachedRemoteLLMConfigurations = RemoteModelConfigurationStore.loadConfigurations( + from: remoteLLMProviderConfigurationsRaw, + sensitiveValueLoading: .metadataOnly + ) + ensureTranslationModelSelectionConsistency() + ensureRewriteModelSelectionConsistency() + refreshCatalogSnapshot() + } + .onChange(of: remoteASRProviderConfigurationsRaw) { _, _ in + cachedRemoteASRConfigurations = RemoteModelConfigurationStore.loadConfigurations( + from: remoteASRProviderConfigurationsRaw, + sensitiveValueLoading: .metadataOnly + ) + refreshCatalogSnapshot() + } + .onChange(of: useHfMirror) { _, _ in + updateMirrorSetting() + } + .onChange(of: modelStorageRootPath) { _, _ in + refreshModelStorageDisplayPath() + } + .onChange(of: featureSettingsRaw) { _, _ in + cachedFeatureSettings = FeatureSettingsStore.load(defaults: .standard) + pruneSelectedTags() + refreshCatalogSnapshot() + } + .onChange(of: catalogTab) { _, _ in + pruneSelectedTags() + refreshCatalogSnapshot() + } + .onChange(of: selectedTags) { _, _ in + pruneSelectedTags() + refreshCatalogSnapshot() + } + ) + + let stateObserved = AnyView( + configurationObserved + .onChange(of: mlxModelManager.state) { _, _ in + refreshCatalogSnapshot() + } + .onChange(of: whisperModelManager.state) { _, _ in + refreshCatalogSnapshot() + } + .onChange(of: customLLMManager.state) { _, _ in + refreshCatalogSnapshot() + } + .onChange(of: mlxModelManager.remoteSizeTextByRepo) { _, _ in + refreshCatalogSnapshot() + } + .onChange(of: whisperModelManager.remoteSizeTextByID) { _, _ in + refreshCatalogSnapshot() + } + .onChange(of: customLLMManager.remoteSizeTextByRepo) { _, _ in + refreshCatalogSnapshot() + } + ) + + return AnyView( + stateObserved + .onReceive(modelStateRefreshTimer) { _ in + guard isActive else { return } + guard mainWindowState.isVisible else { return } + guard shouldPollModelState else { return } + refreshModelInstallStateIfNeeded() + pruneSelectedTags() + refreshCatalogSnapshot() + } + ) } private var contentWithSheets: some View { @@ -526,6 +560,55 @@ struct ModelSettingsView: View { from: remoteLLMProviderConfigurationsRaw, sensitiveValueLoading: .metadataOnly ) + refreshCatalogSnapshot() + } + + private func refreshCatalogSnapshot() { + let entries = switch catalogTab { + case .asr: + prioritizedEntries(catalogBuilder.asrEntries()) + case .llm: + prioritizedEntries(catalogBuilder.llmEntries()) + } + + let locationTags = Set(entries.flatMap(\.filterTags)).intersection(ModelCatalogTag.locationTags) + let locationScopedEntries: [ModelCatalogEntry] + if selectedTags.contains(localized("Local")) { + locationScopedEntries = entries.filter { $0.filterTags.contains(localized("Local")) } + } else if selectedTags.contains(localized("Remote")) { + locationScopedEntries = entries.filter { $0.filterTags.contains(localized("Remote")) } + } else { + locationScopedEntries = entries + } + + let tagSet = locationTags.union(Set(locationScopedEntries.flatMap(\.filterTags))) + let availableTags = ModelCatalogTag.priority.compactMap { tagSet.contains($0) ? $0 : nil } + let available = Set(availableTags) + var availableTagGroups = [[String]]() + let locationGroup = ModelCatalogTag.groups[0].filter { available.contains($0) } + if !locationGroup.isEmpty { + availableTagGroups.append(locationGroup) + } + availableTagGroups.append( + contentsOf: ModelCatalogTag.groups.dropFirst() + .map { $0.filter { available.contains($0) } } + .filter { !$0.isEmpty } + ) + + let filteredEntries: [ModelCatalogEntry] + if selectedTags.isEmpty { + filteredEntries = entries + } else { + filteredEntries = entries.filter { selectedTags.isSubset(of: Set($0.filterTags)) } + } + + catalogSnapshot = CatalogSnapshot( + allEntries: entries, + availableTags: availableTags, + availableTagGroups: availableTagGroups, + filteredEntries: filteredEntries, + displayItems: LocalModelSeriesGrouping.modelCatalogItems(from: filteredEntries) + ) } private func chooseModelStorageDirectory() { @@ -645,28 +728,36 @@ struct ModelSettingsView: View { Spacer(minLength: 0) if !missingConfigurationIssues.isEmpty { - HStack(spacing: 6) { - Image(systemName: "exclamationmark.triangle.fill") - .foregroundStyle(.orange) - Text( - missingConfigurationIssues.count == 1 - ? localized("1 model needs setup") - : AppLocalization.format("%d models need setup", missingConfigurationIssues.count) + Menu { + ForEach(missingConfigurationIssueDescriptions, id: \.self) { description in + Text(description) + } + } label: { + HStack(spacing: 6) { + Image(systemName: "exclamationmark.triangle.fill") + .foregroundStyle(.orange) + Text( + missingConfigurationIssues.count == 1 + ? localized("1 model needs setup") + : AppLocalization.format("%d models need setup", missingConfigurationIssues.count) + ) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(1) + } + .padding(.horizontal, 8) + .padding(.vertical, 5) + .background( + Capsule(style: .continuous) + .fill(Color.orange.opacity(0.10)) + ) + .overlay( + Capsule(style: .continuous) + .stroke(Color.orange.opacity(0.18), lineWidth: 1) ) - .font(.caption) - .foregroundStyle(.secondary) - .lineLimit(1) } - .padding(.horizontal, 8) - .padding(.vertical, 5) - .background( - Capsule(style: .continuous) - .fill(Color.orange.opacity(0.10)) - ) - .overlay( - Capsule(style: .continuous) - .stroke(Color.orange.opacity(0.18), lineWidth: 1) - ) + .menuStyle(.borderlessButton) + .help(missingConfigurationIssueDescriptions.joined(separator: "\n")) } Text(AppLocalization.format("%d items", filteredEntries.count)) @@ -679,6 +770,11 @@ struct ModelSettingsView: View { Image(systemName: "gearshape") } .buttonStyle(SettingsCompactIconButtonStyle()) + + Button(action: openModelDebugWindow) { + Text(localized("Debug")) + } + .buttonStyle(SettingsPillButtonStyle(horizontalPadding: 12)) } } @@ -860,4 +956,43 @@ struct ModelSettingsView: View { return false } } + + private func openModelDebugWindow() { + guard let appDelegate = AppDelegate.shared else { return } + switch catalogTab { + case .asr: + ASRDebugWindowManager.shared.present(appDelegate: appDelegate) + case .llm: + LLMDebugWindowManager.shared.present(appDelegate: appDelegate) + } + } + + private var missingConfigurationIssueDescriptions: [String] { + missingConfigurationIssues.map(missingConfigurationIssueDescription(for:)) + } + + private func missingConfigurationIssueDescription( + for issue: ConfigurationTransferManager.MissingConfigurationIssue + ) -> String { + switch issue.scope { + case .remoteASRProvider(let provider): + return AppLocalization.format("%@ %@: %@", provider.title, localized("ASR"), issue.message) + case .remoteLLMProvider(let provider): + return AppLocalization.format("%@ %@: %@", provider.title, localized("LLM"), issue.message) + case .mlxModel(let repo): + return AppLocalization.format("%@ %@: %@", mlxModelManager.displayTitle(for: repo), localized("ASR"), issue.message) + case .whisperModel(let modelID): + return AppLocalization.format("%@ %@: %@", whisperModelManager.displayTitle(for: modelID), localized("Whisper"), issue.message) + case .customLLMModel(let repo): + return AppLocalization.format("%@ %@: %@", customLLMManager.displayTitle(for: repo), localized("LLM"), issue.message) + case .translationRemoteLLM(let provider): + return AppLocalization.format("%@ %@: %@", provider.title, localized("Translation"), issue.message) + case .rewriteRemoteLLM(let provider): + return AppLocalization.format("%@ %@: %@", provider.title, localized("Rewrite"), issue.message) + case .translationCustomLLM(let repo): + return AppLocalization.format("%@ %@: %@", customLLMManager.displayTitle(for: repo), localized("Translation"), issue.message) + case .rewriteCustomLLM(let repo): + return AppLocalization.format("%@ %@: %@", customLLMManager.displayTitle(for: repo), localized("Rewrite"), issue.message) + } + } } diff --git a/Voxt/Settings/OnboardingSettingsView+Data.swift b/Voxt/Settings/OnboardingSettingsView+Data.swift index f642fd1..9b49c6e 100644 --- a/Voxt/Settings/OnboardingSettingsView+Data.swift +++ b/Voxt/Settings/OnboardingSettingsView+Data.swift @@ -544,6 +544,7 @@ extension OnboardingSettingsView { configuration, updating: remoteASRProviderConfigurationsRaw ) + NotificationCenter.default.post(name: .voxtRemoteProviderConfigurationsDidChange, object: nil) } func saveRemoteLLMConfiguration(_ configuration: RemoteProviderConfiguration) { @@ -551,6 +552,7 @@ extension OnboardingSettingsView { configuration, updating: remoteLLMProviderConfigurationsRaw ) + NotificationCenter.default.post(name: .voxtRemoteProviderConfigurationsDidChange, object: nil) } func exportConfiguration() { diff --git a/Voxt/Settings/OnboardingSettingsView+Steps.swift b/Voxt/Settings/OnboardingSettingsView+Steps.swift index 36f34d1..f512137 100644 --- a/Voxt/Settings/OnboardingSettingsView+Steps.swift +++ b/Voxt/Settings/OnboardingSettingsView+Steps.swift @@ -293,7 +293,7 @@ extension OnboardingSettingsView { rewriteCustomLLMRepo = newValue } ), - options: CustomLLMModelManager.availableModels.map { model in + options: CustomLLMModelManager.displayModels(including: customLLMRepo).map { model in SettingsMenuOption(value: model.id, title: model.title) }, selectedTitle: customLLMManager.displayTitle(for: customLLMRepo), @@ -845,7 +845,7 @@ extension OnboardingSettingsView { } func customLLMDescription(for repo: String) -> String { - guard let description = CustomLLMModelManager.availableModels.first(where: { $0.id == repo })?.description else { + guard let description = customLLMManager.description(for: repo) else { return "" } return AppLocalization.localizedString(description) diff --git a/Voxt/Settings/PermissionsSettingsView.swift b/Voxt/Settings/PermissionsSettingsView.swift index d30f9c7..d0a8463 100644 --- a/Voxt/Settings/PermissionsSettingsView.swift +++ b/Voxt/Settings/PermissionsSettingsView.swift @@ -498,19 +498,24 @@ struct PermissionsSettingsView: View { private func requestBrowserAutomationPermission(_ target: BrowserAutomationTarget) { browserAutomationRequestsInFlight.insert(target.bundleID) - VoxtLog.info("Browser automation permission request triggered: target=\(target.bundleID)") + VoxtLog.model("Browser automation permission request triggered: target=\(target.bundleID)") Task { @MainActor in defer { browserAutomationRequestsInFlight.remove(target.bundleID) } let status = automationPermissionStatus(for: target.bundleID, askUserIfNeeded: true) - let enabled = (status == noErr) + let scriptProbe = isApplicationRunning(bundleID: target.bundleID) + ? runAppleScriptCandidates(target.scripts) + : ScriptProbeResult(success: false, permissionDenied: false, appNotRunning: true, lastErrorCode: nil) + let enabled = scriptProbe.success || (status == noErr && !scriptProbe.permissionDenied) browserAutomationStates[target.bundleID] = enabled ? .enabled : .disabled if enabled { browserAutomationMessages[target.bundleID] = AppLocalization.localizedString("Authorization granted.") + } else if scriptProbe.appNotRunning { + browserAutomationMessages[target.bundleID] = AppLocalization.localizedString("Open the browser and try again to complete the authorization check.") } else { browserAutomationMessages[target.bundleID] = AppLocalization.localizedString("Authorization not granted.") } - VoxtLog.info( + VoxtLog.model( "Browser automation permission status: target=\(target.bundleID), state=\(enabled ? "enabled" : "disabled"), status=\(status)" ) } @@ -518,6 +523,15 @@ struct PermissionsSettingsView: View { private func probeBrowserAutomationState(_ target: BrowserAutomationTarget) -> PermissionState { let status = automationPermissionStatus(for: target.bundleID, askUserIfNeeded: false) + if isApplicationRunning(bundleID: target.bundleID) { + let scriptProbe = runAppleScriptCandidates(target.scripts) + if scriptProbe.success { + return .enabled + } + if scriptProbe.permissionDenied { + return .disabled + } + } return status == noErr ? .enabled : .disabled } @@ -597,6 +611,11 @@ struct PermissionsSettingsView: View { ) } + private func isApplicationRunning(bundleID: String) -> Bool { + NSRunningApplication.runningApplications(withBundleIdentifier: bundleID) + .contains(where: { !$0.isTerminated }) + } + private func openSettings(for kind: SettingsPermissionKind) { PermissionGuidance.openSettings(for: kind) } diff --git a/Voxt/Settings/SettingsTypes.swift b/Voxt/Settings/SettingsTypes.swift index 0a006c7..41bbe01 100644 --- a/Voxt/Settings/SettingsTypes.swift +++ b/Voxt/Settings/SettingsTypes.swift @@ -5,6 +5,7 @@ extension Notification.Name { static let voxtSettingsNavigate = Notification.Name("voxt.settings.navigate") static let voxtInterfaceLanguageDidChange = Notification.Name("voxt.interfaceLanguage.didChange") static let voxtConfigurationDidImport = Notification.Name("voxt.configuration.didImport") + static let voxtRemoteProviderConfigurationsDidChange = Notification.Name("voxt.remoteProviderConfigurations.didChange") static let voxtPermissionsDidChange = Notification.Name("voxt.permissions.didChange") static let voxtSelectedInputDeviceDidChange = Notification.Name("voxt.selectedInputDevice.didChange") static let voxtAudioInputDevicesDidChange = Notification.Name("voxt.audioInputDevices.didChange") diff --git a/Voxt/Settings/SettingsView.swift b/Voxt/Settings/SettingsView.swift index def936a..23dc534 100644 --- a/Voxt/Settings/SettingsView.swift +++ b/Voxt/Settings/SettingsView.swift @@ -27,6 +27,8 @@ struct SettingsView: View { @AppStorage(AppPreferenceKey.muteSystemAudioWhileRecording) private var muteSystemAudioWhileRecording = false @AppStorage(AppPreferenceKey.transcriptionEngine) private var transcriptionEngineRaw = TranscriptionEngine.mlxAudio.rawValue @AppStorage(AppPreferenceKey.featureSettings) private var featureSettingsRaw = "" + @AppStorage(AppPreferenceKey.remoteASRProviderConfigurations) private var remoteASRProviderConfigurationsRaw = "" + @AppStorage(AppPreferenceKey.remoteLLMProviderConfigurations) private var remoteLLMProviderConfigurationsRaw = "" @State private var selectedTab: SettingsTab @State private var selectedFeatureTab: FeatureSettingsTab @State private var sidebarMode: SettingsSidebarMode @@ -137,6 +139,9 @@ struct SettingsView: View { dictionaryStore.reloadAsync() dictionarySuggestionStore.reloadAsync() } + .onReceive(NotificationCenter.default.publisher(for: .voxtRemoteProviderConfigurationsDidChange)) { _ in + refreshModelConfigurationBadge() + } .onReceive(NotificationCenter.default.publisher(for: .voxtPermissionsDidChange)) { _ in refreshPermissionBadge() } @@ -174,6 +179,12 @@ struct SettingsView: View { selectedFeatureTab = .transcription } } + .onChange(of: remoteASRProviderConfigurationsRaw) { _, _ in + refreshModelConfigurationBadge() + } + .onChange(of: remoteLLMProviderConfigurationsRaw) { _, _ in + refreshModelConfigurationBadge() + } .onChange(of: selectedTab) { _, tab in if Self.isStaticTab(tab) { initializedStaticTabs.insert(tab) diff --git a/Voxt/Support/CustomLLMModelDownloadSupport.swift b/Voxt/Support/CustomLLMModelDownloadSupport.swift index d8e8de0..9d6b982 100644 --- a/Voxt/Support/CustomLLMModelDownloadSupport.swift +++ b/Voxt/Support/CustomLLMModelDownloadSupport.swift @@ -2,6 +2,11 @@ import Foundation import HuggingFace enum CustomLLMModelDownloadSupport { + private static let chatTemplateFileNames: Set = [ + "chat_template.jinja", + "chat_template.json", + ] + struct DownloadContext { let repoID: Repo.ID let entries: [MLXModelDownloadSupport.ModelFileEntry] @@ -102,4 +107,133 @@ enum CustomLLMModelDownloadSupport { ) } } + + static func hasUsableChatTemplate(in directory: URL, fileManager: FileManager = .default) -> Bool { + for fileName in chatTemplateFileNames { + if fileManager.fileExists(atPath: directory.appendingPathComponent(fileName).path) { + return true + } + } + + let tokenizerConfigURL = directory.appendingPathComponent("tokenizer_config.json") + guard fileManager.fileExists(atPath: tokenizerConfigURL.path), + let data = try? Data(contentsOf: tokenizerConfigURL), + let object = try? JSONSerialization.jsonObject(with: data) as? [String: Any] + else { + return false + } + + if let template = object["chat_template"] as? String { + return !template.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty + } + return false + } + + static func repairMissingChatTemplateIfNeeded( + repo: String, + directory: URL, + preferredBaseURL: URL, + mirrorBaseURL: URL, + userAgent: String, + token: String? + ) async { + guard !hasUsableChatTemplate(in: directory) else { return } + + do { + let repaired = try await downloadMissingChatTemplateFilesIfNeeded( + repo: repo, + directory: directory, + baseURL: preferredBaseURL, + userAgent: userAgent, + token: token + ) + if repaired { + VoxtLog.model("Custom LLM chat template repaired from repo metadata: \(repo)") + return + } + } catch { + if let fallbackBaseURL = fallbackHubBaseURL( + from: preferredBaseURL, + mirrorBaseURL: mirrorBaseURL + ) { + do { + let repaired = try await downloadMissingChatTemplateFilesIfNeeded( + repo: repo, + directory: directory, + baseURL: fallbackBaseURL, + userAgent: userAgent, + token: token + ) + if repaired { + VoxtLog.model("Custom LLM chat template repaired from mirror metadata: \(repo)") + return + } + } catch { + VoxtLog.warning("Custom LLM chat template repair failed via mirror. repo=\(repo), error=\(error.localizedDescription)") + } + } else { + VoxtLog.warning("Custom LLM chat template repair failed. repo=\(repo), error=\(error.localizedDescription)") + } + } + } + + private static func downloadMissingChatTemplateFilesIfNeeded( + repo: String, + directory: URL, + baseURL: URL, + userAgent: String, + token: String? + ) async throws -> Bool { + guard let repoID = Repo.ID(rawValue: repo) else { return false } + + let session = MLXModelDownloadSupport.makeDownloadSession(for: baseURL) + let entries = try await MLXModelDownloadSupport.fetchModelEntries( + repo: repoID.description, + baseURL: baseURL, + session: session, + userAgent: userAgent + ) + + let templateEntries = entries.filter { entry in + chatTemplateFileNames.contains(URL(fileURLWithPath: entry.path).lastPathComponent.lowercased()) + } + + guard !templateEntries.isEmpty else { return false } + + var downloadedAny = false + for entry in templateEntries { + let destination = try CustomLLMModelStorageSupport.destinationFileURL( + for: entry.path, + under: directory + ) + if MLXModelDownloadSupport.canReuseExistingDownload( + at: destination, + expectedSize: entry.size, + fileManager: .default + ) { + continue + } + + let descriptor = ResumableDownloadDescriptor( + sourceURL: try MLXModelDownloadSupport.fileResolveURL( + baseURL: baseURL, + repo: repoID.description, + path: entry.path + ), + destinationURL: destination, + relativePath: entry.path, + expectedSize: entry.size, + userAgent: userAgent, + bearerToken: token, + disableProxy: MLXModelDownloadSupport.isMirrorHost(baseURL) + ) + _ = try await ResumableModelDownloadSupport.download( + descriptor, + progress: Progress(totalUnitCount: max(entry.size ?? 1, 1)) + ) + downloadedAny = true + } + + return downloadedAny && hasUsableChatTemplate(in: directory) + } } diff --git a/Voxt/Support/CustomLLMModelManager.swift b/Voxt/Support/CustomLLMModelManager.swift index f7707df..515124f 100644 --- a/Voxt/Support/CustomLLMModelManager.swift +++ b/Voxt/Support/CustomLLMModelManager.swift @@ -2,7 +2,60 @@ import Foundation import HuggingFace import Combine import MLX +import MLXLLM import MLXLMCommon +import Tokenizers + +private struct LocalTokenizerBridge: MLXLMCommon.Tokenizer { + private let upstream: any Tokenizers.Tokenizer + + init(_ upstream: any Tokenizers.Tokenizer) { + self.upstream = upstream + } + + func encode(text: String, addSpecialTokens: Bool) -> [Int] { + upstream.encode(text: text, addSpecialTokens: addSpecialTokens) + } + + func decode(tokenIds: [Int], skipSpecialTokens: Bool) -> String { + upstream.decode(tokens: tokenIds, skipSpecialTokens: skipSpecialTokens) + } + + func convertTokenToId(_ token: String) -> Int? { + upstream.convertTokenToId(token) + } + + func convertIdToToken(_ id: Int) -> String? { + upstream.convertIdToToken(id) + } + + var bosToken: String? { upstream.bosToken } + var eosToken: String? { upstream.eosToken } + var unknownToken: String? { upstream.unknownToken } + + func applyChatTemplate( + messages: [[String: any Sendable]], + tools: [[String: any Sendable]]?, + additionalContext: [String: any Sendable]? + ) throws -> [Int] { + do { + return try upstream.applyChatTemplate( + messages: messages, + tools: tools, + additionalContext: additionalContext + ) + } catch Tokenizers.TokenizerError.missingChatTemplate { + throw MLXLMCommon.TokenizerError.missingChatTemplate + } + } +} + +private struct LocalTokenizerLoader: MLXLMCommon.TokenizerLoader { + func load(from directory: URL) async throws -> any MLXLMCommon.Tokenizer { + let tokenizer = try await Tokenizers.AutoTokenizer.from(modelFolder: directory) + return LocalTokenizerBridge(tokenizer) + } +} @MainActor class CustomLLMModelManager: ObservableObject { @@ -52,6 +105,7 @@ class CustomLLMModelManager: ObservableObject { nonisolated static let defaultModelRepo = CustomLLMModelCatalog.defaultModelRepo nonisolated static let availableModels = CustomLLMModelCatalog.availableModels + nonisolated static let supportedModels = CustomLLMModelCatalog.supportedModels @Published private(set) var state: ModelState = .notDownloaded @Published private(set) var sizeState: ModelSizeState = .unknown @@ -59,6 +113,7 @@ class CustomLLMModelManager: ObservableObject { @Published private(set) var pausedStatusMessage: String? private var downloadedStateByRepo: [String: Bool] = [:] + private var downloadedStateCachePrimed = false private var localSizeTextByRepo: [String: String] = [:] private var modelRepo: String private var hubBaseURL: URL @@ -76,24 +131,32 @@ class CustomLLMModelManager: ObservableObject { private var activeInferenceCount = 0 init(modelRepo: String, hubBaseURL: URL = URL(string: "https://huggingface.co")!) { - let repoSelection = CustomLLMRepoSelection.resolve( - requestedRepo: modelRepo, - supportedRepos: Self.availableModels.map(\.id), - fallbackRepo: Self.defaultModelRepo - ) + let repoSelection = Self.resolveModelRepo(modelRepo) + let repoWasSupported = Self.isSupportedModelRepo(modelRepo) self.modelRepo = repoSelection.effectiveRepo self.hubBaseURL = hubBaseURL self.remoteSizeTextByRepo = CustomLLMModelStorageSupport.loadPersistedRemoteSizeCache() - if repoSelection.didFallback { + if !repoWasSupported { VoxtLog.warning("Unsupported custom LLM repo '\(modelRepo)' found in settings. Falling back to \(repoSelection.effectiveRepo).") + } else if repoSelection.effectiveRepo != modelRepo { + VoxtLog.info("Canonicalized custom LLM repo '\(modelRepo)' -> '\(repoSelection.effectiveRepo)'") } - VoxtLog.info("Custom LLM manager initialized. repo=\(repoSelection.effectiveRepo), hub=\(hubBaseURL.absoluteString)") + VoxtLog.model("Custom LLM manager initialized. repo=\(repoSelection.effectiveRepo), hub=\(hubBaseURL.absoluteString)") checkExistingModel() } var currentModelRepo: String { modelRepo } + func isModelLoaded(repo: String) -> Bool { + let canonicalRepo = Self.canonicalModelRepo(repo) + return inferenceContainer != nil && inferenceModelRepo == canonicalRepo + } + func enhance(_ rawText: String, systemPrompt: String) async throws -> String { + try await enhance(rawText, systemPrompt: systemPrompt, modelRepo: modelRepo) + } + + func enhance(_ rawText: String, systemPrompt: String, modelRepo: String) async throws -> String { let input = rawText.trimmingCharacters(in: .whitespacesAndNewlines) guard !input.isEmpty else { return rawText } let request = CustomLLMRequestPlanBuilder.enhancement( @@ -315,7 +378,20 @@ class CustomLLMModelManager: ObservableObject { userInfo: [NSLocalizedDescriptionKey: "Invalid local model path."] ) } - let container = try await loadModelContainer(directory: directory) + let token = ProcessInfo.processInfo.environment["HF_TOKEN"] + ?? Bundle.main.object(forInfoDictionaryKey: "HF_TOKEN") as? String + await CustomLLMModelDownloadSupport.repairMissingChatTemplateIfNeeded( + repo: repo, + directory: directory, + preferredBaseURL: hubBaseURL, + mirrorBaseURL: Self.mirrorHubBaseURL, + userAgent: Self.hubUserAgent, + token: token + ) + let container = try await loadModelContainer( + from: directory, + using: LocalTokenizerLoader() + ) inferenceContainer = container inferenceModelRepo = repo return container @@ -325,21 +401,44 @@ class CustomLLMModelManager: ObservableObject { CustomLLMModelCatalog.displayTitle(for: repo) } + func description(for repo: String) -> String? { + CustomLLMModelCatalog.description(for: repo) + } + + nonisolated static func ratingText(for repo: String) -> String { + CustomLLMModelCatalog.ratingText(for: repo) + } + + nonisolated static func catalogTagKeys(for repo: String) -> [String] { + CustomLLMModelCatalog.catalogTagKeys(for: repo) + } + nonisolated static func fallbackRemoteSizeText(repo: String) -> String? { CustomLLMModelCatalog.fallbackRemoteSizeText(repo: repo) } + nonisolated static func canonicalModelRepo(_ repo: String) -> String { + CustomLLMModelCatalog.canonicalModelRepo(repo) + } + + nonisolated static func displayModels(including repo: String? = nil) -> [ModelOption] { + CustomLLMModelCatalog.displayModels(including: repo) + } + + nonisolated static func releaseStatus(for repo: String) -> CustomLLMModelCatalog.ReleaseStatus { + CustomLLMModelCatalog.releaseStatus(for: repo) + } + func updateModel(repo: String) { - let repoSelection = CustomLLMRepoSelection.resolve( - requestedRepo: repo, - supportedRepos: Self.availableModels.map(\.id), - fallbackRepo: Self.defaultModelRepo - ) + let repoSelection = Self.resolveModelRepo(repo) + let repoWasSupported = Self.isSupportedModelRepo(repo) guard repoSelection.effectiveRepo != modelRepo else { return } - if repoSelection.didFallback { + if !repoWasSupported { VoxtLog.warning("Unsupported custom LLM repo '\(repo)' requested. Falling back to \(repoSelection.effectiveRepo).") + } else if repoSelection.effectiveRepo != repo { + VoxtLog.info("Canonicalized custom LLM repo '\(repo)' -> '\(repoSelection.effectiveRepo)'") } - VoxtLog.info("Custom LLM model changed: \(modelRepo) -> \(repoSelection.effectiveRepo)") + VoxtLog.model("Custom LLM model changed: \(modelRepo) -> \(repoSelection.effectiveRepo)") modelRepo = repoSelection.effectiveRepo releaseInferenceResources(resetActiveInferenceCount: true) lastLoggedModelPresence = nil @@ -352,14 +451,28 @@ class CustomLLMModelManager: ObservableObject { CustomLLMModelCatalog.isSupportedModelRepo(repo) } + private nonisolated static func resolveModelRepo(_ requestedRepo: String) -> CustomLLMRepoSelection { + guard CustomLLMModelCatalog.isSupportedModelRepo(requestedRepo) else { + return CustomLLMRepoSelection( + requestedRepo: requestedRepo, + effectiveRepo: defaultModelRepo + ) + } + return CustomLLMRepoSelection( + requestedRepo: requestedRepo, + effectiveRepo: CustomLLMModelCatalog.canonicalModelRepo(requestedRepo) + ) + } + func updateHubBaseURL(_ url: URL) { guard url != hubBaseURL else { return } - VoxtLog.info("Custom LLM hub base URL changed: \(hubBaseURL.absoluteString) -> \(url.absoluteString)") + VoxtLog.model("Custom LLM hub base URL changed: \(hubBaseURL.absoluteString) -> \(url.absoluteString)") hubBaseURL = url fetchRemoteSize() } func isModelDownloaded(repo: String) -> Bool { + primeDownloadedStateCacheIfNeeded() if let cached = downloadedStateByRepo[repo] { return cached } @@ -384,6 +497,10 @@ class CustomLLMModelManager: ObservableObject { return text } + func cachedModelSizeText(repo: String) -> String? { + localSizeTextByRepo[repo] + } + func modelDirectoryURL(repo: String) -> URL? { guard let modelDir = cacheDirectory(for: repo), FileManager.default.fileExists(atPath: modelDir.path) @@ -433,7 +550,7 @@ class CustomLLMModelManager: ObservableObject { state = isDownloaded ? .downloaded : .notDownloaded let downloaded = (state == .downloaded) if lastLoggedModelPresence?.repo != modelRepo || lastLoggedModelPresence?.downloaded != downloaded { - VoxtLog.info("Custom LLM local model state refreshed: repo=\(modelRepo), downloaded=\(downloaded)") + VoxtLog.model("Custom LLM local model state refreshed: repo=\(modelRepo), downloaded=\(downloaded)") lastLoggedModelPresence = (modelRepo, downloaded) } } @@ -477,14 +594,15 @@ class CustomLLMModelManager: ObservableObject { VoxtLog.error("Custom LLM download produced incomplete files: \(modelRepo)") return } - state = .downloaded - VoxtLog.info("Custom LLM download completed: \(modelRepo)") + invalidateLocalCache(for: modelRepo) + checkExistingModel() + VoxtLog.model("Custom LLM download completed: \(modelRepo)") } catch is CancellationError { cancelDownloadProgressTask() switch downloadStopAction { case .pause: pausedStatusMessage = nil - VoxtLog.info("Custom LLM download paused: \(modelRepo)") + VoxtLog.model("Custom LLM download paused: \(modelRepo)") case .cancel, .none: pausedStatusMessage = nil if let modelDir = cacheDirectory(for: modelRepo) { @@ -530,7 +648,7 @@ class CustomLLMModelManager: ObservableObject { } func cancelDownload() { - VoxtLog.info("Custom LLM download cancellation requested: \(modelRepo)") + VoxtLog.model("Custom LLM download cancellation requested: \(modelRepo)") if downloadTask != nil { downloadStopAction = .cancel pausedStatusMessage = nil @@ -547,7 +665,7 @@ class CustomLLMModelManager: ObservableObject { } invalidateLocalCache(for: modelRepo) state = .notDownloaded - VoxtLog.info("Custom LLM download cancelled from paused state: \(modelRepo)") + VoxtLog.model("Custom LLM download cancelled from paused state: \(modelRepo)") } private func cancelDownloadProgressTask() { @@ -586,7 +704,7 @@ class CustomLLMModelManager: ObservableObject { userAgent: Self.hubUserAgent, token: token ) - VoxtLog.info("Custom LLM download started: repo=\(context.repoID.description), files=\(context.entries.count), baseURL=\(baseURL.absoluteString)") + VoxtLog.model("Custom LLM download started: repo=\(context.repoID.description), files=\(context.entries.count), baseURL=\(baseURL.absoluteString)") let totalBytes = context.totalBytes let totalFiles = context.entries.count @@ -757,6 +875,22 @@ class CustomLLMModelManager: ObservableObject { localSizeTextByRepo.removeValue(forKey: repo) } + private func primeDownloadedStateCacheIfNeeded() { + guard !downloadedStateCachePrimed else { return } + downloadedStateCachePrimed = true + + for model in Self.supportedModels { + let canonicalRepo = Self.canonicalModelRepo(model.id) + guard downloadedStateByRepo[canonicalRepo] == nil else { continue } + guard let modelDir = cacheDirectory(for: canonicalRepo), + FileManager.default.fileExists(atPath: modelDir.path) else { + downloadedStateByRepo[canonicalRepo] = false + continue + } + downloadedStateByRepo[canonicalRepo] = CustomLLMModelStorageSupport.isModelDirectoryValid(modelDir) + } + } + private func fetchRemoteSize() { sizeTask?.cancel() let repo = modelRepo diff --git a/Voxt/Support/CustomLLMModelSupport.swift b/Voxt/Support/CustomLLMModelSupport.swift index 0f11dca..03e9493 100644 --- a/Voxt/Support/CustomLLMModelSupport.swift +++ b/Voxt/Support/CustomLLMModelSupport.swift @@ -16,9 +16,26 @@ enum CustomLLMModelBehaviorResolver { let family = CustomLLMModelFamily.resolve(for: repo) return CustomLLMModelBehavior( family: family, - disablesThinking: family == .qwen3 + disablesThinking: disablesThinking(for: repo, family: family) ) } + + private static func disablesThinking( + for repo: String, + family: CustomLLMModelFamily + ) -> Bool { + let normalizedRepo = repo.lowercased() + if family == .qwen3 { + return true + } + if normalizedRepo.contains("glm-z1") || normalizedRepo.contains("glmz1") { + return true + } + if normalizedRepo.contains("acereason") { + return true + } + return false + } } enum CustomLLMTaskKind: Equatable { @@ -284,7 +301,12 @@ enum CustomLLMModelFamily: Equatable { let normalizedRepo = repo.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() if normalizedRepo.contains("qwen3") { return .qwen3 } if normalizedRepo.contains("qwen2") { return .qwen2 } - if normalizedRepo.contains("glm-4") || normalizedRepo.contains("glm4") { return .glm4 } + if normalizedRepo.contains("glm-4") + || normalizedRepo.contains("glm4") + || normalizedRepo.contains("glm-z1") + || normalizedRepo.contains("glmz1") { + return .glm4 + } if normalizedRepo.contains("llama") { return .llama } if normalizedRepo.contains("mistral") { return .mistral } if normalizedRepo.contains("gemma") { return .gemma } @@ -321,80 +343,323 @@ enum CustomLLMOutputSanitizer { } struct CustomLLMModelCatalog { + enum Visibility: String, Hashable { + case visible + case hiddenCompat + } + + enum ReleaseStatus: String, Hashable { + case standard + case new + case deprecatedSoon + } + struct Option: Identifiable, Hashable { let id: String let title: String let description: String + let visibility: Visibility + let releaseStatus: ReleaseStatus + } + + private struct PresentationMetadata { + let ratingText: String + let tagKeys: [String] } nonisolated static let defaultModelRepo = "Qwen/Qwen2-1.5B-Instruct" - nonisolated static let availableModels: [Option] = [ + nonisolated private static let compatibilityAliases: [String: String] = [ + "Qwen/Qwen3-8B-4bit": "mlx-community/Qwen3-8B-4bit", + "Qwen/Qwen2.5-7B-Instruct": "mlx-community/Qwen2.5-7B-Instruct-4bit", + "mlx-community/Qwen3.5-2B-MLX-4bit": "mlx-community/Qwen3.5-2B-4bit", + ] + + nonisolated private static let deprecatedSoonRepos: Set = [] + + nonisolated private static let visibleModels: [Option] = [ Option( id: "Qwen/Qwen2-1.5B-Instruct", title: "Qwen2 1.5B Instruct", - description: "General-purpose instruction model for prompt-based text cleanup." + description: "General-purpose instruction model for prompt-based text cleanup.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "Qwen/Qwen2.5-3B-Instruct", title: "Qwen2.5 3B Instruct", - description: "Larger instruction model with stronger reasoning and formatting quality." + description: "Larger instruction model with stronger reasoning and formatting quality.", + visibility: .visible, + releaseStatus: .standard + ), + Option( + id: "mlx-community/Qwen3-0.6B-4bit", + title: "Qwen3 0.6B (4bit)", + description: "Smallest official Qwen3 local model for fast low-memory prompts.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/Qwen3-1.7B-4bit", + title: "Qwen3 1.7B (4bit)", + description: "Compact Qwen3 model with better quality than 0.6B while staying lightweight.", + visibility: .visible, + releaseStatus: .new ), Option( id: "mlx-community/Qwen3-4B-4bit", title: "Qwen3 4B (4bit)", - description: "Balanced Qwen3 model for quality and performance." + description: "Balanced Qwen3 model for quality and performance.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/Qwen3-8B-4bit", title: "Qwen3 8B (4bit)", - description: "Higher-quality Qwen3 model for stronger enhancement results." + description: "Higher-quality Qwen3 model for stronger enhancement results.", + visibility: .visible, + releaseStatus: .standard + ), + Option( + id: "mlx-community/Qwen3.5-2B-4bit", + title: "Qwen3.5 2B (4bit)", + description: "Official Qwen3.5 local model using the upstream-supported inference path.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/Qwen3.5-4B-4bit", + title: "Qwen3.5 4B (4bit)", + description: "Recommended Qwen3.5 upgrade for most home Macs with a strong quality-to-size balance.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/Qwen3.5-0.8B-4bit-OptiQ", + title: "Qwen3.5 0.8B OptiQ (4bit)", + description: "Ultra-light Qwen3.5 option for low-storage Macs that still benefits from mixed-precision OptiQ quantization.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/Qwen3.5-4B-OptiQ-4bit", + title: "Qwen3.5 4B OptiQ (4bit)", + description: "Mixed-precision Qwen3.5 variant tuned for a stronger quality-to-size balance on home Macs.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/Qwen3.5-9B-OptiQ-4bit", + title: "Qwen3.5 9B OptiQ (4bit)", + description: "Higher-quality Qwen3.5 option for higher-memory Macs using Apple-Silicon-optimized mixed precision.", + visibility: .visible, + releaseStatus: .new ), Option( id: "mlx-community/GLM-4-9B-0414-4bit", title: "GLM-4 9B (4bit)", - description: "GLM-4 model variant with strong multilingual instruction following." + description: "GLM-4 model variant with strong multilingual instruction following.", + visibility: .visible, + releaseStatus: .standard + ), + Option( + id: "mlx-community/glm-4-9b-chat-1m-4bit", + title: "GLM-4 9B Chat 1M (4bit)", + description: "Long-context GLM option that stays within home-Mac-friendly download size limits.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/GLM-Z1-9B-0414-4bit", + title: "GLM-Z1 9B (4bit)", + description: "Reasoning-oriented GLM variant that keeps bilingual quality while staying under the home-Mac size target.", + visibility: .visible, + releaseStatus: .new ), Option( id: "mlx-community/Llama-3.2-3B-Instruct-4bit", title: "Llama 3.2 3B Instruct (4bit)", - description: "Lightweight Llama 3.2 model for fast local enhancement." + description: "Lightweight Llama 3.2 model for fast local enhancement.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/Llama-3.2-1B-Instruct-4bit", title: "Llama 3.2 1B Instruct (4bit)", - description: "Smallest Llama 3.2 option with minimal memory footprint." + description: "Smallest Llama 3.2 option with minimal memory footprint.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/Meta-Llama-3-8B-Instruct-4bit", title: "Meta Llama 3 8B Instruct (4bit)", - description: "General-purpose 8B instruction model with strong quality." + description: "General-purpose 8B instruction model with strong quality.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit", title: "Meta Llama 3.1 8B Instruct (4bit)", - description: "Refined 8B Llama 3.1 instruction model." + description: "Refined 8B Llama 3.1 instruction model.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/Mistral-7B-Instruct-v0.3-4bit", title: "Mistral 7B Instruct v0.3 (4bit)", - description: "Reliable 7B instruction model for concise formatting tasks." + description: "Reliable 7B instruction model for concise formatting tasks.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/Mistral-Nemo-Instruct-2407-4bit", title: "Mistral Nemo Instruct 2407 (4bit)", - description: "Nemo-based Mistral model with improved instruction quality." + description: "Nemo-based Mistral model with improved instruction quality.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/gemma-2-2b-it-4bit", title: "Gemma 2 2B IT (4bit)", - description: "Compact Gemma 2 instruction-tuned model." + description: "Compact Gemma 2 instruction-tuned model.", + visibility: .visible, + releaseStatus: .standard ), Option( id: "mlx-community/gemma-2-9b-it-4bit", title: "Gemma 2 9B IT (4bit)", - description: "Higher-capacity Gemma 2 model for better quality output." - ) + description: "Higher-capacity Gemma 2 model for better quality output.", + visibility: .visible, + releaseStatus: .standard + ), + Option( + id: "mlx-community/gemma-4-e2b-it-4bit", + title: "Gemma 4 E2B IT (4bit)", + description: "Official Gemma 4 compact text model with stronger newer prompting behavior.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/gemma-4-e4b-it-4bit", + title: "Gemma 4 E4B IT (4bit)", + description: "Higher-capacity Gemma 4 option for stronger local text generation quality.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/Phi-3.5-mini-instruct-4bit", + title: "Phi 3.5 Mini Instruct (4bit)", + description: "Compact Phi 3.5 model suitable for lightweight local generation tasks.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/Phi-3.5-MoE-instruct-4bit", + title: "Phi 3.5 MoE Instruct (4bit)", + description: "Phi 3.5 MoE model with stronger quality when more memory is available.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/internlm2_5-7b-chat-4bit", + title: "InternLM2.5 7B Chat (4bit)", + description: "Chinese-friendly 7B chat model that adds a strong new bilingual option for home Macs.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/MiniCPM4-8B-4bit", + title: "MiniCPM4 8B (4bit)", + description: "Recommended MiniCPM family model with practical size and strong bilingual general-purpose quality.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/granite-3.3-2b-instruct-4bit", + title: "Granite 3.3 2B Instruct (4bit)", + description: "Compact IBM Granite model for structured local text generation.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/MiMo-7B-SFT-4bit", + title: "MiMo 7B SFT (4bit)", + description: "MiMo family model newly supported by upstream MLX Swift LM.", + visibility: .visible, + releaseStatus: .new + ), + Option( + id: "mlx-community/AceReason-Nemotron-7B-4bit", + title: "AceReason Nemotron 7B (4bit)", + description: "Nemotron-based reasoning model now available in the official upstream path.", + visibility: .visible, + releaseStatus: .new + ), + ] + + nonisolated private static let hiddenCompatibilityModels: [Option] = [ + Option( + id: "mlx-community/Qwen2.5-7B-Instruct-4bit", + title: "Qwen2.5 7B Instruct (4bit)", + description: "Compatibility-only official Qwen2.5 model preserved for existing selections.", + visibility: .hiddenCompat, + releaseStatus: .standard + ), + Option( + id: "mlx-community/Qwen3-30B-A3B-4bit", + title: "Qwen3 30B A3B (4bit)", + description: "Compatibility-only Qwen3 MoE model hidden from the main picker because its download size is too large for most home Macs.", + visibility: .hiddenCompat, + releaseStatus: .standard + ), + Option( + id: "mlx-community/GLM-4.7-Flash-4bit", + title: "GLM-4.7 Flash (4bit)", + description: "Compatibility-only GLM model hidden from the main picker because its download size is too large for most home Macs.", + visibility: .hiddenCompat, + releaseStatus: .standard + ), + ] + + nonisolated static let availableModels: [Option] = visibleModels + + nonisolated static let supportedModels: [Option] = visibleModels + hiddenCompatibilityModels + + nonisolated private static let presentationByRepo: [String: PresentationMetadata] = [ + "Qwen/Qwen2-1.5B-Instruct": PresentationMetadata(ratingText: "4.0", tagKeys: ["Fast"]), + "Qwen/Qwen2.5-3B-Instruct": PresentationMetadata(ratingText: "4.3", tagKeys: ["Balanced"]), + "mlx-community/Qwen3-0.6B-4bit": PresentationMetadata(ratingText: "4.0", tagKeys: ["Fast"]), + "mlx-community/Qwen3-1.7B-4bit": PresentationMetadata(ratingText: "4.2", tagKeys: ["Fast"]), + "mlx-community/Qwen3-4B-4bit": PresentationMetadata(ratingText: "4.6", tagKeys: ["Balanced"]), + "mlx-community/Qwen3-8B-4bit": PresentationMetadata(ratingText: "4.8", tagKeys: ["Accurate"]), + "mlx-community/Qwen3.5-2B-4bit": PresentationMetadata(ratingText: "4.3", tagKeys: ["Fast"]), + "mlx-community/Qwen3.5-4B-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Balanced"]), + "mlx-community/Qwen3.5-0.8B-4bit-OptiQ": PresentationMetadata(ratingText: "4.1", tagKeys: ["Fast"]), + "mlx-community/Qwen3.5-4B-OptiQ-4bit": PresentationMetadata(ratingText: "4.8", tagKeys: ["Balanced"]), + "mlx-community/Qwen3.5-9B-OptiQ-4bit": PresentationMetadata(ratingText: "4.9", tagKeys: ["Accurate"]), + "mlx-community/GLM-4-9B-0414-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Accurate"]), + "mlx-community/glm-4-9b-chat-1m-4bit": PresentationMetadata(ratingText: "4.6", tagKeys: ["Accurate"]), + "mlx-community/GLM-Z1-9B-0414-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Accurate"]), + "mlx-community/Llama-3.2-3B-Instruct-4bit": PresentationMetadata(ratingText: "4.2", tagKeys: ["Balanced"]), + "mlx-community/Llama-3.2-1B-Instruct-4bit": PresentationMetadata(ratingText: "4.0", tagKeys: ["Fast"]), + "mlx-community/Meta-Llama-3-8B-Instruct-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Accurate"]), + "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit": PresentationMetadata(ratingText: "4.8", tagKeys: ["Accurate"]), + "mlx-community/Mistral-7B-Instruct-v0.3-4bit": PresentationMetadata(ratingText: "4.6", tagKeys: ["Balanced"]), + "mlx-community/Mistral-Nemo-Instruct-2407-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Accurate"]), + "mlx-community/gemma-2-2b-it-4bit": PresentationMetadata(ratingText: "4.1", tagKeys: ["Fast"]), + "mlx-community/gemma-2-9b-it-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Accurate"]), + "mlx-community/gemma-4-e2b-it-4bit": PresentationMetadata(ratingText: "4.3", tagKeys: ["Fast"]), + "mlx-community/gemma-4-e4b-it-4bit": PresentationMetadata(ratingText: "4.6", tagKeys: ["Balanced"]), + "mlx-community/Phi-3.5-mini-instruct-4bit": PresentationMetadata(ratingText: "4.2", tagKeys: ["Fast"]), + "mlx-community/Phi-3.5-MoE-instruct-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Accurate"]), + "mlx-community/internlm2_5-7b-chat-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Accurate"]), + "mlx-community/MiniCPM4-8B-4bit": PresentationMetadata(ratingText: "4.8", tagKeys: ["Accurate"]), + "mlx-community/granite-3.3-2b-instruct-4bit": PresentationMetadata(ratingText: "4.1", tagKeys: ["Fast"]), + "mlx-community/MiMo-7B-SFT-4bit": PresentationMetadata(ratingText: "4.4", tagKeys: ["Balanced"]), + "mlx-community/AceReason-Nemotron-7B-4bit": PresentationMetadata(ratingText: "4.5", tagKeys: ["Accurate"]), + "mlx-community/Qwen2.5-7B-Instruct-4bit": PresentationMetadata(ratingText: "4.5", tagKeys: ["Balanced"]), + "mlx-community/Qwen3-30B-A3B-4bit": PresentationMetadata(ratingText: "4.9", tagKeys: ["Accurate"]), + "mlx-community/GLM-4.7-Flash-4bit": PresentationMetadata(ratingText: "4.8", tagKeys: ["Accurate"]), ] nonisolated private static let knownRemoteSizeBytesByRepo: [String: Int64] = [ @@ -402,23 +667,70 @@ struct CustomLLMModelCatalog { "Qwen/Qwen2.5-3B-Instruct": 6_183_464_935, "mlx-community/Qwen3-4B-4bit": 2_278_972_183, "mlx-community/Qwen3-8B-4bit": 4_623_784_971, + "mlx-community/Qwen3.5-0.8B-4bit-OptiQ": 598_000_000, + "mlx-community/Qwen3.5-4B-4bit": 3_060_000_000, + "mlx-community/Qwen3.5-4B-OptiQ-4bit": 2_970_000_000, + "mlx-community/Qwen3.5-9B-OptiQ-4bit": 6_040_000_000, "mlx-community/GLM-4-9B-0414-4bit": 5_309_031_270, + "mlx-community/glm-4-9b-chat-1m-4bit": 5_360_000_000, + "mlx-community/GLM-Z1-9B-0414-4bit": 5_290_000_000, + "mlx-community/GLM-4.7-Flash-4bit": 16_900_000_000, "mlx-community/Llama-3.2-3B-Instruct-4bit": 1_824_825_759, "mlx-community/Llama-3.2-1B-Instruct-4bit": 712_593_855, "mlx-community/Meta-Llama-3-8B-Instruct-4bit": 5_281_878_323, "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit": 4_526_698_444, "mlx-community/Mistral-7B-Instruct-v0.3-4bit": 4_080_222_853, "mlx-community/Mistral-Nemo-Instruct-2407-4bit": 6_905_203_123, + "mlx-community/internlm2_5-7b-chat-4bit": 4_350_000_000, + "mlx-community/MiniCPM4-8B-4bit": 4_610_000_000, "mlx-community/gemma-2-2b-it-4bit": 1_492_852_888, "mlx-community/gemma-2-9b-it-4bit": 5_217_089_310, ] + nonisolated static func canonicalModelRepo(_ repo: String) -> String { + let trimmed = repo.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return defaultModelRepo } + return compatibilityAliases[trimmed] ?? trimmed + } + + nonisolated static func option(for repo: String) -> Option? { + let canonicalRepo = canonicalModelRepo(repo) + return supportedModels.first(where: { $0.id == canonicalRepo }) + } + + nonisolated static func displayModels(including repo: String? = nil) -> [Option] { + guard let repo else { return availableModels } + guard let option = option(for: repo), option.visibility == .hiddenCompat else { + return availableModels + } + return availableModels + [option] + } + nonisolated static func displayTitle(for repo: String) -> String { - availableModels.first(where: { $0.id == repo })?.title ?? repo + option(for: repo)?.title ?? repo + } + + nonisolated static func description(for repo: String) -> String? { + option(for: repo)?.description + } + + nonisolated static func ratingText(for repo: String) -> String { + presentationByRepo[canonicalModelRepo(repo)]?.ratingText ?? "4.0" + } + + nonisolated static func catalogTagKeys(for repo: String) -> [String] { + presentationByRepo[canonicalModelRepo(repo)]?.tagKeys ?? [] } nonisolated static func isSupportedModelRepo(_ repo: String) -> Bool { - CustomLLMRepoSelection.isSupported(repo: repo, supportedRepos: availableModels.map(\.id)) + option(for: repo) != nil + } + + nonisolated static func releaseStatus(for repo: String) -> ReleaseStatus { + if deprecatedSoonRepos.contains(canonicalModelRepo(repo)) { + return .deprecatedSoon + } + return option(for: repo)?.releaseStatus ?? .standard } nonisolated static func fallbackRemoteSizeText(repo: String) -> String? { @@ -426,7 +738,7 @@ struct CustomLLMModelCatalog { } nonisolated static func fallbackRemoteSizeInfo(repo: String) -> (bytes: Int64, text: String)? { - guard let bytes = knownRemoteSizeBytesByRepo[repo] else { return nil } + guard let bytes = knownRemoteSizeBytesByRepo[canonicalModelRepo(repo)] else { return nil } return (bytes, CustomLLMModelStorageSupport.formatByteCount(bytes)) } } diff --git a/Voxt/Support/ModelDebugSupport.swift b/Voxt/Support/ModelDebugSupport.swift new file mode 100644 index 0000000..12ae673 --- /dev/null +++ b/Voxt/Support/ModelDebugSupport.swift @@ -0,0 +1,509 @@ +import Foundation +import AVFoundation + +struct ASRDebugModelOption: Identifiable, Hashable { + enum Selection: Hashable { + case mlx(repo: String) + case whisper(modelID: String) + case remote(provider: RemoteASRProvider, configuration: RemoteProviderConfiguration) + } + + let id: String + let title: String + let subtitle: String + let selection: Selection +} + +struct LLMDebugModelOption: Identifiable, Hashable { + enum Selection: Hashable { + case local(repo: String) + case remote(provider: RemoteLLMProvider, configuration: RemoteProviderConfiguration) + } + + let id: String + let title: String + let subtitle: String + let selection: Selection +} + +enum LLMDebugPresetKind: Hashable { + case custom + case enhancement + case translation + case rewrite + case meetingSummary + case appGroup(groupID: UUID) +} + +struct LLMDebugPresetOption: Identifiable, Hashable { + let id: String + let title: String + let subtitle: String + let kind: LLMDebugPresetKind + let promptTemplate: String + let variables: [PromptTemplateVariableDescriptor] + let defaultValues: [String: String] +} + +struct LLMDebugResolvedPrompt: Equatable { + let content: String + let inputSummary: String +} + +struct DebugAudioClip: Identifiable, Equatable { + let id: UUID + let fileURL: URL + let durationSeconds: Double + let sampleRate: Double + let createdAt: Date + + var summaryText: String { + let duration = String(format: "%.1f", durationSeconds) + let rate = Int(sampleRate.rounded()) + return "\(duration)s · \(rate) Hz" + } +} + +enum LLMDebugPresetStore { + static let customPresetID = "custom" + + static func customPrompt(defaults: UserDefaults = .standard) -> String { + defaults.string(forKey: AppPreferenceKey.llmDebugCustomPrompt)? + .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + } + + static func saveCustomPrompt(_ prompt: String, defaults: UserDefaults = .standard) { + defaults.set(prompt, forKey: AppPreferenceKey.llmDebugCustomPrompt) + } + + static func promptOverride(for presetID: String, defaults: UserDefaults = .standard) -> String? { + promptOverrides(defaults: defaults)[presetID] + } + + static func savePromptOverride(_ prompt: String, for presetID: String, defaults: UserDefaults = .standard) { + var overrides = promptOverrides(defaults: defaults) + overrides[presetID] = prompt + savePromptOverrides(overrides, defaults: defaults) + } + + private static func promptOverrides(defaults: UserDefaults) -> [String: String] { + guard let data = defaults.data(forKey: AppPreferenceKey.llmDebugPresetPromptOverrides), + let decoded = try? JSONDecoder().decode([String: String].self, from: data) + else { + return [:] + } + return decoded + } + + private static func savePromptOverrides(_ overrides: [String: String], defaults: UserDefaults) { + guard let data = try? JSONEncoder().encode(overrides) else { return } + defaults.set(data, forKey: AppPreferenceKey.llmDebugPresetPromptOverrides) + } +} + +enum ModelDebugCatalog { + static func availableASRModels( + mlxModelManager: MLXModelManager, + whisperModelManager: WhisperKitModelManager, + remoteASRConfigurations: [String: RemoteProviderConfiguration] + ) -> [ASRDebugModelOption] { + let downloadedMLXRepos = Set( + MLXModelManager.availableModels.compactMap { model in + mlxModelManager.isModelDownloaded(repo: model.id) ? model.id : nil + } + ) + let downloadedWhisperModelIDs = Set( + WhisperKitModelManager.availableModels.compactMap { model in + whisperModelManager.isModelDownloaded(id: model.id) ? model.id : nil + } + ) + + return availableASRModels( + downloadedMLXRepos: downloadedMLXRepos, + downloadedWhisperModelIDs: downloadedWhisperModelIDs, + remoteASRConfigurations: remoteASRConfigurations + ) + } + + static func availableASRModels( + downloadedMLXRepos: Set, + downloadedWhisperModelIDs: Set, + remoteASRConfigurations: [String: RemoteProviderConfiguration] + ) -> [ASRDebugModelOption] { + var options: [ASRDebugModelOption] = [] + + let localMLX = MLXModelManager.availableModels.compactMap { model -> ASRDebugModelOption? in + guard downloadedMLXRepos.contains(model.id) else { return nil } + return ASRDebugModelOption( + id: "mlx:\(model.id)", + title: MLXModelCatalog.displayTitle(for: model.id), + subtitle: AppLocalization.localizedString("Local MLX Audio"), + selection: .mlx(repo: model.id) + ) + } + options.append(contentsOf: localMLX) + + let localWhisper = WhisperKitModelManager.availableModels.compactMap { model -> ASRDebugModelOption? in + guard downloadedWhisperModelIDs.contains(model.id) else { return nil } + return ASRDebugModelOption( + id: "whisper:\(model.id)", + title: WhisperKitModelCatalog.displayTitle(for: model.id), + subtitle: AppLocalization.localizedString("Local Whisper"), + selection: .whisper(modelID: model.id) + ) + } + options.append(contentsOf: localWhisper) + + let remote = RemoteASRProvider.allCases.compactMap { provider -> ASRDebugModelOption? in + let configuration = RemoteModelConfigurationStore.resolvedASRConfiguration( + provider: provider, + stored: remoteASRConfigurations + ) + guard configuration.isConfigured, configuration.hasUsableModel else { return nil } + return ASRDebugModelOption( + id: "remote-asr:\(provider.rawValue)", + title: "\(provider.title) · \(configuration.model)", + subtitle: AppLocalization.localizedString("Configured Remote ASR"), + selection: .remote(provider: provider, configuration: configuration) + ) + } + options.append(contentsOf: remote) + + return options + } + + static func availableLLMModels( + customLLMManager: CustomLLMModelManager, + remoteLLMConfigurations: [String: RemoteProviderConfiguration] + ) -> [LLMDebugModelOption] { + let downloadedLocalRepos = Set( + CustomLLMModelCatalog.displayModels(including: customLLMManager.currentModelRepo) + .compactMap { model in + customLLMManager.isModelDownloaded(repo: model.id) ? model.id : nil + } + ) + + return availableLLMModels( + downloadedLocalRepos: downloadedLocalRepos, + currentLocalRepo: customLLMManager.currentModelRepo, + remoteLLMConfigurations: remoteLLMConfigurations + ) + } + + static func availableLLMModels( + downloadedLocalRepos: Set, + currentLocalRepo: String?, + remoteLLMConfigurations: [String: RemoteProviderConfiguration] + ) -> [LLMDebugModelOption] { + var options: [LLMDebugModelOption] = [] + + let local = CustomLLMModelCatalog.displayModels(including: currentLocalRepo) + .compactMap { model -> LLMDebugModelOption? in + guard downloadedLocalRepos.contains(model.id) else { return nil } + return LLMDebugModelOption( + id: "local-llm:\(model.id)", + title: CustomLLMModelCatalog.displayTitle(for: model.id), + subtitle: AppLocalization.localizedString("Local Custom LLM"), + selection: .local(repo: model.id) + ) + } + options.append(contentsOf: local) + + let remote = RemoteLLMProvider.allCases.compactMap { provider -> LLMDebugModelOption? in + let configuration = RemoteModelConfigurationStore.resolvedLLMConfiguration( + provider: provider, + stored: remoteLLMConfigurations + ) + guard configuration.isConfigured, configuration.hasUsableModel else { return nil } + return LLMDebugModelOption( + id: "remote-llm:\(provider.rawValue)", + title: "\(provider.title) · \(configuration.model)", + subtitle: AppLocalization.localizedString("Configured Remote LLM"), + selection: .remote(provider: provider, configuration: configuration) + ) + } + options.append(contentsOf: remote) + + return options + } + + static func availableLLMPresets(defaults: UserDefaults = .standard) -> [LLMDebugPresetOption] { + let userMainLanguage = userMainLanguagePromptValue(defaults: defaults) + let targetLanguage = TranslationTargetLanguage( + rawValue: defaults.string(forKey: AppPreferenceKey.translationTargetLanguage) ?? "" + ) ?? .english + + var presets: [LLMDebugPresetOption] = [ + LLMDebugPresetOption( + id: LLMDebugPresetStore.customPresetID, + title: AppLocalization.localizedString("Custom"), + subtitle: AppLocalization.localizedString("Debug-only preset"), + kind: .custom, + promptTemplate: LLMDebugPresetStore.customPrompt(defaults: defaults), + variables: [], + defaultValues: [:] + ), + LLMDebugPresetOption( + id: "builtin:enhancement", + title: AppLocalization.localizedString("Transcription Enhancement"), + subtitle: AppLocalization.localizedString("Built-in preset"), + kind: .enhancement, + promptTemplate: LLMDebugPresetStore.promptOverride(for: "builtin:enhancement", defaults: defaults) + ?? AppPromptDefaults.resolvedStoredText( + defaults.string(forKey: AppPreferenceKey.enhancementSystemPrompt), + kind: .enhancement, + defaults: defaults + ), + variables: ModelSettingsPromptVariables.enhancement, + defaultValues: [ + AppDelegate.rawTranscriptionTemplateVariable: "", + AppDelegate.userMainLanguageTemplateVariable: userMainLanguage + ] + ), + LLMDebugPresetOption( + id: "builtin:translation", + title: AppLocalization.localizedString("Translation"), + subtitle: AppLocalization.localizedString("Built-in preset"), + kind: .translation, + promptTemplate: LLMDebugPresetStore.promptOverride(for: "builtin:translation", defaults: defaults) + ?? AppPromptDefaults.resolvedStoredText( + defaults.string(forKey: AppPreferenceKey.translationSystemPrompt), + kind: .translation, + defaults: defaults + ), + variables: ModelSettingsPromptVariables.translation, + defaultValues: [ + "{{TARGET_LANGUAGE}}": targetLanguage.instructionName, + AppDelegate.userMainLanguageTemplateVariable: userMainLanguage, + "{{SOURCE_TEXT}}": "" + ] + ), + LLMDebugPresetOption( + id: "builtin:rewrite", + title: AppLocalization.localizedString("Rewrite"), + subtitle: AppLocalization.localizedString("Built-in preset"), + kind: .rewrite, + promptTemplate: LLMDebugPresetStore.promptOverride(for: "builtin:rewrite", defaults: defaults) + ?? AppPromptDefaults.resolvedStoredText( + defaults.string(forKey: AppPreferenceKey.rewriteSystemPrompt), + kind: .rewrite, + defaults: defaults + ), + variables: ModelSettingsPromptVariables.rewrite, + defaultValues: [ + "{{DICTATED_PROMPT}}": "", + "{{SOURCE_TEXT}}": "" + ] + ), + LLMDebugPresetOption( + id: "builtin:meeting-summary", + title: AppLocalization.localizedString("Meeting Summary"), + subtitle: AppLocalization.localizedString("Built-in preset"), + kind: .meetingSummary, + promptTemplate: LLMDebugPresetStore.promptOverride(for: "builtin:meeting-summary", defaults: defaults) + ?? AppPromptDefaults.resolvedStoredText( + defaults.string(forKey: AppPreferenceKey.meetingSummaryPromptTemplate), + kind: .meetingSummary, + defaults: defaults + ), + variables: MeetingSummarySupport.promptTemplateVariables.map { + PromptTemplateVariableDescriptor(token: $0, tipKey: "Template tip \($0)") + }, + defaultValues: [ + AppPreferenceKey.asrUserMainLanguageTemplateVariable: userMainLanguage, + "{{MEETING_RECORD}}": "" + ] + ) + ] + + let groups = loadAppBranchGroups(defaults: defaults) + presets.append( + contentsOf: groups.compactMap { group -> LLMDebugPresetOption? in + let trimmedPrompt = group.prompt.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedPrompt.isEmpty else { return nil } + return LLMDebugPresetOption( + id: "group:\(group.id.uuidString)", + title: AppLocalization.format("App Enhancement · %@", group.name), + subtitle: AppLocalization.localizedString("Saved group preset"), + kind: .appGroup(groupID: group.id), + promptTemplate: LLMDebugPresetStore.promptOverride(for: "group:\(group.id.uuidString)", defaults: defaults) ?? trimmedPrompt, + variables: ModelSettingsPromptVariables.enhancement, + defaultValues: [ + AppDelegate.rawTranscriptionTemplateVariable: "", + AppDelegate.userMainLanguageTemplateVariable: userMainLanguage + ] + ) + } + ) + + return presets + } + + private static func userMainLanguagePromptValue(defaults: UserDefaults) -> String { + let selectedCodes = UserMainLanguageOption.storedSelection( + from: defaults.string(forKey: AppPreferenceKey.userMainLanguageCodes) + ) + if let firstCode = selectedCodes.first, + let option = UserMainLanguageOption.option(for: firstCode) { + return option.promptName + } + return UserMainLanguageOption.fallbackOption().promptName + } + + private static func loadAppBranchGroups(defaults: UserDefaults) -> [AppBranchGroup] { + guard let data = defaults.data(forKey: AppPreferenceKey.appBranchGroups), + let groups = try? JSONDecoder().decode([AppBranchGroup].self, from: data) + else { + return [] + } + return groups + } +} + +enum ModelDebugPromptResolver { + static func resolve( + preset: LLMDebugPresetOption, + values: [String: String], + defaults: UserDefaults = .standard + ) -> LLMDebugResolvedPrompt { + let mergedValues = preset.defaultValues.merging(values) { _, rhs in rhs } + switch preset.kind { + case .custom: + return LLMDebugResolvedPrompt( + content: preset.promptTemplate, + inputSummary: "" + ) + case .enhancement, .appGroup: + let rawTranscription = mergedValues[AppDelegate.rawTranscriptionTemplateVariable] ?? "" + let userMainLanguage = mergedValues[AppDelegate.userMainLanguageTemplateVariable] ?? "" + let content = resolveEnhancementPrompt( + template: preset.promptTemplate, + rawTranscription: rawTranscription, + userMainLanguage: userMainLanguage + ) + return LLMDebugResolvedPrompt( + content: content, + inputSummary: rawTranscription + ) + case .translation: + let sourceText = mergedValues["{{SOURCE_TEXT}}"] ?? "" + let targetLanguage = TranslationTargetLanguage( + rawValue: defaults.string(forKey: AppPreferenceKey.translationTargetLanguage) ?? "" + ) ?? .english + let languageName = mergedValues["{{TARGET_LANGUAGE}}"]?.trimmingCharacters(in: .whitespacesAndNewlines) + let resolvedLanguage = TranslationTargetLanguage.allCases.first(where: { + $0.instructionName.caseInsensitiveCompare(languageName ?? "") == .orderedSame + }) ?? targetLanguage + let content = TranslationPromptBuilder.build( + systemPrompt: preset.promptTemplate, + targetLanguage: resolvedLanguage, + sourceText: sourceText, + userMainLanguagePromptValue: mergedValues[AppDelegate.userMainLanguageTemplateVariable] ?? "", + strict: true + ) + return LLMDebugResolvedPrompt( + content: content, + inputSummary: sourceText + ) + case .rewrite: + let dictatedPrompt = mergedValues["{{DICTATED_PROMPT}}"] ?? "" + let sourceText = mergedValues["{{SOURCE_TEXT}}"] ?? "" + let content = RewritePromptBuilder.build( + systemPrompt: preset.promptTemplate, + dictatedPrompt: dictatedPrompt, + sourceText: sourceText, + structuredAnswerOutput: false, + directAnswerMode: sourceText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty, + forceNonEmptyAnswer: false + ) + return LLMDebugResolvedPrompt( + content: content, + inputSummary: sourceText.isEmpty ? dictatedPrompt : sourceText + ) + case .meetingSummary: + let content = MeetingSummarySupport.summaryPrompt( + transcript: mergedValues["{{MEETING_RECORD}}"] ?? "", + settings: MeetingSummarySettingsSnapshot( + autoGenerate: false, + promptTemplate: preset.promptTemplate, + modelSelectionID: nil + ), + userMainLanguage: mergedValues[AppPreferenceKey.asrUserMainLanguageTemplateVariable] ?? "" + ) + return LLMDebugResolvedPrompt( + content: content, + inputSummary: mergedValues["{{MEETING_RECORD}}"] ?? "" + ) + } + } + + private static func resolveEnhancementPrompt( + template: String, + rawTranscription: String, + userMainLanguage: String + ) -> String { + let resolved = template + .replacingOccurrences(of: AppDelegate.rawTranscriptionTemplateVariable, with: rawTranscription) + .replacingOccurrences(of: AppDelegate.userMainLanguageTemplateVariable, with: userMainLanguage) + + let languageRules = """ + Runtime language preservation rules: + - User main language: \(userMainLanguage). + - It is guidance for punctuation, formatting, filler-word cleanup, and semantic disambiguation only. + - It is not a target output language and must not trigger translation. + - Preserve the original language distribution and wording in the transcription. + """ + + return [resolved, languageRules] + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } + .filter { !$0.isEmpty } + .joined(separator: "\n\n") + } +} + +enum DebugAudioClipIO { + static func temporaryClipURL() -> URL { + FileManager.default.temporaryDirectory + .appendingPathComponent("Voxt-Debug-\(UUID().uuidString)") + .appendingPathExtension("wav") + } + + static func clip(for fileURL: URL) throws -> DebugAudioClip { + let file = try AVAudioFile(forReading: fileURL) + let sampleRate = file.processingFormat.sampleRate + let duration = sampleRate > 0 + ? Double(file.length) / sampleRate + : 0 + return DebugAudioClip( + id: UUID(), + fileURL: fileURL, + durationSeconds: duration, + sampleRate: sampleRate, + createdAt: Date() + ) + } + + static func loadMonoSamples(from fileURL: URL) throws -> (samples: [Float], sampleRate: Double) { + let file = try AVAudioFile(forReading: fileURL) + let format = file.processingFormat + let frameCount = AVAudioFrameCount(file.length) + guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount) else { + throw NSError( + domain: "Voxt.ModelDebug", + code: -1, + userInfo: [NSLocalizedDescriptionKey: AppLocalization.localizedString("Unable to allocate audio buffer.")] + ) + } + try file.read(into: buffer) + + if let mono = AudioLevelMeter.monoSamples(from: buffer) { + return (mono, format.sampleRate) + } + + throw NSError( + domain: "Voxt.ModelDebug", + code: -2, + userInfo: [NSLocalizedDescriptionKey: AppLocalization.localizedString("Unable to decode audio samples.")] + ) + } +} diff --git a/Voxt/Support/VoxtLog.swift b/Voxt/Support/VoxtLog.swift index 28e4060..75e69a7 100644 --- a/Voxt/Support/VoxtLog.swift +++ b/Voxt/Support/VoxtLog.swift @@ -28,6 +28,11 @@ enum VoxtLog { log(message(), level: .info) } + static func model(_ message: @autoclosure () -> String) { + guard UserDefaults.standard.bool(forKey: AppPreferenceKey.llmDebugLoggingEnabled) else { return } + log(message(), level: .info) + } + static func llmPreview(_ text: String, limit: Int = 1200) -> String { let normalized = text .replacingOccurrences(of: "\r\n", with: "\n") diff --git a/Voxt/Support/VoxtNetworkSession.swift b/Voxt/Support/VoxtNetworkSession.swift index 718e4ff..87b71e3 100644 --- a/Voxt/Support/VoxtNetworkSession.swift +++ b/Voxt/Support/VoxtNetworkSession.swift @@ -201,6 +201,41 @@ enum VoxtNetworkSession { ) } + static func activeProxyUnavailableMessage(for error: Error) -> String? { + let nsError = error as NSError + let description = nsError.localizedDescription.lowercased() + let socketNotConnected = description.contains("socket is not connected") + || description.contains("socket未连接") + let likelyProxyFailure = + socketNotConnected + || nsError.code == NSURLErrorCannotConnectToHost + || nsError.code == NSURLErrorNetworkConnectionLost + || nsError.code == NSURLErrorCannotFindHost + + guard likelyProxyFailure else { return nil } + + let settings = currentProxySettings + switch settings.mode { + case .system: + let status = currentSystemProxyStatus + guard status.hasEnabledProxy, let proxySummary = status.preferredSummary else { return nil } + return AppLocalization.format( + "Voxt is using the macOS system proxy (%@), but that proxy is unreachable. Make sure Clash/your proxy app is running, or switch Voxt to Direct Connection if you don't need a proxy.", + proxySummary + ) + case .custom: + guard settings.hasValidCustomEndpoint, let port = settings.port else { return nil } + return AppLocalization.format( + "Voxt is using the custom proxy (%@://%@:%d), but that proxy is unreachable. Check the proxy address, port, and whether the proxy app is running.", + settings.scheme.rawValue.uppercased(), + settings.host, + port + ) + case .disabled: + return nil + } + } + static var currentProxySettings: ProxySettings { let credentials = currentProxyCredentials() let defaults = UserDefaults.standard diff --git a/Voxt/Transcription/MLXModelManager+Download.swift b/Voxt/Transcription/MLXModelManager+Download.swift index 1469a02..d14521b 100644 --- a/Voxt/Transcription/MLXModelManager+Download.swift +++ b/Voxt/Transcription/MLXModelManager+Download.swift @@ -3,7 +3,7 @@ import CFNetwork import HuggingFace enum MLXModelDownloadSupport { - private static let modelEntryAllowedExtensions: Set = ["safetensors", "json", "txt", "wav"] + private static let modelEntryAllowedExtensions: Set = ["safetensors", "json", "txt", "wav", "jinja"] private static let byteFormatter: ByteCountFormatter = { let formatter = ByteCountFormatter() formatter.allowedUnits = [.useMB, .useGB] @@ -1014,9 +1014,9 @@ enum ResumableModelDownloadSupport { if let etag = existingState?.etag, !etag.isEmpty { request.setValue(etag, forHTTPHeaderField: "If-Range") } - VoxtLog.info("Resumable download resuming: file=\(descriptor.relativePath), offset=\(initialBytes)") + VoxtLog.model("Resumable download resuming: file=\(descriptor.relativePath), offset=\(initialBytes)") } else { - VoxtLog.info("Resumable download starting: file=\(descriptor.relativePath), url=\(descriptor.sourceURL.absoluteString)") + VoxtLog.model("Resumable download starting: file=\(descriptor.relativePath), url=\(descriptor.sourceURL.absoluteString)") } let task = session.dataTask(with: request) @@ -1131,7 +1131,7 @@ enum ResumableModelDownloadSupport { } try FileManager.default.moveItem(at: partURL, to: descriptor.destinationURL) try? FileManager.default.removeItem(at: stateURL) - VoxtLog.info("Resumable download completed: file=\(descriptor.relativePath), bytes=\(result.bytesDownloaded), resumedFrom=\(result.resumedFromBytes)") + VoxtLog.model("Resumable download completed: file=\(descriptor.relativePath), bytes=\(result.bytesDownloaded), resumedFrom=\(result.resumedFromBytes)") } private static func partialFileURL(for destinationURL: URL) -> URL { diff --git a/Voxt/Transcription/MLXModelManager.swift b/Voxt/Transcription/MLXModelManager.swift index a7180dc..2ee3777 100644 --- a/Voxt/Transcription/MLXModelManager.swift +++ b/Voxt/Transcription/MLXModelManager.swift @@ -2,6 +2,7 @@ import Foundation import Combine import CFNetwork import MLX +import MLXAudioCore import MLXAudioSTT import HuggingFace @@ -72,6 +73,7 @@ class MLXModelManager: ObservableObject { @Published private(set) var pausedStatusMessage: String? private var downloadedStateByRepo: [String: Bool] = [:] + private var downloadedStateCachePrimed = false private var localSizeTextByRepo: [String: String] = [:] private var modelRepo: String private var hubBaseURL: URL @@ -107,8 +109,21 @@ class MLXModelManager: ObservableObject { MLXModelCatalog.fallbackRemoteSizeText(repo: repo) } + nonisolated static func ratingText(for repo: String) -> String { + MLXModelCatalog.ratingText(for: repo) + } + + nonisolated static func catalogTagKeys(for repo: String) -> [String] { + MLXModelCatalog.catalogTagKeys(for: repo) + } + + nonisolated static func isMultilingualModelRepo(_ repo: String) -> Bool { + MLXModelCatalog.isMultilingualModelRepo(repo) + } + func isModelDownloaded(repo: String) -> Bool { let canonicalRepo = Self.canonicalModelRepo(repo) + primeDownloadedStateCacheIfNeeded() if let cached = downloadedStateByRepo[canonicalRepo] { return cached } @@ -134,6 +149,11 @@ class MLXModelManager: ObservableObject { return text } + func cachedModelSizeText(repo: String) -> String? { + let canonicalRepo = Self.canonicalModelRepo(repo) + return localSizeTextByRepo[canonicalRepo] + } + func modelDirectoryURL(repo: String) -> URL? { let canonicalRepo = Self.canonicalModelRepo(repo) guard let modelDir = cacheDirectory(for: canonicalRepo), @@ -153,7 +173,10 @@ class MLXModelManager: ObservableObject { } if let repoID = Repo.ID(rawValue: canonicalRepo) { - MLXModelStorageSupport.clearHubCache(for: repoID) + MLXModelStorageSupport.clearHubCache( + for: repoID, + rootDirectory: ModelStorageDirectoryManager.resolvedRootURL() + ) } if let modelDir = cacheDirectory(for: canonicalRepo) { do { @@ -445,6 +468,25 @@ class MLXModelManager: ObservableObject { localSizeTextByRepo.removeValue(forKey: repo) } + private func primeDownloadedStateCacheIfNeeded() { + guard !downloadedStateCachePrimed else { return } + downloadedStateCachePrimed = true + + for model in Self.availableModels { + let canonicalRepo = Self.canonicalModelRepo(model.id) + guard downloadedStateByRepo[canonicalRepo] == nil else { continue } + guard let modelDir = cacheDirectory(for: canonicalRepo), + FileManager.default.fileExists(atPath: modelDir.path) else { + downloadedStateByRepo[canonicalRepo] = false + continue + } + downloadedStateByRepo[canonicalRepo] = MLXModelDownloadSupport.isModelDirectoryValid( + modelDir, + fileManager: .default + ) + } + } + private func readyModel(for repo: String) throws -> any STTGenerationModel { guard let model = loadedModel, loadedRepo == repo else { throw NSError( @@ -458,6 +500,7 @@ class MLXModelManager: ObservableObject { private static func loadSTTModel(for repo: String) async throws -> any STTGenerationModel { let lower = repo.lowercased() + let cache = activeHubCache() if lower.contains("forcedaligner") { throw NSError( domain: "MLXModelManager", @@ -466,31 +509,70 @@ class MLXModelManager: ObservableObject { ) } if lower.contains("glmasr") || lower.contains("glm-asr") { - return try await GLMASRModel.fromPretrained(repo) + return try await GLMASRModel.fromPretrained(repo, cache: cache) } if lower.contains("firered") { - return try await FireRedASR2Model.fromPretrained(repo) + return try await FireRedASR2Model.fromPretrained(repo, cache: cache) } if lower.contains("sensevoice") { - return try await SenseVoiceModel.fromPretrained(repo) + return try await SenseVoiceModel.fromPretrained(repo, cache: cache) } if lower.contains("qwen3-asr") || lower.contains("qwen3_asr") { - return try await Qwen3ASRModel.fromPretrained(repo) + return try await Qwen3ASRModel.fromPretrained(repo, cache: cache) } if lower.contains("voxtral") { - return try await VoxtralRealtimeModel.fromPretrained(repo) + return try await loadVoxtralModel(repo: repo, cache: cache) } if lower.contains("cohere") { - return try await CohereTranscribeModel.fromPretrained(repo) + return try await loadCohereModel(repo: repo, cache: cache) } if lower.contains("parakeet") { - return try await ParakeetModel.fromPretrained(repo) + return try await ParakeetModel.fromPretrained(repo, cache: cache) } if lower.contains("granite") { - return try await GraniteSpeechModel.fromPretrained(repo) + return try await GraniteSpeechModel.fromPretrained(repo, cache: cache) + } + + return try await Qwen3ASRModel.fromPretrained(repo, cache: cache) + } + + private static func loadVoxtralModel(repo: String, cache: HubCache) async throws -> VoxtralRealtimeModel { + guard let repoID = Repo.ID(rawValue: repo) else { + throw NSError( + domain: "MLXModelManager", + code: 1002, + userInfo: [NSLocalizedDescriptionKey: "Invalid repository ID: \(repo)"] + ) + } + + let modelDir = try await ModelUtils.resolveOrDownloadModel( + repoID: repoID, + requiredExtension: "safetensors", + cache: cache + ) + return try VoxtralRealtimeModel.fromDirectory(modelDir) + } + + private static func loadCohereModel(repo: String, cache: HubCache) async throws -> CohereTranscribeModel { + guard let repoID = Repo.ID(rawValue: repo) else { + throw NSError( + domain: "MLXModelManager", + code: 1003, + userInfo: [NSLocalizedDescriptionKey: "Invalid repository ID: \(repo)"] + ) } - return try await Qwen3ASRModel.fromPretrained(repo) + let modelDir = try await ModelUtils.resolveOrDownloadModel( + repoID: repoID, + requiredExtension: "safetensors", + additionalMatchingPatterns: ["*.model"], + cache: cache + ) + return try CohereTranscribeModel.fromDirectory(modelDir) + } + + static func activeHubCache() -> HubCache { + MLXModelStorageSupport.hubCache(rootDirectory: ModelStorageDirectoryManager.resolvedRootURL()) } private func cacheDirectory(for repo: String) -> URL? { @@ -1058,7 +1140,10 @@ class MLXModelManager: ObservableObject { private func clearCurrentRepoHubCache() { guard let repoID = Repo.ID(rawValue: modelRepo) else { return } - MLXModelStorageSupport.clearHubCache(for: repoID) + MLXModelStorageSupport.clearHubCache( + for: repoID, + rootDirectory: ModelStorageDirectoryManager.resolvedRootURL() + ) } } diff --git a/Voxt/Transcription/MLXModelSupport.swift b/Voxt/Transcription/MLXModelSupport.swift index a4a488c..4cb5c51 100644 --- a/Voxt/Transcription/MLXModelSupport.swift +++ b/Voxt/Transcription/MLXModelSupport.swift @@ -8,6 +8,11 @@ struct MLXModelCatalog { let description: String } + private struct PresentationMetadata { + let ratingText: String + let tagKeys: [String] + } + nonisolated static let defaultModelRepo = "mlx-community/Qwen3-ASR-0.6B-4bit" nonisolated private static let realtimeCapableModelRepos: Set = [ @@ -152,6 +157,34 @@ struct MLXModelCatalog { ) ] + nonisolated private static let presentationByRepo: [String: PresentationMetadata] = [ + "mlx-community/Qwen3-ASR-0.6B-4bit": PresentationMetadata(ratingText: "4.4", tagKeys: ["Multilingual", "Fast"]), + "mlx-community/Qwen3-ASR-0.6B-6bit": PresentationMetadata(ratingText: "4.5", tagKeys: ["Multilingual", "Balanced"]), + "mlx-community/Qwen3-ASR-0.6B-8bit": PresentationMetadata(ratingText: "4.6", tagKeys: ["Multilingual", "Balanced"]), + "mlx-community/Qwen3-ASR-0.6B-bf16": PresentationMetadata(ratingText: "4.7", tagKeys: ["Multilingual", "Accurate"]), + "mlx-community/Qwen3-ASR-1.7B-4bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Multilingual", "Balanced"]), + "mlx-community/Qwen3-ASR-1.7B-6bit": PresentationMetadata(ratingText: "4.8", tagKeys: ["Multilingual", "Accurate"]), + "mlx-community/Qwen3-ASR-1.7B-8bit": PresentationMetadata(ratingText: "4.8", tagKeys: ["Multilingual", "Accurate"]), + "mlx-community/Qwen3-ASR-1.7B-bf16": PresentationMetadata(ratingText: "4.9", tagKeys: ["Multilingual", "Accurate"]), + "mlx-community/Voxtral-Mini-4B-Realtime-2602-4bit": PresentationMetadata(ratingText: "4.6", tagKeys: ["Multilingual", "Realtime", "Fast"]), + "mlx-community/Voxtral-Mini-4B-Realtime-6bit": PresentationMetadata(ratingText: "4.7", tagKeys: ["Multilingual", "Realtime", "Balanced"]), + "mlx-community/Voxtral-Mini-4B-Realtime-2602-fp16": PresentationMetadata(ratingText: "4.7", tagKeys: ["Multilingual", "Realtime", "Accurate"]), + "beshkenadze/cohere-transcribe-03-2026-mlx-fp16": PresentationMetadata(ratingText: "4.8", tagKeys: ["Multilingual", "Accurate"]), + "mlx-community/parakeet-tdt_ctc-110m": PresentationMetadata(ratingText: "4.0", tagKeys: ["Fast"]), + "mlx-community/parakeet-tdt-0.6b-v2": PresentationMetadata(ratingText: "4.2", tagKeys: ["Fast"]), + "mlx-community/parakeet-tdt-0.6b-v3": PresentationMetadata(ratingText: "4.3", tagKeys: ["Fast"]), + "mlx-community/parakeet-ctc-0.6b": PresentationMetadata(ratingText: "4.2", tagKeys: ["Balanced"]), + "mlx-community/parakeet-rnnt-0.6b": PresentationMetadata(ratingText: "4.3", tagKeys: ["Balanced"]), + "mlx-community/parakeet-tdt-1.1b": PresentationMetadata(ratingText: "4.6", tagKeys: ["Accurate"]), + "mlx-community/parakeet-tdt_ctc-1.1b": PresentationMetadata(ratingText: "4.6", tagKeys: ["Accurate"]), + "mlx-community/parakeet-ctc-1.1b": PresentationMetadata(ratingText: "4.5", tagKeys: ["Accurate"]), + "mlx-community/parakeet-rnnt-1.1b": PresentationMetadata(ratingText: "4.5", tagKeys: ["Accurate"]), + "mlx-community/GLM-ASR-Nano-2512-4bit": PresentationMetadata(ratingText: "4.1", tagKeys: ["Multilingual", "Fast"]), + "mlx-community/granite-4.0-1b-speech-5bit": PresentationMetadata(ratingText: "4.5", tagKeys: ["Multilingual", "Balanced"]), + "mlx-community/FireRedASR2-AED-mlx": PresentationMetadata(ratingText: "4.8", tagKeys: ["Multilingual", "Accurate"]), + "mlx-community/SenseVoiceSmall": PresentationMetadata(ratingText: "4.5", tagKeys: ["Multilingual", "Fast"]), + ] + nonisolated private static let knownRemoteSizeBytesByRepo: [String: Int64] = [ "mlx-community/Qwen3-ASR-0.6B-4bit": 712_781_279, "mlx-community/Qwen3-ASR-0.6B-6bit": 861_777_567, @@ -193,6 +226,18 @@ struct MLXModelCatalog { realtimeCapableModelRepos.contains(canonicalModelRepo(repo)) } + nonisolated static func ratingText(for repo: String) -> String { + presentationByRepo[canonicalModelRepo(repo)]?.ratingText ?? "4.3" + } + + nonisolated static func catalogTagKeys(for repo: String) -> [String] { + presentationByRepo[canonicalModelRepo(repo)]?.tagKeys ?? [] + } + + nonisolated static func isMultilingualModelRepo(_ repo: String) -> Bool { + catalogTagKeys(for: repo).contains("Multilingual") + } + nonisolated static func fallbackRemoteSizeText(repo: String) -> String? { fallbackRemoteSizeInfo(repo: repo)?.text } @@ -235,6 +280,10 @@ enum MLXModelStorageSupport { .appendingPathComponent(modelSubdir) } + nonisolated static func hubCache(rootDirectory: URL) -> HubCache { + HubCache(cacheDirectory: rootDirectory) + } + nonisolated static func destinationFileURL(for entryPath: String, under directory: URL) throws -> URL { let base = directory.standardizedFileURL let destination = base.appendingPathComponent(entryPath).standardizedFileURL @@ -253,8 +302,8 @@ enum MLXModelStorageSupport { return destination } - nonisolated static func clearHubCache(for repoID: Repo.ID) { - let cache = HubCache.default + nonisolated static func clearHubCache(for repoID: Repo.ID, rootDirectory: URL = HubCache.default.cacheDirectory) { + let cache = hubCache(rootDirectory: rootDirectory) let repoDir = cache.repoDirectory(repo: repoID, kind: .model) let metadataDir = cache.metadataDirectory(repo: repoID, kind: .model) try? FileManager.default.removeItem(at: repoDir) diff --git a/Voxt/Transcription/MLXTranscriber.swift b/Voxt/Transcription/MLXTranscriber.swift index 968b383..c2117c9 100644 --- a/Voxt/Transcription/MLXTranscriber.swift +++ b/Voxt/Transcription/MLXTranscriber.swift @@ -962,6 +962,14 @@ class MLXTranscriber: ObservableObject, TranscriberProtocol { } } + func transcribeAudioFile(_ fileURL: URL) async throws -> String { + let loaded = try DebugAudioClipIO.loadMonoSamples(from: fileURL) + return await transcribeMeetingChunk( + samples: loaded.samples, + sampleRate: loaded.sampleRate + ) ?? "" + } + private func applyPreferredInputDeviceIfNeeded(inputNode: AVAudioInputNode) { guard let preferredInputDeviceID else { return } guard let audioUnit = inputNode.audioUnit else { return } diff --git a/Voxt/Transcription/RemoteASRStreamingContexts.swift b/Voxt/Transcription/RemoteASRStreamingContexts.swift index ed1d315..b6aaf10 100644 --- a/Voxt/Transcription/RemoteASRStreamingContexts.swift +++ b/Voxt/Transcription/RemoteASRStreamingContexts.swift @@ -1,5 +1,19 @@ import Foundation +actor AsyncGate { + private var isOpen = false + + func open() { + isOpen = true + } + + func wait() async { + while !isOpen { + try? await Task.sleep(for: .milliseconds(30)) + } + } +} + @MainActor final class AliyunQwenStreamingContext { let session: URLSession diff --git a/Voxt/Transcription/RemoteASRTranscriber+AliyunDebug.swift b/Voxt/Transcription/RemoteASRTranscriber+AliyunDebug.swift new file mode 100644 index 0000000..4945a6e --- /dev/null +++ b/Voxt/Transcription/RemoteASRTranscriber+AliyunDebug.swift @@ -0,0 +1,317 @@ +import Foundation + +extension RemoteASRTranscriber { + func transcribeAliyunFunRealtimeFile( + fileURL: URL, + token: String, + model: String, + endpoint: String, + hintPayload: ResolvedASRHintPayload + ) async throws -> String { + guard let wsURL = URL(string: RemoteASREndpointSupport.resolvedAliyunFunRealtimeEndpoint(endpoint)) else { + throw NSError(domain: "Voxt.RemoteASR", code: -41, userInfo: [NSLocalizedDescriptionKey: "Invalid Aliyun realtime WebSocket endpoint URL."]) + } + + let (samples, sampleRate) = try DebugAudioClipIO.loadMonoSamples(from: fileURL) + guard let pcmData = Self.makePCM16MonoData(from: samples, inputSampleRate: sampleRate), + !pcmData.isEmpty else { + throw NSError( + domain: "Voxt.RemoteASR", + code: -52, + userInfo: [NSLocalizedDescriptionKey: "Unable to decode audio samples."] + ) + } + + var request = URLRequest(url: wsURL) + request.timeoutInterval = 45 + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + let managedSocket = VoxtNetworkSession.makeWebSocketTask(with: request) + let ws = managedSocket.task + ws.resume() + defer { + ws.cancel(with: .goingAway, reason: nil) + managedSocket.session.invalidateAndCancel() + } + + let taskID = AliyunMeetingASRConfiguration.makeRealtimeTaskID() + let responseState = AliyunFunResponseState() + let startSignal = AsyncGate() + let receiveTask = Task { + do { + while !Task.isCancelled { + let message = try await ws.receive() + let text: String + switch message { + case .string(let value): + text = value + case .data(let data): + guard let value = String(data: data, encoding: .utf8) else { continue } + text = value + @unknown default: + continue + } + try await self.handleAliyunFunDebugMessage( + text, + responseState: responseState, + startSignal: startSignal + ) + } + } catch { + await responseState.markCompletedWithError(error) + await startSignal.open() + } + } + + var parameters: [String: Any] = [ + "sample_rate": 16000, + "format": "pcm" + ] + if !hintPayload.languageHints.isEmpty { + parameters["language_hints"] = hintPayload.languageHints + } + + sendAliyunFunControl( + action: "run-task", + through: ws, + taskID: taskID, + model: model, + parameters: parameters + ) { error in + Task { + if let error { + await responseState.markCompletedWithError(error) + await startSignal.open() + } else { + await responseState.markRunRequested() + } + } + } + + await startSignal.wait() + + let chunkSize = 3200 + var offset = 0 + while offset < pcmData.count { + let end = min(offset + chunkSize, pcmData.count) + let chunk = Data(pcmData[offset.. String { + let resolvedEndpoint = RemoteASREndpointSupport.resolvedAliyunQwenRealtimeEndpoint(endpoint, model: model) + guard let wsURL = URL(string: resolvedEndpoint) else { + throw NSError(domain: "Voxt.RemoteASR", code: -45, userInfo: [NSLocalizedDescriptionKey: "Invalid Aliyun Qwen realtime WebSocket endpoint URL."]) + } + + let (samples, sampleRate) = try DebugAudioClipIO.loadMonoSamples(from: fileURL) + guard let pcmData = Self.makePCM16MonoData(from: samples, inputSampleRate: sampleRate), + !pcmData.isEmpty else { + throw NSError( + domain: "Voxt.RemoteASR", + code: -52, + userInfo: [NSLocalizedDescriptionKey: "Unable to decode audio samples."] + ) + } + + var request = URLRequest(url: wsURL) + request.timeoutInterval = 45 + request.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + let managedSocket = VoxtNetworkSession.makeWebSocketTask(with: request) + let ws = managedSocket.task + ws.resume() + defer { + ws.cancel(with: .goingAway, reason: nil) + managedSocket.session.invalidateAndCancel() + } + + let responseState = AliyunQwenResponseState() + let startSignal = AsyncGate() + let receiveTask = Task { + do { + while !Task.isCancelled { + let message = try await ws.receive() + let text: String + switch message { + case .string(let value): + text = value + case .data(let data): + guard let value = String(data: data, encoding: .utf8) else { continue } + text = value + @unknown default: + continue + } + try await self.handleAliyunQwenDebugMessage( + text, + responseState: responseState, + startSignal: startSignal + ) + } + } catch { + await responseState.markCompletedWithError(error) + await startSignal.open() + } + } + + sendAliyunQwenSessionUpdate(through: ws, hintPayload: hintPayload) { error in + Task { + if let error { + await responseState.markCompletedWithError(error) + await startSignal.open() + } + } + } + + await startSignal.wait() + + let chunkSize = 3200 + var offset = 0 + while offset < pcmData.count { + let end = min(offset + chunkSize, pcmData.count) + let chunk = Data(pcmData[offset.. [String: Any]? in + output["sentence"] as? [String: Any] + } ?? [:] + let partialText = (sentence["text"] as? String ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let isSentenceEnd = sentence["sentence_end"] as? Bool ?? false + if !partialText.isEmpty { + _ = await responseState.updateWithSentence(partialText, isSentenceEnd: isSentenceEnd) + } + return + } + + if event == "task-finished" { + await responseState.markTaskFinished() + return + } + } + + func handleAliyunQwenDebugMessage( + _ text: String, + responseState: AliyunQwenResponseState, + startSignal: AsyncGate + ) async throws { + guard let data = text.data(using: .utf8), + let object = try JSONSerialization.jsonObject(with: data) as? [String: Any] else { + return + } + let type = (object["type"] as? String ?? "").lowercased() + if type == "error" { + let detail = (object["message"] as? String) ?? "Aliyun Qwen realtime ASR task failed." + throw NSError(domain: "Voxt.RemoteASR", code: -46, userInfo: [NSLocalizedDescriptionKey: detail]) + } + + if type == "session.updated" { + await startSignal.open() + return + } + + if type == "conversation.item.input_audio_transcription.text" { + let partial = (object["text"] as? String ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if !partial.isEmpty { + _ = await responseState.setPartial(partial) + } + return + } + + if type == "conversation.item.input_audio_transcription.completed" { + let final = (object["transcript"] as? String ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + if !final.isEmpty { + _ = await responseState.commit(final) + } + return + } + + if type == "session.finished" { + await responseState.markSessionFinished() + return + } + } +} diff --git a/Voxt/Transcription/RemoteASRTranscriber.swift b/Voxt/Transcription/RemoteASRTranscriber.swift index 17a12c9..f1a0c15 100644 --- a/Voxt/Transcription/RemoteASRTranscriber.swift +++ b/Voxt/Transcription/RemoteASRTranscriber.swift @@ -367,7 +367,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { } } - private func resolveStreamingResult( + func resolveStreamingResult( warningMessage: String, waitForFinal: @escaping @Sendable () async throws -> String, fallback: @escaping @Sendable () async -> String @@ -375,9 +375,14 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { do { return try await waitForFinal() } catch { - VoxtLog.warning("\(warningMessage): \(error.localizedDescription)") - notifyRuntimeFailure(error) - return await fallback() + let fallbackText = await fallback() + if fallbackText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + VoxtLog.warning("\(warningMessage): \(error.localizedDescription)") + notifyRuntimeFailure(error) + } else { + VoxtLog.info("\(warningMessage): recovered with partial text fallback.", verbose: true) + } + return fallbackText } } @@ -486,6 +491,39 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { ) } + func transcribeDebugAudioFile( + _ fileURL: URL, + provider: RemoteASRProvider, + configuration: RemoteProviderConfiguration + ) async throws -> String { + guard configuration.isConfigured else { + throw NSError( + domain: "Voxt.RemoteASR", + code: -111, + userInfo: [NSLocalizedDescriptionKey: "Remote ASR is not configured yet."] + ) + } + let hintPayload = resolvedHintPayload(for: provider, configuration: configuration) + do { + return try await transcribeAudioFile( + fileURL: fileURL, + provider: provider, + configuration: configuration, + hintPayload: hintPayload + ) + } catch { + let message = userVisibleRemoteErrorMessage(for: error) + throw NSError( + domain: "Voxt.RemoteASR", + code: (error as NSError).code, + userInfo: [ + NSLocalizedDescriptionKey: message, + NSUnderlyingErrorKey: error, + ] + ) + } + } + private func resolvedHintPayload( for provider: RemoteASRProvider, configuration: RemoteProviderConfiguration @@ -645,7 +683,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { let accessToken = configuration.accessToken.trimmingCharacters(in: .whitespacesAndNewlines) let appID = configuration.appID.trimmingCharacters(in: .whitespacesAndNewlines) let resourceID = RemoteASREndpointSupport.resolvedDoubaoResourceID(from: configuration) - let endpoint = RemoteASREndpointSupport.resolvedDoubaoEndpoint(from: configuration) + let endpoint = RemoteASREndpointSupport.resolvedDoubaoStreamingEndpoint(from: configuration) guard !accessToken.isEmpty else { throw NSError(domain: "Voxt.RemoteASR", code: -3, userInfo: [NSLocalizedDescriptionKey: "Doubao Access Token is empty."]) @@ -664,7 +702,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { configuration: configuration ) } - return try await transcribeDoubaoWebSocket( + return try await transcribeDoubaoStreamingFileWebSocket( fileURL: fileURL, appID: appID, accessToken: accessToken, @@ -752,6 +790,24 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { guard !token.isEmpty else { throw NSError(domain: "Voxt.RemoteASR", code: -30, userInfo: [NSLocalizedDescriptionKey: "Aliyun Bailian API key is empty."]) } + if RemoteASREndpointSupport.isAliyunQwenRealtimeModel(model) { + return try await transcribeAliyunQwenRealtimeFile( + fileURL: fileURL, + token: token, + model: model, + endpoint: configuration.endpoint, + hintPayload: resolvedHintPayload(for: .aliyunBailianASR, configuration: configuration) + ) + } + if RemoteASREndpointSupport.isAliyunFunRealtimeModel(model) { + return try await transcribeAliyunFunRealtimeFile( + fileURL: fileURL, + token: token, + model: model, + endpoint: configuration.endpoint, + hintPayload: resolvedHintPayload(for: .aliyunBailianASR, configuration: configuration) + ) + } if let validationError = AliyunMeetingASRConfiguration.validationError(model: model, endpoint: configuration.endpoint) { throw NSError(domain: "Voxt.RemoteASR", code: -36, userInfo: [NSLocalizedDescriptionKey: validationError]) } @@ -1002,7 +1058,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { audioLevel = 0 } - private func sendAliyunFunControl( + func sendAliyunFunControl( action: String, through ws: URLSessionWebSocketTask, taskID: String, @@ -1190,7 +1246,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { isRecording = true } - private func sendAliyunQwenSessionUpdate( + func sendAliyunQwenSessionUpdate( through ws: URLSessionWebSocketTask, hintPayload: ResolvedASRHintPayload, onError: @escaping (Error?) -> Void @@ -1217,7 +1273,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { sendAliyunQwenEvent(payload: payload, through: ws, onError: onError) } - private func sendAliyunQwenAudioAppend( + func sendAliyunQwenAudioAppend( _ audio: Data, through ws: URLSessionWebSocketTask, onError: @escaping (Error?) -> Void @@ -1230,7 +1286,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { sendAliyunQwenEvent(payload: payload, through: ws, onError: onError) } - private func sendAliyunQwenEvent( + func sendAliyunQwenEvent( type: String, through ws: URLSessionWebSocketTask, onError: @escaping (Error?) -> Void @@ -1242,7 +1298,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { sendAliyunQwenEvent(payload: payload, through: ws, onError: onError) } - private func sendAliyunQwenEvent( + func sendAliyunQwenEvent( payload: [String: Any], through ws: URLSessionWebSocketTask, onError: @escaping (Error?) -> Void @@ -1365,7 +1421,136 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { try? await Task.sleep(for: .milliseconds(24)) } - let finalText = try await responseState.waitForFinalResult(timeoutSeconds: 20) + let finalText = await resolveStreamingResult( + warningMessage: "Doubao non-stream file result wait failed" + ) { + try await responseState.waitForFinalResult(timeoutSeconds: 20) + } fallback: { + await responseState.currentText() + } + receiveTask.cancel() + return finalText + } + + private func transcribeDoubaoStreamingFileWebSocket( + fileURL: URL, + appID: String, + accessToken: String, + resourceID: String, + endpoint: String, + hintPayload: ResolvedASRHintPayload, + configuration: RemoteProviderConfiguration + ) async throws -> String { + guard let wsURL = URL(string: endpoint) else { + throw NSError(domain: "Voxt.RemoteASR", code: -5, userInfo: [NSLocalizedDescriptionKey: "Invalid Doubao endpoint URL."]) + } + + let (samples, sampleRate) = try DebugAudioClipIO.loadMonoSamples(from: fileURL) + guard let pcmData = Self.makePCM16MonoData(from: samples, inputSampleRate: sampleRate), + !pcmData.isEmpty else { + throw NSError( + domain: "Voxt.RemoteASR", + code: -52, + userInfo: [NSLocalizedDescriptionKey: "Unable to decode audio samples."] + ) + } + + var request = URLRequest(url: wsURL) + request.timeoutInterval = 45 + request.setValue(appID, forHTTPHeaderField: "X-Api-App-Key") + request.setValue(accessToken, forHTTPHeaderField: "X-Api-Access-Key") + request.setValue(resourceID, forHTTPHeaderField: "X-Api-Resource-Id") + let requestID = UUID().uuidString.lowercased() + request.setValue(requestID, forHTTPHeaderField: "X-Api-Request-Id") + request.setValue(requestID, forHTTPHeaderField: "X-Api-Connect-Id") + VoxtLog.info( + "Doubao websocket connect. endpoint=\(endpoint), resource=\(resourceID)" + ) + + let managedSocket = VoxtNetworkSession.makeWebSocketTask(with: request) + let ws = managedSocket.task + ws.resume() + defer { + ws.cancel(with: .goingAway, reason: nil) + managedSocket.session.invalidateAndCancel() + } + + let reqID = UUID().uuidString.lowercased() + try await sendDoubaoFullRequest( + ws: ws, + reqID: reqID, + sequence: 1, + hintPayload: hintPayload, + audioFormat: DoubaoASRConfiguration.streamingAudioFormat, + configuration: configuration + ) + + let responseState = DoubaoResponseState { [weak self] error in + Task { @MainActor [weak self] in + self?.notifyRuntimeFailure(error) + } + } + let receiveTask = Task { + do { + while !Task.isCancelled { + let message = try await ws.receive() + guard case .data(let payloadData) = message else { continue } + if let parsed = try self.parseDoubaoServerPacket(payloadData) { + if let text = parsed.text, !text.isEmpty { + _ = await responseState.replace(text: text, isFinal: parsed.isFinal) + } else if parsed.isFinal { + await responseState.markFinal() + } + } + } + } catch { + if let detail = await self.fetchDoubaoHandshakeFailureDetail( + error: error, + endpoint: endpoint, + resourceID: resourceID, + appID: appID, + accessToken: accessToken + ) { + VoxtLog.warning("Doubao websocket receive failed. detail=\(detail)") + let detailedError = NSError( + domain: "Voxt.RemoteASR", + code: (error as NSError).code, + userInfo: [NSLocalizedDescriptionKey: detail] + ) + await responseState.markCompletedWithError(detailedError) + } else { + await responseState.markCompletedWithError(error) + } + } + } + + var offset = 0 + let chunkSize = DoubaoASRConfiguration.recommendedStreamingPacketBytes + var sequence: Int32 = 2 + while offset < pcmData.count { + let end = min(offset + chunkSize, pcmData.count) + let chunk = pcmData[offset..= pcmData.count + try await sendDoubaoAudioPacket( + ws: ws, + payload: Data(chunk), + isLast: isLast, + sequence: sequence + ) + if !isLast { + sequence += 1 + } + offset = end + try? await Task.sleep(for: .milliseconds(24)) + } + + let finalText = await resolveStreamingResult( + warningMessage: "Doubao async file result wait failed" + ) { + try await responseState.waitForFinalResult(timeoutSeconds: 20) + } fallback: { + await responseState.currentText() + } receiveTask.cancel() return finalText } @@ -1398,7 +1583,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { let requestID = UUID().uuidString.lowercased() request.setValue(requestID, forHTTPHeaderField: "X-Api-Request-Id") request.setValue(requestID, forHTTPHeaderField: "X-Api-Connect-Id") - VoxtLog.info( + VoxtLog.model( "Doubao stream connect. endpoint=\(endpoint), resource=\(resourceID)" ) @@ -2048,6 +2233,24 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { return output } + nonisolated static func makePCM16MonoData(from samples: [Float], inputSampleRate: Double) -> Data? { + guard !samples.isEmpty, inputSampleRate > 0 else { return nil } + let targetRate = 16000.0 + let ratio = targetRate / inputSampleRate + let outputCount = max(Int(Double(samples.count) * ratio), 1) + var data = Data(count: outputCount * MemoryLayout.size) + data.withUnsafeMutableBytes { rawBuffer in + let out = rawBuffer.bindMemory(to: Int16.self) + for index in 0.. String { + WhisperKitModelCatalog.ratingText(for: id) + } + + nonisolated static func catalogTagKeys(for id: String) -> [String] { + WhisperKitModelCatalog.catalogTagKeys(for: id) + } + func updateModel(id: String) { let canonicalModelID = Self.canonicalModelID(id) guard canonicalModelID != modelID else { return } @@ -389,6 +398,7 @@ final class WhisperKitModelManager: ObservableObject { func isModelDownloaded(id: String) -> Bool { let canonicalModelID = Self.canonicalModelID(id) + primeDirectoryLookupCacheIfNeeded() if let cached = downloadedStateByID[canonicalModelID] { return cached } @@ -407,6 +417,7 @@ final class WhisperKitModelManager: ObservableObject { private func firstModelDirectoryURL(id: String, requireValid: Bool) -> URL? { let canonicalModelID = Self.canonicalModelID(id) + primeDirectoryLookupCacheIfNeeded() if let cached = directoryLookupCacheByID[canonicalModelID] { return requireValid ? cached.validURL : (cached.rawURL ?? cached.validURL) } @@ -436,6 +447,45 @@ final class WhisperKitModelManager: ObservableObject { return nil } + private func primeDirectoryLookupCacheIfNeeded() { + guard !directoryLookupCachePrimed else { return } + directoryLookupCachePrimed = true + + let expectedModelIDs = Set(Self.availableModels.map { Self.canonicalModelID($0.id) }) + guard let enumerator = FileManager.default.enumerator( + at: downloadRootURL(), + includingPropertiesForKeys: [.isDirectoryKey], + options: [.skipsHiddenFiles] + ) else { + for modelID in expectedModelIDs { + downloadedStateByID[modelID] = false + directoryLookupCacheByID[modelID] = DirectoryLookupCache(validURL: nil, rawURL: nil) + } + return + } + + for case let fileURL as URL in enumerator { + let folderName = fileURL.lastPathComponent + guard folderName.hasPrefix("openai_whisper-") else { continue } + let rawModelID = String(folderName.dropFirst("openai_whisper-".count)) + let canonicalModelID = Self.canonicalModelID(rawModelID) + guard expectedModelIDs.contains(canonicalModelID) else { continue } + guard directoryLookupCacheByID[canonicalModelID] == nil else { continue } + + let isValid = WhisperModelArtifacts.isValidModelDirectory(fileURL) + directoryLookupCacheByID[canonicalModelID] = DirectoryLookupCache( + validURL: isValid ? fileURL : nil, + rawURL: fileURL + ) + downloadedStateByID[canonicalModelID] = isValid + } + + for modelID in expectedModelIDs where directoryLookupCacheByID[modelID] == nil { + downloadedStateByID[modelID] = false + directoryLookupCacheByID[modelID] = DirectoryLookupCache(validURL: nil, rawURL: nil) + } + } + private func removeModelDirectoryIfPresent(id: String) { if let directoryURL = rawModelDirectoryURL(id: id) { try? removeModelDirectory(at: directoryURL, modelID: id, updatesCurrentState: false) @@ -487,6 +537,11 @@ final class WhisperKitModelManager: ObservableObject { return text } + func cachedModelSizeText(id: String) -> String? { + let canonicalModelID = Self.canonicalModelID(id) + return localSizeTextByID[canonicalModelID] + } + func remoteSizeText(id: String) -> String { let canonicalModelID = Self.canonicalModelID(id) return remoteSizeTextByID[canonicalModelID] diff --git a/Voxt/Transcription/WhisperKitModelSupport.swift b/Voxt/Transcription/WhisperKitModelSupport.swift index 83b77cb..3a8047e 100644 --- a/Voxt/Transcription/WhisperKitModelSupport.swift +++ b/Voxt/Transcription/WhisperKitModelSupport.swift @@ -8,6 +8,11 @@ struct WhisperKitModelCatalog { let remoteSizeText: String } + private struct PresentationMetadata { + let ratingText: String + let tagKeys: [String] + } + nonisolated static let defaultModelID = "base" nonisolated static let availableModels: [Option] = [ @@ -43,6 +48,14 @@ struct WhisperKitModelCatalog { ), ] + nonisolated private static let presentationByID: [String: PresentationMetadata] = [ + "tiny": PresentationMetadata(ratingText: "4.0", tagKeys: ["Multilingual", "Fast"]), + "base": PresentationMetadata(ratingText: "4.3", tagKeys: ["Multilingual", "Balanced"]), + "small": PresentationMetadata(ratingText: "4.5", tagKeys: ["Multilingual", "Balanced"]), + "medium": PresentationMetadata(ratingText: "4.7", tagKeys: ["Multilingual", "Accurate"]), + "large-v3": PresentationMetadata(ratingText: "4.9", tagKeys: ["Multilingual", "Accurate"]), + ] + nonisolated private static let knownRemoteSizeBytesByID: [String: Int64] = [ "tiny": 76_635_397, "base": 146_719_453, @@ -60,6 +73,14 @@ struct WhisperKitModelCatalog { return availableModels.first(where: { $0.id == canonicalModelID })?.title ?? canonicalModelID } + nonisolated static func ratingText(for modelID: String) -> String { + presentationByID[canonicalModelID(modelID)]?.ratingText ?? "4.3" + } + + nonisolated static func catalogTagKeys(for modelID: String) -> [String] { + presentationByID[canonicalModelID(modelID)]?.tagKeys ?? ["Multilingual"] + } + nonisolated static func fallbackRemoteSizeText(id: String) -> String? { fallbackRemoteSizeInfo(id: id)?.text } diff --git a/Voxt/Transcription/WhisperKitTranscriber.swift b/Voxt/Transcription/WhisperKitTranscriber.swift index 8b7b70e..489e400 100644 --- a/Voxt/Transcription/WhisperKitTranscriber.swift +++ b/Voxt/Transcription/WhisperKitTranscriber.swift @@ -282,6 +282,26 @@ final class WhisperKitTranscriber: ObservableObject, TranscriberProtocol { } } + func transcribeAudioFile( + _ fileURL: URL, + outputMode: SessionOutputMode = .transcription, + useBuiltInTranslationTask: Bool = false + ) async throws -> String { + let loaded = try DebugAudioClipIO.loadMonoSamples(from: fileURL) + preparedOutputMode = outputMode + preparedUseBuiltInTranslationTask = useBuiltInTranslationTask + let whisper = try await modelManager.loadWhisper() + let preparedSamples = prepareInputSamples(loaded.samples, sampleRate: loaded.sampleRate) + let results = try await whisper.transcribe( + audioArray: preparedSamples, + decodeOptions: buildDecodingOptions( + whisper: whisper, + includeWordTimings: false + ) + ) + return normalizeText(results.map(\.text).joined(separator: " ")) + } + func restartCaptureForPreferredInputDevice() throws { guard isRecording else { return } guard !whisperRealtimeEnabled else { diff --git a/Voxt/UI/ModelDebugCore.swift b/Voxt/UI/ModelDebugCore.swift new file mode 100644 index 0000000..809b6e0 --- /dev/null +++ b/Voxt/UI/ModelDebugCore.swift @@ -0,0 +1,819 @@ +import AppKit +import SwiftUI +import AVFoundation +import Combine + + +func modelDebugLocalized(_ key: String) -> String { + AppLocalization.localizedString(key) +} + +func modelDebugClipTimestamp(_ date: Date) -> String { + date.formatted(.dateTime.month(.defaultDigits).day(.defaultDigits).hour().minute().second()) +} + +func modelDebugClipTitlePreview(_ text: String) -> String? { + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return nil } + let singleLine = trimmed.replacingOccurrences(of: "\n", with: " ") + return String(singleLine.prefix(28)) +} + +struct ASRDebugResult: Identifiable, Equatable { + enum Source: String { + case liveRecording + case reusedClip + } + + let id: UUID + let clipID: UUID + let clipTitle: String + let modelTitle: String + let source: Source + let audioDurationText: String + let runtimeText: String + let characterCount: Int + let createdAt: Date + let outputText: String + let errorText: String? + + var isError: Bool { errorText != nil } +} + +struct ASRDebugClipItem: Identifiable, Equatable { + let id: UUID + let clip: DebugAudioClip + let defaultTitle: String + var title: String + + var displayTitle: String { title } +} + +extension ASRDebugModelOption { + var selectorEntry: FeatureModelSelectorEntry { + let selectionID = FeatureModelSelectionID(rawValue: id) + let locationTag: String + switch selection { + case .remote: + locationTag = modelDebugLocalized("Remote") + case .mlx, .whisper: + locationTag = modelDebugLocalized("Local") + } + return FeatureModelSelectorEntry( + selectionID: selectionID, + title: title, + engine: subtitle, + sizeText: modelDebugLocalized("Ready"), + ratingText: "—", + filterTags: [locationTag, modelDebugLocalized("Installed"), modelDebugLocalized("Configured")], + displayTags: [locationTag, modelDebugLocalized("Installed")], + statusText: "", + usageLocations: [], + badgeText: nil, + isSelectable: true, + disabledReason: nil + ) + } +} + +extension LLMDebugModelOption { + var selectorEntry: FeatureModelSelectorEntry { + let selectionID = FeatureModelSelectionID(rawValue: id) + let locationTag: String + switch selection { + case .remote: + locationTag = modelDebugLocalized("Remote") + case .local: + locationTag = modelDebugLocalized("Local") + } + return FeatureModelSelectorEntry( + selectionID: selectionID, + title: title, + engine: subtitle, + sizeText: modelDebugLocalized("Ready"), + ratingText: "—", + filterTags: [locationTag, modelDebugLocalized("Installed"), modelDebugLocalized("Configured")], + displayTags: [locationTag, modelDebugLocalized("Installed")], + statusText: "", + usageLocations: [], + badgeText: nil, + isSelectable: true, + disabledReason: nil + ) + } +} + +enum ModelDebugWindowStyle { + static let width: CGFloat = 750 + static let height: CGFloat = 500 + static let minWidth: CGFloat = 650 + static let minHeight: CGFloat = 560 + static let selectorMinWidth: CGFloat = 150 + static let selectorIdealWidth: CGFloat = 220 + static let resultCardBodyHeight: CGFloat = 208 +} + +struct LLMDebugResult: Identifiable, Equatable { + let id: UUID + let modelTitle: String + let presetTitle: String + let inputSummary: String + let durationText: String + let createdAt: Date + let outputText: String + let errorText: String? + + var isError: Bool { errorText != nil } +} + +extension ASRDebugResult.Source { + var localizedTitle: String { + switch self { + case .liveRecording: + return modelDebugLocalized("Recorded") + case .reusedClip: + return modelDebugLocalized("Reused Audio") + } + } +} + +private final class DebugAudioRecorder: NSObject { + private var recorder: AVAudioRecorder? + private(set) var activeURL: URL? + + func start() throws { + let url = DebugAudioClipIO.temporaryClipURL() + let settings: [String: Any] = [ + AVFormatIDKey: kAudioFormatLinearPCM, + AVSampleRateKey: 16_000, + AVNumberOfChannelsKey: 1, + AVLinearPCMBitDepthKey: 16, + AVLinearPCMIsFloatKey: false, + AVLinearPCMIsBigEndianKey: false + ] + + let recorder = try AVAudioRecorder(url: url, settings: settings) + recorder.isMeteringEnabled = false + guard recorder.record() else { + throw NSError( + domain: "Voxt.ModelDebug", + code: -10, + userInfo: [NSLocalizedDescriptionKey: modelDebugLocalized("Failed to start recording.")] + ) + } + self.recorder = recorder + activeURL = url + } + + func stop() -> URL? { + recorder?.stop() + recorder = nil + defer { activeURL = nil } + return activeURL + } + + func cancel() { + recorder?.stop() + recorder = nil + if let activeURL { + try? FileManager.default.removeItem(at: activeURL) + } + activeURL = nil + } +} + +@MainActor +final class ASRDebugViewModel: ObservableObject { + @Published private(set) var options: [ASRDebugModelOption] = [] + @Published var selectedModelID = "" + @Published private(set) var clips: [ASRDebugClipItem] = [] + @Published var selectedClipID: UUID? + @Published private(set) var results: [ASRDebugResult] = [] + @Published private(set) var isRecording = false + @Published private(set) var isRunning = false + @Published private(set) var isModelInitializing = false + @Published private(set) var statusMessage = "" + @Published private(set) var toastMessage = "" + + private let mlxModelManager: MLXModelManager + private let whisperModelManager: WhisperKitModelManager + private var remoteConfigurations: [String: RemoteProviderConfiguration] + private let recorder = DebugAudioRecorder() + private let mlxTranscriber: MLXTranscriber + private let whisperTranscriber: WhisperKitTranscriber + private let remoteTranscriber = RemoteASRTranscriber() + private var toastDismissTask: Task? + + init(appDelegate: AppDelegate) { + let useMirror = UserDefaults.standard.bool(forKey: AppPreferenceKey.useHfMirror) + let hubURL = useMirror ? MLXModelManager.mirrorHubBaseURL : MLXModelManager.defaultHubBaseURL + mlxModelManager = MLXModelManager( + modelRepo: appDelegate.mlxModelManager.currentModelRepo, + hubBaseURL: hubURL + ) + whisperModelManager = WhisperKitModelManager( + modelID: appDelegate.whisperModelManager.currentModelID, + hubBaseURL: hubURL + ) + mlxTranscriber = MLXTranscriber(modelManager: mlxModelManager) + whisperTranscriber = WhisperKitTranscriber(modelManager: whisperModelManager) + mlxTranscriber.dictionaryEntryProvider = { + appDelegate.dictionaryStore.activeEntriesForRemoteRequest( + activeGroupID: appDelegate.activeDictionaryGroupID() + ) + } + remoteTranscriber.doubaoDictionaryEntryProvider = { + appDelegate.dictionaryStore.activeEntriesForRemoteRequest( + activeGroupID: appDelegate.activeDictionaryGroupID() + ) + } + remoteConfigurations = RemoteModelConfigurationStore.loadConfigurations( + from: UserDefaults.standard.string(forKey: AppPreferenceKey.remoteASRProviderConfigurations) ?? "" + ) + refreshOptions() + selectedModelID = preferredModelID() + } + + func refreshOptions() { + remoteConfigurations = RemoteModelConfigurationStore.loadConfigurations( + from: UserDefaults.standard.string(forKey: AppPreferenceKey.remoteASRProviderConfigurations) ?? "" + ) + options = ModelDebugCatalog.availableASRModels( + mlxModelManager: mlxModelManager, + whisperModelManager: whisperModelManager, + remoteASRConfigurations: remoteConfigurations + ) + if !options.contains(where: { $0.id == selectedModelID }) { + selectedModelID = options.first?.id ?? "" + } + } + + var selectedModelTitle: String { + options.first(where: { $0.id == selectedModelID })?.title ?? modelDebugLocalized("Select Model") + } + + var selectedClipItem: ASRDebugClipItem? { + guard let selectedClipID else { return nil } + return clips.first(where: { $0.id == selectedClipID }) + } + + var selectedClipTitle: String { + selectedClipItem?.displayTitle ?? modelDebugLocalized("Select Audio") + } + + func toggleRecording() { + if isRecording { + stopRecordingAndRun() + } else { + startRecording() + } + } + + func generateSelectedClip() { + guard !options.isEmpty, options.contains(where: { $0.id == selectedModelID }) else { + showToast(modelDebugLocalized("No available model.")) + return + } + guard let clipItem = selectedClipItem else { + showToast(modelDebugLocalized("No audio selected.")) + return + } + Task { + await runCurrentSelection(with: clipItem, source: .reusedClip) + } + } + + func clearResults() { + results.removeAll() + } + + func removeResult(_ resultID: UUID) { + results.removeAll { $0.id == resultID } + } + + func deleteClip(_ clipID: UUID) { + guard let clip = clips.first(where: { $0.id == clipID }) else { return } + try? FileManager.default.removeItem(at: clip.clip.fileURL) + clips.removeAll { $0.id == clipID } + results.removeAll { $0.clipID == clipID } + if selectedClipID == clipID { + selectedClipID = clips.first?.id + } + statusMessage = "" + } + + func handleWindowClose() { + toastDismissTask?.cancel() + recorder.cancel() + for clip in clips { + try? FileManager.default.removeItem(at: clip.clip.fileURL) + } + clips.removeAll() + selectedClipID = nil + clearResults() + } + + private func preferredModelID() -> String { + let defaults = UserDefaults.standard + let engine = TranscriptionEngine(rawValue: defaults.string(forKey: AppPreferenceKey.transcriptionEngine) ?? "") + ?? .mlxAudio + switch engine { + case .mlxAudio: + return "mlx:\(MLXModelManager.canonicalModelRepo(defaults.string(forKey: AppPreferenceKey.mlxModelRepo) ?? MLXModelManager.defaultModelRepo))" + case .whisperKit: + return "whisper:\(WhisperKitModelManager.canonicalModelID(defaults.string(forKey: AppPreferenceKey.whisperModelID) ?? WhisperKitModelManager.defaultModelID))" + case .remote: + let provider = RemoteASRProvider(rawValue: defaults.string(forKey: AppPreferenceKey.remoteASRSelectedProvider) ?? "") + ?? .openAIWhisper + return "remote-asr:\(provider.rawValue)" + case .dictation: + return options.first?.id ?? "" + } + } + + private func startRecording() { + do { + try recorder.start() + isRecording = true + statusMessage = modelDebugLocalized("Recording…") + } catch { + statusMessage = error.localizedDescription + } + } + + private func stopRecordingAndRun() { + guard let url = recorder.stop() else { + isRecording = false + statusMessage = modelDebugLocalized("No recording was captured.") + return + } + isRecording = false + do { + let clip = try DebugAudioClipIO.clip(for: url) + let timestamp = modelDebugClipTimestamp(clip.createdAt) + let item = ASRDebugClipItem( + id: clip.id, + clip: clip, + defaultTitle: timestamp, + title: timestamp + ) + clips.insert(item, at: 0) + selectedClipID = item.id + statusMessage = AppLocalization.format("Recorded clip ready: %@", clip.summaryText) + Task { + await runCurrentSelection(with: item, source: .liveRecording) + } + } catch { + try? FileManager.default.removeItem(at: url) + statusMessage = error.localizedDescription + } + } + + private func runCurrentSelection( + with clipItem: ASRDebugClipItem, + source: ASRDebugResult.Source + ) async { + guard let option = options.first(where: { $0.id == selectedModelID }) else { return } + let needsInitialization = requiresInitialization(for: option) + isRunning = true + isModelInitializing = needsInitialization + defer { + isRunning = false + isModelInitializing = false + } + let startedAt = Date() + statusMessage = AppLocalization.format("Running %@", option.title) + + do { + let output = try await run(option: option, clip: clipItem.clip) + let elapsed = Date().timeIntervalSince(startedAt) + let result = ASRDebugResult( + id: UUID(), + clipID: clipItem.id, + clipTitle: clipItem.displayTitle, + modelTitle: option.title, + source: source, + audioDurationText: String(format: "%.1fs", clipItem.clip.durationSeconds), + runtimeText: String(format: "%.2fs", elapsed), + characterCount: output.count, + createdAt: Date(), + outputText: output, + errorText: nil + ) + results.insert(result, at: 0) + updateClipTitleIfNeeded(clipID: clipItem.id, transcript: output) + statusMessage = AppLocalization.format("Completed %@", option.title) + } catch { + let elapsed = Date().timeIntervalSince(startedAt) + let result = ASRDebugResult( + id: UUID(), + clipID: clipItem.id, + clipTitle: clipItem.displayTitle, + modelTitle: option.title, + source: source, + audioDurationText: String(format: "%.1fs", clipItem.clip.durationSeconds), + runtimeText: String(format: "%.2fs", elapsed), + characterCount: 0, + createdAt: Date(), + outputText: "", + errorText: error.localizedDescription + ) + results.insert(result, at: 0) + statusMessage = error.localizedDescription + } + } + + private func requiresInitialization(for option: ASRDebugModelOption) -> Bool { + switch option.selection { + case .mlx(let repo): + let canonicalRepo = MLXModelManager.canonicalModelRepo(repo) + return mlxModelManager.currentModelRepo != canonicalRepo || !mlxModelManager.isCurrentModelLoaded + case .whisper(let modelID): + let canonicalModelID = WhisperKitModelManager.canonicalModelID(modelID) + return whisperModelManager.currentModelID != canonicalModelID || !whisperModelManager.isCurrentModelLoaded + case .remote: + return false + } + } + + private func run(option: ASRDebugModelOption, clip: DebugAudioClip) async throws -> String { + switch option.selection { + case .mlx(let repo): + mlxModelManager.updateModel(repo: repo) + return try await mlxTranscriber.transcribeAudioFile(clip.fileURL) + case .whisper(let modelID): + whisperModelManager.updateModel(id: modelID) + return try await whisperTranscriber.transcribeAudioFile(clip.fileURL) + case .remote(let provider, let configuration): + return try await remoteTranscriber.transcribeDebugAudioFile( + clip.fileURL, + provider: provider, + configuration: configuration + ) + } + } + + private func updateClipTitleIfNeeded(clipID: UUID, transcript: String) { + guard let index = clips.firstIndex(where: { $0.id == clipID }), + let preview = modelDebugClipTitlePreview(transcript) + else { return } + guard clips[index].title == clips[index].defaultTitle else { return } + clips[index].title = "\(clips[index].defaultTitle) · \(preview)" + } + + func dismissToast() { + toastDismissTask?.cancel() + toastMessage = "" + } + + private func showToast(_ message: String, duration: TimeInterval = 2.2) { + toastDismissTask?.cancel() + toastMessage = message + toastDismissTask = Task { @MainActor in + try? await Task.sleep(nanoseconds: UInt64(duration * 1_000_000_000)) + guard !Task.isCancelled else { return } + self.toastMessage = "" + } + } +} + +@MainActor +final class LLMDebugViewModel: ObservableObject { + @Published private(set) var modelOptions: [LLMDebugModelOption] = [] + @Published private(set) var presetOptions: [LLMDebugPresetOption] = [] + @Published var selectedModelID = "" + @Published var selectedPresetID = "" + @Published var variableValues: [String: String] = [:] + @Published private(set) var results: [LLMDebugResult] = [] + @Published private(set) var isRunning = false + @Published private(set) var isModelInitializing = false + @Published private(set) var statusMessage = "" + + private let customLLMManager: CustomLLMModelManager + private var remoteConfigurations: [String: RemoteProviderConfiguration] + + init(appDelegate: AppDelegate) { + let useMirror = UserDefaults.standard.bool(forKey: AppPreferenceKey.useHfMirror) + let hubURL = useMirror ? CustomLLMModelManager.mirrorHubBaseURL : CustomLLMModelManager.defaultHubBaseURL + customLLMManager = CustomLLMModelManager( + modelRepo: appDelegate.customLLMManager.currentModelRepo, + hubBaseURL: hubURL + ) + remoteConfigurations = RemoteModelConfigurationStore.loadConfigurations( + from: UserDefaults.standard.string(forKey: AppPreferenceKey.remoteLLMProviderConfigurations) ?? "" + ) + refreshOptions() + selectedModelID = preferredModelID() + selectedPresetID = LLMDebugPresetStore.customPresetID + resetVariableValuesForPreset() + } + + var selectedPreset: LLMDebugPresetOption? { + presetOptions.first(where: { $0.id == selectedPresetID }) + } + + var selectedModelTitle: String { + modelOptions.first(where: { $0.id == selectedModelID })?.title ?? modelDebugLocalized("Select Model") + } + + var selectedPresetTitle: String { + selectedPreset?.title ?? modelDebugLocalized("Select Preset") + } + + var promptPreview: String { + guard let selectedPreset else { return "" } + return ModelDebugPromptResolver.resolve( + preset: selectedPreset, + values: variableValues + ).content + } + + func refreshOptions() { + remoteConfigurations = RemoteModelConfigurationStore.loadConfigurations( + from: UserDefaults.standard.string(forKey: AppPreferenceKey.remoteLLMProviderConfigurations) ?? "" + ) + modelOptions = ModelDebugCatalog.availableLLMModels( + customLLMManager: customLLMManager, + remoteLLMConfigurations: remoteConfigurations + ) + presetOptions = ModelDebugCatalog.availableLLMPresets() + if !modelOptions.contains(where: { $0.id == selectedModelID }) { + selectedModelID = modelOptions.first?.id ?? "" + } + if !presetOptions.contains(where: { $0.id == selectedPresetID }) { + selectedPresetID = LLMDebugPresetStore.customPresetID + } + } + + func presetDidChange() { + resetVariableValuesForPreset() + } + + func clearResults() { + results.removeAll() + } + + func removeResult(_ resultID: UUID) { + results.removeAll { $0.id == resultID } + } + + func savePromptTemplate(_ prompt: String) { + guard let preset = selectedPreset else { return } + let currentPresetID = selectedPresetID + let preservedVariableValues = variableValues + switch preset.kind { + case .custom: + LLMDebugPresetStore.saveCustomPrompt(prompt) + case .enhancement, .translation, .rewrite, .meetingSummary, .appGroup: + LLMDebugPresetStore.savePromptOverride(prompt, for: preset.id) + } + refreshOptions() + selectedPresetID = currentPresetID + variableValues = preservedVariableValues + } + + func applyPromptTemplate(_ prompt: String) { + guard let preset = selectedPreset else { return } + let defaults = UserDefaults.standard + switch preset.kind { + case .custom: + return + case .enhancement: + defaults.set( + AppPromptDefaults.canonicalStoredText(prompt, kind: .enhancement), + forKey: AppPreferenceKey.enhancementSystemPrompt + ) + case .translation: + defaults.set( + AppPromptDefaults.canonicalStoredText(prompt, kind: .translation), + forKey: AppPreferenceKey.translationSystemPrompt + ) + case .rewrite: + defaults.set( + AppPromptDefaults.canonicalStoredText(prompt, kind: .rewrite), + forKey: AppPreferenceKey.rewriteSystemPrompt + ) + case .meetingSummary: + defaults.set( + AppPromptDefaults.canonicalStoredText(prompt, kind: .meetingSummary), + forKey: AppPreferenceKey.meetingSummaryPromptTemplate + ) + case .appGroup(let groupID): + applyGroupPrompt(prompt, groupID: groupID, defaults: defaults) + } + } + + func handleWindowClose() { + clearResults() + statusMessage = "" + resetVariableValuesForPreset() + } + + func run() { + guard let preset = selectedPreset, + let model = modelOptions.first(where: { $0.id == selectedModelID }) + else { return } + + let values = mergedVariableValues(for: preset) + let promptResolution = ModelDebugPromptResolver.resolve(preset: preset, values: values) + let needsInitialization = requiresInitialization(for: model) + isRunning = true + isModelInitializing = needsInitialization + statusMessage = AppLocalization.format("Running %@", model.title) + + Task { + let startedAt = Date() + do { + let output = try await run( + model: model, + preset: preset, + values: values, + promptResolution: promptResolution + ) + let result = LLMDebugResult( + id: UUID(), + modelTitle: model.title, + presetTitle: preset.title, + inputSummary: promptResolution.inputSummary, + durationText: String(format: "%.2fs", Date().timeIntervalSince(startedAt)), + createdAt: Date(), + outputText: output, + errorText: nil + ) + await MainActor.run { + self.results.insert(result, at: 0) + self.isRunning = false + self.isModelInitializing = false + self.statusMessage = AppLocalization.format("Completed %@", model.title) + } + } catch { + let result = LLMDebugResult( + id: UUID(), + modelTitle: model.title, + presetTitle: preset.title, + inputSummary: promptResolution.inputSummary, + durationText: String(format: "%.2fs", Date().timeIntervalSince(startedAt)), + createdAt: Date(), + outputText: "", + errorText: error.localizedDescription + ) + await MainActor.run { + self.results.insert(result, at: 0) + self.isRunning = false + self.isModelInitializing = false + self.statusMessage = error.localizedDescription + } + } + } + } + + private func preferredModelID() -> String { + let defaults = UserDefaults.standard + let enhancementMode = EnhancementMode(rawValue: defaults.string(forKey: AppPreferenceKey.enhancementMode) ?? "") + ?? .off + switch enhancementMode { + case .customLLM: + return "local-llm:\(CustomLLMModelCatalog.canonicalModelRepo(defaults.string(forKey: AppPreferenceKey.customLLMModelRepo) ?? CustomLLMModelManager.defaultModelRepo))" + case .remoteLLM: + let provider = RemoteLLMProvider(rawValue: defaults.string(forKey: AppPreferenceKey.remoteLLMSelectedProvider) ?? "") + ?? .openAI + return "remote-llm:\(provider.rawValue)" + case .off, .appleIntelligence: + return modelOptions.first?.id ?? "" + } + } + + private func resetVariableValuesForPreset() { + guard let preset = selectedPreset else { + variableValues = [:] + return + } + variableValues = preset.defaultValues + } + + private func mergedVariableValues(for preset: LLMDebugPresetOption) -> [String: String] { + preset.defaultValues.merging(variableValues) { _, rhs in rhs } + } + + private func applyGroupPrompt(_ prompt: String, groupID: UUID, defaults: UserDefaults) { + guard let data = defaults.data(forKey: AppPreferenceKey.appBranchGroups), + var groups = try? JSONDecoder().decode([AppBranchGroup].self, from: data), + let index = groups.firstIndex(where: { $0.id == groupID }) + else { return } + groups[index].prompt = prompt + guard let encoded = try? JSONEncoder().encode(groups) else { return } + defaults.set(encoded, forKey: AppPreferenceKey.appBranchGroups) + } + + private func requiresInitialization(for model: LLMDebugModelOption) -> Bool { + switch model.selection { + case .local(let repo): + return !customLLMManager.isModelLoaded(repo: repo) + case .remote: + return false + } + } + + private func run( + model: LLMDebugModelOption, + preset: LLMDebugPresetOption, + values: [String: String], + promptResolution: LLMDebugResolvedPrompt + ) async throws -> String { + switch preset.kind { + case .custom: + switch model.selection { + case .local(let repo): + return try await customLLMManager.enhance(userPrompt: promptResolution.content, repo: repo) + case .remote(let provider, let configuration): + return try await RemoteLLMRuntimeClient().enhance( + userPrompt: promptResolution.content, + provider: provider, + configuration: configuration + ) + } + case .enhancement, .appGroup: + let sourceText = values[AppDelegate.rawTranscriptionTemplateVariable] ?? "" + let prompt = promptResolution.content + switch model.selection { + case .local(let repo): + return try await customLLMManager.enhance( + sourceText, + systemPrompt: prompt, + modelRepo: repo + ) + case .remote(let provider, let configuration): + return try await RemoteLLMRuntimeClient().enhance( + text: sourceText, + systemPrompt: prompt, + provider: provider, + configuration: configuration + ) + } + case .translation: + let sourceText = values["{{SOURCE_TEXT}}"] ?? "" + let resolvedTarget = resolvedTranslationTargetLanguage(values: values) + let prompt = promptResolution.content + switch model.selection { + case .local(let repo): + return try await customLLMManager.translate( + sourceText, + targetLanguage: resolvedTarget, + systemPrompt: prompt, + modelRepo: repo + ) + case .remote(let provider, let configuration): + return try await RemoteLLMRuntimeClient().translate( + text: sourceText, + systemPrompt: prompt, + provider: provider, + configuration: configuration + ) + } + case .rewrite: + let dictatedPrompt = values["{{DICTATED_PROMPT}}"] ?? "" + let sourceText = values["{{SOURCE_TEXT}}"] ?? "" + let prompt = promptResolution.content + switch model.selection { + case .local(let repo): + return try await customLLMManager.rewrite( + sourceText: sourceText, + dictatedPrompt: dictatedPrompt, + systemPrompt: prompt, + modelRepo: repo + ) + case .remote(let provider, let configuration): + return try await RemoteLLMRuntimeClient().rewrite( + sourceText: sourceText, + dictatedPrompt: dictatedPrompt, + systemPrompt: prompt, + provider: provider, + configuration: configuration + ) + } + case .meetingSummary: + let prompt = promptResolution.content + switch model.selection { + case .local(let repo): + return try await customLLMManager.enhance(userPrompt: prompt, repo: repo) + case .remote(let provider, let configuration): + return try await RemoteLLMRuntimeClient().enhance( + userPrompt: prompt, + provider: provider, + configuration: configuration + ) + } + } + } + + private func resolvedTranslationTargetLanguage(values: [String: String]) -> TranslationTargetLanguage { + let requested = values["{{TARGET_LANGUAGE}}"]?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + if let matched = TranslationTargetLanguage.allCases.first(where: { + $0.instructionName.caseInsensitiveCompare(requested) == .orderedSame + }) { + return matched + } + return TranslationTargetLanguage(rawValue: UserDefaults.standard.string(forKey: AppPreferenceKey.translationTargetLanguage) ?? "") + ?? .english + } +} diff --git a/Voxt/UI/ModelDebugWindowComponents.swift b/Voxt/UI/ModelDebugWindowComponents.swift new file mode 100644 index 0000000..2b5a558 --- /dev/null +++ b/Voxt/UI/ModelDebugWindowComponents.swift @@ -0,0 +1,615 @@ +import AppKit +import SwiftUI +import AVFoundation +import Combine + +struct ModelDebugWindowBackground: View { + var body: some View { + RoundedRectangle(cornerRadius: MeetingDetailUIStyle.windowCornerRadius, style: .continuous) + .fill(MeetingDetailUIStyle.windowFillColor) + .overlay( + RoundedRectangle(cornerRadius: MeetingDetailUIStyle.windowCornerRadius, style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + .ignoresSafeArea() + } +} + +struct ModelDebugToolbarSelectorLabel: View { + let title: String + let value: String + + var body: some View { + HStack(spacing: 10) { + VStack(alignment: .leading, spacing: 2) { + Text(title) + .font(.caption) + .foregroundStyle(.secondary) + Text(value) + .font(.system(size: 12, weight: .semibold)) + .lineLimit(1) + .truncationMode(.tail) + } + Spacer(minLength: 8) + Image(systemName: "chevron.up.chevron.down") + .font(.system(size: 10, weight: .semibold)) + .foregroundStyle(.secondary) + } + .padding(.horizontal, 12) + .frame(minWidth: ModelDebugWindowStyle.selectorMinWidth, idealWidth: ModelDebugWindowStyle.selectorIdealWidth, maxWidth: .infinity, minHeight: 44, maxHeight: 44) + .background( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + } +} + +struct ModelDebugHeaderBadge: View { + var body: some View { + Text(modelDebugLocalized("Debug")) + .font(.system(size: 9, weight: .semibold)) + .foregroundStyle(.secondary) + .padding(.horizontal, 7) + .frame(height: 18) + .background( + Capsule(style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + Capsule(style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + .frame(width: 62, alignment: .leading) + .offset(x: 1) + } +} + +struct ModelDebugToast: View { + let message: String + let onClose: () -> Void + + var body: some View { + HStack(spacing: 10) { + Text(message) + .font(.system(size: 12, weight: .medium)) + .foregroundStyle(.primary) + .lineLimit(2) + Button(action: onClose) { + Image(systemName: "xmark") + .font(.system(size: 10, weight: .semibold)) + } + .buttonStyle(.plain) + .foregroundStyle(.secondary) + } + .padding(.horizontal, 12) + .frame(height: 34) + .background( + Capsule(style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + Capsule(style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + .shadow(color: Color.black.opacity(0.12), radius: 10, y: 4) + } +} + +struct ASRDebugAudioSelectorSheet: View { + @Environment(\.dismiss) private var dismiss + + let clips: [ASRDebugClipItem] + let selectedClipID: UUID? + let onSelect: (UUID) -> Void + let onDelete: (UUID) -> Void + + var body: some View { + VStack(alignment: .leading, spacing: 16) { + HStack { + Text(modelDebugLocalized("Recorded Audio")) + .font(.title3.weight(.semibold)) + Spacer() + Button { + dismiss() + } label: { + Image(systemName: "xmark") + } + .buttonStyle(SettingsCompactIconButtonStyle()) + } + + if clips.isEmpty { + debugEmptyState( + title: modelDebugLocalized("No audio clips yet"), + detail: modelDebugLocalized("Record audio in this window first, then reuse the same clip across different models.") + ) + } else { + ScrollView { + LazyVStack(spacing: 10) { + ForEach(clips) { clip in + HStack(alignment: .center, spacing: 12) { + Button { + onSelect(clip.id) + dismiss() + } label: { + HStack(alignment: .center, spacing: 12) { + HStack(alignment: .firstTextBaseline, spacing: 8) { + Text(clip.displayTitle) + .font(.system(size: 13, weight: .semibold)) + .foregroundStyle(.primary) + .lineLimit(1) + Text(clip.clip.summaryText) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(1) + } + Spacer() + if clip.id == selectedClipID { + Image(systemName: "checkmark.circle.fill") + .foregroundStyle(Color.accentColor) + } + } + .padding(12) + .frame(maxWidth: .infinity, alignment: .leading) + .background( + RoundedRectangle(cornerRadius: 14, style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 14, style: .continuous) + .strokeBorder( + clip.id == selectedClipID ? Color.accentColor.opacity(0.35) : MeetingDetailUIStyle.borderColor, + lineWidth: 1 + ) + ) + } + .buttonStyle(.plain) + + Button { + onDelete(clip.id) + } label: { + Image(systemName: "trash") + .font(.system(size: 13, weight: .semibold)) + .frame(width: 40, height: 40) + .background( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + } + .buttonStyle(.plain) + } + } + } + } + } + } + .padding(18) + .frame(width: 580, height: 470) + .background(MeetingDetailUIStyle.windowFillColor) + } +} + +struct LLMDebugPresetSelectorSheet: View { + @Environment(\.dismiss) private var dismiss + + let presets: [LLMDebugPresetOption] + let selectedPresetID: String + let onSelect: (String) -> Void + + var body: some View { + VStack(alignment: .leading, spacing: 16) { + HStack { + Text(modelDebugLocalized("Choose Preset")) + .font(.title3.weight(.semibold)) + Spacer() + Button { + dismiss() + } label: { + Image(systemName: "xmark") + } + .buttonStyle(SettingsCompactIconButtonStyle()) + } + + ScrollView { + LazyVStack(spacing: 10) { + ForEach(presets) { preset in + Button { + onSelect(preset.id) + dismiss() + } label: { + HStack(alignment: .center, spacing: 12) { + HStack(alignment: .firstTextBaseline, spacing: 8) { + Text(preset.title) + .font(.system(size: 13, weight: .semibold)) + .foregroundStyle(.primary) + .lineLimit(1) + Text(preset.subtitle) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(1) + } + Spacer() + if preset.id == selectedPresetID { + Image(systemName: "checkmark.circle.fill") + .foregroundStyle(Color.accentColor) + } + } + .padding(12) + .frame(maxWidth: .infinity, alignment: .leading) + .background( + RoundedRectangle(cornerRadius: 14, style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 14, style: .continuous) + .strokeBorder( + preset.id == selectedPresetID ? Color.accentColor.opacity(0.35) : MeetingDetailUIStyle.borderColor, + lineWidth: 1 + ) + ) + } + .buttonStyle(.plain) + } + } + } + } + .padding(18) + .frame(width: 520, height: 470) + .background(MeetingDetailUIStyle.windowFillColor) + } +} + +struct LLMDebugPromptSettingsSheet: View { + @Environment(\.dismiss) private var dismiss + + let preset: LLMDebugPresetOption + @Binding var variableValues: [String: String] + let onApply: (String) -> Void + let onSave: (String) -> Void + + @State private var draftPrompt = "" + + private var isCustomPreset: Bool { + if case .custom = preset.kind { + return true + } + return false + } + + var body: some View { + VStack(alignment: .leading, spacing: 16) { + HStack { + VStack(alignment: .leading, spacing: 4) { + Text(modelDebugLocalized("Preset Settings")) + .font(.title3.weight(.semibold)) + Text(preset.title) + .font(.caption) + .foregroundStyle(.secondary) + } + Spacer() + if !isCustomPreset { + Button(modelDebugLocalized("Apply")) { + onApply(draftPrompt) + } + .buttonStyle(SettingsPillButtonStyle(horizontalPadding: 10)) + } + Button(modelDebugLocalized("Save")) { + onSave(draftPrompt) + dismiss() + } + .buttonStyle(MeetingPrimaryButtonStyle()) + Button { + dismiss() + } label: { + Image(systemName: "xmark") + } + .buttonStyle(SettingsCompactIconButtonStyle()) + } + + if isCustomPreset { + VStack(alignment: .leading, spacing: 8) { + Text(modelDebugLocalized("Prompt")) + .font(.subheadline.weight(.medium)) + PromptEditorView( + text: $draftPrompt, + height: 310, + variables: [] + ) + } + } else { + GeometryReader { proxy in + let variableWidth = max(160, proxy.size.width * 0.3) + HStack(alignment: .top, spacing: 16) { + VStack(alignment: .leading, spacing: 8) { + Text(modelDebugLocalized("Variables")) + .font(.subheadline.weight(.medium)) + LLMDebugVariableEditor( + descriptors: preset.variables, + values: $variableValues + ) + Spacer(minLength: 0) + } + .frame(width: variableWidth, alignment: .topLeading) + + VStack(alignment: .leading, spacing: 8) { + Text(modelDebugLocalized("Prompt")) + .font(.subheadline.weight(.medium)) + PromptEditorView( + text: $draftPrompt, + height: 310, + variables: [] + ) + } + .frame(maxWidth: .infinity, alignment: .topLeading) + } + } + } + } + .padding(18) + .frame(width: 720, height: 450) + .background(MeetingDetailUIStyle.windowFillColor) + .onAppear { + draftPrompt = preset.promptTemplate + } + } +} + +struct LLMDebugVariableEditor: View { + let descriptors: [PromptTemplateVariableDescriptor] + @Binding var values: [String: String] + + private static let multilineTokens: Set = [ + "{{DICTATED_PROMPT}}", + "{{SOURCE_TEXT}}", + AppDelegate.rawTranscriptionTemplateVariable, + "{{MEETING_RECORD}}" + ] + + var body: some View { + ScrollView { + VStack(alignment: .leading, spacing: 10) { + ForEach(descriptors, id: \.id) { variable in + VStack(alignment: .leading, spacing: 6) { + HStack(alignment: .center, spacing: 8) { + Text(variable.token) + .font(.caption.monospaced()) + .foregroundStyle(.secondary) + Spacer(minLength: 8) + Button(modelDebugLocalized("Copy")) { + copy(variable.token) + } + .buttonStyle(SettingsPillButtonStyle(horizontalPadding: 8, height: 26)) + } + + if isMultiline(variable) { + TextEditor(text: binding(for: variable)) + .font(.system(size: 13)) + .scrollContentBackground(.hidden) + .padding(8) + .frame(minHeight: 96, maxHeight: 120, alignment: .topLeading) + .background( + RoundedRectangle(cornerRadius: 10, style: .continuous) + .fill(MeetingDetailUIStyle.panelFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 10, style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + } else { + TextField( + AppLocalization.localizedString(variable.tipKey), + text: binding(for: variable) + ) + .textFieldStyle(.plain) + .font(.system(size: 13)) + .padding(.horizontal, 10) + .frame(height: 32) + .background( + RoundedRectangle(cornerRadius: 10, style: .continuous) + .fill(MeetingDetailUIStyle.panelFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 10, style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + } + } + } + } + .padding(.vertical, 2) + } + } + + private func binding(for variable: PromptTemplateVariableDescriptor) -> Binding { + Binding( + get: { values[variable.token, default: ""] }, + set: { values[variable.token] = $0 } + ) + } + + private func isMultiline(_ variable: PromptTemplateVariableDescriptor) -> Bool { + Self.multilineTokens.contains(variable.token) + } + + private func copy(_ value: String) { + let pasteboard = NSPasteboard.general + pasteboard.clearContents() + pasteboard.setString(value, forType: .string) + } +} + +struct ASRDebugResultCard: View { + let result: ASRDebugResult + let onClose: () -> Void + + var body: some View { + VStack(alignment: .leading, spacing: 10) { + HStack(alignment: .center, spacing: 8) { + VStack(alignment: .leading, spacing: 2) { + Text(result.modelTitle) + .font(.headline) + .lineLimit(1) + Text(result.clipTitle) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(1) + } + Spacer() + Button(action: onClose) { + Image(systemName: "xmark") + } + .buttonStyle(SettingsCompactIconButtonStyle()) + } + + ScrollView { + Text(result.isError ? (result.errorText ?? "") : (result.outputText.isEmpty ? modelDebugLocalized("No output.") : result.outputText)) + .font(.body) + .foregroundStyle(result.isError ? .red : .primary) + .frame(maxWidth: .infinity, alignment: .leading) + .textSelection(.enabled) + } + .frame(height: ModelDebugWindowStyle.resultCardBodyHeight) + + HStack(spacing: 10) { + Text(AppLocalization.format("%@ audio", result.audioDurationText)) + Text(AppLocalization.format("%@ run", result.runtimeText)) + Text(AppLocalization.format("%d chars", result.characterCount)) + Spacer() + } + .font(.caption) + .foregroundStyle(.secondary) + } + .padding(14) + .background( + RoundedRectangle(cornerRadius: 16, style: .continuous) + .fill(MeetingDetailUIStyle.panelFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 16, style: .continuous) + .strokeBorder(result.isError ? Color.red.opacity(0.22) : MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + } +} + +struct LLMDebugResultCard: View { + let result: LLMDebugResult + let onClose: () -> Void + + var body: some View { + VStack(alignment: .leading, spacing: 10) { + HStack(alignment: .center, spacing: 8) { + VStack(alignment: .leading, spacing: 2) { + Text(result.modelTitle) + .font(.headline) + .lineLimit(1) + Text(result.presetTitle) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(1) + } + Spacer() + Button(action: onClose) { + Image(systemName: "xmark") + } + .buttonStyle(SettingsCompactIconButtonStyle()) + } + + if !result.inputSummary.isEmpty { + Text(result.inputSummary) + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(3) + .frame(maxWidth: .infinity, alignment: .leading) + .textSelection(.enabled) + } + + ScrollView { + Text(result.isError ? (result.errorText ?? "") : (result.outputText.isEmpty ? modelDebugLocalized("No output.") : result.outputText)) + .font(.body) + .foregroundStyle(result.isError ? .red : .primary) + .frame(maxWidth: .infinity, alignment: .leading) + .textSelection(.enabled) + } + .frame(height: ModelDebugWindowStyle.resultCardBodyHeight) + + HStack(spacing: 10) { + Text(result.durationText) + Spacer() + } + .font(.caption) + .foregroundStyle(.secondary) + } + .padding(14) + .background( + RoundedRectangle(cornerRadius: 16, style: .continuous) + .fill(MeetingDetailUIStyle.panelFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 16, style: .continuous) + .strokeBorder(result.isError ? Color.red.opacity(0.22) : MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + } +} + +func modelDebugConfigureWindowChrome(_ window: NSWindow) { + window.isMovableByWindowBackground = false + window.isOpaque = false + window.backgroundColor = .clear + window.hasShadow = true +} + +func modelDebugPositionWindowTrafficLightButtons(_ window: NSWindow) { + guard let closeButton = window.standardWindowButton(.closeButton), + let miniaturizeButton = window.standardWindowButton(.miniaturizeButton), + let zoomButton = window.standardWindowButton(.zoomButton), + let container = closeButton.superview + else { + return + } + + let leftInset: CGFloat = 15 + let topInset: CGFloat = 17 + let spacing: CGFloat = 6 + + let buttonSize = closeButton.frame.size + let y = container.bounds.height - topInset - buttonSize.height + let closeX = leftInset + let miniaturizeX = closeX + buttonSize.width + spacing + let zoomX = miniaturizeX + buttonSize.width + spacing + + closeButton.translatesAutoresizingMaskIntoConstraints = true + miniaturizeButton.translatesAutoresizingMaskIntoConstraints = true + zoomButton.translatesAutoresizingMaskIntoConstraints = true + + closeButton.setFrameOrigin(CGPoint(x: closeX, y: y)) + miniaturizeButton.setFrameOrigin(CGPoint(x: miniaturizeX, y: y)) + zoomButton.setFrameOrigin(CGPoint(x: zoomX, y: y)) +} + +func modelDebugScheduleTrafficLightButtonPositionUpdate(for window: NSWindow) { + DispatchQueue.main.async { [weak window] in + guard let window else { return } + modelDebugPositionWindowTrafficLightButtons(window) + } +} + +@ViewBuilder +func debugEmptyState(title: String, detail: String) -> some View { + VStack(spacing: 8) { + Text(title) + .font(.headline) + Text(detail) + .font(.subheadline) + .foregroundStyle(.secondary) + .multilineTextAlignment(.center) + } + .frame(maxWidth: .infinity) + .padding(.vertical, 48) +} diff --git a/Voxt/UI/ModelDebugWindowControllers.swift b/Voxt/UI/ModelDebugWindowControllers.swift new file mode 100644 index 0000000..ab6288d --- /dev/null +++ b/Voxt/UI/ModelDebugWindowControllers.swift @@ -0,0 +1,178 @@ +import AppKit +import SwiftUI +import AVFoundation +import Combine + +@MainActor +final class ASRDebugWindowManager { + static let shared = ASRDebugWindowManager() + + private var controller: ASRDebugWindowController? + + func present(appDelegate: AppDelegate) { + let controller = resolvedController(appDelegate: appDelegate) + controller.refresh() + controller.showWindow(nil) + controller.window?.makeKeyAndOrderFront(nil) + controller.window?.orderFrontRegardless() + NSApp.activate(ignoringOtherApps: true) + } + + private func resolvedController(appDelegate: AppDelegate) -> ASRDebugWindowController { + if let controller { + return controller + } + let controller = ASRDebugWindowController(viewModel: ASRDebugViewModel(appDelegate: appDelegate)) { [weak self] in + self?.controller = nil + } + self.controller = controller + return controller + } +} + +@MainActor +final class LLMDebugWindowManager { + static let shared = LLMDebugWindowManager() + + private var controller: LLMDebugWindowController? + + func present(appDelegate: AppDelegate) { + let controller = resolvedController(appDelegate: appDelegate) + controller.refresh() + controller.showWindow(nil) + controller.window?.makeKeyAndOrderFront(nil) + controller.window?.orderFrontRegardless() + NSApp.activate(ignoringOtherApps: true) + } + + private func resolvedController(appDelegate: AppDelegate) -> LLMDebugWindowController { + if let controller { + return controller + } + let controller = LLMDebugWindowController(viewModel: LLMDebugViewModel(appDelegate: appDelegate)) { [weak self] in + self?.controller = nil + } + self.controller = controller + return controller + } +} + +@MainActor +private final class ASRDebugWindowController: NSWindowController, NSWindowDelegate { + private let viewModel: ASRDebugViewModel + private let onClose: () -> Void + + init(viewModel: ASRDebugViewModel, onClose: @escaping () -> Void) { + self.viewModel = viewModel + self.onClose = onClose + + let rootView = ASRDebugWindowView(viewModel: viewModel) + let hostingController = NSHostingController(rootView: rootView) + let window = NSWindow( + contentRect: NSRect(origin: .zero, size: NSSize(width: ModelDebugWindowStyle.width, height: ModelDebugWindowStyle.height)), + styleMask: [.titled, .closable, .miniaturizable, .resizable, .fullSizeContentView], + backing: .buffered, + defer: false + ) + window.contentViewController = hostingController + window.title = modelDebugLocalized("ASR Debug") + window.titleVisibility = .hidden + window.titlebarAppearsTransparent = true + window.toolbar = nil + window.collectionBehavior = [.moveToActiveSpace, .fullScreenAuxiliary] + window.isReleasedWhenClosed = false + window.minSize = NSSize(width: ModelDebugWindowStyle.minWidth, height: ModelDebugWindowStyle.minHeight) + window.setContentSize(NSSize(width: ModelDebugWindowStyle.width, height: ModelDebugWindowStyle.height)) + window.center() + modelDebugConfigureWindowChrome(window) + + super.init(window: window) + window.delegate = self + modelDebugPositionWindowTrafficLightButtons(window) + modelDebugScheduleTrafficLightButtonPositionUpdate(for: window) + } + + @available(*, unavailable) + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + func refresh() { + viewModel.refreshOptions() + } + + func windowWillClose(_ notification: Notification) { + viewModel.handleWindowClose() + onClose() + } + + func windowDidResize(_ notification: Notification) { + guard let window else { return } + modelDebugScheduleTrafficLightButtonPositionUpdate(for: window) + } + + func windowDidBecomeKey(_ notification: Notification) { + guard let window else { return } + modelDebugScheduleTrafficLightButtonPositionUpdate(for: window) + } +} + +@MainActor +private final class LLMDebugWindowController: NSWindowController, NSWindowDelegate { + private let viewModel: LLMDebugViewModel + private let onClose: () -> Void + + init(viewModel: LLMDebugViewModel, onClose: @escaping () -> Void) { + self.viewModel = viewModel + self.onClose = onClose + + let rootView = LLMDebugWindowView(viewModel: viewModel) + let hostingController = NSHostingController(rootView: rootView) + let window = NSWindow( + contentRect: NSRect(origin: .zero, size: NSSize(width: ModelDebugWindowStyle.width, height: ModelDebugWindowStyle.height)), + styleMask: [.titled, .closable, .miniaturizable, .resizable, .fullSizeContentView], + backing: .buffered, + defer: false + ) + window.contentViewController = hostingController + window.title = modelDebugLocalized("LLM Debug") + window.titleVisibility = .hidden + window.titlebarAppearsTransparent = true + window.toolbar = nil + window.collectionBehavior = [.moveToActiveSpace, .fullScreenAuxiliary] + window.isReleasedWhenClosed = false + window.minSize = NSSize(width: ModelDebugWindowStyle.minWidth, height: ModelDebugWindowStyle.minHeight) + window.setContentSize(NSSize(width: ModelDebugWindowStyle.width, height: ModelDebugWindowStyle.height)) + window.center() + modelDebugConfigureWindowChrome(window) + + super.init(window: window) + window.delegate = self + modelDebugPositionWindowTrafficLightButtons(window) + modelDebugScheduleTrafficLightButtonPositionUpdate(for: window) + } + + @available(*, unavailable) + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + func refresh() { + viewModel.refreshOptions() + } + + func windowWillClose(_ notification: Notification) { + viewModel.handleWindowClose() + onClose() + } + + func windowDidResize(_ notification: Notification) { + guard let window else { return } + modelDebugScheduleTrafficLightButtonPositionUpdate(for: window) + } + + func windowDidBecomeKey(_ notification: Notification) { + guard let window else { return } + modelDebugScheduleTrafficLightButtonPositionUpdate(for: window) + } +} diff --git a/Voxt/UI/ModelDebugWindowViews.swift b/Voxt/UI/ModelDebugWindowViews.swift new file mode 100644 index 0000000..8e3e228 --- /dev/null +++ b/Voxt/UI/ModelDebugWindowViews.swift @@ -0,0 +1,314 @@ +import AppKit +import SwiftUI +import AVFoundation +import Combine + +struct ASRDebugWindowView: View { + @ObservedObject var viewModel: ASRDebugViewModel + @State private var isModelSelectorPresented = false + @State private var isAudioSelectorPresented = false + + private let columns = [ + GridItem(.flexible(minimum: 280), spacing: 12), + GridItem(.flexible(minimum: 280), spacing: 12) + ] + + private var selectorEntries: [FeatureModelSelectorEntry] { + viewModel.options.map(\.selectorEntry) + } + + var body: some View { + VStack(spacing: 8) { + HStack(alignment: .bottom, spacing: 10) { + ModelDebugHeaderBadge() + + Button { + isModelSelectorPresented = true + } label: { + ModelDebugToolbarSelectorLabel( + title: modelDebugLocalized("Model"), + value: viewModel.selectedModelTitle + ) + } + .buttonStyle(.plain) + .layoutPriority(1) + + Button { + viewModel.toggleRecording() + } label: { + Image(systemName: viewModel.isRecording ? "stop.fill" : "mic.fill") + .font(.system(size: 16, weight: .semibold)) + .foregroundStyle(viewModel.isRecording ? Color.green : Color.primary) + .frame(width: 44, height: 44) + .background( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .strokeBorder( + viewModel.isRecording ? Color.green.opacity(0.28) : MeetingDetailUIStyle.borderColor, + lineWidth: 1 + ) + ) + } + .buttonStyle(.plain) + .disabled(viewModel.isRunning) + + Button { + isAudioSelectorPresented = true + } label: { + ModelDebugToolbarSelectorLabel( + title: modelDebugLocalized("Audio"), + value: viewModel.selectedClipTitle + ) + } + .buttonStyle(.plain) + .disabled(viewModel.clips.isEmpty) + .layoutPriority(1) + + Button { + viewModel.generateSelectedClip() + } label: { + HStack(spacing: 6) { + if viewModel.isRunning { + if viewModel.isModelInitializing { + ModelInitializingIconView() + .frame(width: 14, height: 14) + } else { + ProgressView() + .controlSize(.small) + .tint(.white) + .scaleEffect(0.75) + } + } + Text(modelDebugLocalized(viewModel.isModelInitializing ? "Initializing…" : "Generate")) + .font(.system(size: 12, weight: .semibold)) + .lineLimit(1) + } + .frame(width: 92, height: 30) + } + .buttonStyle(MeetingPrimaryButtonStyle()) + .disabled(viewModel.isRunning || viewModel.isRecording) + } + .frame(height: 54) + + ScrollView { + Group { + if viewModel.results.isEmpty { + VStack { + debugEmptyState( + title: modelDebugLocalized("No debug results yet"), + detail: modelDebugLocalized("Record audio once, then switch models or clips to compare transcription output.") + ) + } + .frame(maxWidth: .infinity, minHeight: 360, alignment: .center) + } else { + LazyVGrid(columns: columns, spacing: 12) { + ForEach(viewModel.results) { result in + ASRDebugResultCard( + result: result, + onClose: { viewModel.removeResult(result.id) } + ) + } + } + .frame(maxWidth: .infinity, alignment: .leading) + .padding(.top, 2) + } + } + .frame(maxWidth: .infinity) + } + } + .padding(.horizontal, 10) + .padding(.top, 10) + .padding(.bottom, 8) + .frame(maxWidth: .infinity, maxHeight: .infinity) + .background(ModelDebugWindowBackground()) + .ignoresSafeArea(.container, edges: .top) + .overlay(alignment: .top) { + if !viewModel.toastMessage.isEmpty { + ModelDebugToast(message: viewModel.toastMessage) { + viewModel.dismissToast() + } + .padding(.top, 54) + .transition(.move(edge: .top).combined(with: .opacity)) + } + } + .sheet(isPresented: $isModelSelectorPresented) { + FeatureModelSelectorDialog( + title: modelDebugLocalized("Choose Transcription ASR"), + entries: selectorEntries, + selectedID: FeatureModelSelectionID(rawValue: viewModel.selectedModelID), + onSelect: { selectionID in + viewModel.selectedModelID = selectionID.rawValue + } + ) + } + .sheet(isPresented: $isAudioSelectorPresented) { + ASRDebugAudioSelectorSheet( + clips: viewModel.clips, + selectedClipID: viewModel.selectedClipID, + onSelect: { viewModel.selectedClipID = $0 }, + onDelete: { viewModel.deleteClip($0) } + ) + } + } +} + +struct LLMDebugWindowView: View { + @ObservedObject var viewModel: LLMDebugViewModel + @State private var isModelSelectorPresented = false + @State private var isPresetSelectorPresented = false + @State private var isPromptSettingsPresented = false + + private var columns: [GridItem] { + [ + GridItem(.flexible(minimum: 260, maximum: .infinity), spacing: 12, alignment: .top), + GridItem(.flexible(minimum: 260, maximum: .infinity), spacing: 12, alignment: .top) + ] + } + + private var selectorEntries: [FeatureModelSelectorEntry] { + viewModel.modelOptions.map(\.selectorEntry) + } + + var body: some View { + VStack(spacing: 12) { + HStack(alignment: .bottom, spacing: 10) { + ModelDebugHeaderBadge() + + Button { + isModelSelectorPresented = true + } label: { + ModelDebugToolbarSelectorLabel( + title: modelDebugLocalized("Model"), + value: viewModel.selectedModelTitle + ) + } + .buttonStyle(.plain) + .layoutPriority(1) + + Button { + isPresetSelectorPresented = true + } label: { + ModelDebugToolbarSelectorLabel( + title: modelDebugLocalized("Preset"), + value: viewModel.selectedPresetTitle + ) + } + .buttonStyle(.plain) + .layoutPriority(1) + + Button { + isPromptSettingsPresented = true + } label: { + Image(systemName: "slider.horizontal.3") + .font(.system(size: 14, weight: .semibold)) + .frame(width: 44, height: 44) + .background( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .fill(MeetingDetailUIStyle.controlFillColor) + ) + .overlay( + RoundedRectangle(cornerRadius: 12, style: .continuous) + .strokeBorder(MeetingDetailUIStyle.borderColor, lineWidth: 1) + ) + } + .buttonStyle(.plain) + .disabled(viewModel.selectedPreset == nil || viewModel.isRunning) + + Button { + viewModel.run() + } label: { + HStack(spacing: 6) { + if viewModel.isRunning { + if viewModel.isModelInitializing { + ModelInitializingIconView() + .frame(width: 14, height: 14) + } else { + ProgressView() + .controlSize(.small) + .tint(.white) + .scaleEffect(0.75) + } + } + Text(modelDebugLocalized(viewModel.isModelInitializing ? "Initializing…" : "Generate")) + .font(.system(size: 12, weight: .semibold)) + .lineLimit(1) + } + .frame(width: 92, height: 30) + } + .buttonStyle(MeetingPrimaryButtonStyle()) + .disabled(viewModel.isRunning || viewModel.selectedPreset == nil || viewModel.selectedModelID.isEmpty) + } + .frame(height: 54) + + ScrollView { + Group { + if viewModel.results.isEmpty { + VStack { + debugEmptyState( + title: modelDebugLocalized("No debug results yet"), + detail: modelDebugLocalized("Choose a preset, fill variables, and run different models to compare outputs.") + ) + } + .frame(maxWidth: .infinity, minHeight: 360, alignment: .center) + } else { + LazyVGrid(columns: columns, spacing: 12) { + ForEach(viewModel.results) { result in + LLMDebugResultCard( + result: result, + onClose: { viewModel.removeResult(result.id) } + ) + } + } + .frame(maxWidth: .infinity, alignment: .leading) + .padding(.top, 2) + } + } + .frame(maxWidth: .infinity) + } + } + .padding(.horizontal, 10) + .padding(.top, 10) + .padding(.bottom, 8) + .frame(maxWidth: .infinity, maxHeight: .infinity) + .background(ModelDebugWindowBackground()) + .ignoresSafeArea(.container, edges: .top) + .sheet(isPresented: $isModelSelectorPresented) { + FeatureModelSelectorDialog( + title: modelDebugLocalized("Choose Transcription LLM"), + entries: selectorEntries, + selectedID: FeatureModelSelectionID(rawValue: viewModel.selectedModelID), + onSelect: { selectionID in + viewModel.selectedModelID = selectionID.rawValue + } + ) + } + .sheet(isPresented: $isPresetSelectorPresented) { + LLMDebugPresetSelectorSheet( + presets: viewModel.presetOptions, + selectedPresetID: viewModel.selectedPresetID, + onSelect: { presetID in + viewModel.selectedPresetID = presetID + viewModel.presetDidChange() + } + ) + } + .sheet(isPresented: $isPromptSettingsPresented) { + if let selectedPreset = viewModel.selectedPreset { + LLMDebugPromptSettingsSheet( + preset: selectedPreset, + variableValues: $viewModel.variableValues, + onApply: { prompt in + viewModel.savePromptTemplate(prompt) + viewModel.applyPromptTemplate(prompt) + }, + onSave: { prompt in + viewModel.savePromptTemplate(prompt) + } + ) + } + } + } +} diff --git a/Voxt/en.lproj/Localizable.strings b/Voxt/en.lproj/Localizable.strings index da053e1..8cbe48c 100644 --- a/Voxt/en.lproj/Localizable.strings +++ b/Voxt/en.lproj/Localizable.strings @@ -231,6 +231,9 @@ "Download" = "Download"; "Unknown" = "Unknown"; "Loading…" = "Loading…"; +"Initializing…" = "Initializing…"; +"Voxt is using the macOS system proxy (%@), but that proxy is unreachable. Make sure Clash/your proxy app is running, or switch Voxt to Direct Connection if you don't need a proxy." = "Voxt is using the macOS system proxy (%@), but that proxy is unreachable. Make sure Clash/your proxy app is running, or switch Voxt to Direct Connection if you don't need a proxy."; +"Voxt is using the custom proxy (%@://%@:%d), but that proxy is unreachable. Check the proxy address, port, and whether the proxy app is running." = "Voxt is using the custom proxy (%@://%@:%d), but that proxy is unreachable. Check the proxy address, port, and whether the proxy app is running."; "Downloaded: %@ / %@" = "Downloaded: %@ / %@"; "Downloaded: %@" = "Downloaded: %@"; "%d/%d files" = "%d/%d files"; @@ -1362,3 +1365,90 @@ "Allow GLM to call its hosted web search tool for fresher answers. This may increase latency and usage." = "Allow GLM to call its hosted web search tool for fresher answers. This may increase latency and usage."; "Allow Qwen to use built-in web search. Enabled by default for Aliyun because it is available directly in the official compatible API." = "Allow Qwen to use built-in web search. Enabled by default for Aliyun because it is available directly in the official compatible API."; "Use provider-hosted web search when available. This may increase latency and usage." = "Use provider-hosted web search when available. This may increase latency and usage."; +"New" = "New"; +"即将下线" = "Coming Soon"; +"Debug" = "Debug"; +"ASR Debug" = "ASR Debug"; +"LLM Debug" = "LLM Debug"; +"Local MLX Audio" = "Local MLX Audio"; +"Local Whisper" = "Local Whisper"; +"Configured Remote ASR" = "Configured Remote ASR"; +"Debug-only preset" = "Debug-only preset"; +"Transcription Enhancement" = "Transcription Enhancement"; +"Built-in preset" = "Built-in preset"; +"App Enhancement · %@" = "App Enhancement · %@"; +"Saved group preset" = "Saved group preset"; +"Failed to start recording." = "Failed to start recording."; +"No available model." = "No available model."; +"No audio selected." = "No audio selected."; +"Recording…" = "Recording…"; +"No recording was captured." = "No recording was captured."; +"Recorded clip ready: %@" = "Recorded clip ready: %@"; +"Running %@" = "Running %@"; +"Completed %@" = "Completed %@"; +"Select Model" = "Select Model"; +"Select Audio" = "Select Audio"; +"Select Preset" = "Select Preset"; +"No debug results yet" = "No debug results yet"; +"Record audio once, then switch models or clips to compare transcription output." = "Record audio once, then switch models or clips to compare transcription output."; +"Choose a preset, fill variables, and run different models to compare outputs." = "Choose a preset, fill variables, and run different models to compare outputs."; +"Recorded Audio" = "Recorded Audio"; +"No audio clips yet" = "No audio clips yet"; +"Record audio in this window first, then reuse the same clip across different models." = "Record audio in this window first, then reuse the same clip across different models."; +"Choose Preset" = "Choose Preset"; +"Preset Settings" = "Preset Settings"; +"Variables" = "Variables"; +"No output." = "No output."; +"Recorded" = "Recorded"; +"Reused Audio" = "Reused Audio"; +"%@ audio" = "%@ audio"; +"%@ run" = "%@ run"; +"%d chars" = "%d chars"; +"Unable to allocate audio buffer." = "Unable to allocate audio buffer."; +"Unable to decode audio samples." = "Unable to decode audio samples."; +"1 model needs setup" = "1 model needs setup"; +"%d models need setup" = "%d models need setup"; +"Generate" = "Generate"; +"Apply" = "Apply"; +"Debug" = "Debug"; +"ASR Debug" = "ASR Debug"; +"LLM Debug" = "LLM Debug"; +"Local MLX Audio" = "Local MLX Audio"; +"Configured Remote ASR" = "Configured Remote ASR"; +"Debug-only preset" = "Debug-only preset"; +"Transcription Enhancement" = "Transcription Enhancement"; +"Built-in preset" = "Built-in preset"; +"App Enhancement · %@" = "App Enhancement · %@"; +"Saved group preset" = "Saved group preset"; +"Failed to start recording." = "Failed to start recording."; +"No available model." = "No available model."; +"No audio selected." = "No audio selected."; +"Recording…" = "Recording…"; +"No recording was captured." = "No recording was captured."; +"Recorded clip ready: %@" = "Recorded clip ready: %@"; +"Running %@" = "Running %@"; +"Completed %@" = "Completed %@"; +"Select Model" = "Select Model"; +"Select Audio" = "Select Audio"; +"Select Preset" = "Select Preset"; +"No debug results yet" = "No debug results yet"; +"Record audio once, then switch models or clips to compare transcription output." = "Record audio once, then switch models or clips to compare transcription output."; +"Choose a preset, fill variables, and run different models to compare outputs." = "Choose a preset, fill variables, and run different models to compare outputs."; +"Recorded Audio" = "Recorded Audio"; +"No audio clips yet" = "No audio clips yet"; +"Record audio in this window first, then reuse the same clip across different models." = "Record audio in this window first, then reuse the same clip across different models."; +"Choose Preset" = "Choose Preset"; +"Preset Settings" = "Preset Settings"; +"Variables" = "Variables"; +"No output." = "No output."; +"Recorded" = "Recorded"; +"Reused Audio" = "Reused Audio"; +"Unable to allocate audio buffer." = "Unable to allocate audio buffer."; +"Unable to decode audio samples." = "Unable to decode audio samples."; +"1 model needs setup" = "1 model needs setup"; +"%d models need setup" = "%d models need setup"; +"Logging" = "Logging"; +"Enable hotkey debug logs" = "Enable hotkey debug logs"; +"Records hotkey detection, trigger routing, and shortcut handling details for debugging." = "Records hotkey detection, trigger routing, and shortcut handling details for debugging."; +"Enable model debug logs" = "Enable model debug logs"; +"Records local and remote model details, including LLM, ASR, model downloads, and model routing, for debugging." = "Records local and remote model details, including LLM, ASR, model downloads, and model routing, for debugging."; diff --git a/Voxt/ja.lproj/Localizable.strings b/Voxt/ja.lproj/Localizable.strings index 70f53ad..9d12987 100644 --- a/Voxt/ja.lproj/Localizable.strings +++ b/Voxt/ja.lproj/Localizable.strings @@ -229,6 +229,9 @@ "Download" = "ダウンロード"; "Unknown" = "不明"; "Loading…" = "読み込み中…"; +"Initializing…" = "初期化中…"; +"Voxt is using the macOS system proxy (%@), but that proxy is unreachable. Make sure Clash/your proxy app is running, or switch Voxt to Direct Connection if you don't need a proxy." = "Voxt は macOS のシステムプロキシ(%@)を使用していますが、そのプロキシに接続できません。Clash/プロキシアプリが起動しているか確認するか、プロキシが不要なら Voxt を直接接続に切り替えてください。"; +"Voxt is using the custom proxy (%@://%@:%d), but that proxy is unreachable. Check the proxy address, port, and whether the proxy app is running." = "Voxt はカスタムプロキシ(%@://%@:%d)を使用していますが、そのプロキシに接続できません。プロキシのアドレス、ポート、プロキシアプリが起動しているかを確認してください。"; "Downloaded: %@ / %@" = "ダウンロード済み: %@ / %@"; "Downloaded: %@" = "ダウンロード済み: %@"; "%d/%d files" = "%d/%d ファイル"; @@ -1287,3 +1290,90 @@ "Allow GLM to call its hosted web search tool for fresher answers. This may increase latency and usage." = "GLM がホスト型 Web 検索ツールを呼び出して、より新しい回答を返せるようにします。遅延や利用量が増える場合があります。"; "Allow Qwen to use built-in web search. Enabled by default for Aliyun because it is available directly in the official compatible API." = "Qwen が組み込み Web 検索を使えるようにします。公式互換 API で直接利用できるため、Alibaba Cloud ではデフォルトで有効です。"; "Use provider-hosted web search when available. This may increase latency and usage." = "利用可能な場合はプロバイダー提供の Web 検索を使います。遅延や利用量が増える場合があります。"; +"New" = "New"; +"即将下线" = "提供終了予定"; +"Debug" = "デバッグ"; +"ASR Debug" = "ASR デバッグ"; +"LLM Debug" = "LLM デバッグ"; +"Local MLX Audio" = "ローカル MLX Audio"; +"Local Whisper" = "ローカル Whisper"; +"Configured Remote ASR" = "設定済みリモート ASR"; +"Debug-only preset" = "デバッグ専用プリセット"; +"Transcription Enhancement" = "文字起こし強化"; +"Built-in preset" = "内蔵プリセット"; +"App Enhancement · %@" = "アプリ強化 · %@"; +"Saved group preset" = "保存済みグループプリセット"; +"Failed to start recording." = "録音を開始できませんでした。"; +"No available model." = "利用可能なモデルがありません。"; +"No audio selected." = "音声が選択されていません。"; +"Recording…" = "録音中…"; +"No recording was captured." = "録音が取得されませんでした。"; +"Recorded clip ready: %@" = "録音クリップの準備完了: %@"; +"Running %@" = "%@ を実行中"; +"Completed %@" = "%@ が完了しました"; +"Select Model" = "モデルを選択"; +"Select Audio" = "音声を選択"; +"Select Preset" = "プリセットを選択"; +"No debug results yet" = "デバッグ結果はまだありません"; +"Record audio once, then switch models or clips to compare transcription output." = "一度音声を録音してから、モデルまたはクリップを切り替えて文字起こし結果を比較してください。"; +"Choose a preset, fill variables, and run different models to compare outputs." = "プリセットを選び、変数を入力して、異なるモデルを実行し出力を比較してください。"; +"Recorded Audio" = "録音済み音声"; +"No audio clips yet" = "音声クリップはまだありません"; +"Record audio in this window first, then reuse the same clip across different models." = "まずこのウィンドウで音声を録音し、その同じクリップを異なるモデルで再利用してください。"; +"Choose Preset" = "プリセットを選択"; +"Preset Settings" = "プリセット設定"; +"Variables" = "変数"; +"No output." = "出力がありません。"; +"Recorded" = "録音"; +"Reused Audio" = "再利用音声"; +"%@ audio" = "%@ 音声"; +"%@ run" = "%@ 実行"; +"%d chars" = "%d 文字"; +"Unable to allocate audio buffer." = "音声バッファを確保できません。"; +"Unable to decode audio samples." = "音声サンプルをデコードできません。"; +"1 model needs setup" = "1 個のモデルで設定が必要です"; +"%d models need setup" = "%d 個のモデルで設定が必要です"; +"Generate" = "生成"; +"Apply" = "適用"; +"Debug" = "デバッグ"; +"ASR Debug" = "ASR デバッグ"; +"LLM Debug" = "LLM デバッグ"; +"Local MLX Audio" = "ローカル MLX Audio"; +"Configured Remote ASR" = "設定済みリモート ASR"; +"Debug-only preset" = "デバッグ専用プリセット"; +"Transcription Enhancement" = "文字起こし強化"; +"Built-in preset" = "内蔵プリセット"; +"App Enhancement · %@" = "アプリ強化 · %@"; +"Saved group preset" = "保存済みグループプリセット"; +"Failed to start recording." = "録音を開始できませんでした。"; +"No available model." = "利用可能なモデルがありません。"; +"No audio selected." = "音声が選択されていません。"; +"Recording…" = "録音中…"; +"No recording was captured." = "録音が取得されませんでした。"; +"Recorded clip ready: %@" = "録音クリップの準備完了: %@"; +"Running %@" = "%@ を実行中"; +"Completed %@" = "%@ が完了しました"; +"Select Model" = "モデルを選択"; +"Select Audio" = "音声を選択"; +"Select Preset" = "プリセットを選択"; +"No debug results yet" = "デバッグ結果はまだありません"; +"Record audio once, then switch models or clips to compare transcription output." = "一度音声を録音してから、モデルまたはクリップを切り替えて文字起こし結果を比較してください。"; +"Choose a preset, fill variables, and run different models to compare outputs." = "プリセットを選び、変数を入力して、異なるモデルを実行し出力を比較してください。"; +"Recorded Audio" = "録音済み音声"; +"No audio clips yet" = "音声クリップはまだありません"; +"Record audio in this window first, then reuse the same clip across different models." = "まずこのウィンドウで音声を録音し、その同じクリップを異なるモデルで再利用してください。"; +"Choose Preset" = "プリセットを選択"; +"Preset Settings" = "プリセット設定"; +"Variables" = "変数"; +"No output." = "出力がありません。"; +"Recorded" = "録音"; +"Reused Audio" = "再利用音声"; +"Unable to allocate audio buffer." = "音声バッファを確保できません。"; +"Unable to decode audio samples." = "音声サンプルをデコードできません。"; +"1 model needs setup" = "1 個のモデルで設定が必要です"; +"%d models need setup" = "%d 個のモデルで設定が必要です"; +"Logging" = "ログ"; +"Enable hotkey debug logs" = "ホットキーのデバッグログを有効にする"; +"Records hotkey detection, trigger routing, and shortcut handling details for debugging." = "ホットキーの検出、トリガールーティング、ショートカット処理の詳細を記録し、デバッグに役立てます。"; +"Enable model debug logs" = "モデルのデバッグログを有効にする"; +"Records local and remote model details, including LLM, ASR, model downloads, and model routing, for debugging." = "LLM、ASR、モデルのダウンロード、モデルルーティングを含む、ローカルおよびリモートモデルの詳細を記録し、デバッグに役立てます。"; diff --git a/Voxt/zh-Hans.lproj/Localizable.strings b/Voxt/zh-Hans.lproj/Localizable.strings index 65ae7fd..8274aff 100644 --- a/Voxt/zh-Hans.lproj/Localizable.strings +++ b/Voxt/zh-Hans.lproj/Localizable.strings @@ -231,6 +231,9 @@ "Download" = "下载"; "Unknown" = "未知"; "Loading…" = "加载中…"; +"Initializing…" = "初始化中…"; +"Voxt is using the macOS system proxy (%@), but that proxy is unreachable. Make sure Clash/your proxy app is running, or switch Voxt to Direct Connection if you don't need a proxy." = "Voxt 正在使用 macOS 系统代理(%@),但该代理当前不可达。请确认 Clash/你的代理应用正在运行;如果你不需要代理,也可以将 Voxt 切换为直连。"; +"Voxt is using the custom proxy (%@://%@:%d), but that proxy is unreachable. Check the proxy address, port, and whether the proxy app is running." = "Voxt 正在使用自定义代理(%@://%@:%d),但该代理当前不可达。请检查代理地址、端口,以及代理应用是否正在运行。"; "Downloaded: %@ / %@" = "已下载:%@ / %@"; "Downloaded: %@" = "已下载:%@"; "%d/%d files" = "%d/%d 个文件"; @@ -1364,3 +1367,90 @@ "Allow GLM to call its hosted web search tool for fresher answers. This may increase latency and usage." = "允许 GLM 调用其托管的网页搜索工具,以获得更新的回答。这可能会增加延迟和用量。"; "Allow Qwen to use built-in web search. Enabled by default for Aliyun because it is available directly in the official compatible API." = "允许 Qwen 使用内置网页搜索。阿里云默认开启,因为官方兼容 API 已直接支持该能力。"; "Use provider-hosted web search when available. This may increase latency and usage." = "在可用时使用提供商托管的网页搜索。这可能会增加延迟和用量。"; +"New" = "新"; +"即将下线" = "即将下线"; +"Debug" = "调试"; +"ASR Debug" = "ASR 调试"; +"LLM Debug" = "LLM 调试"; +"Local MLX Audio" = "本地 MLX Audio"; +"Local Whisper" = "本地 Whisper"; +"Configured Remote ASR" = "已配置远端 ASR"; +"Debug-only preset" = "仅用于调试的预设"; +"Transcription Enhancement" = "转录增强"; +"Built-in preset" = "内置预设"; +"App Enhancement · %@" = "应用增强 · %@"; +"Saved group preset" = "已保存的分组预设"; +"Failed to start recording." = "无法开始录音。"; +"No available model." = "没有可用模型。"; +"No audio selected." = "尚未选择音频。"; +"Recording…" = "录音中…"; +"No recording was captured." = "未捕获到录音。"; +"Recorded clip ready: %@" = "录音片段已就绪:%@"; +"Running %@" = "正在运行 %@"; +"Completed %@" = "已完成 %@"; +"Select Model" = "选择模型"; +"Select Audio" = "选择音频"; +"Select Preset" = "选择预设"; +"No debug results yet" = "暂无调试结果"; +"Record audio once, then switch models or clips to compare transcription output." = "先录制一次音频,再切换模型或音频片段以比较转录结果。"; +"Choose a preset, fill variables, and run different models to compare outputs." = "选择预设、填写变量,然后运行不同模型以比较输出结果。"; +"Recorded Audio" = "已录制音频"; +"No audio clips yet" = "暂无音频片段"; +"Record audio in this window first, then reuse the same clip across different models." = "请先在此窗口录音,然后复用同一段音频比较不同模型。"; +"Choose Preset" = "选择预设"; +"Preset Settings" = "预设设置"; +"Variables" = "变量"; +"No output." = "无输出。"; +"Recorded" = "录制"; +"Reused Audio" = "复用音频"; +"%@ audio" = "%@ 音频"; +"%@ run" = "%@ 运行"; +"%d chars" = "%d 字符"; +"Unable to allocate audio buffer." = "无法分配音频缓冲区。"; +"Unable to decode audio samples." = "无法解码音频采样。"; +"1 model needs setup" = "1 个模型需要补全配置"; +"%d models need setup" = "%d 个模型需要补全配置"; +"Generate" = "生成"; +"Apply" = "应用"; +"Debug" = "调试"; +"ASR Debug" = "ASR 调试"; +"LLM Debug" = "LLM 调试"; +"Local MLX Audio" = "本地 MLX Audio"; +"Configured Remote ASR" = "已配置远端 ASR"; +"Debug-only preset" = "仅用于调试的预设"; +"Transcription Enhancement" = "转录增强"; +"Built-in preset" = "内置预设"; +"App Enhancement · %@" = "应用增强 · %@"; +"Saved group preset" = "已保存的分组预设"; +"Failed to start recording." = "无法开始录音。"; +"No available model." = "没有可用模型。"; +"No audio selected." = "尚未选择音频。"; +"Recording…" = "录音中…"; +"No recording was captured." = "未捕获到录音。"; +"Recorded clip ready: %@" = "录音片段已就绪:%@"; +"Running %@" = "正在运行 %@"; +"Completed %@" = "已完成 %@"; +"Select Model" = "选择模型"; +"Select Audio" = "选择音频"; +"Select Preset" = "选择预设"; +"No debug results yet" = "暂无调试结果"; +"Record audio once, then switch models or clips to compare transcription output." = "先录制一次音频,再切换模型或音频片段以比较转录结果。"; +"Choose a preset, fill variables, and run different models to compare outputs." = "选择预设、填写变量,然后运行不同模型以比较输出结果。"; +"Recorded Audio" = "已录制音频"; +"No audio clips yet" = "暂无音频片段"; +"Record audio in this window first, then reuse the same clip across different models." = "请先在此窗口录音,然后复用同一段音频比较不同模型。"; +"Choose Preset" = "选择预设"; +"Preset Settings" = "预设设置"; +"Variables" = "变量"; +"No output." = "无输出。"; +"Recorded" = "录制"; +"Reused Audio" = "复用音频"; +"Unable to allocate audio buffer." = "无法分配音频缓冲区。"; +"Unable to decode audio samples." = "无法解码音频采样。"; +"1 model needs setup" = "1 个模型需要补全配置"; +"%d models need setup" = "%d 个模型需要补全配置"; +"Logging" = "日志"; +"Enable hotkey debug logs" = "开启快捷键调试日志"; +"Records hotkey detection, trigger routing, and shortcut handling details for debugging." = "记录快捷键检测、触发路由与快捷键处理细节,便于调试。"; +"Enable model debug logs" = "开启模型调试日志"; +"Records local and remote model details, including LLM, ASR, model downloads, and model routing, for debugging." = "记录本地与远端模型细节,包括 LLM、ASR、模型下载和模型路由,便于调试。"; diff --git a/VoxtTests/CustomLLMModelSupportTests.swift b/VoxtTests/CustomLLMModelSupportTests.swift index 28dd5ae..e3d49d7 100644 --- a/VoxtTests/CustomLLMModelSupportTests.swift +++ b/VoxtTests/CustomLLMModelSupportTests.swift @@ -4,22 +4,102 @@ import XCTest final class CustomLLMModelSupportTests: XCTestCase { func testCatalogRecognizesSupportedRepoAndFallbackTitle() { XCTAssertTrue(CustomLLMModelCatalog.isSupportedModelRepo("mlx-community/Qwen3-4B-4bit")) + XCTAssertTrue(CustomLLMModelCatalog.isSupportedModelRepo("Qwen/Qwen3-8B-4bit")) XCTAssertFalse(CustomLLMModelCatalog.isSupportedModelRepo("unsupported/repo")) XCTAssertEqual( CustomLLMModelCatalog.displayTitle(for: "mlx-community/Qwen3-4B-4bit"), "Qwen3 4B (4bit)" ) + XCTAssertEqual( + CustomLLMModelCatalog.canonicalModelRepo("Qwen/Qwen3-8B-4bit"), + "mlx-community/Qwen3-8B-4bit" + ) XCTAssertEqual( CustomLLMModelCatalog.displayTitle(for: "custom/repo"), "custom/repo" ) } - func testCatalogProvidesFallbackRemoteSizeTextForCuratedRepos() { - let missingRepos = CustomLLMModelCatalog.availableModels - .map(\.id) - .filter { CustomLLMModelCatalog.fallbackRemoteSizeText(repo: $0) == nil } - XCTAssertTrue(missingRepos.isEmpty, "Missing size fallbacks for repos: \(missingRepos)") + func testCatalogUsesKnownRemoteSizeFallbacksForLegacyCuratedRepos() { + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "Qwen/Qwen2-1.5B-Instruct")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/Qwen3-4B-4bit")) + XCTAssertNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/Qwen3.5-2B-4bit")) + } + + func testCatalogUsesKnownRemoteSizeFallbacksForNewRecommendedModels() { + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/Qwen3.5-0.8B-4bit-OptiQ")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/Qwen3.5-4B-4bit")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/Qwen3.5-4B-OptiQ-4bit")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/Qwen3.5-9B-OptiQ-4bit")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/MiniCPM4-8B-4bit")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/internlm2_5-7b-chat-4bit")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/glm-4-9b-chat-1m-4bit")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/GLM-Z1-9B-0414-4bit")) + XCTAssertNotNil(CustomLLMModelCatalog.fallbackRemoteSizeText(repo: "mlx-community/GLM-4.7-Flash-4bit")) + } + + func testCatalogMarksNewAndHiddenCompatibilityModels() { + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/Qwen3.5-2B-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/Qwen3.5-0.8B-4bit-OptiQ"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/Qwen3.5-4B-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/Qwen3.5-4B-OptiQ-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/Qwen3.5-9B-OptiQ-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/MiniCPM4-8B-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/internlm2_5-7b-chat-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/glm-4-9b-chat-1m-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/GLM-Z1-9B-0414-4bit"), + .new + ) + XCTAssertEqual( + CustomLLMModelCatalog.releaseStatus(for: "mlx-community/GLM-4.7-Flash-4bit"), + .standard + ) + let compatibilityOnly = CustomLLMModelCatalog.displayModels(including: "Qwen/Qwen2.5-7B-Instruct") + XCTAssertTrue(compatibilityOnly.contains(where: { $0.id == "mlx-community/Qwen2.5-7B-Instruct-4bit" })) + let qwen30BCompatibility = CustomLLMModelCatalog.displayModels(including: "mlx-community/Qwen3-30B-A3B-4bit") + XCTAssertTrue(qwen30BCompatibility.contains(where: { $0.id == "mlx-community/Qwen3-30B-A3B-4bit" })) + let glm47Compatibility = CustomLLMModelCatalog.displayModels(including: "mlx-community/GLM-4.7-Flash-4bit") + XCTAssertTrue(glm47Compatibility.contains(where: { $0.id == "mlx-community/GLM-4.7-Flash-4bit" })) + } + + func testCatalogIncludesNewRecommendedHomeMacModels() { + let modelIDs = Set(CustomLLMModelCatalog.availableModels.map(\.id)) + + XCTAssertTrue(modelIDs.contains("mlx-community/Qwen3.5-0.8B-4bit-OptiQ")) + XCTAssertTrue(modelIDs.contains("mlx-community/Qwen3.5-4B-4bit")) + XCTAssertTrue(modelIDs.contains("mlx-community/Qwen3.5-4B-OptiQ-4bit")) + XCTAssertTrue(modelIDs.contains("mlx-community/Qwen3.5-9B-OptiQ-4bit")) + XCTAssertTrue(modelIDs.contains("mlx-community/MiniCPM4-8B-4bit")) + XCTAssertTrue(modelIDs.contains("mlx-community/internlm2_5-7b-chat-4bit")) + XCTAssertTrue(modelIDs.contains("mlx-community/glm-4-9b-chat-1m-4bit")) + XCTAssertTrue(modelIDs.contains("mlx-community/GLM-Z1-9B-0414-4bit")) + XCTAssertFalse(modelIDs.contains("mlx-community/Qwen3-30B-A3B-4bit")) + XCTAssertFalse(modelIDs.contains("mlx-community/GLM-4.7-Flash-4bit")) } func testStorageSupportBuildsExpectedCacheDirectory() { @@ -33,4 +113,37 @@ final class CustomLLMModelSupportTests: XCTestCase { "/tmp/voxt-tests/mlx-llm/mlx-community_Qwen3-4B-4bit" ) } + + func testChatTemplateDetectionRecognizesDownloadedTemplateSidecars() throws { + let root = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true) + defer { try? FileManager.default.removeItem(at: root) } + + let jinjaURL = root.appendingPathComponent("chat_template.jinja") + try "{% for m in messages %}{{ m.content }}{% endfor %}".write( + to: jinjaURL, + atomically: true, + encoding: .utf8 + ) + + XCTAssertTrue(CustomLLMModelDownloadSupport.hasUsableChatTemplate(in: root)) + } + + func testChatTemplateDetectionRecognizesInlineTokenizerTemplate() throws { + let root = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true) + defer { try? FileManager.default.removeItem(at: root) } + + let tokenizerConfigURL = root.appendingPathComponent("tokenizer_config.json") + let json = """ + { + "chat_template": "{{ bos_token }}{{ messages[0]['content'] }}" + } + """ + try json.write(to: tokenizerConfigURL, atomically: true, encoding: .utf8) + + XCTAssertTrue(CustomLLMModelDownloadSupport.hasUsableChatTemplate(in: root)) + } } diff --git a/VoxtTests/FeatureModelCatalogBuilderTests.swift b/VoxtTests/FeatureModelCatalogBuilderTests.swift index 2886203..c826ca3 100644 --- a/VoxtTests/FeatureModelCatalogBuilderTests.swift +++ b/VoxtTests/FeatureModelCatalogBuilderTests.swift @@ -148,6 +148,54 @@ final class FeatureModelCatalogBuilderTests: XCTestCase { XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Multilingual"))) } + func testLLMSelectorUsesCuratedRatingAndTags() throws { + let repo = "mlx-community/MiniCPM4-8B-4bit" + let builder = makeBuilder( + featureSettings: makeFeatureSettings(translationModel: .localLLM(repo)) + ) + + let entry = try XCTUnwrap( + builder.entries(for: .translationModel) + .first(where: { $0.selectionID == .localLLM(repo) }) + ) + + XCTAssertEqual(entry.ratingText, "4.8") + XCTAssertTrue(entry.displayTags.contains(AppLocalization.localizedString("Accurate"))) + XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Fast"))) + } + + func testMLXSelectorUsesCuratedRatingAndTags() throws { + let repo = "mlx-community/GLM-ASR-Nano-2512-4bit" + let builder = makeBuilder( + featureSettings: makeFeatureSettings(transcriptionASR: .mlx(repo)) + ) + + let entry = try XCTUnwrap( + builder.entries(for: .transcriptionASR) + .first(where: { $0.selectionID == .mlx(repo) }) + ) + + XCTAssertEqual(entry.ratingText, "4.1") + XCTAssertTrue(entry.displayTags.contains(AppLocalization.localizedString("Fast"))) + XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Accurate"))) + } + + func testWhisperSelectorUsesCuratedRatingAndTags() throws { + let modelID = "medium" + let builder = makeBuilder( + featureSettings: makeFeatureSettings(transcriptionASR: .whisper(modelID)) + ) + + let entry = try XCTUnwrap( + builder.entries(for: .transcriptionASR) + .first(where: { $0.selectionID == .whisper(modelID) }) + ) + + XCTAssertEqual(entry.ratingText, "4.7") + XCTAssertTrue(entry.displayTags.contains(AppLocalization.localizedString("Accurate"))) + XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Fast"))) + } + private func makeBuilder( featureSettings: FeatureSettings, remoteASRConfigurationsRaw: String = "", diff --git a/VoxtTests/FeatureModelSelectorFilteringTests.swift b/VoxtTests/FeatureModelSelectorFilteringTests.swift index 51379a5..26656cf 100644 --- a/VoxtTests/FeatureModelSelectorFilteringTests.swift +++ b/VoxtTests/FeatureModelSelectorFilteringTests.swift @@ -120,6 +120,7 @@ final class FeatureModelSelectorFilteringTests: XCTestCase { displayTags: filterTags, statusText: "", usageLocations: usageLocations, + badgeText: nil, isSelectable: true, disabledReason: nil ) diff --git a/VoxtTests/MLXModelManagerTests.swift b/VoxtTests/MLXModelManagerTests.swift index 85708b1..dd36b83 100644 --- a/VoxtTests/MLXModelManagerTests.swift +++ b/VoxtTests/MLXModelManagerTests.swift @@ -1,8 +1,57 @@ import XCTest @testable import Voxt +import HuggingFace @MainActor final class MLXModelManagerTests: XCTestCase { + func testMLXAudioActiveHubCacheUsesConfiguredModelStorageRoot() throws { + let defaults = UserDefaults.standard + let previousPath = defaults.string(forKey: AppPreferenceKey.modelStorageRootPath) + let previousBookmark = defaults.data(forKey: AppPreferenceKey.modelStorageRootBookmark) + let customRoot = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + + defaults.set(customRoot.path, forKey: AppPreferenceKey.modelStorageRootPath) + defaults.removeObject(forKey: AppPreferenceKey.modelStorageRootBookmark) + addTeardownBlock { + if let previousPath { + defaults.set(previousPath, forKey: AppPreferenceKey.modelStorageRootPath) + } else { + defaults.removeObject(forKey: AppPreferenceKey.modelStorageRootPath) + } + if let previousBookmark { + defaults.set(previousBookmark, forKey: AppPreferenceKey.modelStorageRootBookmark) + } else { + defaults.removeObject(forKey: AppPreferenceKey.modelStorageRootBookmark) + } + } + + XCTAssertEqual(MLXModelManager.activeHubCache().cacheDirectory, customRoot) + } + + func testMLXAudioClearHubCacheTargetsConfiguredModelStorageRoot() throws { + let repoID = try XCTUnwrap(Repo.ID(rawValue: "mlx-community/Qwen3-ASR-0.6B-8bit")) + let rootDirectory = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + let cache = MLXModelStorageSupport.hubCache(rootDirectory: rootDirectory) + let repoDirectory = cache.repoDirectory(repo: repoID, kind: .model) + let metadataDirectory = cache.metadataDirectory(repo: repoID, kind: .model) + + try FileManager.default.createDirectory(at: repoDirectory, withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: metadataDirectory, withIntermediateDirectories: true) + addTeardownBlock { + try? FileManager.default.removeItem(at: rootDirectory) + } + + XCTAssertTrue(FileManager.default.fileExists(atPath: repoDirectory.path)) + XCTAssertTrue(FileManager.default.fileExists(atPath: metadataDirectory.path)) + + MLXModelStorageSupport.clearHubCache(for: repoID, rootDirectory: rootDirectory) + + XCTAssertFalse(FileManager.default.fileExists(atPath: repoDirectory.path)) + XCTAssertFalse(FileManager.default.fileExists(atPath: metadataDirectory.path)) + } + func testCanonicalModelRepoMapsLegacyReposToCurrentIdentifiers() { XCTAssertEqual( MLXModelManager.canonicalModelRepo("mlx-community/Parakeet-0.6B"), @@ -145,12 +194,9 @@ final class MLXModelManagerTests: XCTestCase { XCTAssertEqual(missingRepos, []) } - func testAllCuratedCustomLLMModelsHaveRemoteSizeFallbacks() { - let missingRepos = CustomLLMModelManager.availableModels - .map(\.id) - .filter { CustomLLMModelManager.fallbackRemoteSizeText(repo: $0) == nil } - - XCTAssertEqual(missingRepos, []) + func testKnownCustomLLMRemoteSizeFallbacksRemainAvailable() { + XCTAssertNotNil(CustomLLMModelManager.fallbackRemoteSizeText(repo: "Qwen/Qwen2-1.5B-Instruct")) + XCTAssertNotNil(CustomLLMModelManager.fallbackRemoteSizeText(repo: "mlx-community/Qwen3-8B-4bit")) } func testAllCuratedWhisperModelsHaveRemoteSizeFallbacks() { @@ -161,30 +207,41 @@ final class MLXModelManagerTests: XCTestCase { XCTAssertEqual(missingModelIDs, []) } - func testCustomLLMBehaviorDisablesThinkingForQwen3Family() { + func testCustomLLMBehaviorDisablesThinkingForThinkingModels() { XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3-4B-4bit").family, .qwen3) XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3-4B-4bit").disablesThinking) XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3-8B-4bit").disablesThinking) - XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3.5-4B-MLX-4bit").disablesThinking) + XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3.5-2B-4bit").disablesThinking) + XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3.5-0.8B-4bit-OptiQ").disablesThinking) + XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3.5-4B-4bit").disablesThinking) + XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3.5-4B-OptiQ-4bit").disablesThinking) + XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3.5-9B-OptiQ-4bit").disablesThinking) + XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/GLM-Z1-9B-0414-4bit").disablesThinking) + XCTAssertTrue(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/AceReason-Nemotron-7B-4bit").disablesThinking) } func testCustomLLMBehaviorLeavesOtherInstructionModelsUntouched() { XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "Qwen/Qwen2-1.5B-Instruct").family, .qwen2) XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/GLM-4-9B-0414-4bit").family, .glm4) + XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/glm-4-9b-chat-1m-4bit").family, .glm4) + XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/GLM-Z1-9B-0414-4bit").family, .glm4) XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Llama-3.2-3B-Instruct-4bit").family, .llama) XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Mistral-Nemo-Instruct-2407-4bit").family, .mistral) XCTAssertEqual(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/gemma-2-2b-it-4bit").family, .gemma) XCTAssertFalse(CustomLLMModelBehaviorResolver.behavior(for: "Qwen/Qwen2-1.5B-Instruct").disablesThinking) XCTAssertFalse(CustomLLMModelBehaviorResolver.behavior(for: "Qwen/Qwen2.5-3B-Instruct").disablesThinking) XCTAssertFalse(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/GLM-4-9B-0414-4bit").disablesThinking) + XCTAssertFalse(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/glm-4-9b-chat-1m-4bit").disablesThinking) XCTAssertFalse(CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Llama-3.2-3B-Instruct-4bit").disablesThinking) } func testCustomLLMBehaviorProvidesAdditionalContextOnlyForThinkingModels() { let qwen3Behavior = CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/Qwen3-4B-4bit") + let glmZ1Behavior = CustomLLMModelBehaviorResolver.behavior(for: "mlx-community/GLM-Z1-9B-0414-4bit") let qwen2Behavior = CustomLLMModelBehaviorResolver.behavior(for: "Qwen/Qwen2-1.5B-Instruct") XCTAssertEqual(qwen3Behavior.additionalContext?["enable_thinking"] as? Bool, false) + XCTAssertEqual(glmZ1Behavior.additionalContext?["enable_thinking"] as? Bool, false) XCTAssertNil(qwen2Behavior.additionalContext) } diff --git a/VoxtTests/ModelCatalogBuilderTests.swift b/VoxtTests/ModelCatalogBuilderTests.swift index 311133c..8cef68c 100644 --- a/VoxtTests/ModelCatalogBuilderTests.swift +++ b/VoxtTests/ModelCatalogBuilderTests.swift @@ -149,6 +149,53 @@ final class ModelCatalogBuilderTests: XCTestCase { XCTAssertEqual(entry.primaryAction?.title, AppLocalization.localizedString("Pause")) } + func testCustomLLMCatalogUsesCuratedRatingAndTags() throws { + let repo = "mlx-community/Qwen3.5-4B-OptiQ-4bit" + let builder = makeBuilder( + featureSettings: makeFeatureSettings(translationModel: .localLLM(repo)) + ) + + let entry = try XCTUnwrap( + builder.llmEntries().first(where: { $0.id == "local-llm:\(repo)" }) + ) + + XCTAssertEqual(entry.ratingText, "4.8") + XCTAssertTrue(entry.displayTags.contains(AppLocalization.localizedString("Balanced"))) + XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Accurate"))) + } + + func testMLXCatalogUsesCuratedRatingAndTags() throws { + let repo = "mlx-community/Voxtral-Mini-4B-Realtime-6bit" + let builder = makeBuilder( + featureSettings: makeFeatureSettings(transcriptionASR: .mlx(repo)) + ) + + let entry = try XCTUnwrap( + builder.asrEntries().first(where: { $0.id == "mlx:\(repo)" }) + ) + + XCTAssertEqual(entry.ratingText, "4.7") + XCTAssertTrue(entry.displayTags.contains(AppLocalization.localizedString("Realtime"))) + XCTAssertTrue(entry.displayTags.contains(AppLocalization.localizedString("Balanced"))) + XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Fast"))) + } + + func testWhisperCatalogUsesCuratedRatingAndTags() throws { + let modelID = "base" + let builder = makeBuilder( + featureSettings: makeFeatureSettings(transcriptionASR: .whisper(modelID)) + ) + + let entry = try XCTUnwrap( + builder.asrEntries().first(where: { $0.id == "whisper:\(modelID)" }) + ) + + XCTAssertEqual(entry.ratingText, "4.3") + XCTAssertTrue(entry.displayTags.contains(AppLocalization.localizedString("Balanced"))) + XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Fast"))) + XCTAssertFalse(entry.displayTags.contains(AppLocalization.localizedString("Accurate"))) + } + private func makeBuilder( featureSettings: FeatureSettings, remoteASRConfigurations: [String: RemoteProviderConfiguration] = [:], diff --git a/VoxtTests/ModelDebugSupportTests.swift b/VoxtTests/ModelDebugSupportTests.swift new file mode 100644 index 0000000..f49d6a8 --- /dev/null +++ b/VoxtTests/ModelDebugSupportTests.swift @@ -0,0 +1,202 @@ +import XCTest +@testable import Voxt + +@MainActor +final class ModelDebugSupportTests: XCTestCase { + func testLLMDebugPresetsIncludeBuiltinsAndSavedGroups() throws { + let defaults = UserDefaults.standard + let previousGroups = defaults.data(forKey: AppPreferenceKey.appBranchGroups) + let previousPrompt = defaults.string(forKey: AppPreferenceKey.enhancementSystemPrompt) + let previousLanguageCodes = defaults.string(forKey: AppPreferenceKey.userMainLanguageCodes) + + let groups = [ + AppBranchGroup( + id: UUID(uuidString: "AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE")!, + name: "Chrome", + prompt: "Clean {{RAW_TRANSCRIPTION}} for browser work", + appBundleIDs: ["com.google.Chrome"], + appRefs: [AppBranchAppRef(bundleID: "com.google.Chrome", displayName: "Chrome")], + urlPatternIDs: [], + isExpanded: true + ) + ] + defaults.set(try JSONEncoder().encode(groups), forKey: AppPreferenceKey.appBranchGroups) + defaults.set("Base {{RAW_TRANSCRIPTION}}", forKey: AppPreferenceKey.enhancementSystemPrompt) + defaults.set("en", forKey: AppPreferenceKey.userMainLanguageCodes) + addTeardownBlock { + if let previousGroups { + defaults.set(previousGroups, forKey: AppPreferenceKey.appBranchGroups) + } else { + defaults.removeObject(forKey: AppPreferenceKey.appBranchGroups) + } + if let previousPrompt { + defaults.set(previousPrompt, forKey: AppPreferenceKey.enhancementSystemPrompt) + } else { + defaults.removeObject(forKey: AppPreferenceKey.enhancementSystemPrompt) + } + if let previousLanguageCodes { + defaults.set(previousLanguageCodes, forKey: AppPreferenceKey.userMainLanguageCodes) + } else { + defaults.removeObject(forKey: AppPreferenceKey.userMainLanguageCodes) + } + } + + let presets = ModelDebugCatalog.availableLLMPresets(defaults: defaults) + + XCTAssertTrue(presets.contains(where: { $0.id == "builtin:enhancement" })) + XCTAssertTrue(presets.contains(where: { $0.id == "builtin:translation" })) + XCTAssertTrue(presets.contains(where: { $0.id == "builtin:rewrite" })) + XCTAssertTrue(presets.contains(where: { $0.id == "builtin:meeting-summary" })) + XCTAssertTrue(presets.contains(where: { $0.title.contains("Chrome") })) + } + + func testPromptResolverInjectsEnhancementVariables() { + let preset = LLMDebugPresetOption( + id: "builtin:enhancement", + title: "Enhancement", + subtitle: "Built-in preset", + kind: .enhancement, + promptTemplate: "Clean {{RAW_TRANSCRIPTION}} for {{USER_MAIN_LANGUAGE}}", + variables: ModelSettingsPromptVariables.enhancement, + defaultValues: [ + AppDelegate.rawTranscriptionTemplateVariable: "", + AppDelegate.userMainLanguageTemplateVariable: "English" + ] + ) + + let resolved = ModelDebugPromptResolver.resolve( + preset: preset, + values: [ + AppDelegate.rawTranscriptionTemplateVariable: "hello world", + AppDelegate.userMainLanguageTemplateVariable: "Chinese" + ] + ) + + XCTAssertTrue(resolved.content.contains("hello world")) + XCTAssertTrue(resolved.content.contains("Chinese")) + XCTAssertEqual(resolved.inputSummary, "hello world") + } + + func testPromptResolverInjectsMeetingSummaryVariables() { + let preset = LLMDebugPresetOption( + id: "builtin:meeting-summary", + title: "Meeting Summary", + subtitle: "Built-in preset", + kind: .meetingSummary, + promptTemplate: "Minutes: {{MEETING_RECORD}} | Lang: {{USER_MAIN_LANGUAGE}}", + variables: [], + defaultValues: [:] + ) + + let resolved = ModelDebugPromptResolver.resolve( + preset: preset, + values: [ + "{{MEETING_RECORD}}": "Discuss launch blockers", + AppPreferenceKey.asrUserMainLanguageTemplateVariable: "Japanese" + ] + ) + + XCTAssertTrue(resolved.content.contains("Discuss launch blockers")) + XCTAssertTrue(resolved.content.contains("Japanese")) + XCTAssertEqual(resolved.inputSummary, "Discuss launch blockers") + } + + func testPromptResolverUsesRequestedTranslationTargetLanguage() { + let preset = LLMDebugPresetOption( + id: "builtin:translation", + title: "Translation", + subtitle: "Built-in preset", + kind: .translation, + promptTemplate: "Translate {{SOURCE_TEXT}} into {{TARGET_LANGUAGE}} for {{USER_MAIN_LANGUAGE}}", + variables: ModelSettingsPromptVariables.translation, + defaultValues: [ + "{{TARGET_LANGUAGE}}": "English", + AppDelegate.userMainLanguageTemplateVariable: "Chinese", + "{{SOURCE_TEXT}}": "" + ] + ) + + let resolved = ModelDebugPromptResolver.resolve( + preset: preset, + values: [ + "{{TARGET_LANGUAGE}}": "Japanese", + AppDelegate.userMainLanguageTemplateVariable: "English", + "{{SOURCE_TEXT}}": "你好" + ] + ) + + XCTAssertTrue(resolved.content.contains("Japanese")) + XCTAssertTrue(resolved.content.contains("你好")) + XCTAssertEqual(resolved.inputSummary, "你好") + } + + func testPromptResolverUsesDictatedPromptSummaryWhenRewriteSourceIsEmpty() { + let preset = LLMDebugPresetOption( + id: "builtin:rewrite", + title: "Rewrite", + subtitle: "Built-in preset", + kind: .rewrite, + promptTemplate: "Rewrite {{SOURCE_TEXT}} with {{DICTATED_PROMPT}}", + variables: ModelSettingsPromptVariables.rewrite, + defaultValues: [:] + ) + + let resolved = ModelDebugPromptResolver.resolve( + preset: preset, + values: [ + "{{DICTATED_PROMPT}}": "write a short reply", + "{{SOURCE_TEXT}}": "" + ] + ) + + XCTAssertTrue(resolved.content.contains("write a short reply")) + XCTAssertEqual(resolved.inputSummary, "write a short reply") + } + + func testRemoteDebugModelCatalogFiltersUnavailableProviders() { + let remoteASRConfigurations = [ + RemoteASRProvider.openAIWhisper.rawValue: RemoteProviderConfiguration( + providerID: RemoteASRProvider.openAIWhisper.rawValue, + model: "gpt-4o-mini-transcribe", + endpoint: "", + apiKey: "key" + ), + RemoteASRProvider.doubaoASR.rawValue: RemoteProviderConfiguration( + providerID: RemoteASRProvider.doubaoASR.rawValue, + model: "", + endpoint: "", + apiKey: "" + ) + ] + let remoteLLMConfigurations = [ + RemoteLLMProvider.openAI.rawValue: RemoteProviderConfiguration( + providerID: RemoteLLMProvider.openAI.rawValue, + model: "gpt-4.1-mini", + endpoint: "", + apiKey: "key" + ), + RemoteLLMProvider.aliyunBailian.rawValue: RemoteProviderConfiguration( + providerID: RemoteLLMProvider.aliyunBailian.rawValue, + model: "", + endpoint: "", + apiKey: "" + ) + ] + + let asrOptions = ModelDebugCatalog.availableASRModels( + downloadedMLXRepos: [], + downloadedWhisperModelIDs: [], + remoteASRConfigurations: remoteASRConfigurations + ) + let llmOptions = ModelDebugCatalog.availableLLMModels( + downloadedLocalRepos: [], + currentLocalRepo: CustomLLMModelManager.defaultModelRepo, + remoteLLMConfigurations: remoteLLMConfigurations + ) + + XCTAssertTrue(asrOptions.contains(where: { $0.id == "remote-asr:\(RemoteASRProvider.openAIWhisper.rawValue)" })) + XCTAssertFalse(asrOptions.contains(where: { $0.id == "remote-asr:\(RemoteASRProvider.doubaoASR.rawValue)" })) + XCTAssertTrue(llmOptions.contains(where: { $0.id == "remote-llm:\(RemoteLLMProvider.openAI.rawValue)" })) + XCTAssertFalse(llmOptions.contains(where: { $0.id == "remote-llm:\(RemoteLLMProvider.aliyunBailian.rawValue)" })) + } +} diff --git a/docs/MLXAudioDependency.md b/docs/MLXAudioDependency.md index f131657..4e4ae36 100644 --- a/docs/MLXAudioDependency.md +++ b/docs/MLXAudioDependency.md @@ -6,7 +6,7 @@ The current Xcode package reference is: - URL: `https://github.com/hehehai/mlx-audio-swift.git` - Requirement: `revision` -- Revision: `c96fe7b8577fb1db5a9987a6582e706acb388a8e` +- Revision: `8ae0c745360b32c128c0ba6d4e46b27ee3214529` ## Version rules @@ -42,5 +42,5 @@ If Voxt needs to consume a synced fork commit before a new tag exists, pin the p - Fork: `hehehai/mlx-audio-swift` - Requirement: `revision` -- Commit: `c96fe7b8577fb1db5a9987a6582e706acb388a8e` -- Notes: includes `Cohere Transcribe` support from fork `main`; replace with a new Voxt tag once one is cut +- Commit: `8ae0c745360b32c128c0ba6d4e46b27ee3214529` +- Notes: pinned to the current fork `main` tip while Voxt tracks upstream 3.x-compatible STT updates; replace with a new Voxt tag once one is cut diff --git a/docs/README.zh-CN.md b/docs/README.zh-CN.md index 496ab6d..32b963e 100644 --- a/docs/README.zh-CN.md +++ b/docs/README.zh-CN.md @@ -117,7 +117,7 @@ Whisper 不是 `MLX Audio` 的子模式,而是在模型页里独立显示的 - Voxt 会把 MLX Audio 下载内容存放在自己的 `mlx-audio` 模型目录下,并先做 canonical repo 归一化,再判断模型是否已经下载。 - 老的模型 ID 会自动映射到当前 canonical ID,包括 `Parakeet`、`GLM-ASR Nano`、`Voxtral Realtime`、`FireRed ASR 2`,升级后一般不需要手工重选。 - 对齐专用仓库会被明确拒绝,例如 `Qwen3-ForcedAligner` 不会被当成可转录模型。 -- 当前工程里的依赖源是 Voxt 维护的镜像 fork `hehehai/mlx-audio-swift`,目前固定在 commit `c96fe7b8577fb1db5a9987a6582e706acb388a8e`。依赖策略见 [docs/MLXAudioDependency.md](./MLXAudioDependency.md)。 +- 当前工程里的依赖源是 Voxt 维护的镜像 fork `hehehai/mlx-audio-swift`,目前固定在 commit `8ae0c745360b32c128c0ba6d4e46b27ee3214529`。依赖策略见 [docs/MLXAudioDependency.md](./MLXAudioDependency.md)。 #### Whisper(WhisperKit)