Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -665,12 +665,10 @@ The format is based on Keep a Changelog.

### Added
- App Branch source card now shows an Apps-tab drag hint in the header.
- Added custom LLM model options:
- `mlx-community/Qwen3.5-0.8B-MLX-4bit`
- `mlx-community/Qwen3.5-2B-MLX-4bit`
- Added an experimental custom LLM model path for early `Qwen3.5` evaluation.

### Changed
- Upgraded `mlx-swift-lm` to a newer revision with `qwen3_5` model-type support.
- Upgraded `mlx-swift-lm` to a newer revision for early `Qwen3.5` model-type support.
- Improved App Branch localization coverage for tab content and related sheets.

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Notes for the current MLX Audio integration:
- Voxt stores MLX Audio downloads under its `mlx-audio` model storage root and checks canonical model identifiers before deciding whether a model is already installed.
- Older saved model IDs are auto-migrated to the current canonical IDs for `Parakeet`, `GLM-ASR Nano`, `Voxtral Realtime`, and `FireRed ASR 2`, so existing settings should continue working after upgrade.
- Alignment-only repositories are rejected explicitly; for example, `Qwen3-ForcedAligner` is not treated as a transcription model.
- The current package source is the Voxt mirror fork `hehehai/mlx-audio-swift` pinned to commit `c96fe7b8577fb1db5a9987a6582e706acb388a8e`. See [docs/MLXAudioDependency.md](docs/MLXAudioDependency.md) for the dependency policy.
- The current package source is the Voxt mirror fork `hehehai/mlx-audio-swift` pinned to commit `8ae0c745360b32c128c0ba6d4e46b27ee3214529`. See [docs/MLXAudioDependency.md](docs/MLXAudioDependency.md) for the dependency policy.

#### Whisper (WhisperKit)

Expand Down
14 changes: 11 additions & 3 deletions Voxt.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
A1B2C3D4E5F6078901234569 /* MLXAudioSTT in Frameworks */ = {isa = PBXBuildFile; productRef = A1B2C3D4E5F607890123456A /* MLXAudioSTT */; };
B1C2D3E4F5060708090A0B0C /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = B1C2D3E4F5060708090A0B0D /* Sparkle */; };
C1D2E3F4A5060708090A0B01 /* MLXLMCommon in Frameworks */ = {isa = PBXBuildFile; productRef = C1D2E3F4A5060708090A0B02 /* MLXLMCommon */; };
C1D2E3F4A5060708090A0B06 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = C1D2E3F4A5060708090A0B07 /* MLXLLM */; };
D1E2F3A4B5C60718293A4B5C /* WhisperKit in Frameworks */ = {isa = PBXBuildFile; productRef = D1E2F3A4B5C60718293A4B5D /* WhisperKit */; };
E2A1C3B4D5F60718293A4B6C /* PermissionFlow in Frameworks */ = {isa = PBXBuildFile; productRef = E2A1C3B4D5F60718293A4B6D /* PermissionFlow */; };
E2A1C3B4D5F60718293A4B6E /* SystemSettingsKit in Frameworks */ = {isa = PBXBuildFile; productRef = E2A1C3B4D5F60718293A4B6F /* SystemSettingsKit */; };
Expand Down Expand Up @@ -75,6 +76,7 @@
A1B2C3D4E5F6078901234569 /* MLXAudioSTT in Frameworks */,
B1C2D3E4F5060708090A0B0C /* Sparkle in Frameworks */,
C1D2E3F4A5060708090A0B01 /* MLXLMCommon in Frameworks */,
C1D2E3F4A5060708090A0B06 /* MLXLLM in Frameworks */,
D1E2F3A4B5C60718293A4B5C /* WhisperKit in Frameworks */,
E2A1C3B4D5F60718293A4B6C /* PermissionFlow in Frameworks */,
E2A1C3B4D5F60718293A4B6E /* SystemSettingsKit in Frameworks */,
Expand Down Expand Up @@ -172,6 +174,7 @@
A1B2C3D4E5F607890123456A /* MLXAudioSTT */,
B1C2D3E4F5060708090A0B0D /* Sparkle */,
C1D2E3F4A5060708090A0B02 /* MLXLMCommon */,
C1D2E3F4A5060708090A0B07 /* MLXLLM */,
D1E2F3A4B5C60718293A4B5D /* WhisperKit */,
E2A1C3B4D5F60718293A4B6D /* PermissionFlow */,
E2A1C3B4D5F60718293A4B6F /* SystemSettingsKit */,
Expand Down Expand Up @@ -879,7 +882,7 @@
repositoryURL = "https://github.com/hehehai/mlx-audio-swift.git";
requirement = {
kind = revision;
revision = c96fe7b8577fb1db5a9987a6582e706acb388a8e;
revision = 8ae0c745360b32c128c0ba6d4e46b27ee3214529;
};
};
B1C2D3E4F5060708090A0B0E /* XCRemoteSwiftPackageReference "Sparkle" */ = {
Expand All @@ -894,8 +897,8 @@
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/ml-explore/mlx-swift-lm.git";
requirement = {
kind = revision;
revision = e33eba8513595bde535719c48fedcb10ade5af57;
kind = exactVersion;
version = 3.31.3;
};
};
D1E2F3A4B5C60718293A4B5E /* XCRemoteSwiftPackageReference "WhisperKit" */ = {
Expand Down Expand Up @@ -945,6 +948,11 @@
package = C1D2E3F4A5060708090A0B03 /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXLMCommon;
};
C1D2E3F4A5060708090A0B07 /* MLXLLM */ = {
isa = XCSwiftPackageProductDependency;
package = C1D2E3F4A5060708090A0B03 /* XCRemoteSwiftPackageReference "mlx-swift-lm" */;
productName = MLXLLM;
};
D1E2F3A4B5C60718293A4B5D /* WhisperKit */ = {
isa = XCSwiftPackageProductDependency;
package = D1E2F3A4B5C60718293A4B5E /* XCRemoteSwiftPackageReference "WhisperKit" */;
Expand Down
40 changes: 26 additions & 14 deletions Voxt/App/AppDelegate+EnhancementBrowserContext.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,22 @@ extension AppDelegate {

func activeBrowserTabURL(frontmostBundleID: String?) -> String? {
guard let frontmostBundleID else { return nil }
if let deniedUntil = browserAutomationDeniedUntilByBundleID[frontmostBundleID],
deniedUntil > Date() {
return nil
}
guard NSRunningApplication.runningApplications(withBundleIdentifier: frontmostBundleID)
.contains(where: { !$0.isTerminated }) else {
VoxtLog.info("Browser process not running while resolving active tab URL. bundleID=\(frontmostBundleID)")
VoxtLog.model("Browser process not running while resolving active tab URL. bundleID=\(frontmostBundleID)")
return nil
}
guard let provider = browserScriptProvider(for: frontmostBundleID) else { return nil }
if let scriptedURL = runAppleScriptCandidates(provider.scripts, providerName: provider.name) {
browserAutomationDeniedUntilByBundleID.removeValue(forKey: frontmostBundleID)
return scriptedURL
}
if let axURL = activeBrowserTabURLFromAccessibility(frontmostBundleID: frontmostBundleID) {
VoxtLog.info("Browser active-tab URL read succeeded via AX fallback. provider=\(provider.name)")
VoxtLog.model("Browser active-tab URL read succeeded via AX fallback. provider=\(provider.name)")
return axURL
}
return nil
Expand Down Expand Up @@ -129,28 +134,35 @@ extension AppDelegate {
!output.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
let elapsedMs = Int(Date().timeIntervalSince(startedAt) * 1000)
if index > 0 {
VoxtLog.info("Browser active-tab URL read succeeded via fallback. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs)")
VoxtLog.model("Browser active-tab URL read succeeded via fallback. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs)")
}
return output
}
if let executionError {
let elapsedMs = Int(Date().timeIntervalSince(startedAt) * 1000)
VoxtLog.info(
VoxtLog.model(
"Browser active-tab URL candidate failed. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs), error=\(executionError)"
)
lastError = executionError
if let errorNumber = executionError["NSAppleScriptErrorNumber"] as? Int, errorNumber == -600 {
break
if let errorNumber = executionError["NSAppleScriptErrorNumber"] as? Int {
if errorNumber == -1743 || errorNumber == -10004 {
if let frontmostBundleID = NSWorkspace.shared.frontmostApplication?.bundleIdentifier {
browserAutomationDeniedUntilByBundleID[frontmostBundleID] = Date().addingTimeInterval(300)
}
}
if errorNumber == -600 {
break
}
}
} else {
let elapsedMs = Int(Date().timeIntervalSince(startedAt) * 1000)
VoxtLog.info(
VoxtLog.model(
"Browser active-tab URL candidate returned empty/timed out. provider=\(providerName), candidate=\(index + 1), elapsedMs=\(elapsedMs)"
)
}
}
if let lastError {
VoxtLog.info("Browser active-tab URL read failed. provider=\(providerName), error=\(lastError)")
VoxtLog.model("Browser active-tab URL read failed. provider=\(providerName), error=\(lastError)")
}
return nil
}
Expand All @@ -176,7 +188,7 @@ extension AppDelegate {
guard let script = NSAppleScript(source: wrappedSource) else { return nil }
guard let output = script.executeAndReturnError(&error).stringValue else {
if logFailure, let error {
VoxtLog.info("Browser active-tab URL read failed: \(error)")
VoxtLog.model("Browser active-tab URL read failed: \(error)")
}
return nil
}
Expand All @@ -185,13 +197,13 @@ extension AppDelegate {

func activeBrowserTabURLFromAccessibility(frontmostBundleID: String) -> String? {
guard AccessibilityPermissionManager.isTrusted() else {
VoxtLog.info("Browser active-tab AX fallback unavailable: accessibility not trusted")
VoxtLog.model("Browser active-tab AX fallback unavailable: accessibility not trusted")
return nil
}
guard let app = NSWorkspace.shared.frontmostApplication,
app.bundleIdentifier == frontmostBundleID
else {
VoxtLog.info("Browser active-tab AX fallback skipped: frontmost app changed")
VoxtLog.model("Browser active-tab AX fallback skipped: frontmost app changed")
return nil
}

Expand All @@ -207,7 +219,7 @@ extension AppDelegate {
let url = axDocumentURL(from: focusedWindow) {
return url
} else if focusedStatus != .success {
VoxtLog.info("Browser active-tab AX fallback focused window unavailable: status=\(focusedStatus.rawValue)")
VoxtLog.model("Browser active-tab AX fallback focused window unavailable: status=\(focusedStatus.rawValue)")
}

var mainWindowValue: CFTypeRef?
Expand All @@ -220,7 +232,7 @@ extension AppDelegate {
let mainWindow = mainWindowValue {
return axDocumentURL(from: mainWindow)
}
VoxtLog.info("Browser active-tab AX fallback main window unavailable: status=\(mainStatus.rawValue)")
VoxtLog.model("Browser active-tab AX fallback main window unavailable: status=\(mainStatus.rawValue)")
return nil
}

Expand All @@ -234,7 +246,7 @@ extension AppDelegate {
&documentValue
)
guard status == .success, let documentValue else {
VoxtLog.info("Browser active-tab AX fallback document attribute unavailable: status=\(status.rawValue)")
VoxtLog.model("Browser active-tab AX fallback document attribute unavailable: status=\(status.rawValue)")
return nil
}
return documentValue as? String
Expand Down
2 changes: 1 addition & 1 deletion Voxt/App/AppDelegate+MeetingSummary.swift
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ extension AppDelegate {
)
}

let downloadedCustomOptions: [MeetingSummaryModelOption] = CustomLLMModelManager.availableModels.compactMap { model -> MeetingSummaryModelOption? in
let downloadedCustomOptions: [MeetingSummaryModelOption] = CustomLLMModelManager.displayModels(including: customLLMManager.currentModelRepo).compactMap { model -> MeetingSummaryModelOption? in
guard customLLMManager.isModelDownloaded(repo: model.id) else {
return nil
}
Expand Down
8 changes: 4 additions & 4 deletions Voxt/App/AppDelegate+RecordingSession.swift
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ extension AppDelegate {

VoxtLog.info("Transcription result received. characters=\(text.count), output=\(sessionOutputMode == .translation ? "translation" : "transcription")")
VoxtLog.info("Transcription result output mode resolved as \(RecordingSessionSupport.outputLabel(for: sessionOutputMode)).", verbose: true)
VoxtLog.info(
VoxtLog.model(
"Session text model routing: \(RecordingSessionSupport.textModelRoutingDescription(outputMode: sessionOutputMode, transcriptionSettings: transcriptionFeatureSettings, translationSettings: translationFeatureSettings, rewriteSettings: rewriteFeatureSettings))"
)

Expand All @@ -357,7 +357,7 @@ extension AppDelegate {
return
}

VoxtLog.info("Transcription flow dispatch: standard. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)")
VoxtLog.info("Transcription flow dispatch: standard. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)", verbose: true)
processStandardTranscription(text, sessionID: sessionID)
}

Expand All @@ -370,7 +370,7 @@ extension AppDelegate {

let resolvedDelay = delay ?? sessionFinishDelay
let finishingSessionID = activeRecordingSessionID
VoxtLog.info("Finish session scheduled. delayMs=\(Int(resolvedDelay * 1000)), displayMode=\(overlayState.displayMode), isRecording=\(overlayState.isRecording), isEnhancing=\(overlayState.isEnhancing), isRequesting=\(overlayState.isRequesting)")
VoxtLog.info("Finish session scheduled. delayMs=\(Int(resolvedDelay * 1000)), displayMode=\(overlayState.displayMode), isRecording=\(overlayState.isRecording), isEnhancing=\(overlayState.isEnhancing), isRequesting=\(overlayState.isRequesting)", verbose: true)
overlayState.isCompleting = resolvedDelay > 0
if overlayState.displayMode != .answer {
overlayState.isEnhancing = false
Expand All @@ -394,7 +394,7 @@ extension AppDelegate {
)
return
}
VoxtLog.info("Finish session executing now. displayMode=\(self.overlayState.displayMode)")
VoxtLog.info("Finish session executing now. displayMode=\(self.overlayState.displayMode)", verbose: true)
self.executeSessionEndPipeline(for: finishingSessionID, trigger: "finish")
}
}
Expand Down
10 changes: 5 additions & 5 deletions Voxt/App/AppDelegate+TranscriptionFlow.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ extension AppDelegate {

func processStandardTranscription(_ text: String, sessionID: UUID) {
guard shouldHandleCallbacks(for: sessionID) else { return }
VoxtLog.info("Standard transcription flow entered. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)")
VoxtLog.info("Standard transcription flow entered. characters=\(text.count), enhancementMode=\(enhancementMode.rawValue)", verbose: true)
switch enhancementMode {
case .off:
setEnhancingState(false)
overlayState.transcribedText = text
VoxtLog.info("Standard transcription committing raw text immediately. characters=\(text.count)")
VoxtLog.info("Standard transcription committing raw text immediately. characters=\(text.count)", verbose: true)
commitTranscription(text, llmDurationSeconds: nil) { [weak self] in
self?.finishSession(after: 0)
}
Expand All @@ -36,7 +36,7 @@ extension AppDelegate {
)
setEnhancingState(false)
overlayState.transcribedText = text
VoxtLog.info("Standard transcription falling back to raw text because custom model is unavailable. characters=\(text.count)")
VoxtLog.info("Standard transcription falling back to raw text because custom model is unavailable. characters=\(text.count)", verbose: true)
commitTranscription(text, llmDurationSeconds: nil) { [weak self] in
self?.finishSession(after: 0)
}
Expand All @@ -59,9 +59,9 @@ extension AppDelegate {
let llmStartedAt = Date()
if let asrAt = self.transcriptionResultReceivedAt {
let handoffMs = Int(llmStartedAt.timeIntervalSince(asrAt) * 1000)
VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=\(max(handoffMs, 0)), inputChars=\(text.count)")
VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=\(max(handoffMs, 0)), inputChars=\(text.count)", verbose: true)
} else {
VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=unknown, inputChars=\(text.count)")
VoxtLog.info("Enhancement handoff. mode=\(self.enhancementMode.rawValue), handoffMs=unknown, inputChars=\(text.count)", verbose: true)
}
do {
let enhanced = try await self.runStandardTranscriptionPipeline(text: text)
Expand Down
1 change: 1 addition & 0 deletions Voxt/App/VoxtApp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ class AppDelegate: NSObject, NSApplicationDelegate {
var currentEndingSessionID: UUID?
var lastCompletedSessionEndSessionID: UUID?
var isSessionCancellationRequested = false
var browserAutomationDeniedUntilByBundleID: [String: Date] = [:]
var pendingCompletedHistoryAudioArchiveURL: URL?
var latestInjectableOutputText: String?
var sessionTargetApplicationPID: pid_t?
Expand Down
4 changes: 2 additions & 2 deletions Voxt/Settings/AboutSettingsView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ struct AboutSettingsView: View {
Text("Thanks")
.font(.headline)
Link(
"github.com/Blaizzy/mlx-audio-swift",
destination: URL(string: "https://github.com/Blaizzy/mlx-audio-swift")!
"github.com/hehehai/mlx-audio-swift",
destination: URL(string: "https://github.com/hehehai/mlx-audio-swift")!
)
.font(.caption)
Link(
Expand Down
2 changes: 2 additions & 0 deletions Voxt/Settings/AppPreferenceKey.swift
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ enum AppPreferenceKey {
static let autoCheckForUpdates = "autoCheckForUpdates"
static let hotkeyDebugLoggingEnabled = "hotkeyDebugLoggingEnabled"
static let llmDebugLoggingEnabled = "llmDebugLoggingEnabled"
static let llmDebugCustomPrompt = "llmDebugCustomPrompt"
static let llmDebugPresetPromptOverrides = "llmDebugPresetPromptOverrides"
static let useSystemProxy = "useSystemProxy"
static let networkProxyMode = "networkProxyMode"
static let customProxyScheme = "customProxyScheme"
Expand Down
Loading