Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Voxt.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,6 @@
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_TESTABILITY = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GENERATE_INFOPLIST_FILE = YES;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_PREPROCESSOR_DEFINITIONS = (
Expand All @@ -696,6 +695,7 @@
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
GENERATE_INFOPLIST_FILE = YES;
INSTALL_PATH = "$(TEST_HOST)/../..";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
Expand Down Expand Up @@ -752,7 +752,6 @@
ENABLE_NS_ASSERTIONS = NO;
ENABLE_TESTABILITY = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GENERATE_INFOPLIST_FILE = YES;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
Expand All @@ -763,6 +762,7 @@
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
GENERATE_INFOPLIST_FILE = YES;
INSTALL_PATH = "$(TEST_HOST)/../..";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
Expand Down Expand Up @@ -815,7 +815,6 @@
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_TESTABILITY = YES;
ENABLE_USER_SCRIPT_SANDBOXING = YES;
GENERATE_INFOPLIST_FILE = YES;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_PREPROCESSOR_DEFINITIONS = (
Expand All @@ -830,6 +829,7 @@
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_LABEL = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
GENERATE_INFOPLIST_FILE = YES;
INSTALL_PATH = "$(TEST_HOST)/../..";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
Expand Down
44 changes: 40 additions & 4 deletions Voxt/App/AppDelegate+HotkeyLifecycle.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,23 @@ import Carbon

@MainActor
extension AppDelegate {
enum SessionCallbackHandlingDecision: Equatable {
case accept
case rejectStale
case rejectCancelled

var logDescription: String {
switch self {
case .accept:
return "accept"
case .rejectStale:
return "stale-session"
case .rejectCancelled:
return "cancelled-session"
}
}
}

func setupHotkey() {
// Callback contract:
// - HotkeyManager only emits normalized events (transcriptionDown/up, translationDown/up, rewriteDown/up).
Expand Down Expand Up @@ -403,16 +420,35 @@ extension AppDelegate {
pendingTranscriptionStartTask = nil
}

nonisolated static func sessionCallbackHandlingDecision(
requestedSessionID: UUID,
activeSessionID: UUID,
isSessionCancellationRequested: Bool
) -> SessionCallbackHandlingDecision {
guard requestedSessionID == activeSessionID else {
return .rejectStale
}
guard !isSessionCancellationRequested else {
return .rejectCancelled
}
return .accept
}

func shouldHandleCallbacks(for sessionID: UUID) -> Bool {
guard sessionID == activeRecordingSessionID else {
switch Self.sessionCallbackHandlingDecision(
requestedSessionID: sessionID,
activeSessionID: activeRecordingSessionID,
isSessionCancellationRequested: isSessionCancellationRequested
) {
case .accept:
return true
case .rejectStale:
VoxtLog.info("Ignoring stale session callback. sessionID=\(sessionID.uuidString)", verbose: true)
return false
}
guard !isSessionCancellationRequested else {
case .rejectCancelled:
VoxtLog.info("Ignoring callback for cancelled session. sessionID=\(sessionID.uuidString)", verbose: true)
return false
}
return true
}

var sessionOutputModeLabel: String {
Expand Down
20 changes: 11 additions & 9 deletions Voxt/App/AppDelegate+PreferencesAndHistory.swift
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ extension AppDelegate {
}

var whisperRealtimeEnabled: Bool {
defaults.object(forKey: AppPreferenceKey.whisperRealtimeEnabled) as? Bool ?? true
defaults.object(forKey: AppPreferenceKey.whisperRealtimeEnabled) as? Bool ?? false
}

var whisperKeepResidentLoaded: Bool {
Expand Down Expand Up @@ -319,13 +319,15 @@ extension AppDelegate {

let historyKind = resolvedHistoryKind(for: outputMode)
VoxtLog.info(
"History append requested. kind=\(historyKind.rawValue), engine=\(transcriptionEngine.rawValue), historyEnabled=\(historyEnabled), audioStorageEnabled=\(historyAudioStorageEnabled), stashedAudio=\(pendingCompletedHistoryAudioArchiveURL != nil)"
"History append requested. kind=\(historyKind.rawValue), engine=\(transcriptionEngine.rawValue), historyEnabled=\(historyEnabled), audioStorageEnabled=\(historyAudioStorageEnabled), stashedAudio=\(pendingCompletedHistoryAudioArchiveURL != nil)",
verbose: true
)
let pendingAudioArchiveURL = consumePendingCompletedHistoryAudioURL()
if let pendingAudioArchiveURL {
let exists = FileManager.default.fileExists(atPath: pendingAudioArchiveURL.path)
VoxtLog.info(
"History append consumed pending audio archive. kind=\(historyKind.rawValue), file=\(pendingAudioArchiveURL.lastPathComponent), exists=\(exists)"
"History append consumed pending audio archive. kind=\(historyKind.rawValue), file=\(pendingAudioArchiveURL.lastPathComponent), exists=\(exists)",
verbose: true
)
} else {
VoxtLog.warning(
Expand Down Expand Up @@ -519,7 +521,7 @@ extension AppDelegate {
}
}
} else {
try? historyStore.replaceAudioArchive(for: activeEntryID, with: pendingAudioArchiveURL)
_ = try? historyStore.replaceAudioArchive(for: activeEntryID, with: pendingAudioArchiveURL)
}
}

Expand Down Expand Up @@ -674,7 +676,8 @@ extension AppDelegate {
self.pendingCompletedHistoryAudioArchiveURL = nil
let exists = FileManager.default.fileExists(atPath: pendingCompletedHistoryAudioArchiveURL.path)
VoxtLog.info(
"Consumed stashed history audio archive. file=\(pendingCompletedHistoryAudioArchiveURL.lastPathComponent), exists=\(exists)"
"Consumed stashed history audio archive. file=\(pendingCompletedHistoryAudioArchiveURL.lastPathComponent), exists=\(exists)",
verbose: true
)
return pendingCompletedHistoryAudioArchiveURL
}
Expand All @@ -692,7 +695,8 @@ extension AppDelegate {
if let consumedURL {
let exists = FileManager.default.fileExists(atPath: consumedURL.path)
VoxtLog.info(
"Consumed transcriber history audio archive. engine=\(transcriptionEngine.rawValue), file=\(consumedURL.lastPathComponent), exists=\(exists)"
"Consumed transcriber history audio archive. engine=\(transcriptionEngine.rawValue), file=\(consumedURL.lastPathComponent), exists=\(exists)",
verbose: true
)
} else {
VoxtLog.warning(
Expand Down Expand Up @@ -725,9 +729,7 @@ extension AppDelegate {
try? FileManager.default.removeItem(at: pendingCompletedHistoryAudioArchiveURL)
}
pendingCompletedHistoryAudioArchiveURL = url
let exists = FileManager.default.fileExists(atPath: url.path)
let fileSize = (try? url.resourceValues(forKeys: [.fileSizeKey]).fileSize) ?? 0
VoxtLog.info("Pending history audio archive stashed. file=\(url.lastPathComponent), exists=\(exists), size=\(fileSize)")
VoxtLog.info("Pending history audio archive stashed. file=\(url.lastPathComponent)")
}

private func importConsumedAudioArchiveIfNeeded(
Expand Down
86 changes: 70 additions & 16 deletions Voxt/App/AppDelegate+RecordingSession.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,74 @@ import AVFoundation
import Speech

extension AppDelegate {
enum StopRecordingFallbackDecision: Equatable {
case finishNow
case extendGrace(seconds: TimeInterval)
}

nonisolated static func stopRecordingFallbackDecision(
transcriptionEngine: TranscriptionEngine,
isWhisperFinalizing: Bool,
transcriptionResultReceived: Bool,
isExtendedGrace: Bool
) -> StopRecordingFallbackDecision {
guard transcriptionEngine == .whisperKit else { return .finishNow }
guard isWhisperFinalizing else { return .finishNow }
guard !transcriptionResultReceived else { return .finishNow }
guard !isExtendedGrace else { return .finishNow }
return .extendGrace(seconds: 12)
}

private func shouldDeferStopRecordingFallback() -> Bool {
guard transcriptionEngine == .whisperKit else { return false }
guard whisperTranscriber?.isFinalizingTranscription == true else { return false }
guard transcriptionResultReceivedAt == nil else { return false }
return true
}

private func armStopRecordingFallback(
timeoutSeconds: TimeInterval,
isExtendedGrace: Bool = false
) {
let armedSessionID = activeRecordingSessionID
stopRecordingFallbackTask = Task { [weak self] in
guard let self else { return }
do {
try await Task.sleep(for: .seconds(timeoutSeconds))
} catch {
return
}
guard !Task.isCancelled else { return }
guard self.isSessionActive, self.activeRecordingSessionID == armedSessionID else { return }

let fallbackDecision = Self.stopRecordingFallbackDecision(
transcriptionEngine: self.transcriptionEngine,
isWhisperFinalizing: self.shouldDeferStopRecordingFallback(),
transcriptionResultReceived: self.transcriptionResultReceivedAt != nil,
isExtendedGrace: isExtendedGrace
)
if case .extendGrace(let graceSeconds) = fallbackDecision {
VoxtLog.warning(
"""
Stop recording fallback reached while Whisper finalization is still running; extending grace. sessionID=\(armedSessionID.uuidString), engine=\(self.transcriptionEngine.rawValue), output=\(RecordingSessionSupport.outputLabel(for: self.sessionOutputMode))
"""
)
self.armStopRecordingFallback(timeoutSeconds: graceSeconds, isExtendedGrace: true)
return
}

VoxtLog.warning(
"""
Stop recording fallback triggered; forcing session finish. sessionID=\(self.activeRecordingSessionID.uuidString), engine=\(self.transcriptionEngine.rawValue), output=\(RecordingSessionSupport.outputLabel(for: self.sessionOutputMode)), resultReceived=\(self.transcriptionResultReceivedAt != nil), endingSessionID=\(self.currentEndingSessionID?.uuidString ?? "nil"), whisperFinalizing=\(self.whisperTranscriber?.isFinalizingTranscription == true)
"""
)
if self.transcriptionEngine == .remote {
self.remoteASRTranscriber.discardPendingSessionOutput()
}
self.finishSession(after: 0)
}
}

func continueRewriteConversation() {
guard overlayState.canContinueRewriteAnswer else { return }
overlayState.beginRewriteConversationIfNeeded()
Expand Down Expand Up @@ -43,7 +111,7 @@ extension AppDelegate {
whisperTranscriber?.stopRecording()
remoteASRTranscriber.discardPendingSessionOutput()
if preservePendingHistoryAudio {
VoxtLog.info("Preserving pending history audio during residual resource release. reason=\(reason)")
VoxtLog.info("Preserving pending history audio during residual resource release. reason=\(reason)", verbose: true)
} else {
discardPendingCompletedHistoryAudio()
}
Expand Down Expand Up @@ -220,21 +288,7 @@ extension AppDelegate {
transcriptionEngine: transcriptionEngine,
remoteProvider: remoteASRSelectedProvider
)
stopRecordingFallbackTask = Task { [weak self] in
guard let self else { return }
do {
try await Task.sleep(for: .seconds(fallbackTimeoutSeconds))
} catch {
return
}
guard !Task.isCancelled else { return }
guard self.isSessionActive else { return }
VoxtLog.warning("Stop recording fallback triggered; forcing session finish.")
if self.transcriptionEngine == .remote {
self.remoteASRTranscriber.discardPendingSessionOutput()
}
self.finishSession(after: 0)
}
armStopRecordingFallback(timeoutSeconds: fallbackTimeoutSeconds)
}

func cancelActiveRecordingSession() {
Expand Down
11 changes: 7 additions & 4 deletions Voxt/App/AppDelegate+RecordingSessionCapture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,7 @@ extension AppDelegate {
whisper.transcribedText = ""
whisper.isModelInitializing = needsModelInitialization
whisper.setPreferredInputDevice(selectedInputDeviceID)
whisper.onPartialTranscription = { [weak self] text in
guard let self, self.shouldHandleCallbacks(for: sessionID) else { return }
self.overlayState.transcribedText = text
}
whisper.onPartialTranscription = nil
whisper.onTranscriptionFinished = { [weak self] text in
self?.stashPendingCompletedHistoryAudioArchive(self?.whisperTranscriber?.consumeCompletedAudioArchiveURL())
self?.processTranscription(text, sessionID: sessionID)
Expand Down Expand Up @@ -335,6 +332,12 @@ extension AppDelegate {
if transcriptionEngine == .mlxAudio, isMLXReady {
mlxTranscriber?.stopRecording()
} else if transcriptionEngine == .whisperKit, isWhisperReady {
if let whisperTranscriber {
VoxtLog.info(
"Issuing Whisper stop. \(whisperTranscriber.debugCaptureStopSummary())",
verbose: true
)
}
whisperTranscriber?.stopRecording()
} else if transcriptionEngine == .remote {
remoteASRTranscriber.stopRecording()
Expand Down
22 changes: 21 additions & 1 deletion Voxt/App/AppDelegate+RecordingSessionTextRouting.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,20 @@ extension AppDelegate {
}

func processTranscription(_ rawText: String, sessionID: UUID) {
guard shouldHandleCallbacks(for: sessionID) else { return }
let callbackDecision = Self.sessionCallbackHandlingDecision(
requestedSessionID: sessionID,
activeSessionID: activeRecordingSessionID,
isSessionCancellationRequested: isSessionCancellationRequested
)
guard callbackDecision == .accept else {
VoxtLog.info(
"""
Dropping transcription callback before processing. reason=\(callbackDecision.logDescription), callbackSessionID=\(sessionID.uuidString), activeSessionID=\(activeRecordingSessionID.uuidString), stopped=\(recordingStoppedAt != nil), endingSessionID=\(currentEndingSessionID?.uuidString ?? "nil"), rawChars=\(rawText.count)
""",
verbose: true
)
return
}
if didCommitSessionOutput {
VoxtLog.info("Ignoring transcription callback because current session output has already been committed.")
return
Expand All @@ -16,6 +29,13 @@ extension AppDelegate {
stopRecordingFallbackTask = nil

transcriptionResultReceivedAt = Date()
if let stoppedAt = recordingStoppedAt {
let stopToResultMs = max(Int(Date().timeIntervalSince(stoppedAt) * 1000), 0)
VoxtLog.info(
"Transcription callback accepted after stop. sessionID=\(sessionID.uuidString), stopToResultMs=\(stopToResultMs), rawChars=\(rawText.count)",
verbose: true
)
}
let displayText = RecordingSessionSupport.normalizedTranscriptionDisplayText(
rawText,
transcriptionEngine: transcriptionEngine,
Expand Down
14 changes: 13 additions & 1 deletion Voxt/App/AppDelegate+SessionTextIO.swift
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,19 @@ extension AppDelegate {
let sessionID = activeRecordingSessionID
let sessionOutputMode = sessionOutputMode
let userMainLanguage = userMainLanguage
guard shouldHandleCallbacks(for: sessionID) else { return }
let callbackDecision = Self.sessionCallbackHandlingDecision(
requestedSessionID: sessionID,
activeSessionID: activeRecordingSessionID,
isSessionCancellationRequested: isSessionCancellationRequested
)
guard callbackDecision == .accept else {
VoxtLog.warning(
"""
Commit transcription abandoned after session invalidation. reason=\(callbackDecision.logDescription), sessionID=\(sessionID.uuidString), activeSessionID=\(activeRecordingSessionID.uuidString), outputMode=\(RecordingSessionSupport.outputLabel(for: sessionOutputMode)), chars=\(text.count), stopped=\(recordingStoppedAt != nil)
"""
)
return
}

VoxtLog.info("Commit transcription entered. characters=\(text.count)")

Expand Down
6 changes: 3 additions & 3 deletions Voxt/App/MeetingStartPlanner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ enum MeetingStartBlockReason: Equatable {
var userMessage: String {
switch self {
case .dictationUnsupported:
return String(localized: "Meeting Notes currently supports Whisper, MLX Audio, and Remote ASR. Direct Dictation is not available for meetings.")
return String(localized: "Meeting Notes currently supports MLX Audio and Remote ASR. Direct Dictation is not available for meetings.")
case .recording(let reason):
return reason.userMessage
case .remoteASRUnavailable:
Expand Down Expand Up @@ -62,12 +62,12 @@ enum MeetingStartPlanner {
}
case .whisperKit:
switch RecordingStartPlanner.resolve(
selectedEngine: .whisperKit,
selectedEngine: .mlxAudio,
mlxModelState: mlxModelState,
whisperModelState: whisperModelState
) {
case .start:
return .start(.whisperKit)
return .start(.mlxAudio)
case .blocked(let reason):
return .blocked(.recording(reason))
}
Expand Down
Loading