Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions Voxt/App/AppDelegate+MeetingHistory.swift
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ extension AppDelegate {
}
VoxtLog.info("Meeting history saved. entryID=\(entry.id.uuidString), kind=\(entry.kind.rawValue)")
meetingDetailWindowManager.closeLiveWindow()
let audioURL = historyStore.meetingAudioURL(for: entry)
let audioURL = historyStore.audioURL(for: entry)
meetingOverlayWindow.hide { [weak self] in
guard let appDelegate = self else { return }
appDelegate.historyStore.reload()
Expand Down Expand Up @@ -102,11 +102,15 @@ extension AppDelegate {
return nil
}

let meetingAudioRelativePath: String?
if let archivedAudioURL = result.archivedAudioURL {
meetingAudioRelativePath = try? historyStore.importMeetingAudioArchive(from: archivedAudioURL)
let audioRelativePath: String?
if historyAudioStorageEnabled, let archivedAudioURL = result.archivedAudioURL {
audioRelativePath = try? historyStore.importAudioArchive(from: archivedAudioURL, kind: .meeting)
} else {
meetingAudioRelativePath = nil
if let archivedAudioURL = result.archivedAudioURL,
FileManager.default.fileExists(atPath: archivedAudioURL.path) {
try? FileManager.default.removeItem(at: archivedAudioURL)
}
audioRelativePath = nil
}

guard let entryID = historyStore.append(
Expand All @@ -132,9 +136,10 @@ extension AppDelegate {
remoteLLMProvider: nil,
remoteLLMModel: nil,
remoteLLMEndpoint: nil,
audioRelativePath: audioRelativePath,
whisperWordTimings: nil,
meetingSegments: persistedSegments,
meetingAudioRelativePath: meetingAudioRelativePath,
meetingAudioRelativePath: audioRelativePath,
dictionaryHitTerms: [],
dictionaryCorrectedTerms: [],
dictionarySuggestedTerms: []
Expand Down
166 changes: 163 additions & 3 deletions Voxt/App/AppDelegate+PreferencesAndHistory.swift
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ extension AppDelegate {
true
}

var historyAudioStorageEnabled: Bool {
defaults.object(forKey: AppPreferenceKey.historyAudioStorageEnabled) as? Bool ?? false
}

var dictionaryAutoLearningEnabled: Bool {
false
}
Expand All @@ -282,10 +286,16 @@ extension AppDelegate {
dictionaryCorrectedTerms: [String],
dictionarySuggestedTerms: [DictionarySuggestionSnapshot]
) -> UUID? {
guard historyEnabled else { return nil }
guard historyEnabled else {
discardPendingCompletedHistoryAudio()
return nil
}

let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return nil }
guard !trimmed.isEmpty else {
discardPendingCompletedHistoryAudio()
return nil
}
let trimmedDisplayTitle = displayTitle?.trimmingCharacters(in: .whitespacesAndNewlines)

let transcriptionModel: String
Expand All @@ -308,6 +318,20 @@ extension AppDelegate {
}

let historyKind = resolvedHistoryKind(for: outputMode)
VoxtLog.info(
"History append requested. kind=\(historyKind.rawValue), engine=\(transcriptionEngine.rawValue), historyEnabled=\(historyEnabled), audioStorageEnabled=\(historyAudioStorageEnabled), stashedAudio=\(pendingCompletedHistoryAudioArchiveURL != nil)"
)
let pendingAudioArchiveURL = consumePendingCompletedHistoryAudioURL()
if let pendingAudioArchiveURL {
let exists = FileManager.default.fileExists(atPath: pendingAudioArchiveURL.path)
VoxtLog.info(
"History append consumed pending audio archive. kind=\(historyKind.rawValue), file=\(pendingAudioArchiveURL.lastPathComponent), exists=\(exists)"
)
} else {
VoxtLog.warning(
"History append found no pending audio archive. kind=\(historyKind.rawValue), engine=\(transcriptionEngine.rawValue)"
)
}
let textModelMetadata = resolvedHistoryTextModelMetadata(for: historyKind)

let now = Date()
Expand Down Expand Up @@ -343,6 +367,7 @@ extension AppDelegate {
audioDurationSeconds: audioDuration,
transcriptionProcessingDurationSeconds: processingDuration,
llmDurationSeconds: llmDurationSeconds,
pendingAudioArchiveURL: pendingAudioArchiveURL,
whisperWordTimings: transcriptionEngine == .whisperKit && whisperTimestampsEnabled
? whisperTranscriber?.latestWordTimings
: nil,
Expand All @@ -356,6 +381,11 @@ extension AppDelegate {
return continuedEntryID
}

let audioRelativePath = importConsumedAudioArchiveIfNeeded(
pendingAudioArchiveURL,
kind: historyKind
)

let entryID = historyStore.append(
text: trimmed,
transcriptionEngine: transcriptionEngine.title,
Expand All @@ -379,6 +409,7 @@ extension AppDelegate {
remoteLLMProvider: textModelMetadata.remoteProviderTitle,
remoteLLMModel: textModelMetadata.remoteModelTitle,
remoteLLMEndpoint: textModelMetadata.remoteEndpoint,
audioRelativePath: audioRelativePath,
whisperWordTimings: transcriptionEngine == .whisperKit && whisperTimestampsEnabled
? whisperTranscriber?.latestWordTimings
: nil,
Expand Down Expand Up @@ -410,6 +441,7 @@ extension AppDelegate {
audioDurationSeconds: TimeInterval?,
transcriptionProcessingDurationSeconds: TimeInterval?,
llmDurationSeconds: TimeInterval?,
pendingAudioArchiveURL: URL?,
whisperWordTimings: [WhisperHistoryWordTiming]?,
dictionaryHitTerms: [String],
dictionaryCorrectedTerms: [String],
Expand All @@ -433,6 +465,13 @@ extension AppDelegate {
createdAt: createdAt
)

defer {
if let pendingAudioArchiveURL,
FileManager.default.fileExists(atPath: pendingAudioArchiveURL.path) {
try? FileManager.default.removeItem(at: pendingAudioArchiveURL)
}
}

let mergedEntry = historyStore.updateTranscriptionEntry(
activeEntryID,
text: text,
Expand Down Expand Up @@ -464,7 +503,27 @@ extension AppDelegate {
dictionarySuggestedTerms: mergedSuggestedTerms
)

return mergedEntry == nil ? nil : activeEntryID
guard mergedEntry != nil else { return nil }

if let pendingAudioArchiveURL {
let existingAudioURL = historyStore.audioURL(for: existingEntry)
if let existingAudioURL, FileManager.default.fileExists(atPath: existingAudioURL.path) {
if let mergedAudioURL = try? HistoryAudioArchiveSupport.mergedRewriteArchive(
existingArchiveURL: existingAudioURL,
appendedArchiveURL: pendingAudioArchiveURL
) {
do {
_ = try historyStore.replaceAudioArchive(for: activeEntryID, with: mergedAudioURL)
} catch {
try? FileManager.default.removeItem(at: mergedAudioURL)
}
}
} else {
try? historyStore.replaceAudioArchive(for: activeEntryID, with: pendingAudioArchiveURL)
}
}

return activeEntryID
}

private func resolvedHistoryKind(for outputMode: SessionOutputMode) -> TranscriptionHistoryKind {
Expand Down Expand Up @@ -609,4 +668,105 @@ extension AppDelegate {
private func enumValue<T: RawRepresentable>(forKey key: String, default defaultValue: T?) -> T? where T.RawValue == String {
T(rawValue: stringValue(forKey: key)) ?? defaultValue
}

private func consumePendingCompletedHistoryAudioURL() -> URL? {
if let pendingCompletedHistoryAudioArchiveURL {
self.pendingCompletedHistoryAudioArchiveURL = nil
let exists = FileManager.default.fileExists(atPath: pendingCompletedHistoryAudioArchiveURL.path)
VoxtLog.info(
"Consumed stashed history audio archive. file=\(pendingCompletedHistoryAudioArchiveURL.lastPathComponent), exists=\(exists)"
)
return pendingCompletedHistoryAudioArchiveURL
}
let consumedURL: URL?
switch transcriptionEngine {
case .dictation:
consumedURL = speechTranscriber.consumeCompletedAudioArchiveURL()
case .mlxAudio:
consumedURL = mlxTranscriber?.consumeCompletedAudioArchiveURL()
case .whisperKit:
consumedURL = whisperTranscriber?.consumeCompletedAudioArchiveURL()
case .remote:
consumedURL = remoteASRTranscriber.consumeCompletedAudioArchiveURL()
}
if let consumedURL {
let exists = FileManager.default.fileExists(atPath: consumedURL.path)
VoxtLog.info(
"Consumed transcriber history audio archive. engine=\(transcriptionEngine.rawValue), file=\(consumedURL.lastPathComponent), exists=\(exists)"
)
} else {
VoxtLog.warning(
"No transcriber history audio archive available. engine=\(transcriptionEngine.rawValue)"
)
}
return consumedURL
}

func discardPendingCompletedHistoryAudio() {
if let pendingCompletedHistoryAudioArchiveURL,
FileManager.default.fileExists(atPath: pendingCompletedHistoryAudioArchiveURL.path) {
try? FileManager.default.removeItem(at: pendingCompletedHistoryAudioArchiveURL)
}
pendingCompletedHistoryAudioArchiveURL = nil
speechTranscriber.discardCompletedAudioArchive()
mlxTranscriber?.discardCompletedAudioArchive()
whisperTranscriber?.discardCompletedAudioArchive()
remoteASRTranscriber.discardCompletedAudioArchive()
}

func stashPendingCompletedHistoryAudioArchive(_ url: URL?) {
guard let url else {
VoxtLog.warning("Pending history audio archive stash skipped because URL was nil.")
return
}
if let pendingCompletedHistoryAudioArchiveURL,
pendingCompletedHistoryAudioArchiveURL.path != url.path,
FileManager.default.fileExists(atPath: pendingCompletedHistoryAudioArchiveURL.path) {
try? FileManager.default.removeItem(at: pendingCompletedHistoryAudioArchiveURL)
}
pendingCompletedHistoryAudioArchiveURL = url
let exists = FileManager.default.fileExists(atPath: url.path)
let fileSize = (try? url.resourceValues(forKeys: [.fileSizeKey]).fileSize) ?? 0
VoxtLog.info("Pending history audio archive stashed. file=\(url.lastPathComponent), exists=\(exists), size=\(fileSize)")
}

private func importConsumedAudioArchiveIfNeeded(
_ sourceURL: URL?,
kind: TranscriptionHistoryKind
) -> String? {
guard let sourceURL else {
VoxtLog.warning("History audio import skipped because source URL was nil. kind=\(kind.rawValue)")
return nil
}
defer {
if FileManager.default.fileExists(atPath: sourceURL.path) {
try? FileManager.default.removeItem(at: sourceURL)
}
}
let exists = FileManager.default.fileExists(atPath: sourceURL.path)
guard historyAudioStorageEnabled else {
VoxtLog.info(
"History audio import skipped because storage is disabled. kind=\(kind.rawValue), file=\(sourceURL.lastPathComponent), exists=\(exists)"
)
return nil
}
guard exists else {
VoxtLog.warning(
"History audio import skipped because source file does not exist. kind=\(kind.rawValue), file=\(sourceURL.lastPathComponent)"
)
return nil
}
do {
let relativePath = try historyStore.importAudioArchive(from: sourceURL, kind: kind)
VoxtLog.info(
"History audio import succeeded. kind=\(kind.rawValue), file=\(sourceURL.lastPathComponent), relativePath=\(relativePath)"
)
return relativePath
} catch {
VoxtLog.warning(
"History audio import failed. kind=\(kind.rawValue), source=\(sourceURL.lastPathComponent), error=\(error.localizedDescription)"
)
return nil
}
}
}
15 changes: 14 additions & 1 deletion Voxt/App/AppDelegate+RecordingSession.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ extension AppDelegate {
beginRecording(outputMode: .rewrite)
}

func releaseResidualRecordingResources(reason: String) {
func releaseResidualRecordingResources(
reason: String,
preservePendingHistoryAudio: Bool = false
) {
let speechWasRecording = speechTranscriber.isRecording
let mlxWasRecording = mlxTranscriber?.isRecording == true
let whisperWasRecording = whisperTranscriber?.isRecording == true
Expand Down Expand Up @@ -39,6 +42,11 @@ extension AppDelegate {
mlxTranscriber?.stopRecording()
whisperTranscriber?.stopRecording()
remoteASRTranscriber.discardPendingSessionOutput()
if preservePendingHistoryAudio {
VoxtLog.info("Preserving pending history audio during residual resource release. reason=\(reason)")
} else {
discardPendingCompletedHistoryAudio()
}

overlayState.isRecording = false
overlayState.audioLevel = 0
Expand Down Expand Up @@ -472,6 +480,7 @@ extension AppDelegate {
mlx.transcribedText = ""
mlx.setPreferredInputDevice(selectedInputDeviceID)
mlx.onTranscriptionFinished = { [weak self] text in
self?.stashPendingCompletedHistoryAudioArchive(self?.mlxTranscriber?.consumeCompletedAudioArchiveURL())
self?.processTranscription(text, sessionID: sessionID)
}
overlayState.bind(to: mlx)
Expand Down Expand Up @@ -503,6 +512,7 @@ extension AppDelegate {
let sessionID = self.activeRecordingSessionID
self.speechTranscriber.transcribedText = ""
self.speechTranscriber.onTranscriptionFinished = { [weak self] text in
self?.stashPendingCompletedHistoryAudioArchive(self?.speechTranscriber.consumeCompletedAudioArchiveURL())
self?.processTranscription(text, sessionID: sessionID)
}
self.speechTranscriber.startRecording()
Expand Down Expand Up @@ -540,6 +550,7 @@ extension AppDelegate {
self.overlayState.transcribedText = text
}
whisper.onTranscriptionFinished = { [weak self] text in
self?.stashPendingCompletedHistoryAudioArchive(self?.whisperTranscriber?.consumeCompletedAudioArchiveURL())
self?.processTranscription(text, sessionID: sessionID)
}
overlayState.bind(to: whisper)
Expand Down Expand Up @@ -612,6 +623,7 @@ extension AppDelegate {
let sessionID = self.activeRecordingSessionID
self.remoteASRTranscriber.transcribedText = ""
self.remoteASRTranscriber.onTranscriptionFinished = { [weak self] text in
self?.stashPendingCompletedHistoryAudioArchive(self?.remoteASRTranscriber.consumeCompletedAudioArchiveURL())
self?.processTranscription(text, sessionID: sessionID)
}
self.remoteASRTranscriber.onStartFailure = { [weak self] message in
Expand All @@ -638,6 +650,7 @@ extension AppDelegate {
if transcriptionEngine == .remote {
remoteASRTranscriber.discardPendingSessionOutput()
}
discardPendingCompletedHistoryAudio()
isSessionActive = false
isSessionCancellationRequested = false
didCommitSessionOutput = false
Expand Down
5 changes: 4 additions & 1 deletion Voxt/App/AppDelegate+SessionEndFlow.swift
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ extension AppDelegate {
var name: String { "releaseResidualCapture" }

func run(delegate: AppDelegate) {
delegate.releaseResidualRecordingResources(reason: "session-end-pipeline")
delegate.releaseResidualRecordingResources(
reason: "session-end-pipeline",
preservePendingHistoryAudio: true
)
}
}

Expand Down
1 change: 1 addition & 0 deletions Voxt/App/AppDelegate+TranscriptionDetail.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ extension AppDelegate {
func showTranscriptionDetailWindow(for entry: TranscriptionHistoryEntry) {
TranscriptionDetailWindowManager.shared.present(
entry: entry,
audioURL: historyStore.audioURL(for: entry),
followUpStatusProvider: { @MainActor entry in
self.transcriptionFollowUpProviderStatus(for: entry)
},
Expand Down
2 changes: 2 additions & 0 deletions Voxt/App/VoxtApp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ class AppDelegate: NSObject, NSApplicationDelegate {
var currentEndingSessionID: UUID?
var lastCompletedSessionEndSessionID: UUID?
var isSessionCancellationRequested = false
var pendingCompletedHistoryAudioArchiveURL: URL?
var latestInjectableOutputText: String?
var sessionTargetApplicationPID: pid_t?
var sessionTargetApplicationBundleID: String?
Expand Down Expand Up @@ -278,6 +279,7 @@ class AppDelegate: NSObject, NSApplicationDelegate {
AppPreferenceKey.historyEnabled: true,
AppPreferenceKey.historyCleanupEnabled: true,
AppPreferenceKey.historyRetentionPeriod: HistoryRetentionPeriod.ninetyDays.rawValue,
AppPreferenceKey.historyAudioStorageEnabled: false,
AppPreferenceKey.dictionaryRecognitionEnabled: true,
AppPreferenceKey.dictionaryAutoLearningEnabled: false,
AppPreferenceKey.dictionaryHighConfidenceCorrectionEnabled: true,
Expand Down
Loading