diff --git a/Voxt/App/AppDelegate+MeetingHistory.swift b/Voxt/App/AppDelegate+MeetingHistory.swift index 43433cb..6610295 100644 --- a/Voxt/App/AppDelegate+MeetingHistory.swift +++ b/Voxt/App/AppDelegate+MeetingHistory.swift @@ -41,7 +41,7 @@ extension AppDelegate { } VoxtLog.info("Meeting history saved. entryID=\(entry.id.uuidString), kind=\(entry.kind.rawValue)") meetingDetailWindowManager.closeLiveWindow() - let audioURL = historyStore.meetingAudioURL(for: entry) + let audioURL = historyStore.audioURL(for: entry) meetingOverlayWindow.hide { [weak self] in guard let appDelegate = self else { return } appDelegate.historyStore.reload() @@ -102,11 +102,15 @@ extension AppDelegate { return nil } - let meetingAudioRelativePath: String? - if let archivedAudioURL = result.archivedAudioURL { - meetingAudioRelativePath = try? historyStore.importMeetingAudioArchive(from: archivedAudioURL) + let audioRelativePath: String? + if historyAudioStorageEnabled, let archivedAudioURL = result.archivedAudioURL { + audioRelativePath = try? historyStore.importAudioArchive(from: archivedAudioURL, kind: .meeting) } else { - meetingAudioRelativePath = nil + if let archivedAudioURL = result.archivedAudioURL, + FileManager.default.fileExists(atPath: archivedAudioURL.path) { + try? FileManager.default.removeItem(at: archivedAudioURL) + } + audioRelativePath = nil } guard let entryID = historyStore.append( @@ -132,9 +136,10 @@ extension AppDelegate { remoteLLMProvider: nil, remoteLLMModel: nil, remoteLLMEndpoint: nil, + audioRelativePath: audioRelativePath, whisperWordTimings: nil, meetingSegments: persistedSegments, - meetingAudioRelativePath: meetingAudioRelativePath, + meetingAudioRelativePath: audioRelativePath, dictionaryHitTerms: [], dictionaryCorrectedTerms: [], dictionarySuggestedTerms: [] diff --git a/Voxt/App/AppDelegate+PreferencesAndHistory.swift b/Voxt/App/AppDelegate+PreferencesAndHistory.swift index adc87a1..3a99931 100644 --- a/Voxt/App/AppDelegate+PreferencesAndHistory.swift +++ b/Voxt/App/AppDelegate+PreferencesAndHistory.swift @@ -265,6 +265,10 @@ extension AppDelegate { true } + var historyAudioStorageEnabled: Bool { + defaults.object(forKey: AppPreferenceKey.historyAudioStorageEnabled) as? Bool ?? false + } + var dictionaryAutoLearningEnabled: Bool { false } @@ -282,10 +286,16 @@ extension AppDelegate { dictionaryCorrectedTerms: [String], dictionarySuggestedTerms: [DictionarySuggestionSnapshot] ) -> UUID? { - guard historyEnabled else { return nil } + guard historyEnabled else { + discardPendingCompletedHistoryAudio() + return nil + } let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmed.isEmpty else { return nil } + guard !trimmed.isEmpty else { + discardPendingCompletedHistoryAudio() + return nil + } let trimmedDisplayTitle = displayTitle?.trimmingCharacters(in: .whitespacesAndNewlines) let transcriptionModel: String @@ -308,6 +318,20 @@ extension AppDelegate { } let historyKind = resolvedHistoryKind(for: outputMode) + VoxtLog.info( + "History append requested. kind=\(historyKind.rawValue), engine=\(transcriptionEngine.rawValue), historyEnabled=\(historyEnabled), audioStorageEnabled=\(historyAudioStorageEnabled), stashedAudio=\(pendingCompletedHistoryAudioArchiveURL != nil)" + ) + let pendingAudioArchiveURL = consumePendingCompletedHistoryAudioURL() + if let pendingAudioArchiveURL { + let exists = FileManager.default.fileExists(atPath: pendingAudioArchiveURL.path) + VoxtLog.info( + "History append consumed pending audio archive. kind=\(historyKind.rawValue), file=\(pendingAudioArchiveURL.lastPathComponent), exists=\(exists)" + ) + } else { + VoxtLog.warning( + "History append found no pending audio archive. kind=\(historyKind.rawValue), engine=\(transcriptionEngine.rawValue)" + ) + } let textModelMetadata = resolvedHistoryTextModelMetadata(for: historyKind) let now = Date() @@ -343,6 +367,7 @@ extension AppDelegate { audioDurationSeconds: audioDuration, transcriptionProcessingDurationSeconds: processingDuration, llmDurationSeconds: llmDurationSeconds, + pendingAudioArchiveURL: pendingAudioArchiveURL, whisperWordTimings: transcriptionEngine == .whisperKit && whisperTimestampsEnabled ? whisperTranscriber?.latestWordTimings : nil, @@ -356,6 +381,11 @@ extension AppDelegate { return continuedEntryID } + let audioRelativePath = importConsumedAudioArchiveIfNeeded( + pendingAudioArchiveURL, + kind: historyKind + ) + let entryID = historyStore.append( text: trimmed, transcriptionEngine: transcriptionEngine.title, @@ -379,6 +409,7 @@ extension AppDelegate { remoteLLMProvider: textModelMetadata.remoteProviderTitle, remoteLLMModel: textModelMetadata.remoteModelTitle, remoteLLMEndpoint: textModelMetadata.remoteEndpoint, + audioRelativePath: audioRelativePath, whisperWordTimings: transcriptionEngine == .whisperKit && whisperTimestampsEnabled ? whisperTranscriber?.latestWordTimings : nil, @@ -410,6 +441,7 @@ extension AppDelegate { audioDurationSeconds: TimeInterval?, transcriptionProcessingDurationSeconds: TimeInterval?, llmDurationSeconds: TimeInterval?, + pendingAudioArchiveURL: URL?, whisperWordTimings: [WhisperHistoryWordTiming]?, dictionaryHitTerms: [String], dictionaryCorrectedTerms: [String], @@ -433,6 +465,13 @@ extension AppDelegate { createdAt: createdAt ) + defer { + if let pendingAudioArchiveURL, + FileManager.default.fileExists(atPath: pendingAudioArchiveURL.path) { + try? FileManager.default.removeItem(at: pendingAudioArchiveURL) + } + } + let mergedEntry = historyStore.updateTranscriptionEntry( activeEntryID, text: text, @@ -464,7 +503,27 @@ extension AppDelegate { dictionarySuggestedTerms: mergedSuggestedTerms ) - return mergedEntry == nil ? nil : activeEntryID + guard mergedEntry != nil else { return nil } + + if let pendingAudioArchiveURL { + let existingAudioURL = historyStore.audioURL(for: existingEntry) + if let existingAudioURL, FileManager.default.fileExists(atPath: existingAudioURL.path) { + if let mergedAudioURL = try? HistoryAudioArchiveSupport.mergedRewriteArchive( + existingArchiveURL: existingAudioURL, + appendedArchiveURL: pendingAudioArchiveURL + ) { + do { + _ = try historyStore.replaceAudioArchive(for: activeEntryID, with: mergedAudioURL) + } catch { + try? FileManager.default.removeItem(at: mergedAudioURL) + } + } + } else { + try? historyStore.replaceAudioArchive(for: activeEntryID, with: pendingAudioArchiveURL) + } + } + + return activeEntryID } private func resolvedHistoryKind(for outputMode: SessionOutputMode) -> TranscriptionHistoryKind { @@ -609,4 +668,105 @@ extension AppDelegate { private func enumValue(forKey key: String, default defaultValue: T?) -> T? where T.RawValue == String { T(rawValue: stringValue(forKey: key)) ?? defaultValue } + + private func consumePendingCompletedHistoryAudioURL() -> URL? { + if let pendingCompletedHistoryAudioArchiveURL { + self.pendingCompletedHistoryAudioArchiveURL = nil + let exists = FileManager.default.fileExists(atPath: pendingCompletedHistoryAudioArchiveURL.path) + VoxtLog.info( + "Consumed stashed history audio archive. file=\(pendingCompletedHistoryAudioArchiveURL.lastPathComponent), exists=\(exists)" + ) + return pendingCompletedHistoryAudioArchiveURL + } + let consumedURL: URL? + switch transcriptionEngine { + case .dictation: + consumedURL = speechTranscriber.consumeCompletedAudioArchiveURL() + case .mlxAudio: + consumedURL = mlxTranscriber?.consumeCompletedAudioArchiveURL() + case .whisperKit: + consumedURL = whisperTranscriber?.consumeCompletedAudioArchiveURL() + case .remote: + consumedURL = remoteASRTranscriber.consumeCompletedAudioArchiveURL() + } + if let consumedURL { + let exists = FileManager.default.fileExists(atPath: consumedURL.path) + VoxtLog.info( + "Consumed transcriber history audio archive. engine=\(transcriptionEngine.rawValue), file=\(consumedURL.lastPathComponent), exists=\(exists)" + ) + } else { + VoxtLog.warning( + "No transcriber history audio archive available. engine=\(transcriptionEngine.rawValue)" + ) + } + return consumedURL + } + + func discardPendingCompletedHistoryAudio() { + if let pendingCompletedHistoryAudioArchiveURL, + FileManager.default.fileExists(atPath: pendingCompletedHistoryAudioArchiveURL.path) { + try? FileManager.default.removeItem(at: pendingCompletedHistoryAudioArchiveURL) + } + pendingCompletedHistoryAudioArchiveURL = nil + speechTranscriber.discardCompletedAudioArchive() + mlxTranscriber?.discardCompletedAudioArchive() + whisperTranscriber?.discardCompletedAudioArchive() + remoteASRTranscriber.discardCompletedAudioArchive() + } + + func stashPendingCompletedHistoryAudioArchive(_ url: URL?) { + guard let url else { + VoxtLog.warning("Pending history audio archive stash skipped because URL was nil.") + return + } + if let pendingCompletedHistoryAudioArchiveURL, + pendingCompletedHistoryAudioArchiveURL.path != url.path, + FileManager.default.fileExists(atPath: pendingCompletedHistoryAudioArchiveURL.path) { + try? FileManager.default.removeItem(at: pendingCompletedHistoryAudioArchiveURL) + } + pendingCompletedHistoryAudioArchiveURL = url + let exists = FileManager.default.fileExists(atPath: url.path) + let fileSize = (try? url.resourceValues(forKeys: [.fileSizeKey]).fileSize) ?? 0 + VoxtLog.info("Pending history audio archive stashed. file=\(url.lastPathComponent), exists=\(exists), size=\(fileSize)") + } + + private func importConsumedAudioArchiveIfNeeded( + _ sourceURL: URL?, + kind: TranscriptionHistoryKind + ) -> String? { + guard let sourceURL else { + VoxtLog.warning("History audio import skipped because source URL was nil. kind=\(kind.rawValue)") + return nil + } + defer { + if FileManager.default.fileExists(atPath: sourceURL.path) { + try? FileManager.default.removeItem(at: sourceURL) + } + } + let exists = FileManager.default.fileExists(atPath: sourceURL.path) + guard historyAudioStorageEnabled else { + VoxtLog.info( + "History audio import skipped because storage is disabled. kind=\(kind.rawValue), file=\(sourceURL.lastPathComponent), exists=\(exists)" + ) + return nil + } + guard exists else { + VoxtLog.warning( + "History audio import skipped because source file does not exist. kind=\(kind.rawValue), file=\(sourceURL.lastPathComponent)" + ) + return nil + } + do { + let relativePath = try historyStore.importAudioArchive(from: sourceURL, kind: kind) + VoxtLog.info( + "History audio import succeeded. kind=\(kind.rawValue), file=\(sourceURL.lastPathComponent), relativePath=\(relativePath)" + ) + return relativePath + } catch { + VoxtLog.warning( + "History audio import failed. kind=\(kind.rawValue), source=\(sourceURL.lastPathComponent), error=\(error.localizedDescription)" + ) + return nil + } + } } diff --git a/Voxt/App/AppDelegate+RecordingSession.swift b/Voxt/App/AppDelegate+RecordingSession.swift index b29ceff..60842dc 100644 --- a/Voxt/App/AppDelegate+RecordingSession.swift +++ b/Voxt/App/AppDelegate+RecordingSession.swift @@ -11,7 +11,10 @@ extension AppDelegate { beginRecording(outputMode: .rewrite) } - func releaseResidualRecordingResources(reason: String) { + func releaseResidualRecordingResources( + reason: String, + preservePendingHistoryAudio: Bool = false + ) { let speechWasRecording = speechTranscriber.isRecording let mlxWasRecording = mlxTranscriber?.isRecording == true let whisperWasRecording = whisperTranscriber?.isRecording == true @@ -39,6 +42,11 @@ extension AppDelegate { mlxTranscriber?.stopRecording() whisperTranscriber?.stopRecording() remoteASRTranscriber.discardPendingSessionOutput() + if preservePendingHistoryAudio { + VoxtLog.info("Preserving pending history audio during residual resource release. reason=\(reason)") + } else { + discardPendingCompletedHistoryAudio() + } overlayState.isRecording = false overlayState.audioLevel = 0 @@ -472,6 +480,7 @@ extension AppDelegate { mlx.transcribedText = "" mlx.setPreferredInputDevice(selectedInputDeviceID) mlx.onTranscriptionFinished = { [weak self] text in + self?.stashPendingCompletedHistoryAudioArchive(self?.mlxTranscriber?.consumeCompletedAudioArchiveURL()) self?.processTranscription(text, sessionID: sessionID) } overlayState.bind(to: mlx) @@ -503,6 +512,7 @@ extension AppDelegate { let sessionID = self.activeRecordingSessionID self.speechTranscriber.transcribedText = "" self.speechTranscriber.onTranscriptionFinished = { [weak self] text in + self?.stashPendingCompletedHistoryAudioArchive(self?.speechTranscriber.consumeCompletedAudioArchiveURL()) self?.processTranscription(text, sessionID: sessionID) } self.speechTranscriber.startRecording() @@ -540,6 +550,7 @@ extension AppDelegate { self.overlayState.transcribedText = text } whisper.onTranscriptionFinished = { [weak self] text in + self?.stashPendingCompletedHistoryAudioArchive(self?.whisperTranscriber?.consumeCompletedAudioArchiveURL()) self?.processTranscription(text, sessionID: sessionID) } overlayState.bind(to: whisper) @@ -612,6 +623,7 @@ extension AppDelegate { let sessionID = self.activeRecordingSessionID self.remoteASRTranscriber.transcribedText = "" self.remoteASRTranscriber.onTranscriptionFinished = { [weak self] text in + self?.stashPendingCompletedHistoryAudioArchive(self?.remoteASRTranscriber.consumeCompletedAudioArchiveURL()) self?.processTranscription(text, sessionID: sessionID) } self.remoteASRTranscriber.onStartFailure = { [weak self] message in @@ -638,6 +650,7 @@ extension AppDelegate { if transcriptionEngine == .remote { remoteASRTranscriber.discardPendingSessionOutput() } + discardPendingCompletedHistoryAudio() isSessionActive = false isSessionCancellationRequested = false didCommitSessionOutput = false diff --git a/Voxt/App/AppDelegate+SessionEndFlow.swift b/Voxt/App/AppDelegate+SessionEndFlow.swift index 2278989..f2100cf 100644 --- a/Voxt/App/AppDelegate+SessionEndFlow.swift +++ b/Voxt/App/AppDelegate+SessionEndFlow.swift @@ -82,7 +82,10 @@ extension AppDelegate { var name: String { "releaseResidualCapture" } func run(delegate: AppDelegate) { - delegate.releaseResidualRecordingResources(reason: "session-end-pipeline") + delegate.releaseResidualRecordingResources( + reason: "session-end-pipeline", + preservePendingHistoryAudio: true + ) } } diff --git a/Voxt/App/AppDelegate+TranscriptionDetail.swift b/Voxt/App/AppDelegate+TranscriptionDetail.swift index 7586836..18e795b 100644 --- a/Voxt/App/AppDelegate+TranscriptionDetail.swift +++ b/Voxt/App/AppDelegate+TranscriptionDetail.swift @@ -18,6 +18,7 @@ extension AppDelegate { func showTranscriptionDetailWindow(for entry: TranscriptionHistoryEntry) { TranscriptionDetailWindowManager.shared.present( entry: entry, + audioURL: historyStore.audioURL(for: entry), followUpStatusProvider: { @MainActor entry in self.transcriptionFollowUpProviderStatus(for: entry) }, diff --git a/Voxt/App/VoxtApp.swift b/Voxt/App/VoxtApp.swift index a53b421..d64ff49 100644 --- a/Voxt/App/VoxtApp.swift +++ b/Voxt/App/VoxtApp.swift @@ -186,6 +186,7 @@ class AppDelegate: NSObject, NSApplicationDelegate { var currentEndingSessionID: UUID? var lastCompletedSessionEndSessionID: UUID? var isSessionCancellationRequested = false + var pendingCompletedHistoryAudioArchiveURL: URL? var latestInjectableOutputText: String? var sessionTargetApplicationPID: pid_t? var sessionTargetApplicationBundleID: String? @@ -278,6 +279,7 @@ class AppDelegate: NSObject, NSApplicationDelegate { AppPreferenceKey.historyEnabled: true, AppPreferenceKey.historyCleanupEnabled: true, AppPreferenceKey.historyRetentionPeriod: HistoryRetentionPeriod.ninetyDays.rawValue, + AppPreferenceKey.historyAudioStorageEnabled: false, AppPreferenceKey.dictionaryRecognitionEnabled: true, AppPreferenceKey.dictionaryAutoLearningEnabled: false, AppPreferenceKey.dictionaryHighConfidenceCorrectionEnabled: true, diff --git a/Voxt/Meeting/MeetingAudioArchive.swift b/Voxt/Meeting/MeetingAudioArchive.swift index fbd132a..0339dee 100644 --- a/Voxt/Meeting/MeetingAudioArchive.swift +++ b/Voxt/Meeting/MeetingAudioArchive.swift @@ -2,7 +2,7 @@ import Foundation import WhisperKit actor MeetingAudioArchive { - private let targetSampleRate: Double = Double(WhisperKit.sampleRate) + private let targetSampleRate: Double = HistoryAudioArchiveSupport.targetSampleRate private var meSamples: [Float] = [] private var themSamples: [Float] = [] @@ -27,12 +27,11 @@ actor MeetingAudioArchive { func exportWAV(to destinationURL: URL) throws -> Bool { let mixed = mixedSamples() - guard !mixed.isEmpty else { return false } - let data = Self.wavData(for: mixed, sampleRate: Int(targetSampleRate)) - let directory = destinationURL.deletingLastPathComponent() - try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) - try data.write(to: destinationURL, options: .atomic) - return true + return try HistoryAudioArchiveSupport.exportWAV( + samples: mixed, + sampleRate: targetSampleRate, + to: destinationURL + ) } func reset() { @@ -54,45 +53,6 @@ actor MeetingAudioArchive { return output } - private static func wavData(for samples: [Float], sampleRate: Int) -> Data { - let channelCount: UInt16 = 1 - let bitsPerSample: UInt16 = 16 - let byteRate = UInt32(sampleRate) * UInt32(channelCount) * UInt32(bitsPerSample / 8) - let blockAlign = channelCount * (bitsPerSample / 8) - - var pcmData = Data(capacity: samples.count * 2) - for sample in samples { - let clamped = max(-1, min(1, sample)) - var value = Int16((clamped * Float(Int16.max)).rounded()) - pcmData.append(Data(bytes: &value, count: MemoryLayout.size)) - } - - let riffChunkSize = UInt32(36 + pcmData.count) - let dataChunkSize = UInt32(pcmData.count) - - var data = Data() - data.append("RIFF".data(using: .ascii)!) - data.append(Self.bytes(of: riffChunkSize)) - data.append("WAVE".data(using: .ascii)!) - data.append("fmt ".data(using: .ascii)!) - data.append(Self.bytes(of: UInt32(16))) - data.append(Self.bytes(of: UInt16(1))) - data.append(Self.bytes(of: channelCount)) - data.append(Self.bytes(of: UInt32(sampleRate))) - data.append(Self.bytes(of: byteRate)) - data.append(Self.bytes(of: blockAlign)) - data.append(Self.bytes(of: bitsPerSample)) - data.append("data".data(using: .ascii)!) - data.append(Self.bytes(of: dataChunkSize)) - data.append(pcmData) - return data - } - - private static func bytes(of value: T) -> Data { - var mutableValue = value - return withUnsafeBytes(of: &mutableValue) { Data($0) } - } - private static func write(_ samples: [Float], at startIndex: Int, to track: inout [Float]) { guard !samples.isEmpty else { return } diff --git a/Voxt/Settings/AppPreferenceKey.swift b/Voxt/Settings/AppPreferenceKey.swift index dcb4899..d4dad7a 100644 --- a/Voxt/Settings/AppPreferenceKey.swift +++ b/Voxt/Settings/AppPreferenceKey.swift @@ -95,6 +95,9 @@ enum AppPreferenceKey { static let historyEnabled = "historyEnabled" static let historyCleanupEnabled = "historyCleanupEnabled" static let historyRetentionPeriod = "historyRetentionPeriod" + static let historyAudioStorageEnabled = "historyAudioStorageEnabled" + static let historyAudioStorageRootPath = "historyAudioStorageRootPath" + static let historyAudioStorageRootBookmark = "historyAudioStorageRootBookmark" static let dictionaryRecognitionEnabled = "dictionaryRecognitionEnabled" static let dictionaryAutoLearningEnabled = "dictionaryAutoLearningEnabled" static let dictionaryHighConfidenceCorrectionEnabled = "dictionaryHighConfidenceCorrectionEnabled" diff --git a/Voxt/Settings/HistorySettingsComponents.swift b/Voxt/Settings/HistorySettingsComponents.swift index c96ef67..ccf8a5a 100644 --- a/Voxt/Settings/HistorySettingsComponents.swift +++ b/Voxt/Settings/HistorySettingsComponents.swift @@ -66,7 +66,7 @@ struct HistoryRow: View { @Environment(\.locale) private var locale let entry: TranscriptionHistoryEntry - let meetingAudioURL: URL? + let audioURL: URL? let isCopied: Bool let onCopy: () -> Void let onDelete: () -> Void @@ -128,7 +128,7 @@ struct HistoryRow: View { } .buttonStyle(.plain) .popover(isPresented: $showModelInfo, arrowEdge: .trailing) { - HistoryInfoPopover(entry: entry, locale: locale) + HistoryInfoPopover(entry: entry, audioURL: audioURL, locale: locale) } if supportsDetail { @@ -268,7 +268,7 @@ struct HistoryRow: View { } MeetingDetailWindowManager.shared.presentHistoryMeeting( entry: entry, - audioURL: meetingAudioURL, + audioURL: audioURL, initialSummarySettings: appDelegate.currentMeetingSummarySettingsSnapshot(), summaryModelOptionsProvider: { @MainActor in appDelegate.meetingSummaryModelOptions() @@ -314,11 +314,13 @@ struct HistoryRow: View { private struct HistoryInfoPopover: View { let entry: TranscriptionHistoryEntry + let audioURL: URL? let locale: Locale var body: some View { TranscriptionDetailContentView( entry: entry, + audioURL: audioURL, locale: locale, style: .popover ) diff --git a/Voxt/Settings/HistorySettingsView.swift b/Voxt/Settings/HistorySettingsView.swift index 4e82782..164514a 100644 --- a/Voxt/Settings/HistorySettingsView.swift +++ b/Voxt/Settings/HistorySettingsView.swift @@ -1,10 +1,12 @@ import SwiftUI +import AppKit struct HistorySettingsView: View { private static let pageSize = 40 @AppStorage(AppPreferenceKey.historyCleanupEnabled) private var historyCleanupEnabled = true @AppStorage(AppPreferenceKey.historyRetentionPeriod) private var historyRetentionPeriodRaw = HistoryRetentionPeriod.ninetyDays.rawValue + @AppStorage(AppPreferenceKey.historyAudioStorageEnabled) private var historyAudioStorageEnabled = false @ObservedObject var historyStore: TranscriptionHistoryStore @ObservedObject var noteStore: VoxtNoteStore @@ -15,6 +17,11 @@ struct HistorySettingsView: View { @State private var copiedNoteID: UUID? @State private var selectedFilter: HistoryFilterTab = .transcription @State private var visibleItemLimit = pageSize + @State private var isHistoryAudioSettingsPresented = false + @State private var historyAudioStorageDisplayPath = "" + @State private var historyAudioStorageSelectionError: String? + @State private var historyAudioExportResultMessage: String? + @State private var historyAudioStorageStats = HistoryAudioStorageStats(storedFileCount: 0, totalBytes: 0) private var historyRetentionPeriod: HistoryRetentionPeriod { HistoryRetentionPeriod(rawValue: historyRetentionPeriodRaw) ?? .ninetyDays @@ -86,6 +93,14 @@ struct HistorySettingsView: View { HStack(alignment: .center, spacing: 12) { HistoryFilterTabPicker(selectedTab: $selectedFilter) Spacer(minLength: 12) + Button { + historyAudioStorageSelectionError = nil + historyAudioExportResultMessage = nil + isHistoryAudioSettingsPresented = true + } label: { + Image(systemName: "gearshape") + } + .buttonStyle(SettingsCompactIconButtonStyle()) Button(String(localized: "Clean All"), role: .destructive) { copiedEntryID = nil copiedNoteID = nil @@ -160,7 +175,7 @@ struct HistorySettingsView: View { ForEach(visibleEntries) { entry in HistoryRow( entry: entry, - meetingAudioURL: historyStore.meetingAudioURL(for: entry), + audioURL: historyStore.audioURL(for: entry), isCopied: copiedEntryID == entry.id, onCopy: { copyStringToPasteboard(entry.text) @@ -212,11 +227,16 @@ struct HistorySettingsView: View { } } .frame(maxHeight: .infinity, alignment: .top) + .sheet(isPresented: $isHistoryAudioSettingsPresented) { + historyAudioSettingsSheet + } .onAppear { if !HistoryRetentionPeriod.allCases.contains(where: { $0.rawValue == historyRetentionPeriodRaw }) { historyRetentionPeriodRaw = HistoryRetentionPeriod.ninetyDays.rawValue } resetVisibleItemLimit() + refreshHistoryAudioStorageDisplayPath() + refreshHistoryAudioStorageStats() historyStore.reloadAsync() } .onChange(of: historyCleanupEnabled) { _, _ in @@ -235,6 +255,7 @@ struct HistorySettingsView: View { } .onReceive(historyStore.$entries) { _ in visibleItemLimit = min(max(visibleItemLimit, Self.pageSize), max(filteredEntries.count, Self.pageSize)) + refreshHistoryAudioStorageStats() } .onReceive(noteStore.$items) { _ in visibleItemLimit = min(max(visibleItemLimit, Self.pageSize), max(allNotes.count, Self.pageSize)) @@ -270,4 +291,175 @@ struct HistorySettingsView: View { guard hasMoreFilteredEntries else { return } visibleItemLimit = min(visibleItemLimit + Self.pageSize, filteredEntries.count) } + + private var historyAudioSettingsSheet: some View { + VStack(alignment: .leading, spacing: 16) { + Text(String(localized: "History Audio Settings")) + .font(.title3.weight(.semibold)) + + GeneralSettingsCard(title: "Audio Storage") { + Toggle(String(localized: "Save history audio"), isOn: $historyAudioStorageEnabled) + + if historyAudioStorageEnabled { + HStack(alignment: .firstTextBaseline, spacing: 10) { + Text(String(localized: "Storage Path")) + .foregroundStyle(.secondary) + Spacer() + Button(action: openHistoryAudioStorageInFinder) { + HStack(spacing: 6) { + Image(systemName: "folder") + .font(.caption) + Text( + historyAudioStorageDisplayPath.isEmpty + ? HistoryAudioStorageDirectoryManager.defaultRootURL.path + : historyAudioStorageDisplayPath + ) + .underline() + .lineLimit(1) + .truncationMode(.middle) + .multilineTextAlignment(.trailing) + Image(systemName: "arrow.up.forward.square") + .font(.caption) + } + } + .buttonStyle(SettingsInlineSelectorButtonStyle()) + .help(String(localized: "Open folder")) + + Button(String(localized: "Choose")) { + chooseHistoryAudioStorageDirectory() + } + .buttonStyle(SettingsPillButtonStyle()) + } + + Text(String(localized: "New history audio is stored here. Switching the path will not move existing audio files.")) + .font(.caption) + .foregroundStyle(.secondary) + + if let historyAudioStorageSelectionError, !historyAudioStorageSelectionError.isEmpty { + Text(historyAudioStorageSelectionError) + .font(.caption) + .foregroundStyle(.red) + } + } else { + Text(String(localized: "When disabled, history items will not keep audio files.")) + .font(.caption) + .foregroundStyle(.secondary) + } + } + + if historyAudioStorageEnabled { + GeneralSettingsCard(title: "Export") { + HStack(spacing: 10) { + Button(String(localized: "Export Audio")) { + exportAllHistoryAudio() + } + .buttonStyle(SettingsPillButtonStyle()) + + VStack(alignment: .leading, spacing: 4) { + Text(historyAudioStorageStatsSummary) + .font(.caption) + .foregroundStyle(.secondary) + + Text(String(localized: "Copies every saved history audio file into a folder you choose.")) + .font(.caption) + .foregroundStyle(.secondary) + } + } + + if let historyAudioExportResultMessage, !historyAudioExportResultMessage.isEmpty { + Text(historyAudioExportResultMessage) + .font(.caption) + .foregroundStyle(.secondary) + .fixedSize(horizontal: false, vertical: true) + } + } + } + + SettingsDialogActionRow { + Button(String(localized: "Done")) { + isHistoryAudioSettingsPresented = false + } + .buttonStyle(SettingsPrimaryButtonStyle()) + .keyboardShortcut(.defaultAction) + } + } + .padding(20) + .frame(width: 560) + } + + private func openHistoryAudioStorageInFinder() { + HistoryAudioStorageDirectoryManager.openRootInFinder() + } + + private func chooseHistoryAudioStorageDirectory() { + let panel = NSOpenPanel() + panel.canChooseDirectories = true + panel.canChooseFiles = false + panel.allowsMultipleSelection = false + panel.directoryURL = HistoryAudioStorageDirectoryManager.resolvedRootURL() + + guard panel.runModal() == .OK, let selectedURL = panel.url else { return } + + do { + try HistoryAudioStorageDirectoryManager.saveUserSelectedRootURL(selectedURL) + historyAudioStorageSelectionError = nil + refreshHistoryAudioStorageDisplayPath() + } catch { + historyAudioStorageSelectionError = AppLocalization.format( + "Failed to update history audio storage path: %@", + error.localizedDescription + ) + } + } + + private func refreshHistoryAudioStorageDisplayPath() { + historyAudioStorageDisplayPath = HistoryAudioStorageDirectoryManager.resolvedRootURL().path + } + + private func refreshHistoryAudioStorageStats() { + historyAudioStorageStats = historyStore.currentAudioArchiveStorageStats() + } + + private var historyAudioStorageStatsSummary: String { + AppLocalization.format( + "Saved audio: %d files · %@", + historyAudioStorageStats.storedFileCount, + formattedByteCount(historyAudioStorageStats.totalBytes) + ) + } + + private func formattedByteCount(_ bytes: Int64) -> String { + let formatter = ByteCountFormatter() + formatter.allowedUnits = [.useBytes, .useKB, .useMB, .useGB] + formatter.countStyle = .file + formatter.includesUnit = true + formatter.isAdaptive = true + return formatter.string(fromByteCount: bytes) + } + + private func exportAllHistoryAudio() { + let panel = NSOpenPanel() + panel.canChooseDirectories = true + panel.canChooseFiles = false + panel.allowsMultipleSelection = false + panel.directoryURL = FileManager.default.homeDirectoryForCurrentUser + + guard panel.runModal() == .OK, let destinationURL = panel.url else { return } + + do { + let summary = try historyStore.exportAllAudioArchives(to: destinationURL) + historyAudioExportResultMessage = AppLocalization.format( + "Exported %d audio files. Skipped %d. Failed %d.", + summary.exportedCount, + summary.skippedCount, + summary.failedCount + ) + } catch { + historyAudioExportResultMessage = AppLocalization.format( + "Audio export failed: %@", + error.localizedDescription + ) + } + refreshHistoryAudioStorageStats() + } } diff --git a/Voxt/Support/HistoryAudioArchiveSupport.swift b/Voxt/Support/HistoryAudioArchiveSupport.swift new file mode 100644 index 0000000..23472c1 --- /dev/null +++ b/Voxt/Support/HistoryAudioArchiveSupport.swift @@ -0,0 +1,199 @@ +import Foundation + +enum HistoryAudioArchiveSupport { + static let targetSampleRate: Double = 16_000 + static let rewriteJoinGapSeconds: Double = 0.3 + + static func exportWAV( + samples: [Float], + sampleRate: Double, + to destinationURL: URL + ) throws -> Bool { + guard !samples.isEmpty else { return false } + let preparedSamples = resample(samples: samples, from: sampleRate, to: targetSampleRate) + guard !preparedSamples.isEmpty else { return false } + + let data = wavData(for: preparedSamples, sampleRate: Int(targetSampleRate)) + let directory = destinationURL.deletingLastPathComponent() + try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true) + try data.write(to: destinationURL, options: .atomic) + return true + } + + static func mergedRewriteArchive( + existingArchiveURL: URL?, + appendedArchiveURL: URL + ) throws -> URL { + let appendedSamples = try readWAVSamples(from: appendedArchiveURL) + guard !appendedSamples.isEmpty else { + throw NSError( + domain: "Voxt.HistoryAudio", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "The appended rewrite audio archive was empty."] + ) + } + + var mergedSamples: [Float] = [] + if let existingArchiveURL { + mergedSamples = try readWAVSamples(from: existingArchiveURL) + if !mergedSamples.isEmpty { + mergedSamples.append(contentsOf: silenceSamples(durationSeconds: rewriteJoinGapSeconds)) + } + } + mergedSamples.append(contentsOf: appendedSamples) + + let tempURL = FileManager.default.temporaryDirectory + .appendingPathComponent("voxt-history-rewrite-\(UUID().uuidString)") + .appendingPathExtension("wav") + _ = try exportWAV(samples: mergedSamples, sampleRate: targetSampleRate, to: tempURL) + return tempURL + } + + static func readWAVSamples(from fileURL: URL) throws -> [Float] { + let data = try Data(contentsOf: fileURL) + guard data.count >= 44 else { + throw NSError( + domain: "Voxt.HistoryAudio", + code: 2, + userInfo: [NSLocalizedDescriptionKey: "The WAV file was too small to parse."] + ) + } + + guard String(data: data[0..<4], encoding: .ascii) == "RIFF", + String(data: data[8..<12], encoding: .ascii) == "WAVE" else { + throw NSError( + domain: "Voxt.HistoryAudio", + code: 3, + userInfo: [NSLocalizedDescriptionKey: "The audio archive was not a WAV file."] + ) + } + + var offset = 12 + var bitsPerSample: UInt16? + var channelCount: UInt16? + var dataChunkRange: Range? + + while offset + 8 <= data.count { + let chunkIDData = data[offset..<(offset + 4)] + guard let chunkID = String(data: chunkIDData, encoding: .ascii) else { break } + let chunkSize = littleEndianUInt32(from: data, at: offset + 4) + let chunkBodyStart = offset + 8 + let chunkBodyEnd = chunkBodyStart + Int(chunkSize) + guard chunkBodyEnd <= data.count else { break } + + if chunkID == "fmt ", chunkSize >= 16 { + channelCount = littleEndianUInt16(from: data, at: chunkBodyStart + 2) + bitsPerSample = littleEndianUInt16(from: data, at: chunkBodyStart + 14) + } else if chunkID == "data" { + dataChunkRange = chunkBodyStart.. [Float] { + let count = max(Int((durationSeconds * targetSampleRate).rounded()), 0) + return [Float](repeating: 0, count: count) + } + + static func temporaryArchiveURL(prefix: String) -> URL { + FileManager.default.temporaryDirectory + .appendingPathComponent("\(prefix)-\(UUID().uuidString)") + .appendingPathExtension("wav") + } + + private static func wavData(for samples: [Float], sampleRate: Int) -> Data { + let channelCount: UInt16 = 1 + let bitsPerSample: UInt16 = 16 + let byteRate = UInt32(sampleRate) * UInt32(channelCount) * UInt32(bitsPerSample / 8) + let blockAlign = channelCount * (bitsPerSample / 8) + + var pcmData = Data(capacity: samples.count * MemoryLayout.size) + for sample in samples { + let clamped = max(-1, min(1, sample)) + var value = Int16((clamped * Float(Int16.max)).rounded()) + pcmData.append(Data(bytes: &value, count: MemoryLayout.size)) + } + + let riffChunkSize = UInt32(36 + pcmData.count) + let dataChunkSize = UInt32(pcmData.count) + + var data = Data() + data.append("RIFF".data(using: .ascii)!) + data.append(bytes(of: riffChunkSize)) + data.append("WAVE".data(using: .ascii)!) + data.append("fmt ".data(using: .ascii)!) + data.append(bytes(of: UInt32(16))) + data.append(bytes(of: UInt16(1))) + data.append(bytes(of: channelCount)) + data.append(bytes(of: UInt32(sampleRate))) + data.append(bytes(of: byteRate)) + data.append(bytes(of: blockAlign)) + data.append(bytes(of: bitsPerSample)) + data.append("data".data(using: .ascii)!) + data.append(bytes(of: dataChunkSize)) + data.append(pcmData) + return data + } + + private static func bytes(of value: T) -> Data { + var mutableValue = value + return withUnsafeBytes(of: &mutableValue) { Data($0) } + } + + private static func resample(samples: [Float], from inputRate: Double, to outputRate: Double) -> [Float] { + guard !samples.isEmpty, inputRate > 0, outputRate > 0 else { return samples } + if abs(inputRate - outputRate) <= 1 { + return samples + } + + let ratio = outputRate / inputRate + let outputCount = max(Int(Double(samples.count) * ratio), 1) + var output = [Float](repeating: 0, count: outputCount) + + for index in 0.. UInt16 { + data.subdata(in: offset..<(offset + 2)).withUnsafeBytes { rawBuffer in + rawBuffer.load(as: UInt16.self).littleEndian + } + } + + private static func littleEndianUInt32(from data: Data, at offset: Int) -> UInt32 { + data.subdata(in: offset..<(offset + 4)).withUnsafeBytes { rawBuffer in + rawBuffer.load(as: UInt32.self).littleEndian + } + } +} diff --git a/Voxt/Support/HistoryAudioStorageDirectoryManager.swift b/Voxt/Support/HistoryAudioStorageDirectoryManager.swift new file mode 100644 index 0000000..ded3907 --- /dev/null +++ b/Voxt/Support/HistoryAudioStorageDirectoryManager.swift @@ -0,0 +1,86 @@ +import Foundation +import AppKit + +enum HistoryAudioStorageDirectoryManager { + private static var securityScopedURL: URL? + private static let fileManager = FileManager.default + + static var defaultRootURL: URL { + let appSupport = fileManager.urls(for: .applicationSupportDirectory, in: .userDomainMask).first + ?? fileManager.homeDirectoryForCurrentUser.appendingPathComponent("Library/Application Support", isDirectory: true) + return appSupport + .appendingPathComponent("Voxt", isDirectory: true) + .appendingPathComponent("transcription-history-audio", isDirectory: true) + } + + static func resolvedRootURL() -> URL { + let defaults = UserDefaults.standard + if let bookmarkData = defaults.data(forKey: AppPreferenceKey.historyAudioStorageRootBookmark), + let bookmarkedURL = resolveSecurityScopedURL(from: bookmarkData) { + return bookmarkedURL + } + + if let path = defaults.string(forKey: AppPreferenceKey.historyAudioStorageRootPath), !path.isEmpty { + return URL(fileURLWithPath: path, isDirectory: true) + } + + return defaultRootURL + } + + static func saveUserSelectedRootURL(_ url: URL) throws { + let normalized = url.standardizedFileURL + let bookmark = try normalized.bookmarkData( + options: [.withSecurityScope], + includingResourceValuesForKeys: nil, + relativeTo: nil + ) + + let defaults = UserDefaults.standard + defaults.set(normalized.path, forKey: AppPreferenceKey.historyAudioStorageRootPath) + defaults.set(bookmark, forKey: AppPreferenceKey.historyAudioStorageRootBookmark) + + _ = resolveSecurityScopedURL(from: bookmark) + } + + static func ensureRootDirectoryExists() throws -> URL { + let url = resolvedRootURL() + try fileManager.createDirectory(at: url, withIntermediateDirectories: true) + return url + } + + static func openRootInFinder() { + if let url = try? ensureRootDirectoryExists() { + NSWorkspace.shared.activateFileViewerSelecting([url]) + } + } + + private static func resolveSecurityScopedURL(from bookmarkData: Data) -> URL? { + var isStale = false + guard let resolved = try? URL( + resolvingBookmarkData: bookmarkData, + options: [.withSecurityScope, .withoutUI], + relativeTo: nil, + bookmarkDataIsStale: &isStale + ) else { + return nil + } + + if securityScopedURL?.path != resolved.path { + securityScopedURL?.stopAccessingSecurityScopedResource() + if resolved.startAccessingSecurityScopedResource() { + securityScopedURL = resolved + } + } + + if isStale, + let refreshed = try? resolved.bookmarkData( + options: [.withSecurityScope], + includingResourceValuesForKeys: nil, + relativeTo: nil + ) { + UserDefaults.standard.set(refreshed, forKey: AppPreferenceKey.historyAudioStorageRootBookmark) + } + + return resolved + } +} diff --git a/Voxt/Support/TranscriptionHistoryStore.swift b/Voxt/Support/TranscriptionHistoryStore.swift index 93dde58..5e3bb76 100644 --- a/Voxt/Support/TranscriptionHistoryStore.swift +++ b/Voxt/Support/TranscriptionHistoryStore.swift @@ -40,6 +40,7 @@ struct TranscriptionHistoryEntry: Identifiable, Codable, Hashable { let remoteLLMProvider: String? let remoteLLMModel: String? let remoteLLMEndpoint: String? + let audioRelativePath: String? let whisperWordTimings: [WhisperHistoryWordTiming]? let meetingSegments: [MeetingTranscriptSegment]? let meetingAudioRelativePath: String? @@ -76,6 +77,7 @@ struct TranscriptionHistoryEntry: Identifiable, Codable, Hashable { case remoteLLMProvider case remoteLLMModel case remoteLLMEndpoint + case audioRelativePath case whisperWordTimings case meetingSegments case meetingAudioRelativePath @@ -113,6 +115,7 @@ struct TranscriptionHistoryEntry: Identifiable, Codable, Hashable { remoteLLMProvider: String?, remoteLLMModel: String?, remoteLLMEndpoint: String?, + audioRelativePath: String? = nil, whisperWordTimings: [WhisperHistoryWordTiming]?, meetingSegments: [MeetingTranscriptSegment]? = nil, meetingAudioRelativePath: String? = nil, @@ -148,6 +151,7 @@ struct TranscriptionHistoryEntry: Identifiable, Codable, Hashable { self.remoteLLMProvider = remoteLLMProvider self.remoteLLMModel = remoteLLMModel self.remoteLLMEndpoint = remoteLLMEndpoint + self.audioRelativePath = audioRelativePath ?? meetingAudioRelativePath self.whisperWordTimings = whisperWordTimings self.meetingSegments = meetingSegments self.meetingAudioRelativePath = meetingAudioRelativePath @@ -192,9 +196,11 @@ struct TranscriptionHistoryEntry: Identifiable, Codable, Hashable { remoteLLMProvider = try container.decodeIfPresent(String.self, forKey: .remoteLLMProvider) remoteLLMModel = try container.decodeIfPresent(String.self, forKey: .remoteLLMModel) remoteLLMEndpoint = try container.decodeIfPresent(String.self, forKey: .remoteLLMEndpoint) + let decodedAudioRelativePath = try container.decodeIfPresent(String.self, forKey: .audioRelativePath) whisperWordTimings = try container.decodeIfPresent([WhisperHistoryWordTiming].self, forKey: .whisperWordTimings) meetingSegments = try container.decodeIfPresent([MeetingTranscriptSegment].self, forKey: .meetingSegments) meetingAudioRelativePath = try container.decodeIfPresent(String.self, forKey: .meetingAudioRelativePath) + audioRelativePath = decodedAudioRelativePath ?? meetingAudioRelativePath meetingSummary = try container.decodeIfPresent(MeetingSummarySnapshot.self, forKey: .meetingSummary) meetingSummaryChatMessages = try container.decodeIfPresent([MeetingSummaryChatMessage].self, forKey: .meetingSummaryChatMessages) displayTitle = try container.decodeIfPresent(String.self, forKey: .displayTitle) @@ -322,6 +328,7 @@ final class TranscriptionHistoryStore: ObservableObject { remoteLLMProvider: String?, remoteLLMModel: String?, remoteLLMEndpoint: String?, + audioRelativePath: String? = nil, whisperWordTimings: [WhisperHistoryWordTiming]?, meetingSegments: [MeetingTranscriptSegment]? = nil, meetingAudioRelativePath: String? = nil, @@ -360,6 +367,7 @@ final class TranscriptionHistoryStore: ObservableObject { remoteLLMProvider: remoteLLMProvider, remoteLLMModel: remoteLLMModel, remoteLLMEndpoint: remoteLLMEndpoint, + audioRelativePath: audioRelativePath, whisperWordTimings: whisperWordTimings, meetingSegments: meetingSegments, meetingAudioRelativePath: meetingAudioRelativePath, @@ -388,20 +396,28 @@ final class TranscriptionHistoryStore: ObservableObject { allEntries.removeAll { $0.id == id } loadedCount = min(loadedCount, allEntries.count) entries = Array(allEntries.prefix(loadedCount)) - removed.forEach(removeMeetingAudioIfNeeded(for:)) + removed.forEach(removeAudioIfNeeded(for:)) persist() } func clearAll() { - allEntries.forEach(removeMeetingAudioIfNeeded(for:)) + allEntries.forEach(removeAudioIfNeeded(for:)) allEntries = [] entries = [] loadedCount = 0 persist() } - func importMeetingAudioArchive(from sourceURL: URL) throws -> String { - let relativePath = "meeting/\(UUID().uuidString).wav" + func importAudioArchive( + from sourceURL: URL, + kind: TranscriptionHistoryKind, + preferredFileName: String? = nil + ) throws -> String { + let resolvedFileName = sanitizedAudioFileName( + preferredFileName?.trimmingCharacters(in: .whitespacesAndNewlines), + fallbackKind: kind + ) + let relativePath = "\(audioFolderName(for: kind))/\(resolvedFileName)" let destinationURL = try historyAssetsDirectoryURL().appendingPathComponent(relativePath) try fileManager.createDirectory(at: destinationURL.deletingLastPathComponent(), withIntermediateDirectories: true) if fileManager.fileExists(atPath: destinationURL.path) { @@ -411,8 +427,30 @@ final class TranscriptionHistoryStore: ObservableObject { return relativePath } - func meetingAudioURL(for entry: TranscriptionHistoryEntry) -> URL? { - guard let relativePath = entry.meetingAudioRelativePath, !relativePath.isEmpty else { + func replaceAudioArchive(for entryID: UUID, with sourceURL: URL) throws -> TranscriptionHistoryEntry? { + guard let index = allEntries.firstIndex(where: { $0.id == entryID }) else { return nil } + + let existingEntry = allEntries[index] + let relativePath = existingEntry.audioRelativePath ?? existingEntry.meetingAudioRelativePath + let resolvedRelativePath = relativePath?.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty == false + ? relativePath! + : "\(audioFolderName(for: existingEntry.kind))/\(sanitizedAudioFileName(nil, fallbackKind: existingEntry.kind))" + let destinationURL = try historyAssetsDirectoryURL().appendingPathComponent(resolvedRelativePath) + try fileManager.createDirectory(at: destinationURL.deletingLastPathComponent(), withIntermediateDirectories: true) + if fileManager.fileExists(atPath: destinationURL.path) { + try fileManager.removeItem(at: destinationURL) + } + try fileManager.moveItem(at: sourceURL, to: destinationURL) + + allEntries[index] = existingEntry.updatingAudioRelativePath(resolvedRelativePath) + entries = Array(allEntries.prefix(loadedCount)) + persist() + return allEntries[index] + } + + func audioURL(for entry: TranscriptionHistoryEntry) -> URL? { + let relativePath = entry.audioRelativePath ?? entry.meetingAudioRelativePath + guard let relativePath, !relativePath.isEmpty else { return nil } do { @@ -422,6 +460,66 @@ final class TranscriptionHistoryStore: ObservableObject { } } + func exportAllAudioArchives(to destinationDirectoryURL: URL) throws -> HistoryAudioExportSummary { + try fileManager.createDirectory(at: destinationDirectoryURL, withIntermediateDirectories: true) + + var exportedCount = 0 + var skippedCount = 0 + var failedCount = 0 + + for entry in allEntries { + guard let sourceURL = audioURL(for: entry) else { + skippedCount += 1 + continue + } + guard fileManager.fileExists(atPath: sourceURL.path) else { + skippedCount += 1 + continue + } + + do { + let folderURL = destinationDirectoryURL.appendingPathComponent(audioFolderName(for: entry.kind), isDirectory: true) + try fileManager.createDirectory(at: folderURL, withIntermediateDirectories: true) + let destinationURL = folderURL.appendingPathComponent(exportFileName(for: entry)) + if fileManager.fileExists(atPath: destinationURL.path) { + try fileManager.removeItem(at: destinationURL) + } + try fileManager.copyItem(at: sourceURL, to: destinationURL) + exportedCount += 1 + } catch { + failedCount += 1 + } + } + + return HistoryAudioExportSummary( + exportedCount: exportedCount, + skippedCount: skippedCount, + failedCount: failedCount + ) + } + + func currentAudioArchiveStorageStats() -> HistoryAudioStorageStats { + var storedFileCount = 0 + var totalBytes: Int64 = 0 + + for entry in allEntries { + guard let sourceURL = audioURL(for: entry), + fileManager.fileExists(atPath: sourceURL.path) + else { + continue + } + + storedFileCount += 1 + let fileSize = (try? sourceURL.resourceValues(forKeys: [.fileSizeKey]).fileSize) ?? 0 + totalBytes += Int64(fileSize) + } + + return HistoryAudioStorageStats( + storedFileCount: storedFileCount, + totalBytes: totalBytes + ) + } + func applyDictionarySuggestedTerms(_ snapshotsByHistoryID: [UUID: [DictionarySuggestionSnapshot]]) { guard !snapshotsByHistoryID.isEmpty else { return } @@ -556,7 +654,7 @@ final class TranscriptionHistoryStore: ObservableObject { let originalCount = allEntries.count let removedEntries = allEntries.filter { $0.createdAt < cutoff } allEntries.removeAll { $0.createdAt < cutoff } - removedEntries.forEach(removeMeetingAudioIfNeeded(for:)) + removedEntries.forEach(removeAudioIfNeeded(for:)) return allEntries.count != originalCount } @@ -573,19 +671,12 @@ final class TranscriptionHistoryStore: ObservableObject { } private func historyAssetsDirectoryURL() throws -> URL { - let appSupport = try fileManager.url( - for: .applicationSupportDirectory, - in: .userDomainMask, - appropriateFor: nil, - create: true - ) - return appSupport - .appendingPathComponent("Voxt", isDirectory: true) - .appendingPathComponent("transcription-history-assets", isDirectory: true) + try HistoryAudioStorageDirectoryManager.ensureRootDirectoryExists() } - private func removeMeetingAudioIfNeeded(for entry: TranscriptionHistoryEntry) { - guard let relativePath = entry.meetingAudioRelativePath, !relativePath.isEmpty else { return } + private func removeAudioIfNeeded(for entry: TranscriptionHistoryEntry) { + let relativePath = entry.audioRelativePath ?? entry.meetingAudioRelativePath + guard let relativePath, !relativePath.isEmpty else { return } do { let url = try historyAssetsDirectoryURL().appendingPathComponent(relativePath) if fileManager.fileExists(atPath: url.path) { @@ -608,6 +699,51 @@ final class TranscriptionHistoryStore: ObservableObject { } return merged } + + private func audioFolderName(for kind: TranscriptionHistoryKind) -> String { + switch kind { + case .normal: + return "transcription" + case .translation: + return "translation" + case .rewrite: + return "rewrite" + case .meeting: + return "meeting" + } + } + + private func sanitizedAudioFileName(_ preferredFileName: String?, fallbackKind: TranscriptionHistoryKind) -> String { + let trimmedPreferred = preferredFileName?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + let baseName = trimmedPreferred.isEmpty ? "\(audioFolderName(for: fallbackKind))-\(UUID().uuidString)" : trimmedPreferred + let filtered = baseName.map { character -> Character in + if character.isLetter || character.isNumber || character == "-" || character == "_" { + return character + } + return "-" + } + let normalized = String(filtered).trimmingCharacters(in: CharacterSet(charactersIn: "-_")) + let resolved = normalized.isEmpty ? "\(audioFolderName(for: fallbackKind))-\(UUID().uuidString)" : normalized + return resolved.hasSuffix(".wav") ? resolved : "\(resolved).wav" + } + + private func exportFileName(for entry: TranscriptionHistoryEntry) -> String { + let formatter = DateFormatter() + formatter.locale = Locale(identifier: "en_US_POSIX") + formatter.dateFormat = "yyyyMMdd-HHmmss" + return "\(formatter.string(from: entry.createdAt))-\(audioFolderName(for: entry.kind))-\(entry.id.uuidString).wav" + } +} + +struct HistoryAudioExportSummary: Equatable { + let exportedCount: Int + let skippedCount: Int + let failedCount: Int +} + +struct HistoryAudioStorageStats: Equatable { + let storedFileCount: Int + let totalBytes: Int64 } private extension TranscriptionHistoryEntry { @@ -637,6 +773,7 @@ private extension TranscriptionHistoryEntry { remoteLLMProvider: remoteLLMProvider, remoteLLMModel: remoteLLMModel, remoteLLMEndpoint: remoteLLMEndpoint, + audioRelativePath: audioRelativePath, whisperWordTimings: whisperWordTimings, meetingSegments: meetingSegments, meetingAudioRelativePath: meetingAudioRelativePath, @@ -676,6 +813,7 @@ private extension TranscriptionHistoryEntry { remoteLLMProvider: remoteLLMProvider, remoteLLMModel: remoteLLMModel, remoteLLMEndpoint: remoteLLMEndpoint, + audioRelativePath: audioRelativePath, whisperWordTimings: whisperWordTimings, meetingSegments: meetingSegments, meetingAudioRelativePath: meetingAudioRelativePath, @@ -715,6 +853,7 @@ private extension TranscriptionHistoryEntry { remoteLLMProvider: remoteLLMProvider, remoteLLMModel: remoteLLMModel, remoteLLMEndpoint: remoteLLMEndpoint, + audioRelativePath: audioRelativePath, whisperWordTimings: whisperWordTimings, meetingSegments: meetingSegments, meetingAudioRelativePath: meetingAudioRelativePath, @@ -754,6 +893,7 @@ private extension TranscriptionHistoryEntry { remoteLLMProvider: remoteLLMProvider, remoteLLMModel: remoteLLMModel, remoteLLMEndpoint: remoteLLMEndpoint, + audioRelativePath: audioRelativePath, whisperWordTimings: whisperWordTimings, meetingSegments: meetingSegments, meetingAudioRelativePath: meetingAudioRelativePath, @@ -804,6 +944,47 @@ private extension TranscriptionHistoryEntry { remoteLLMProvider: remoteLLMProvider, remoteLLMModel: remoteLLMModel, remoteLLMEndpoint: remoteLLMEndpoint, + audioRelativePath: audioRelativePath, + whisperWordTimings: whisperWordTimings, + meetingSegments: meetingSegments, + meetingAudioRelativePath: meetingAudioRelativePath, + meetingSummary: meetingSummary, + meetingSummaryChatMessages: meetingSummaryChatMessages, + displayTitle: displayTitle, + transcriptionChatMessages: transcriptionChatMessages, + dictionaryHitTerms: dictionaryHitTerms, + dictionaryCorrectedTerms: dictionaryCorrectedTerms, + dictionarySuggestedTerms: dictionarySuggestedTerms + ) + } + + func updatingAudioRelativePath(_ audioRelativePath: String?) -> TranscriptionHistoryEntry { + TranscriptionHistoryEntry( + id: id, + text: text, + createdAt: createdAt, + transcriptionEngine: transcriptionEngine, + transcriptionModel: transcriptionModel, + enhancementMode: enhancementMode, + enhancementModel: enhancementModel, + kind: kind, + isTranslation: isTranslation, + audioDurationSeconds: audioDurationSeconds, + transcriptionProcessingDurationSeconds: transcriptionProcessingDurationSeconds, + llmDurationSeconds: llmDurationSeconds, + focusedAppName: focusedAppName, + focusedAppBundleID: focusedAppBundleID, + matchedGroupID: matchedGroupID, + matchedGroupName: matchedGroupName, + matchedAppGroupName: matchedAppGroupName, + matchedURLGroupName: matchedURLGroupName, + remoteASRProvider: remoteASRProvider, + remoteASRModel: remoteASRModel, + remoteASREndpoint: remoteASREndpoint, + remoteLLMProvider: remoteLLMProvider, + remoteLLMModel: remoteLLMModel, + remoteLLMEndpoint: remoteLLMEndpoint, + audioRelativePath: audioRelativePath, whisperWordTimings: whisperWordTimings, meetingSegments: meetingSegments, meetingAudioRelativePath: meetingAudioRelativePath, diff --git a/Voxt/Transcription/MLXTranscriber.swift b/Voxt/Transcription/MLXTranscriber.swift index d5cbf63..968b383 100644 --- a/Voxt/Transcription/MLXTranscriber.swift +++ b/Voxt/Transcription/MLXTranscriber.swift @@ -165,6 +165,7 @@ class MLXTranscriber: ObservableObject, TranscriberProtocol { private let audioEngine = AVAudioEngine() private let sampleStore = AudioSampleStore() private var inputSampleRate: Double = 16000 + private var completedAudioArchiveURL: URL? private let modelManager: MLXModelManager private var preferredInputDeviceID: AudioDeviceID? private let targetSampleRate = 16000 @@ -206,10 +207,21 @@ class MLXTranscriber: ObservableObject, TranscriberProtocol { return micStatus } + func consumeCompletedAudioArchiveURL() -> URL? { + let url = completedAudioArchiveURL + completedAudioArchiveURL = nil + return url + } + + func discardCompletedAudioArchive() { + removeCompletedAudioArchiveIfNeeded() + } + func startRecording() { guard !isRecording else { return } cancelActiveTasks() + removeCompletedAudioArchiveIfNeeded() resetTransientState() sessionRevision += 1 let revision = sessionRevision @@ -384,6 +396,7 @@ class MLXTranscriber: ObservableObject, TranscriberProtocol { ) guard revision == sessionRevision else { return } + stageCompletedAudioArchive(samples: snapshot, sampleRate: sampleRate) let resolved = normalizeText(finalText ?? quickText ?? transcribedText) transcribedText = resolved publishPartial(resolved) @@ -538,6 +551,26 @@ class MLXTranscriber: ObservableObject, TranscriberProtocol { captureWatchdogTask = nil } + private func stageCompletedAudioArchive(samples: [Float], sampleRate: Double) { + removeCompletedAudioArchiveIfNeeded() + guard !samples.isEmpty else { return } + let tempURL = HistoryAudioArchiveSupport.temporaryArchiveURL(prefix: "voxt-mlx-history") + do { + if try HistoryAudioArchiveSupport.exportWAV(samples: samples, sampleRate: sampleRate, to: tempURL) { + completedAudioArchiveURL = tempURL + } + } catch { + try? FileManager.default.removeItem(at: tempURL) + VoxtLog.warning("MLX completed audio archive export failed: \(error.localizedDescription)") + } + } + + private func removeCompletedAudioArchiveIfNeeded() { + guard let completedAudioArchiveURL else { return } + try? FileManager.default.removeItem(at: completedAudioArchiveURL) + self.completedAudioArchiveURL = nil + } + private func safeSampleRate(_ value: Double) -> Double { max(value, 1) } diff --git a/Voxt/Transcription/RemoteASRTranscriber.swift b/Voxt/Transcription/RemoteASRTranscriber.swift index 4fa0887..17a12c9 100644 --- a/Voxt/Transcription/RemoteASRTranscriber.swift +++ b/Voxt/Transcription/RemoteASRTranscriber.swift @@ -6,6 +6,29 @@ import zlib @MainActor class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { + private final class AudioSampleStore { + private let lock = NSLock() + private var samples: [Float] = [] + + func append(_ newSamples: [Float]) { + lock.lock() + defer { lock.unlock() } + samples.append(contentsOf: newSamples) + } + + func snapshot() -> [Float] { + lock.lock() + defer { lock.unlock() } + return samples + } + + func clear() { + lock.lock() + defer { lock.unlock() } + samples.removeAll(keepingCapacity: false) + } + } + @Published var isRecording = false @Published var audioLevel: Float = 0.0 @Published var transcribedText = "" @@ -28,6 +51,9 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { private var openAIPreviewInFlight = false private var openAIPreviewLastText = "" private var recordingFileURL: URL? + private var completedAudioArchiveURL: URL? + private let sampleStore = AudioSampleStore() + private var streamingInputSampleRate: Double = HistoryAudioArchiveSupport.targetSampleRate private var transcribeTask: Task? private var stopRequested = false private var activeProvider: RemoteASRProvider? @@ -61,12 +87,25 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { await AVCaptureDevice.requestAccess(for: .audio) } + func consumeCompletedAudioArchiveURL() -> URL? { + let url = completedAudioArchiveURL + completedAudioArchiveURL = nil + return url + } + + func discardCompletedAudioArchive() { + removeCompletedAudioArchiveIfNeeded() + } + func startRecording() { guard !isRecording else { return } recordingGenerationID = UUID() + removeCompletedAudioArchiveIfNeeded() cleanupActiveUploadTask() cleanupDoubaoStreamingState() cleanupAliyunStreamingState() + sampleStore.clear() + streamingInputSampleRate = HistoryAudioArchiveSupport.targetSampleRate transcribedText = "" audioLevel = 0 isRequesting = false @@ -189,6 +228,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { await MainActor.run { guard self.isCurrentGeneration(generationID) else { return } self.transcribedText = result + self.completedAudioArchiveURL = fileURL self.finish(with: result, generationID: generationID) } } catch { @@ -196,10 +236,10 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { guard self.isCurrentGeneration(generationID) else { return } VoxtLog.error("Remote ASR transcription failed: \(error.localizedDescription)") self.notifyRuntimeFailure(error) + self.completedAudioArchiveURL = fileURL self.finish(with: self.transcribedText, generationID: generationID) } } - try? FileManager.default.removeItem(at: fileURL) } } @@ -320,6 +360,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { let finalText = await result() await MainActor.run { guard self.isCurrentGeneration(generationID) else { return } + self.stageCompletedStreamingAudioArchive() self.transcribedText = finalText self.finish(with: finalText, generationID: generationID) } @@ -926,10 +967,14 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { let inputNode = audioEngine.inputNode applyPreferredInputDeviceIfNeeded(inputNode: inputNode) let inputFormat = inputNode.outputFormat(forBus: 0) + streamingInputSampleRate = inputFormat.sampleRate inputNode.removeTap(onBus: 0) inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { [weak self] buffer, _ in guard let self else { return } guard let pcmData = Self.makeDoubaoPCM16MonoData(from: buffer) else { return } + if let samples = AudioLevelMeter.monoSamples(from: buffer), !samples.isEmpty { + self.sampleStore.append(samples) + } Task { @MainActor in guard self.isRecording, let ctx = self.aliyunStreamingContext, @@ -1117,10 +1162,14 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { let inputNode = audioEngine.inputNode applyPreferredInputDeviceIfNeeded(inputNode: inputNode) let inputFormat = inputNode.outputFormat(forBus: 0) + streamingInputSampleRate = inputFormat.sampleRate inputNode.removeTap(onBus: 0) inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { [weak self] buffer, _ in guard let self else { return } guard let pcmData = Self.makeDoubaoPCM16MonoData(from: buffer) else { return } + if let samples = AudioLevelMeter.monoSamples(from: buffer), !samples.isEmpty { + self.sampleStore.append(samples) + } Task { @MainActor in guard self.isRecording, let ctx = self.aliyunQwenStreamingContext, @@ -1426,10 +1475,14 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { applyPreferredInputDeviceIfNeeded(inputNode: inputNode) } let inputFormat = inputNode.outputFormat(forBus: 0) + streamingInputSampleRate = inputFormat.sampleRate inputNode.removeTap(onBus: 0) inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { [weak self] buffer, _ in guard let self else { return } guard let pcmData = Self.makeDoubaoPCM16MonoData(from: buffer) else { return } + if let samples = AudioLevelMeter.monoSamples(from: buffer), !samples.isEmpty { + self.sampleStore.append(samples) + } Task { @MainActor in guard self.isRecording, let context = self.doubaoStreamingContext, @@ -2347,6 +2400,8 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { recorder?.stop() recorder = nil recordingFileURL = nil + sampleStore.clear() + streamingInputSampleRate = HistoryAudioArchiveSupport.targetSampleRate isRecording = false stopRequested = false stopOpenAIPreviewLoop() @@ -2358,6 +2413,8 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { recorder?.stop() recorder = nil recordingFileURL = nil + sampleStore.clear() + streamingInputSampleRate = HistoryAudioArchiveSupport.targetSampleRate isRecording = false stopOpenAIPreviewLoop() stopMeteringTimer() @@ -2403,6 +2460,7 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { func discardPendingSessionOutput() { recordingGenerationID = UUID() + removeCompletedAudioArchiveIfNeeded() cleanupActiveUploadTask() cleanupRecorderState() cleanupDoubaoStreamingState() @@ -2435,6 +2493,40 @@ class RemoteASRTranscriber: NSObject, ObservableObject, TranscriberProtocol { openAIPreviewInFlight = false } + private func removeCompletedAudioArchiveIfNeeded() { + guard let completedAudioArchiveURL else { return } + try? FileManager.default.removeItem(at: completedAudioArchiveURL) + self.completedAudioArchiveURL = nil + } + + private func stageCompletedStreamingAudioArchive() { + removeCompletedAudioArchiveIfNeeded() + let samples = sampleStore.snapshot() + let realtimeSummary = activeRealtimeDebugSummary() ?? "none" + guard !samples.isEmpty else { + VoxtLog.warning( + "Remote streaming audio archive export skipped because no local samples were captured. realtime=\(realtimeSummary)" + ) + return + } + let tempURL = HistoryAudioArchiveSupport.temporaryArchiveURL(prefix: "voxt-remote-stream-history") + do { + if try HistoryAudioArchiveSupport.exportWAV( + samples: samples, + sampleRate: streamingInputSampleRate, + to: tempURL + ) { + completedAudioArchiveURL = tempURL + VoxtLog.info( + "Remote streaming audio archive staged. samples=\(samples.count), sampleRate=\(Int(streamingInputSampleRate)), file=\(tempURL.lastPathComponent), realtime=\(realtimeSummary)" + ) + } + } catch { + try? FileManager.default.removeItem(at: tempURL) + VoxtLog.warning("Remote streaming completed audio archive export failed: \(error.localizedDescription)") + } + } + private func runOpenAIPreviewPass(configuration: RemoteProviderConfiguration) async { guard isRecording else { return } guard selectedProvider == .openAIWhisper else { return } diff --git a/Voxt/Transcription/SpeechTranscriber.swift b/Voxt/Transcription/SpeechTranscriber.swift index ffa8b99..e1b0590 100644 --- a/Voxt/Transcription/SpeechTranscriber.swift +++ b/Voxt/Transcription/SpeechTranscriber.swift @@ -6,6 +6,29 @@ import AudioToolbox @MainActor class SpeechTranscriber: ObservableObject, TranscriberProtocol { + private final class AudioSampleStore { + private let lock = NSLock() + private var samples: [Float] = [] + + func append(_ newSamples: [Float]) { + lock.lock() + defer { lock.unlock() } + samples.append(contentsOf: newSamples) + } + + func snapshot() -> [Float] { + lock.lock() + defer { lock.unlock() } + return samples + } + + func clear() { + lock.lock() + defer { lock.unlock() } + samples.removeAll(keepingCapacity: false) + } + } + @Published var isRecording = false @Published var audioLevel: Float = 0.0 @Published var transcribedText = "" @@ -16,7 +39,10 @@ class SpeechTranscriber: ObservableObject, TranscriberProtocol { private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest? private var recognitionTask: SFSpeechRecognitionTask? private let audioEngine = AVAudioEngine() + private let sampleStore = AudioSampleStore() private var preferredInputDeviceID: AudioDeviceID? + private var inputSampleRate: Double = 16000 + private var completedAudioArchiveURL: URL? private var finalizeTimeoutTask: Task? private var hasDeliveredFinalResult = false @@ -44,9 +70,20 @@ class SpeechTranscriber: ObservableObject, TranscriberProtocol { return micStatus } + func consumeCompletedAudioArchiveURL() -> URL? { + let url = completedAudioArchiveURL + completedAudioArchiveURL = nil + return url + } + + func discardCompletedAudioArchive() { + removeCompletedAudioArchiveIfNeeded() + } + func startRecording() { guard !isRecording else { return } lastStartFailureMessage = nil + removeCompletedAudioArchiveIfNeeded() let settings = resolvedDictationSettings() refreshSpeechRecognizer(localeIdentifier: settings.localeIdentifier) @@ -73,6 +110,7 @@ class SpeechTranscriber: ObservableObject, TranscriberProtocol { } cleanupSessionState() + sampleStore.clear() transcribedText = "" audioLevel = 0 hasDeliveredFinalResult = false @@ -130,6 +168,7 @@ class SpeechTranscriber: ObservableObject, TranscriberProtocol { finalizeTimeoutTask = nil isRecording = false clearRecognitionPipeline(cancelTask: true) + sampleStore.clear() } private func stopAudioCapture() { @@ -152,6 +191,7 @@ class SpeechTranscriber: ObservableObject, TranscriberProtocol { finalizeTimeoutTask?.cancel() finalizeTimeoutTask = nil clearRecognitionPipeline(cancelTask: true) + stageCompletedAudioArchive() onTranscriptionFinished?(text.trimmingCharacters(in: .whitespacesAndNewlines)) } @@ -179,10 +219,15 @@ class SpeechTranscriber: ObservableObject, TranscriberProtocol { let inputNode = audioEngine.inputNode applyPreferredInputDeviceIfNeeded(inputNode: inputNode) + inputSampleRate = inputNode.outputFormat(forBus: 0).sampleRate inputNode.installTap(onBus: 0, bufferSize: 1024, format: nil) { [weak self] buffer, _ in guard let self else { return } self.recognitionRequest?.append(buffer) + if let samples = AudioLevelMeter.monoSamples(from: buffer), !samples.isEmpty { + self.sampleStore.append(samples) + } + guard let channelData = buffer.floatChannelData?[0] else { return } let frameLength = Int(buffer.frameLength) if frameLength == 0 { return } @@ -281,4 +326,25 @@ class SpeechTranscriber: ObservableObject, TranscriberProtocol { VoxtLog.warning("Speech recognizer initialization failed for locale=\(locale.identifier).") } } + + private func stageCompletedAudioArchive() { + removeCompletedAudioArchiveIfNeeded() + let samples = sampleStore.snapshot() + guard !samples.isEmpty else { return } + let tempURL = HistoryAudioArchiveSupport.temporaryArchiveURL(prefix: "voxt-speech-history") + do { + if try HistoryAudioArchiveSupport.exportWAV(samples: samples, sampleRate: inputSampleRate, to: tempURL) { + completedAudioArchiveURL = tempURL + } + } catch { + try? FileManager.default.removeItem(at: tempURL) + VoxtLog.warning("Speech completed audio archive export failed: \(error.localizedDescription)") + } + } + + private func removeCompletedAudioArchiveIfNeeded() { + guard let completedAudioArchiveURL else { return } + try? FileManager.default.removeItem(at: completedAudioArchiveURL) + self.completedAudioArchiveURL = nil + } } diff --git a/Voxt/Transcription/WhisperKitTranscriber.swift b/Voxt/Transcription/WhisperKitTranscriber.swift index df1e2d2..8b7b70e 100644 --- a/Voxt/Transcription/WhisperKitTranscriber.swift +++ b/Voxt/Transcription/WhisperKitTranscriber.swift @@ -110,6 +110,7 @@ final class WhisperKitTranscriber: ObservableObject, TranscriberProtocol { private let modelManager: WhisperKitModelManager private var preferredInputDeviceID: AudioDeviceID? private var inputSampleRate: Double = 16000 + private var completedAudioArchiveURL: URL? private var preparedWhisper: WhisperKit? private var preparedOutputMode: SessionOutputMode = .transcription private var preparedUseBuiltInTranslationTask = false @@ -138,12 +139,23 @@ final class WhisperKitTranscriber: ObservableObject, TranscriberProtocol { await AVCaptureDevice.requestAccess(for: .audio) } + func consumeCompletedAudioArchiveURL() -> URL? { + let url = completedAudioArchiveURL + completedAudioArchiveURL = nil + return url + } + + func discardCompletedAudioArchive() { + removeCompletedAudioArchiveIfNeeded() + } + func prepareSession( outputMode: SessionOutputMode, useBuiltInTranslationTask: Bool = false ) async -> String? { cancelActiveTasks() cleanupPreparedWhisperIfNeeded() + removeCompletedAudioArchiveIfNeeded() resetTransientState() preparedOutputMode = outputMode preparedUseBuiltInTranslationTask = useBuiltInTranslationTask @@ -418,6 +430,7 @@ final class WhisperKitTranscriber: ObservableObject, TranscriberProtocol { let text = normalizeText(results.map(\.text).joined(separator: " ")) if publishFinalResult { + stageCompletedAudioArchive(samples: preparedSamples, sampleRate: targetSampleRate) latestWordTimings = includeWordTimings ? buildWordTimings(from: results) : [] transcribedText = text onPartialTranscription?(text) @@ -429,6 +442,8 @@ final class WhisperKitTranscriber: ObservableObject, TranscriberProtocol { } catch { VoxtLog.error("Whisper inference failed: \(error)") if publishFinalResult { + let preparedSamples = prepareInputSamples(samples, sampleRate: sampleRate) + stageCompletedAudioArchive(samples: preparedSamples, sampleRate: targetSampleRate) latestWordTimings = [] onTranscriptionFinished?(transcribedText.trimmingCharacters(in: .whitespacesAndNewlines)) } @@ -539,6 +554,26 @@ final class WhisperKitTranscriber: ObservableObject, TranscriberProtocol { preparedWhisper?.audioProcessor.purgeAudioSamples(keepingLast: 0) } + private func stageCompletedAudioArchive(samples: [Float], sampleRate: Double) { + removeCompletedAudioArchiveIfNeeded() + guard !samples.isEmpty else { return } + let tempURL = HistoryAudioArchiveSupport.temporaryArchiveURL(prefix: "voxt-whisper-history") + do { + if try HistoryAudioArchiveSupport.exportWAV(samples: samples, sampleRate: sampleRate, to: tempURL) { + completedAudioArchiveURL = tempURL + } + } catch { + try? FileManager.default.removeItem(at: tempURL) + VoxtLog.warning("Whisper completed audio archive export failed: \(error.localizedDescription)") + } + } + + private func removeCompletedAudioArchiveIfNeeded() { + guard let completedAudioArchiveURL else { return } + try? FileManager.default.removeItem(at: completedAudioArchiveURL) + self.completedAudioArchiveURL = nil + } + private func cancelActiveTasks() { partialLoopTask?.cancel() partialLoopTask = nil diff --git a/Voxt/UI/HistoryAudioPlaybackSupport.swift b/Voxt/UI/HistoryAudioPlaybackSupport.swift new file mode 100644 index 0000000..bca69fd --- /dev/null +++ b/Voxt/UI/HistoryAudioPlaybackSupport.swift @@ -0,0 +1,128 @@ +import AVFoundation +import Combine +import SwiftUI + +@MainActor +final class HistoryAudioPlaybackController: ObservableObject { + @Published var currentTime: TimeInterval = 0 + @Published var duration: TimeInterval = 0 + @Published var isPlaying = false + + private var player: AVAudioPlayer? + private var timer: Timer? + + init(audioURL: URL?) { + loadAudio(audioURL) + } + + deinit { + timer?.invalidate() + } + + var isAvailable: Bool { + player != nil && duration > 0 + } + + func loadAudio(_ audioURL: URL?) { + stopTimer() + isPlaying = false + currentTime = 0 + duration = 0 + player = nil + + guard let audioURL else { return } + player = try? AVAudioPlayer(contentsOf: audioURL) + player?.prepareToPlay() + duration = player?.duration ?? 0 + } + + func togglePlayPause() { + guard let player else { return } + if player.isPlaying { + player.pause() + isPlaying = false + stopTimer() + } else { + player.play() + isPlaying = true + startTimer() + } + } + + func seek(to time: TimeInterval) { + guard let player else { return } + let clamped = max(0, min(time, duration)) + player.currentTime = clamped + currentTime = clamped + } + + private func startTimer() { + stopTimer() + timer = Timer.scheduledTimer(withTimeInterval: 0.15, repeats: true) { [weak self] _ in + Task { @MainActor [weak self] in + guard let self, let player = self.player else { return } + self.currentTime = player.currentTime + if !player.isPlaying { + self.isPlaying = false + self.stopTimer() + } + } + } + } + + private func stopTimer() { + timer?.invalidate() + timer = nil + } +} + +struct HistoryAudioPlayerView: View { + @ObservedObject var controller: HistoryAudioPlaybackController + let compact: Bool + + var body: some View { + VStack(alignment: .leading, spacing: compact ? 8 : 10) { + HStack(spacing: 10) { + Button { + controller.togglePlayPause() + } label: { + Image(systemName: controller.isPlaying ? "pause.fill" : "play.fill") + .font(.system(size: compact ? 11 : 12, weight: .semibold)) + .frame(width: compact ? 26 : 30, height: compact ? 26 : 30) + .background( + Circle() + .fill(Color.accentColor.opacity(0.16)) + ) + } + .buttonStyle(.plain) + + Text(controller.isPlaying ? String(localized: "Playing") : String(localized: "Ready to play")) + .font(.system(size: compact ? 11 : 12, weight: .medium)) + .foregroundStyle(.secondary) + + Spacer(minLength: 8) + + Text("\(formattedTime(controller.currentTime)) / \(formattedTime(controller.duration))") + .font(.system(size: compact ? 10 : 11, weight: .medium, design: .monospaced)) + .foregroundStyle(.secondary) + } + + Slider( + value: Binding( + get: { controller.currentTime }, + set: { controller.seek(to: $0) } + ), + in: 0...max(controller.duration, 0.1) + ) + .controlSize(compact ? .small : .regular) + .disabled(!controller.isAvailable) + } + } + + private func formattedTime(_ seconds: TimeInterval) -> String { + let totalSeconds = max(Int(seconds.rounded()), 0) + let minutes = totalSeconds / 60 + let remainingSeconds = totalSeconds % 60 + return String(format: "%02d:%02d", minutes, remainingSeconds) + } +} diff --git a/Voxt/UI/TranscriptionDetailConversationViews.swift b/Voxt/UI/TranscriptionDetailConversationViews.swift index 86929b2..52a24c1 100644 --- a/Voxt/UI/TranscriptionDetailConversationViews.swift +++ b/Voxt/UI/TranscriptionDetailConversationViews.swift @@ -11,6 +11,7 @@ private struct TranscriptionDetailBottomVisibilityPreferenceKey: PreferenceKey { struct TranscriptionDetailConversationView: View { @ObservedObject var viewModel: TranscriptionDetailViewModel + @StateObject private var playbackController: HistoryAudioPlaybackController @State private var isScrolledToBottom = true @State private var wasScrolledToBottom = true @@ -19,15 +20,26 @@ struct TranscriptionDetailConversationView: View { private let bottomAnchorID = "transcription-detail-bottom-anchor" + init(viewModel: TranscriptionDetailViewModel) { + self.viewModel = viewModel + _playbackController = StateObject(wrappedValue: HistoryAudioPlaybackController(audioURL: viewModel.audioURL)) + } + var body: some View { VStack(spacing: 0) { header + if playbackController.isAvailable { + audioPlayer + } Divider() .overlay(MeetingDetailUIStyle.dividerColor) conversationBody composer } .frame(maxWidth: .infinity, maxHeight: .infinity, alignment: .top) + .onChange(of: viewModel.audioURL?.path) { _, _ in + playbackController.loadAudio(viewModel.audioURL) + } } private var header: some View { @@ -54,6 +66,14 @@ struct TranscriptionDetailConversationView: View { .padding(.bottom, 10) } + private var audioPlayer: some View { + VStack(alignment: .leading, spacing: 8) { + HistoryAudioPlayerView(controller: playbackController, compact: false) + } + .padding(.horizontal, 20) + .padding(.bottom, 12) + } + private var conversationBody: some View { GeometryReader { outerProxy in ScrollViewReader { proxy in diff --git a/Voxt/UI/TranscriptionDetailViewModel.swift b/Voxt/UI/TranscriptionDetailViewModel.swift index 7928b04..9608fef 100644 --- a/Voxt/UI/TranscriptionDetailViewModel.swift +++ b/Voxt/UI/TranscriptionDetailViewModel.swift @@ -8,6 +8,7 @@ final class TranscriptionDetailViewModel: ObservableObject { typealias FollowUpPersistence = @MainActor (UUID, [MeetingSummaryChatMessage]) -> TranscriptionHistoryEntry? @Published private(set) var entry: TranscriptionHistoryEntry + @Published private(set) var audioURL: URL? @Published private(set) var chatMessages: [MeetingSummaryChatMessage] @Published private(set) var providerStatus: TranscriptionFollowUpProviderStatus @Published private(set) var isLoading = false @@ -22,11 +23,13 @@ final class TranscriptionDetailViewModel: ObservableObject { init( entry: TranscriptionHistoryEntry, + audioURL: URL?, followUpStatusProvider: @escaping FollowUpStatusProvider, followUpAnswerer: @escaping FollowUpAnswerer, followUpPersistence: @escaping FollowUpPersistence ) { self.entry = entry + self.audioURL = audioURL self.chatMessages = entry.transcriptionChatMessages ?? [] self.followUpStatusProvider = followUpStatusProvider self.followUpAnswerer = followUpAnswerer @@ -87,6 +90,13 @@ final class TranscriptionDetailViewModel: ObservableObject { self.providerStatus = followUpStatusProvider(entry) } + func refresh(entry: TranscriptionHistoryEntry, audioURL: URL?) { + self.entry = entry + self.audioURL = audioURL + self.chatMessages = entry.transcriptionChatMessages ?? [] + self.providerStatus = followUpStatusProvider(entry) + } + func refreshProviderStatus() { providerStatus = followUpStatusProvider(entry) } diff --git a/Voxt/UI/TranscriptionDetailViews.swift b/Voxt/UI/TranscriptionDetailViews.swift index d0d7bb6..63fc812 100644 --- a/Voxt/UI/TranscriptionDetailViews.swift +++ b/Voxt/UI/TranscriptionDetailViews.swift @@ -7,9 +7,25 @@ enum TranscriptionDetailPresentationStyle { struct TranscriptionDetailContentView: View { let entry: TranscriptionHistoryEntry + let audioURL: URL? let locale: Locale let style: TranscriptionDetailPresentationStyle + @StateObject private var playbackController: HistoryAudioPlaybackController + + init( + entry: TranscriptionHistoryEntry, + audioURL: URL?, + locale: Locale, + style: TranscriptionDetailPresentationStyle + ) { + self.entry = entry + self.audioURL = audioURL + self.locale = locale + self.style = style + _playbackController = StateObject(wrappedValue: HistoryAudioPlaybackController(audioURL: audioURL)) + } + private var preferredContentWidth: CGFloat? { style == .popover ? 360 : nil } @@ -78,6 +94,15 @@ struct TranscriptionDetailContentView: View { optionalDetailLine(label: String(localized: "URL Group"), value: entry.matchedURLGroupName) } + if playbackController.isAvailable { + detailSection(title: String(localized: "Audio")) { + HistoryAudioPlayerView( + controller: playbackController, + compact: style == .popover + ) + } + } + if let whisperWordTimings = entry.whisperWordTimings, !whisperWordTimings.isEmpty { detailSection(title: String(localized: "Whisper Timestamps")) { @@ -130,6 +155,9 @@ struct TranscriptionDetailContentView: View { .frame(width: preferredContentWidth, alignment: .leading) .frame(maxWidth: .infinity, alignment: .leading) } + .onChange(of: audioURL?.path) { _, _ in + playbackController.loadAudio(audioURL) + } } @ViewBuilder diff --git a/Voxt/UI/TranscriptionDetailWindow.swift b/Voxt/UI/TranscriptionDetailWindow.swift index e36b6df..971b7bc 100644 --- a/Voxt/UI/TranscriptionDetailWindow.swift +++ b/Voxt/UI/TranscriptionDetailWindow.swift @@ -13,6 +13,7 @@ final class TranscriptionDetailWindowManager { func present( entry: TranscriptionHistoryEntry, + audioURL: URL?, followUpStatusProvider: @escaping FollowUpStatusProvider, followUpAnswerer: @escaping FollowUpAnswerer, followUpPersistence: @escaping FollowUpPersistence @@ -20,7 +21,7 @@ final class TranscriptionDetailWindowManager { VoxtLog.info("Transcription detail open requested. entryID=\(entry.id), kind=\(entry.kind.rawValue)") if let controller = historyControllers[entry.id] { - controller.refresh(entry: entry) + controller.refresh(entry: entry, audioURL: audioURL) controller.showWindow(nil) controller.window?.makeKeyAndOrderFront(nil) controller.window?.orderFrontRegardless() @@ -36,6 +37,7 @@ final class TranscriptionDetailWindowManager { let viewModel = TranscriptionDetailViewModel( entry: entry, + audioURL: audioURL, followUpStatusProvider: followUpStatusProvider, followUpAnswerer: followUpAnswerer, followUpPersistence: followUpPersistence @@ -109,9 +111,9 @@ private final class TranscriptionDetailWindowController: NSWindowController, NSW fatalError("init(coder:) has not been implemented") } - func refresh(entry: TranscriptionHistoryEntry) { + func refresh(entry: TranscriptionHistoryEntry, audioURL: URL?) { VoxtLog.info("Transcription detail refresh requested. entryID=\(entry.id)") - viewModel.refresh(entry: entry) + viewModel.refresh(entry: entry, audioURL: audioURL) window?.title = viewModel.title } diff --git a/Voxt/en.lproj/Localizable.strings b/Voxt/en.lproj/Localizable.strings index 2917f22..da053e1 100644 --- a/Voxt/en.lproj/Localizable.strings +++ b/Voxt/en.lproj/Localizable.strings @@ -438,10 +438,24 @@ "When disabled, Voxt uses direct network connections and bypasses system HTTP/HTTPS/SOCKS proxy settings." = "When disabled, Voxt uses direct network connections and bypasses system HTTP/HTTPS/SOCKS proxy settings."; "Tip: Drag resources into groups below." = "Tip: Drag resources into groups below."; "Model Storage" = "Model Storage"; +"Audio Storage" = "Audio Storage"; "Storage Path" = "Storage Path"; "Choose" = "Choose"; "New model downloads in Model settings are stored in this folder." = "New model downloads in Model settings are stored in this folder."; "Failed to update model storage path: %@" = "Failed to update model storage path: %@"; +"History Audio Settings" = "History Audio Settings"; +"Save history audio" = "Save history audio"; +"New history audio is stored here. Switching the path will not move existing audio files." = "New history audio is stored here. Switching the path will not move existing audio files."; +"When disabled, history items will not keep audio files." = "When disabled, history items will not keep audio files."; +"Export Audio" = "Export Audio"; +"Saved audio: %d files · %@" = "Saved audio: %d files · %@"; +"Copies every saved history audio file into a folder you choose." = "Copies every saved history audio file into a folder you choose."; +"Failed to update history audio storage path: %@" = "Failed to update history audio storage path: %@"; +"Exported %d audio files. Skipped %d. Failed %d." = "Exported %d audio files. Skipped %d. Failed %d."; +"Audio export failed: %@" = "Audio export failed: %@"; +"Playing" = "Playing"; +"Ready to play" = "Ready to play"; +"Audio Duration" = "Audio Duration"; "After switching to a new path, previously downloaded models won't be detected and must be downloaded again." = "After switching to a new path, previously downloaded models won't be detected and must be downloaded again."; "Add Browser" = "Add Browser"; "Selected app is not a valid browser (missing bundle id)." = "Selected app is not a valid browser (missing bundle id)."; diff --git a/Voxt/ja.lproj/Localizable.strings b/Voxt/ja.lproj/Localizable.strings index fb06356..70f53ad 100644 --- a/Voxt/ja.lproj/Localizable.strings +++ b/Voxt/ja.lproj/Localizable.strings @@ -435,10 +435,24 @@ "When disabled, Voxt uses direct network connections and bypasses system HTTP/HTTPS/SOCKS proxy settings." = "無効にすると、Voxt は直接接続を使用し、システムの HTTP/HTTPS/SOCKS プロキシ設定をバイパスします。"; "Tip: Drag resources into groups below." = "ヒント: 下のグループへリソースをドラッグできます。"; "Model Storage" = "モデル保存先"; +"Audio Storage" = "音声保存先"; "Storage Path" = "保存パス"; "Choose" = "選択"; "New model downloads in Model settings are stored in this folder." = "Model 設定で新しくダウンロードするモデルは、このフォルダに保存されます。"; "Failed to update model storage path: %@" = "モデル保存先の更新に失敗しました: %@"; +"History Audio Settings" = "履歴音声設定"; +"Save history audio" = "履歴音声を保存"; +"New history audio is stored here. Switching the path will not move existing audio files." = "新しい履歴音声はここに保存されます。保存先を切り替えても、既存の音声ファイルは移動しません。"; +"When disabled, history items will not keep audio files." = "無効にすると、履歴項目に音声ファイルは保存されません。"; +"Export Audio" = "音声を書き出す"; +"Saved audio: %d files · %@" = "保存済み音声: %d ファイル · %@"; +"Copies every saved history audio file into a folder you choose." = "保存済みの履歴音声を、選択したフォルダへすべてコピーします。"; +"Failed to update history audio storage path: %@" = "履歴音声の保存先更新に失敗しました: %@"; +"Exported %d audio files. Skipped %d. Failed %d." = "%d 件の音声を出力しました。%d 件をスキップし、%d 件が失敗しました。"; +"Audio export failed: %@" = "音声の書き出しに失敗しました: %@"; +"Playing" = "再生中"; +"Ready to play" = "再生可能"; +"Audio Duration" = "音声の長さ"; "After switching to a new path, previously downloaded models won't be detected and must be downloaded again." = "新しい保存先に切り替えると、以前にダウンロード済みのモデルは検出されず、再ダウンロードが必要になります。"; "Add Browser" = "ブラウザを追加"; "Selected app is not a valid browser (missing bundle id)." = "選択したアプリは有効なブラウザではありません(bundle id がありません)。"; diff --git a/Voxt/zh-Hans.lproj/Localizable.strings b/Voxt/zh-Hans.lproj/Localizable.strings index e36fe37..65ae7fd 100644 --- a/Voxt/zh-Hans.lproj/Localizable.strings +++ b/Voxt/zh-Hans.lproj/Localizable.strings @@ -438,10 +438,24 @@ "When disabled, Voxt uses direct network connections and bypasses system HTTP/HTTPS/SOCKS proxy settings." = "关闭后,Voxt 将使用直连网络,并绕过系统的 HTTP/HTTPS/SOCKS 代理设置。"; "Tip: Drag resources into groups below." = "提示:可将资源拖拽到下方分组中。"; "Model Storage" = "模型存储"; +"Audio Storage" = "音频存储"; "Storage Path" = "存储路径"; "Choose" = "选择"; "New model downloads in Model settings are stored in this folder." = "Model 设置中新下载的模型会存储到此目录。"; "Failed to update model storage path: %@" = "更新模型存储路径失败:%@"; +"History Audio Settings" = "历史音频设置"; +"Save history audio" = "保存历史音频"; +"New history audio is stored here. Switching the path will not move existing audio files." = "新的历史音频会保存到这里。切换路径不会迁移已有音频文件。"; +"When disabled, history items will not keep audio files." = "关闭后,历史记录条目将不再保留音频文件。"; +"Export Audio" = "导出音频"; +"Saved audio: %d files · %@" = "已存储音频:%d 个文件 · %@"; +"Copies every saved history audio file into a folder you choose." = "将所有已保存的历史音频复制到你选择的文件夹中。"; +"Failed to update history audio storage path: %@" = "更新历史音频存储路径失败:%@"; +"Exported %d audio files. Skipped %d. Failed %d." = "已导出 %d 个音频文件,跳过 %d 个,失败 %d 个。"; +"Audio export failed: %@" = "导出音频失败:%@"; +"Playing" = "播放中"; +"Ready to play" = "可播放"; +"Audio Duration" = "音频时长"; "After switching to a new path, previously downloaded models won't be detected and must be downloaded again." = "切换到新路径后,之前已下载的模型将无法被检测到,需要重新下载。"; "Add Browser" = "添加浏览器"; "Selected app is not a valid browser (missing bundle id)." = "所选应用不是有效浏览器(缺少 bundle id)。"; diff --git a/VoxtTests/HistoryAudioArchiveSupportTests.swift b/VoxtTests/HistoryAudioArchiveSupportTests.swift new file mode 100644 index 0000000..122ccb1 --- /dev/null +++ b/VoxtTests/HistoryAudioArchiveSupportTests.swift @@ -0,0 +1,64 @@ +import XCTest +@testable import Voxt + +final class HistoryAudioArchiveSupportTests: XCTestCase { + func testExportWAVRoundTripsAsMono16BitSamples() throws { + let samples: [Float] = [0, 0.25, -0.25, 0.75, -0.75] + let destinationURL = HistoryAudioArchiveSupport.temporaryArchiveURL(prefix: "history-audio-roundtrip") + + defer { + try? FileManager.default.removeItem(at: destinationURL) + } + + let didExport = try HistoryAudioArchiveSupport.exportWAV( + samples: samples, + sampleRate: HistoryAudioArchiveSupport.targetSampleRate, + to: destinationURL + ) + + XCTAssertTrue(didExport) + let decodedSamples = try HistoryAudioArchiveSupport.readWAVSamples(from: destinationURL) + XCTAssertEqual(decodedSamples.count, samples.count) + for (expected, actual) in zip(samples, decodedSamples) { + XCTAssertEqual(actual, expected, accuracy: 0.02) + } + } + + func testMergedRewriteArchiveAddsSilenceGapBeforeAppendedAudio() throws { + let existingURL = HistoryAudioArchiveSupport.temporaryArchiveURL(prefix: "history-audio-existing") + let appendedURL = HistoryAudioArchiveSupport.temporaryArchiveURL(prefix: "history-audio-appended") + + defer { + try? FileManager.default.removeItem(at: existingURL) + try? FileManager.default.removeItem(at: appendedURL) + } + + _ = try HistoryAudioArchiveSupport.exportWAV( + samples: [Float](repeating: 0.4, count: 1_600), + sampleRate: HistoryAudioArchiveSupport.targetSampleRate, + to: existingURL + ) + _ = try HistoryAudioArchiveSupport.exportWAV( + samples: [Float](repeating: 0.8, count: 1_600), + sampleRate: HistoryAudioArchiveSupport.targetSampleRate, + to: appendedURL + ) + + let mergedURL = try HistoryAudioArchiveSupport.mergedRewriteArchive( + existingArchiveURL: existingURL, + appendedArchiveURL: appendedURL + ) + + defer { + try? FileManager.default.removeItem(at: mergedURL) + } + + let mergedSamples = try HistoryAudioArchiveSupport.readWAVSamples(from: mergedURL) + let expectedGapCount = Int((HistoryAudioArchiveSupport.rewriteJoinGapSeconds * HistoryAudioArchiveSupport.targetSampleRate).rounded()) + + XCTAssertEqual(mergedSamples.count, 1_600 + expectedGapCount + 1_600) + XCTAssertEqual(mergedSamples[100], 0.4, accuracy: 0.02) + XCTAssertEqual(mergedSamples[1_600 + (expectedGapCount / 2)], 0, accuracy: 0.001) + XCTAssertEqual(mergedSamples.last ?? 0, 0.8, accuracy: 0.02) + } +} diff --git a/VoxtTests/TranscriptionHistoryConversationSupportTests.swift b/VoxtTests/TranscriptionHistoryConversationSupportTests.swift index 069df5b..37f1d0e 100644 --- a/VoxtTests/TranscriptionHistoryConversationSupportTests.swift +++ b/VoxtTests/TranscriptionHistoryConversationSupportTests.swift @@ -98,6 +98,7 @@ final class TranscriptionHistoryConversationSupportTests: XCTestCase { ) let viewModel = TranscriptionDetailViewModel( entry: entry, + audioURL: nil, followUpStatusProvider: { _ in TranscriptionFollowUpProviderStatus(isAvailable: true, message: "") }, @@ -119,6 +120,7 @@ final class TranscriptionHistoryConversationSupportTests: XCTestCase { ) let viewModel = TranscriptionDetailViewModel( entry: entry, + audioURL: nil, followUpStatusProvider: { _ in TranscriptionFollowUpProviderStatus(isAvailable: true, message: "") }, diff --git a/VoxtTests/TranscriptionHistoryEntryAudioTests.swift b/VoxtTests/TranscriptionHistoryEntryAudioTests.swift new file mode 100644 index 0000000..0520f4a --- /dev/null +++ b/VoxtTests/TranscriptionHistoryEntryAudioTests.swift @@ -0,0 +1,72 @@ +import XCTest +@testable import Voxt + +final class TranscriptionHistoryEntryAudioTests: XCTestCase { + func testDecodingLegacyMeetingAudioPathPopulatesGenericAudioPath() throws { + let createdAt = Date(timeIntervalSinceReferenceDate: 321) + let payload: [String: Any] = [ + "id": UUID().uuidString, + "text": "Meeting transcript", + "createdAt": createdAt.timeIntervalSince1970, + "transcriptionEngine": "WhisperKit", + "transcriptionModel": "base", + "enhancementMode": "Off", + "enhancementModel": "None", + "kind": "meeting", + "isTranslation": false, + "meetingAudioRelativePath": "meeting/legacy.wav", + "dictionaryHitTerms": [], + "dictionaryCorrectedTerms": [], + "dictionarySuggestedTerms": [] + ] + + let data = try JSONSerialization.data(withJSONObject: payload) + let decoder = JSONDecoder() + decoder.dateDecodingStrategy = .secondsSince1970 + + let entry = try decoder.decode(TranscriptionHistoryEntry.self, from: data) + + XCTAssertEqual(entry.meetingAudioRelativePath, "meeting/legacy.wav") + XCTAssertEqual(entry.audioRelativePath, "meeting/legacy.wav") + } + + func testEncodingGenericAudioPathOmitsLegacyFieldWhenUnset() throws { + let entry = TranscriptionHistoryEntry( + id: UUID(), + text: "Transcript", + createdAt: Date(timeIntervalSinceReferenceDate: 456), + transcriptionEngine: "WhisperKit", + transcriptionModel: "large-v3", + enhancementMode: "Off", + enhancementModel: "None", + kind: .normal, + isTranslation: false, + audioDurationSeconds: 2, + transcriptionProcessingDurationSeconds: 1, + llmDurationSeconds: nil, + focusedAppName: nil, + focusedAppBundleID: nil, + matchedGroupID: nil, + matchedGroupName: nil, + matchedAppGroupName: nil, + matchedURLGroupName: nil, + remoteASRProvider: nil, + remoteASRModel: nil, + remoteASREndpoint: nil, + remoteLLMProvider: nil, + remoteLLMModel: nil, + remoteLLMEndpoint: nil, + audioRelativePath: "transcription/sample.wav", + whisperWordTimings: nil, + dictionaryHitTerms: [], + dictionaryCorrectedTerms: [], + dictionarySuggestedTerms: [] + ) + + let data = try JSONEncoder().encode(entry) + let object = try XCTUnwrap(try JSONSerialization.jsonObject(with: data) as? [String: Any]) + + XCTAssertEqual(object["audioRelativePath"] as? String, "transcription/sample.wav") + XCTAssertNil(object["meetingAudioRelativePath"]) + } +}