From 003493459db2d876b122c7ff9a3e71cc1790e4b6 Mon Sep 17 00:00:00 2001
From: eu <ekuleshov@gmail.com>
Date: Thu, 16 Apr 2026 13:59:02 -0400
Subject: [PATCH] fix(extract_audio): refactored to use sync file write to
 improve stability

---
 .../src/features/audio/ExtractAudio.swift     | 421 ++++++++--------
 .../src/features/audio/ExtractAudio.swift     | 454 ++++++++----------
 2 files changed, 389 insertions(+), 486 deletions(-)

diff --git a/ios/Classes/src/features/audio/ExtractAudio.swift b/ios/Classes/src/features/audio/ExtractAudio.swift
index 090a490..ef1f141 100644
--- a/ios/Classes/src/features/audio/ExtractAudio.swift
+++ b/ios/Classes/src/features/audio/ExtractAudio.swift
@@ -30,7 +30,7 @@ class ExtractAudio {
     /// Extracts audio from a video file asynchronously.
     ///
     /// This method uses AVAssetExportSession for fast Passthrough export,
-    /// or AVAssetReader/AVAssetWriter for WAV transcoding.
+    /// or AVAssetReader + FileHandle for WAV transcoding.
     ///
     /// - Parameters:
     ///   - config: Complete extraction configuration
@@ -77,35 +77,25 @@ class ExtractAudio {
         var progressTimer: Timer?
         var isCancelled = false
 
-        // Execute extraction on background queue
-        DispatchQueue.global(qos: .userInitiated).async {
+        // Execute extraction on background task
+        let task = Task.detached(priority: .userInitiated) {
             do {
                 // Load source video asset
                 let sourceURL = URL(fileURLWithPath: config.inputPath)
                 let asset = AVURLAsset(url: sourceURL)
 
                 // Wait for tracks to be loaded
-                let loadSemaphore = DispatchSemaphore(value: 0)
-                var loadError: Error?
+                try await asset.loadValues(forKeys: ["tracks", "duration"])
 
-                asset.loadValuesAsynchronously(forKeys: ["tracks", "duration"]) {
-                    let tracksStatus = asset.statusOfValue(forKey: "tracks", error: nil)
-                    let durationStatus = asset.statusOfValue(forKey: "duration", error: nil)
+                let tracksStatus = asset.statusOfValue(forKey: "tracks", error: nil)
+                let durationStatus = asset.statusOfValue(forKey: "duration", error: nil)
 
-                    if tracksStatus == .failed || durationStatus == .failed {
-                        loadError = NSError(
-                            domain: "ExtractAudio",
-                            code: -10,
-                            userInfo: [NSLocalizedDescriptionKey: "Failed to load asset properties"]
-                        )
-                    }
-                    loadSemaphore.signal()
-                }
-
-                loadSemaphore.wait()
-
-                if let error = loadError {
-                    throw error
+                if tracksStatus == .failed || durationStatus == .failed {
+                    throw NSError(
+                        domain: "ExtractAudio",
+                        code: -10,
+                        userInfo: [NSLocalizedDescriptionKey: "Failed to load asset properties"]
+                    )
                 }
 
                 // Determine output file location
@@ -312,6 +302,7 @@ class ExtractAudio {
         // Return cancellation handle
         return {
             isCancelled = true
+            task.cancel()
             exportSession?.cancelExport()
             DispatchQueue.main.async {
                 progressTimer?.invalidate()
@@ -319,48 +310,41 @@ class ExtractAudio {
         }
     }
     
-    /// Extracts audio to WAV format using AVAssetReader/AVAssetWriter for PCM transcoding.
+    /// Extracts audio to WAV format by streaming raw PCM into a RIFF/WAV file.
+    /// Uses AVAssetReader + FileHandle to stream PCM chunks directly to disk with a manually-built WAV header.
     private static func extractToWav(
         config: AudioExtractConfig,
         onProgress: @escaping (Double) -> Void,
         onComplete: @escaping (FlutterStandardTypedData?) -> Void,
         onError: @escaping (Error) -> Void
     ) -> AudioExtractJobHandle {
-        
+
+        /// Maximum PCM data allowed in a WAV file (~4 GB - 36 bytes).
+        let maxWavDataSize: Int64 = 0xFFFF_FFFF - 36
+
         var assetReader: AVAssetReader?
-        var assetWriter: AVAssetWriter?
         var isCancelled = false
 
-        DispatchQueue.global(qos: .userInitiated).async {
+        let task = Task.detached(priority: .userInitiated) {
             do {
                 // Load source video asset
                 let sourceURL = URL(fileURLWithPath: config.inputPath)
                 let asset = AVURLAsset(url: sourceURL)
-                
+
                 // Wait for tracks to be loaded
-                let loadSemaphore = DispatchSemaphore(value: 0)
-                var loadError: Error?
-                
-                asset.loadValuesAsynchronously(forKeys: ["tracks", "duration"]) {
-                    let tracksStatus = asset.statusOfValue(forKey: "tracks", error: nil)
-                    let durationStatus = asset.statusOfValue(forKey: "duration", error: nil)
-                    
-                    if tracksStatus == .failed || durationStatus == .failed {
-                        loadError = NSError(
-                            domain: "ExtractAudio",
-                            code: -10,
-                            userInfo: [NSLocalizedDescriptionKey: "Failed to load asset properties"]
-                        )
-                    }
-                    loadSemaphore.signal()
-                }
-                
-                loadSemaphore.wait()
-                
-                if let error = loadError {
-                    throw error
+                try await asset.loadValues(forKeys: ["tracks", "duration"])
+
+                let tracksStatus = asset.statusOfValue(forKey: "tracks", error: nil)
+                let durationStatus = asset.statusOfValue(forKey: "duration", error: nil)
+
+                if tracksStatus == .failed || durationStatus == .failed {
+                    throw NSError(
+                        domain: "ExtractAudio",
+                        code: -10,
+                        userInfo: [NSLocalizedDescriptionKey: "Failed to load asset properties"]
+                    )
                 }
-                
+
                 // Determine output file location
                 let outputURL: URL
                 if let outputPath = config.outputPath {
@@ -370,16 +354,31 @@ class ExtractAudio {
                     let filename = "audio_\(Date().timeIntervalSince1970).wav"
                     outputURL = tempDir.appendingPathComponent(filename)
                 }
-                
+
                 // Remove existing file if present
                 try? FileManager.default.removeItem(at: outputURL)
-                
+
                 // Get audio track
                 let audioTracks = asset.tracks(withMediaType: .audio)
                 guard let audioTrack = audioTracks.first else {
                     throw NoAudioTrackException()
                 }
-                
+
+                // Get audio format (sample rate, channels)
+                guard let formatDescription = (audioTrack.formatDescriptions as [AnyObject]).first
+                        as! CMAudioFormatDescription? else {
+                    throw NSError(
+                        domain: "ExtractAudio",
+                        code: -8,
+                        userInfo: [NSLocalizedDescriptionKey: "No audio format description found"]
+                    )
+                }
+
+                let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription)!.pointee
+                let sampleRate = Int(asbd.mSampleRate)
+                let channels = Int(asbd.mChannelsPerFrame)
+                let bitsPerSample = 16
+
                 // Calculate time range
                 // Use the audio track's actual timeRange for full extraction
                 // Audio tracks may not start at zero due to encoding delays or sync adjustments
@@ -398,24 +397,24 @@ class ExtractAudio {
                     // Use the audio track's actual time range to capture all audio data
                     timeRange = audioTrack.timeRange
                 }
-                
+
                 // Create asset reader
                 let reader = try AVAssetReader(asset: asset)
                 assetReader = reader
                 reader.timeRange = timeRange
-                
+
                 // Configure reader output for PCM
                 let readerOutputSettings: [String: Any] = [
                     AVFormatIDKey: kAudioFormatLinearPCM,
-                    AVLinearPCMBitDepthKey: 16,
+                    AVLinearPCMBitDepthKey: bitsPerSample,
                     AVLinearPCMIsFloatKey: false,
                     AVLinearPCMIsBigEndianKey: false,
                     AVLinearPCMIsNonInterleaved: false,
                 ]
-                
+
                 let readerOutput = AVAssetReaderTrackOutput(track: audioTrack, outputSettings: readerOutputSettings)
                 readerOutput.alwaysCopiesSampleData = false
-                
+
                 guard reader.canAdd(readerOutput) else {
                     throw NSError(
                         domain: "ExtractAudio",
@@ -424,66 +423,34 @@ class ExtractAudio {
                     )
                 }
                 reader.add(readerOutput)
-                
-                // Create asset writer
-                let writer = try AVAssetWriter(outputURL: outputURL, fileType: .wav)
-                assetWriter = writer
-                
-                // Get audio format description for writer input
-                let formatDescriptions = audioTrack.formatDescriptions as! [CMFormatDescription]
-                guard let formatDescription = formatDescriptions.first else {
-                    throw NSError(
-                        domain: "ExtractAudio",
-                        code: -8,
-                        userInfo: [NSLocalizedDescriptionKey: "No audio format description found"]
-                    )
-                }
-                
-                let audioStreamBasicDescription = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription)?.pointee
-                let sampleRate = audioStreamBasicDescription?.mSampleRate ?? 44100
-                let channels = audioStreamBasicDescription?.mChannelsPerFrame ?? 2
-                
-                // Create audio channel layout based on number of channels
-                var channelLayout = AudioChannelLayout()
-                channelLayout.mChannelBitmap = AudioChannelBitmap(rawValue: 0)
-                channelLayout.mNumberChannelDescriptions = 0
-                channelLayout.mChannelLayoutTag = switch channels {
-                    case 1: kAudioChannelLayoutTag_Mono
-                    case 2: kAudioChannelLayoutTag_Stereo
-                    case 3: kAudioChannelLayoutTag_MPEG_3_0_A
-                    case 4: kAudioChannelLayoutTag_Quadraphonic
-                    case 5: kAudioChannelLayoutTag_MPEG_5_0_A
-                    case 6: kAudioChannelLayoutTag_MPEG_5_1_A
-                    case 7: kAudioChannelLayoutTag_MPEG_6_1_A
-                    case 8: kAudioChannelLayoutTag_MPEG_7_1_A
-                    default: kAudioChannelLayoutTag_DiscreteInOrder | UInt32(channels)
-                }
-                
-                // Configure writer input for PCM WAV
-                let writerInputSettings: [String: Any] = [
-                    AVFormatIDKey: kAudioFormatLinearPCM,
-                    AVSampleRateKey: sampleRate,
-                    AVNumberOfChannelsKey: channels,
-                    AVLinearPCMBitDepthKey: 16,
-                    AVLinearPCMIsFloatKey: false,
-                    AVLinearPCMIsBigEndianKey: false,
-                    AVLinearPCMIsNonInterleaved: false,
-                    AVChannelLayoutKey: Data(bytes: &channelLayout, count: MemoryLayout<AudioChannelLayout>.size)
-                ]
-                
-                let writerInput = AVAssetWriterInput(mediaType: .audio, outputSettings: writerInputSettings)
-                writerInput.expectsMediaDataInRealTime = false
-                
-                guard writer.canAdd(writerInput) else {
+
+                // Create output file and write a placeholder WAV header.
+                // The header will be patched with the correct data size after all PCM is written.
+                let fm = FileManager.default
+                guard fm.createFile(atPath: outputURL.path, contents: nil) else {
                     throw NSError(
                         domain: "ExtractAudio",
                         code: -9,
-                        userInfo: [NSLocalizedDescriptionKey: "Cannot add writer input"]
+                        userInfo: [NSLocalizedDescriptionKey: "Cannot create output file at \(outputURL.path)"]
                     )
                 }
-                writer.add(writerInput)
-                
-                // Start reading and writing
+                let fileHandle = try FileHandle(forWritingTo: outputURL)
+                var writeSuccess = false
+                defer {
+                    try? fileHandle.close()
+                    if !writeSuccess {
+                        try? fm.removeItem(at: outputURL)
+                    }
+                }
+
+                // Write placeholder header (data size = 0, will be patched later)
+                fileHandle.write(buildWavHeader(
+                    pcmDataSize: 0,
+                    sampleRate: sampleRate,
+                    channels: channels,
+                    bitsPerSample: bitsPerSample
+                ))
+
                 guard reader.startReading() else {
                     throw reader.error ?? NSError(
                         domain: "ExtractAudio",
@@ -491,93 +458,58 @@ class ExtractAudio {
                         userInfo: [NSLocalizedDescriptionKey: "Failed to start reading"]
                     )
                 }
-                
-                guard writer.startWriting() else {
-                    throw writer.error ?? NSError(
-                        domain: "ExtractAudio",
-                        code: -11,
-                        userInfo: [NSLocalizedDescriptionKey: "Failed to start writing"]
-                    )
-                }
 
-                // AVAssetWriter.startSession must be called BEFORE requestMediaDataWhenReady
-                // fires — calling it lazily inside the callback is too late on some
-                // AVFoundation versions/media types and causes a crash:
-                //   "Cannot append sample buffer: Must start a session first"
-                //
-                // To correctly handle audio tracks that don't start at zero (encoding delays,
-                // sync offsets), we read the first sample synchronously here to obtain its
-                // exact PTS, then anchor the session to that timestamp. This avoids any
-                // silent gap at the beginning of the output file that would occur if we
-                // unconditionally used timeRange.start when the first sample's PTS differs.
-                let firstSampleBuffer = readerOutput.copyNextSampleBuffer()
-                let sessionStartTime: CMTime
-                if let first = firstSampleBuffer {
-                    sessionStartTime = CMSampleBufferGetPresentationTimeStamp(first)
-                } else {
-                    // No samples available — anchor to timeRange.start as fallback
-                    sessionStartTime = timeRange.start
-                }
-                writer.startSession(atSourceTime: sessionStartTime)
+                DispatchQueue.main.async { onProgress(0.0) }
 
-                // Calculate total duration for progress
                 let totalDuration = CMTimeGetSeconds(timeRange.duration)
-                
-                DispatchQueue.main.async {
-                    onProgress(0.0)
-                }
-                
-                // Process samples
-                let processingQueue = DispatchQueue(label: "com.provideo.wav.processing")
-                let semaphore = DispatchSemaphore(value: 0)
-                var processingError: Error?
-                
-                writerInput.requestMediaDataWhenReady(on: processingQueue) {
-                    // Drain any sample that was pre-read before the session was started
-                    var pendingBuffer: CMSampleBuffer? = firstSampleBuffer
-
-                    while writerInput.isReadyForMoreMediaData && !isCancelled {
-                        let sampleBuffer: CMSampleBuffer?
-                        if let pending = pendingBuffer {
-                            sampleBuffer = pending
-                            pendingBuffer = nil
-                        } else {
-                            sampleBuffer = readerOutput.copyNextSampleBuffer()
+                var totalPcmBytes: Int64 = 0
+
+                // Stream PCM chunks directly to the file handle
+                while let sampleBuffer = readerOutput.copyNextSampleBuffer() {
+                    if isCancelled { break }
+
+                    if let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) {
+                        let length = CMBlockBufferGetDataLength(blockBuffer)
+
+                        // Guard against WAV 4 GB size limit
+                        totalPcmBytes += Int64(length)
+                        if totalPcmBytes > maxWavDataSize {
+                            reader.cancelReading()
+                            throw NSError(
+                                domain: "ExtractAudio",
+                                code: -13,
+                                userInfo: [NSLocalizedDescriptionKey:
+                                    "WAV output exceeds maximum size (~4 GB). Consider splitting the audio into shorter segments."]
+                            )
                         }
 
-                        if let sampleBuffer = sampleBuffer {
-                            // Update progress
-                            let currentTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
-                            let elapsed = CMTimeGetSeconds(currentTime) - CMTimeGetSeconds(timeRange.start)
-                            let progress = min(max(elapsed / totalDuration, 0.0), 1.0)
-                            DispatchQueue.main.async {
-                                onProgress(progress)
-                            }
-                            
-                            if !writerInput.append(sampleBuffer) {
-                                processingError = writer.error
-                                break
-                            }
-                        } else {
-                            // No more samples
-                            writerInput.markAsFinished()
-                            break
+                        var chunk = Data(count: length)
+                        _ = chunk.withUnsafeMutableBytes { ptr in
+                            CMBlockBufferCopyDataBytes(
+                                blockBuffer, atOffset: 0, dataLength: length,
+                                destination: ptr.baseAddress!)
                         }
+                        fileHandle.write(chunk)
                     }
-                    
-                    if isCancelled {
-                        reader.cancelReading()
-                        writer.cancelWriting()
-                    }
-                    
-                    semaphore.signal()
+
+                    // Update progress based on presentation timestamp
+                    let currentTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
+                    let elapsed = CMTimeGetSeconds(currentTime) - CMTimeGetSeconds(timeRange.start)
+                    let progress = totalDuration > 0 ? min(max(elapsed / totalDuration, 0.0), 0.99) : 0.0
+                    DispatchQueue.main.async { onProgress(progress) }
                 }
-                
-                // Wait for processing to complete
-                semaphore.wait()
-                
+
+                // Check if the reader failed (as opposed to naturally finishing)
+                if reader.status == .failed {
+                    throw reader.error ?? NSError(
+                        domain: "ExtractAudio",
+                        code: -11,
+                        userInfo: [NSLocalizedDescriptionKey: "AVAssetReader failed during reading"]
+                    )
+                }
+
                 if isCancelled {
-                    try? FileManager.default.removeItem(at: outputURL)
+                    reader.cancelReading()
                     DispatchQueue.main.async {
                         onError(NSError(
                             domain: "ExtractAudio",
@@ -587,61 +519,92 @@ class ExtractAudio {
                     }
                     return
                 }
-                
-                if let error = processingError {
-                    try? FileManager.default.removeItem(at: outputURL)
+
+                // Seek back to the start and patch the WAV header with the real data size.
+                // Int cast is safe: totalPcmBytes is validated against maxWavDataSize (< 2^32).
+                fileHandle.seek(toFileOffset: 0)
+                fileHandle.write(buildWavHeader(
+                    pcmDataSize: Int(totalPcmBytes),
+                    sampleRate: sampleRate,
+                    channels: channels,
+                    bitsPerSample: bitsPerSample
+                ))
+
+                writeSuccess = true
+
+                if config.outputPath != nil {
+                    // File output — return nil data
                     DispatchQueue.main.async {
-                        onError(error)
-                    }
-                    return
-                }
-                
-                // Finish writing
-                let finishSemaphore = DispatchSemaphore(value: 0)
-                writer.finishWriting {
-                    finishSemaphore.signal()
-                }
-                finishSemaphore.wait()
-                
-                if writer.status == .completed {
-                    if config.outputPath != nil {
-                        DispatchQueue.main.async {
-                            onProgress(1.0)
-                            onComplete(nil)
-                        }
-                    } else {
-                        let data = try Data(contentsOf: outputURL)
-                        let flutterData = FlutterStandardTypedData(bytes: data)
-                        try? FileManager.default.removeItem(at: outputURL)
-                        DispatchQueue.main.async {
-                            onProgress(1.0)
-                            onComplete(flutterData)
-                        }
+                        onProgress(1.0)
+                        onComplete(nil)
                     }
                 } else {
-                    try? FileManager.default.removeItem(at: outputURL)
-                    let error = writer.error ?? NSError(
-                        domain: "ExtractAudio",
-                        code: -12,
-                        userInfo: [NSLocalizedDescriptionKey: "WAV export failed"]
-                    )
+                    // Memory output — read file back and clean up
+                    let data = try Data(contentsOf: outputURL)
+                    let flutterData = FlutterStandardTypedData(bytes: data)
+                    try? fm.removeItem(at: outputURL)
                     DispatchQueue.main.async {
-                        onError(error)
+                        onProgress(1.0)
+                        onComplete(flutterData)
                     }
                 }
-                
+
             } catch {
                 DispatchQueue.main.async {
                     onError(error)
                 }
             }
         }
-        
+
         // Return cancellation handle
         return {
             isCancelled = true
+            task.cancel()
             assetReader?.cancelReading()
-            assetWriter?.cancelWriting()
         }
     }
+
+    /// Builds a standard 44-byte RIFF/WAV header for 16-bit PCM audio.
+    ///
+    /// Writes a placeholder header (pcmDataSize = 0) first, then seeks back
+    /// and calls this again with the final size once all PCM data is written.
+    private static func buildWavHeader(
+        pcmDataSize: Int,
+        sampleRate: Int,
+        channels: Int,
+        bitsPerSample: Int
+    ) -> Data {
+        let byteRate = sampleRate * channels * (bitsPerSample / 8)
+        let blockAlign = channels * (bitsPerSample / 8)
+
+        var header = Data()
+        header.append(contentsOf: [UInt8]("RIFF".utf8))
+        header.append(UInt32(36 + pcmDataSize).littleEndianBytes)
+        header.append(contentsOf: [UInt8]("WAVE".utf8))
+        header.append(contentsOf: [UInt8]("fmt ".utf8))
+        header.append(UInt32(16).littleEndianBytes) // PCM sub-chunk size
+        header.append(UInt16(1).littleEndianBytes) // AudioFormat = PCM
+        header.append(UInt16(channels).littleEndianBytes)
+        header.append(UInt32(sampleRate).littleEndianBytes)
+        header.append(UInt32(byteRate).littleEndianBytes)
+        header.append(UInt16(blockAlign).littleEndianBytes)
+        header.append(UInt16(bitsPerSample).littleEndianBytes)
+        header.append(contentsOf: [UInt8]("data".utf8))
+        header.append(UInt32(pcmDataSize).littleEndianBytes)
+        return header
+    }
+}
+
+private extension UInt32 {
+    var littleEndianBytes: Data {
+        var value = self.littleEndian
+        return Data(bytes: &value, count: MemoryLayout<UInt32>.size)
+    }
+}
+
+private extension UInt16 {
+    var littleEndianBytes: Data {
+        var value = self.littleEndian
+        return Data(bytes: &value, count: MemoryLayout<UInt16>.size)
+    }
 }
diff --git a/macos/Classes/src/features/audio/ExtractAudio.swift b/macos/Classes/src/features/audio/ExtractAudio.swift
index 0a5e0d2..82cf6b2 100644
--- a/macos/Classes/src/features/audio/ExtractAudio.swift
+++ b/macos/Classes/src/features/audio/ExtractAudio.swift
@@ -30,7 +30,7 @@ class ExtractAudio {
     /// Extracts audio from a video file asynchronously.
     ///
     /// This method uses AVAssetExportSession for fast Passthrough export,
-    /// or AVAssetReader/AVAssetWriter for WAV transcoding.
+    /// or AVAssetReader + FileHandle for WAV transcoding.
     ///
     /// - Parameters:
     ///   - config: Complete extraction configuration
@@ -77,36 +77,26 @@ class ExtractAudio {
         var progressTimer: Timer?
         var isCancelled = false
 
-        // Execute extraction on background queue
-        DispatchQueue.global(qos: .userInitiated).async {
+        // Execute extraction on background task
+        let task = Task.detached(priority: .userInitiated) {
             do {
                 // Load source video asset
                 let sourceURL = URL(fileURLWithPath: config.inputPath)
                 let asset = AVURLAsset(url: sourceURL)
 
                 // Wait for tracks to be loaded
-                let loadSemaphore = DispatchSemaphore(value: 0)
-                var loadError: Error?
+                try await asset.loadValues(forKeys: ["tracks", "duration"])
 
-                asset.loadValuesAsynchronously(forKeys: ["tracks", "duration"]) {
                     let tracksStatus = asset.statusOfValue(forKey: "tracks", error: nil)
                     let durationStatus = asset.statusOfValue(forKey: "duration", error: nil)
 
                     if tracksStatus == .failed || durationStatus == .failed {
-                        loadError = NSError(
+                    throw NSError(
                             domain: "ExtractAudio",
                             code: -10,
                             userInfo: [NSLocalizedDescriptionKey: "Failed to load asset properties"]
                         )
                     }
-                    loadSemaphore.signal()
-                }
-
-                loadSemaphore.wait()
-
-                if let error = loadError {
-                    throw error
-                }
 
                 // Determine output file location
                 let outputURL: URL
@@ -136,16 +126,16 @@ class ExtractAudio {
                 default:
                     outputFileType = .m4a
                 }
-                
+
                 // Configure to export only audio tracks
                 let audioTracks = asset.tracks(withMediaType: .audio)
                 guard !audioTracks.isEmpty else {
                     throw NoAudioTrackException()
                 }
-                
+
                 // Get the actual audio track to extract
                 let audioTrack = audioTracks[0]
-                
+
                 // Determine the time range to extract
                 // IMPORTANT: Use the audio track's actual timeRange, not asset.duration
                 // Audio tracks may not start at zero due to encoding delays or sync adjustments
@@ -166,7 +156,7 @@ class ExtractAudio {
                     // Use the audio track's actual time range to capture all audio data
                     sourceTimeRange = audioTrack.timeRange
                 }
-                
+
                 // Create composition to remap timestamps to start at zero
                 // This ensures the extracted audio timeline starts at 0, not at the original offset
                 let composition = AVMutableComposition()
@@ -180,14 +170,14 @@ class ExtractAudio {
                         userInfo: [NSLocalizedDescriptionKey: "Failed to create composition audio track"]
                     )
                 }
-                
+
                 // Insert the audio track at time zero (remapping the timeline)
                 try compositionAudioTrack.insertTimeRange(
                     sourceTimeRange,
                     of: audioTrack,
                     at: .zero
                 )
-                
+
                 // Create export session with the composition (not the original asset)
                 guard let session = AVAssetExportSession(
                     asset: composition,
@@ -203,15 +193,14 @@ class ExtractAudio {
                 exportSession = session
                 session.outputURL = outputURL
                 session.outputFileType = outputFileType
-                
+
                 // No need to set timeRange on the session since the composition already handles it
-                
+
                 // Start progress tracking on main thread
                 DispatchQueue.main.async {
                     onProgress(0.0)
 
-                    progressTimer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) {
-                        _ in
+                    progressTimer = Timer.scheduledTimer(withTimeInterval: 0.1, repeats: true) { _ in
                         guard !isCancelled else { return }
                         let progress = Double(session.progress)
                         onProgress(progress)
@@ -229,14 +218,11 @@ class ExtractAudio {
                     if isCancelled {
                         try? FileManager.default.removeItem(at: outputURL)
                         DispatchQueue.main.async {
-                            onError(
-                                NSError(
-                                    domain: "ExtractAudio",
-                                    code: -3,
-                                    userInfo: [
-                                        NSLocalizedDescriptionKey: "Extraction was cancelled"
-                                    ]
-                                ))
+                            onError(NSError(
+                                domain: "ExtractAudio",
+                                code: -3,
+                                userInfo: [NSLocalizedDescriptionKey: "Extraction was cancelled"]
+                            ))
                         }
                         return
                     }
@@ -274,16 +260,11 @@ class ExtractAudio {
 
                         case .failed:
                             try? FileManager.default.removeItem(at: outputURL)
-                            let error =
-                                session.error
-                                ?? NSError(
-                                    domain: "ExtractAudio",
-                                    code: -4,
-                                    userInfo: [
-                                        NSLocalizedDescriptionKey:
-                                            "Export failed with unknown error"
-                                    ]
-                                )
+                            let error = session.error ?? NSError(
+                                domain: "ExtractAudio",
+                                code: -4,
+                                userInfo: [NSLocalizedDescriptionKey: "Export failed with unknown error"]
+                            )
                             DispatchQueue.main.async {
                                 onError(error)
                             }
@@ -291,28 +272,24 @@ class ExtractAudio {
                         case .cancelled:
                             try? FileManager.default.removeItem(at: outputURL)
                             DispatchQueue.main.async {
-                                onError(
-                                    NSError(
-                                        domain: "ExtractAudio",
-                                        code: -5,
-                                        userInfo: [
-                                            NSLocalizedDescriptionKey: "Export was cancelled"
-                                        ]
-                                    ))
+                                onError(NSError(
+                                    domain: "ExtractAudio",
+                                    code: -5,
+                                    userInfo: [NSLocalizedDescriptionKey: "Export was cancelled"]
+                                ))
                             }
 
                         default:
                             try? FileManager.default.removeItem(at: outputURL)
                             DispatchQueue.main.async {
-                                onError(
-                                    NSError(
-                                        domain: "ExtractAudio",
-                                        code: -6,
-                                        userInfo: [
-                                            NSLocalizedDescriptionKey:
-                                                "Export ended with unexpected status: \(session.status.rawValue)"
-                                        ]
-                                    ))
+                                onError(NSError(
+                                    domain: "ExtractAudio",
+                                    code: -6,
+                                    userInfo: [
+                                        NSLocalizedDescriptionKey:
+                                            "Export ended with unexpected status: \(session.status.rawValue)"
+                                    ]
+                                ))
                             }
                         }
                     }
@@ -329,6 +306,7 @@ class ExtractAudio {
         // Return cancellation handle
         return {
             isCancelled = true
+            task.cancel()
             exportSession?.cancelExport()
             DispatchQueue.main.async {
                 progressTimer?.invalidate()
@@ -336,7 +314,8 @@ class ExtractAudio {
         }
     }
 
-    /// Extracts audio to WAV format using AVAssetReader/AVAssetWriter for PCM transcoding.
+    /// Extracts audio to WAV format by streaming raw PCM into a RIFF/WAV file.
+    /// Uses AVAssetReader + FileHandle to stream PCM chunks directly to disk with a manually-built WAV header.
     private static func extractToWav(
         config: AudioExtractConfig,
         onProgress: @escaping (Double) -> Void,
@@ -344,39 +323,31 @@ class ExtractAudio {
         onError: @escaping (Error) -> Void
     ) -> AudioExtractJobHandle {
 
+        /// Maximum PCM data allowed in a WAV file (~4 GB - 36 bytes).
+        let maxWavDataSize: Int64 = 0xFFFF_FFFF - 36
+
         var assetReader: AVAssetReader?
-        var assetWriter: AVAssetWriter?
         var isCancelled = false
 
-        DispatchQueue.global(qos: .userInitiated).async {
+        let task = Task.detached(priority: .userInitiated) {
             do {
                 // Load source video asset
                 let sourceURL = URL(fileURLWithPath: config.inputPath)
                 let asset = AVURLAsset(url: sourceURL)
 
                 // Wait for tracks to be loaded
-                let loadSemaphore = DispatchSemaphore(value: 0)
-                var loadError: Error?
+                try await asset.loadValues(forKeys: ["tracks", "duration"])
 
-                asset.loadValuesAsynchronously(forKeys: ["tracks", "duration"]) {
                     let tracksStatus = asset.statusOfValue(forKey: "tracks", error: nil)
                     let durationStatus = asset.statusOfValue(forKey: "duration", error: nil)
 
                     if tracksStatus == .failed || durationStatus == .failed {
-                        loadError = NSError(
+                    throw NSError(
                             domain: "ExtractAudio",
                             code: -10,
                             userInfo: [NSLocalizedDescriptionKey: "Failed to load asset properties"]
                         )
                     }
-                    loadSemaphore.signal()
-                }
-
-                loadSemaphore.wait()
-
-                if let error = loadError {
-                    throw error
-                }
 
                 // Determine output file location
                 let outputURL: URL
@@ -397,6 +368,21 @@ class ExtractAudio {
                     throw NoAudioTrackException()
                 }
 
+                // Get audio format (sample rate, channels)
+                guard let formatDescription = (audioTrack.formatDescriptions as [AnyObject]).first
+                        as! CMAudioFormatDescription? else {
+                    throw NSError(
+                        domain: "ExtractAudio",
+                        code: -8,
+                        userInfo: [NSLocalizedDescriptionKey: "No audio format description found"]
+                    )
+                }
+
+                let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription)!.pointee
+                let sampleRate = Int(asbd.mSampleRate)
+                let channels = Int(asbd.mChannelsPerFrame)
+                let bitsPerSample = 16
+
                 // Calculate time range
                 // Use the audio track's actual timeRange for full extraction
                 // Audio tracks may not start at zero due to encoding delays or sync adjustments
@@ -404,12 +390,10 @@ class ExtractAudio {
                 if let startUs = config.startUs, let endUs = config.endUs {
                     let startTime = CMTime(value: startUs, timescale: 1_000_000)
                     let endTime = CMTime(value: endUs, timescale: 1_000_000)
-                    timeRange = CMTimeRange(
-                        start: startTime, duration: CMTimeSubtract(endTime, startTime))
+                    timeRange = CMTimeRange(start: startTime, duration: CMTimeSubtract(endTime, startTime))
                 } else if let startUs = config.startUs {
                     let startTime = CMTime(value: startUs, timescale: 1_000_000)
-                    timeRange = CMTimeRange(
-                        start: startTime, duration: CMTimeSubtract(asset.duration, startTime))
+                    timeRange = CMTimeRange(start: startTime, duration: CMTimeSubtract(asset.duration, startTime))
                 } else if let endUs = config.endUs {
                     let endTime = CMTime(value: endUs, timescale: 1_000_000)
                     timeRange = CMTimeRange(start: .zero, duration: endTime)
@@ -426,7 +410,7 @@ class ExtractAudio {
                 // Configure reader output for PCM
                 let readerOutputSettings: [String: Any] = [
                     AVFormatIDKey: kAudioFormatLinearPCM,
-                    AVLinearPCMBitDepthKey: 16,
+                    AVLinearPCMBitDepthKey: bitsPerSample,
                     AVLinearPCMIsFloatKey: false,
                     AVLinearPCMIsBigEndianKey: false,
                     AVLinearPCMIsNonInterleaved: false,
@@ -445,216 +429,128 @@ class ExtractAudio {
                 }
                 reader.add(readerOutput)
 
-                // Create asset writer
-                let writer = try AVAssetWriter(outputURL: outputURL, fileType: .wav)
-                assetWriter = writer
-
-                // Get audio format description for writer input
-                let formatDescriptions = audioTrack.formatDescriptions as! [CMFormatDescription]
-                guard let formatDescription = formatDescriptions.first else {
+                // Create output file and write a placeholder WAV header.
+                // The header will be patched with the correct data size after all PCM is written.
+                let fm = FileManager.default
+                guard fm.createFile(atPath: outputURL.path, contents: nil) else {
                     throw NSError(
                         domain: "ExtractAudio",
-                        code: -8,
-                        userInfo: [NSLocalizedDescriptionKey: "No audio format description found"]
+                        code: -9,
+                        userInfo: [NSLocalizedDescriptionKey: "Cannot create output file at \(outputURL.path)"]
                     )
                 }
-
-                let audioStreamBasicDescription = CMAudioFormatDescriptionGetStreamBasicDescription(
-                    formatDescription)?.pointee
-                let sampleRate = audioStreamBasicDescription?.mSampleRate ?? 44100
-                let channels = audioStreamBasicDescription?.mChannelsPerFrame ?? 2
-                
-                // Create audio channel layout based on number of channels
-                var channelLayout = AudioChannelLayout()
-                channelLayout.mChannelBitmap = AudioChannelBitmap(rawValue: 0)
-                channelLayout.mNumberChannelDescriptions = 0
-                channelLayout.mChannelLayoutTag = switch channels {
-                    case 1: kAudioChannelLayoutTag_Mono
-                    case 2: kAudioChannelLayoutTag_Stereo
-                    case 3: kAudioChannelLayoutTag_MPEG_3_0_A
-                    case 4: kAudioChannelLayoutTag_Quadraphonic
-                    case 5: kAudioChannelLayoutTag_MPEG_5_0_A
-                    case 6: kAudioChannelLayoutTag_MPEG_5_1_A
-                    case 7: kAudioChannelLayoutTag_MPEG_6_1_A
-                    case 8: kAudioChannelLayoutTag_MPEG_7_1_A
-                    default: kAudioChannelLayoutTag_DiscreteInOrder | UInt32(channels)
+                let fileHandle = try FileHandle(forWritingTo: outputURL)
+                var writeSuccess = false
+                defer {
+                    try? fileHandle.close()
+                    if !writeSuccess {
+                        try? fm.removeItem(at: outputURL)
+                    }
                 }
 
-                // Configure writer input for PCM WAV
-                let writerInputSettings: [String: Any] = [
-                    AVFormatIDKey: kAudioFormatLinearPCM,
-                    AVSampleRateKey: sampleRate,
-                    AVNumberOfChannelsKey: channels,
-                    AVLinearPCMBitDepthKey: 16,
-                    AVLinearPCMIsFloatKey: false,
-                    AVLinearPCMIsBigEndianKey: false,
-                    AVLinearPCMIsNonInterleaved: false,
-                    AVChannelLayoutKey: Data(bytes: &channelLayout, count: MemoryLayout<AudioChannelLayout>.size)
-                ]
-
-                let writerInput = AVAssetWriterInput(
-                    mediaType: .audio, outputSettings: writerInputSettings)
-                writerInput.expectsMediaDataInRealTime = false
+                // Write placeholder header (data size = 0, will be patched later)
+                fileHandle.write(buildWavHeader(
+                    pcmDataSize: 0,
+                    sampleRate: sampleRate,
+                    channels: channels,
+                    bitsPerSample: bitsPerSample
+                ))
 
-                guard writer.canAdd(writerInput) else {
-                    throw NSError(
+                guard reader.startReading() else {
+                    throw reader.error ?? NSError(
                         domain: "ExtractAudio",
-                        code: -9,
-                        userInfo: [NSLocalizedDescriptionKey: "Cannot add writer input"]
+                        code: -10,
+                        userInfo: [NSLocalizedDescriptionKey: "Failed to start reading"]
                     )
                 }
-                writer.add(writerInput)
 
-                // Start reading and writing
-                guard reader.startReading() else {
-                    throw reader.error
-                        ?? NSError(
-                            domain: "ExtractAudio",
-                            code: -10,
-                            userInfo: [NSLocalizedDescriptionKey: "Failed to start reading"]
-                        )
-                }
+                DispatchQueue.main.async { onProgress(0.0) }
 
-                guard writer.startWriting() else {
-                    throw writer.error
-                        ?? NSError(
-                            domain: "ExtractAudio",
-                            code: -11,
-                            userInfo: [NSLocalizedDescriptionKey: "Failed to start writing"]
-                        )
-                }
-
-                // AVAssetWriter.startSession must be called BEFORE requestMediaDataWhenReady
-                // fires — calling it lazily inside the callback is too late on some macOS
-                // versions/media types and causes a crash:
-                //   "Cannot append sample buffer: Must start a session first"
-                //
-                // To correctly handle audio tracks that don't start at zero (encoding delays,
-                // sync offsets), we read the first sample synchronously here to obtain its
-                // exact PTS, then anchor the session to that timestamp. This avoids any
-                // silent gap at the beginning of the output file that would occur if we
-                // unconditionally used timeRange.start when the first sample's PTS differs.
-                let firstSampleBuffer = readerOutput.copyNextSampleBuffer()
-                let sessionStartTime: CMTime
-                if let first = firstSampleBuffer {
-                    sessionStartTime = CMSampleBufferGetPresentationTimeStamp(first)
-                } else {
-                    // No samples available — anchor to timeRange.start as fallback
-                    sessionStartTime = timeRange.start
-                }
-                writer.startSession(atSourceTime: sessionStartTime)
-
-                // Calculate total duration for progress
                 let totalDuration = CMTimeGetSeconds(timeRange.duration)
+                var totalPcmBytes: Int64 = 0
 
-                DispatchQueue.main.async {
-                    onProgress(0.0)
-                }
+                // Stream PCM chunks directly to the file handle
+                while let sampleBuffer = readerOutput.copyNextSampleBuffer() {
+                    if isCancelled { break }
 
-                // Process samples
-                let processingQueue = DispatchQueue(label: "com.provideo.wav.processing")
-                let semaphore = DispatchSemaphore(value: 0)
-                var processingError: Error?
-
-                writerInput.requestMediaDataWhenReady(on: processingQueue) {
-                    // Drain any sample that was pre-read before the session was started
-                    var pendingBuffer: CMSampleBuffer? = firstSampleBuffer
-
-                    while writerInput.isReadyForMoreMediaData && !isCancelled {
-                        let sampleBuffer: CMSampleBuffer?
-                        if let pending = pendingBuffer {
-                            sampleBuffer = pending
-                            pendingBuffer = nil
-                        } else {
-                            sampleBuffer = readerOutput.copyNextSampleBuffer()
-                        }
-
-                        if let sampleBuffer = sampleBuffer {
-                            // Update progress
-                            let currentTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
-                            let elapsed =
-                                CMTimeGetSeconds(currentTime) - CMTimeGetSeconds(timeRange.start)
-                            let progress = min(max(elapsed / totalDuration, 0.0), 1.0)
-                            DispatchQueue.main.async {
-                                onProgress(progress)
-                            }
+                    if let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) {
+                        let length = CMBlockBufferGetDataLength(blockBuffer)
 
-                            if !writerInput.append(sampleBuffer) {
-                                processingError = writer.error
-                                break
-                            }
-                        } else {
-                            // No more samples
-                            writerInput.markAsFinished()
-                            break
+                        // Guard against WAV 4 GB size limit
+                        totalPcmBytes += Int64(length)
+                        if totalPcmBytes > maxWavDataSize {
+                            reader.cancelReading()
+                            throw NSError(
+                                domain: "ExtractAudio",
+                                code: -13,
+                                userInfo: [NSLocalizedDescriptionKey:
+                                    "WAV output exceeds maximum size (~4 GB). Consider splitting the audio into shorter segments."]
+                            )
                         }
-                    }
 
-                    if isCancelled {
-                        reader.cancelReading()
-                        writer.cancelWriting()
+                        var chunk = Data(count: length)
+                        _ = chunk.withUnsafeMutableBytes { ptr in
+                            CMBlockBufferCopyDataBytes(
+                                blockBuffer, atOffset: 0, dataLength: length,
+                                destination: ptr.baseAddress!)
+                        }
+                        fileHandle.write(chunk)
                     }
 
-                    semaphore.signal()
+                    // Update progress based on presentation timestamp
+                    let currentTime = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
+                    let elapsed = CMTimeGetSeconds(currentTime) - CMTimeGetSeconds(timeRange.start)
+                    let progress = totalDuration > 0 ? min(max(elapsed / totalDuration, 0.0), 0.99) : 0.0
+                    DispatchQueue.main.async { onProgress(progress) }
                 }
 
-                // Wait for processing to complete
-                semaphore.wait()
+                // Check if the reader failed (as opposed to naturally finishing)
+                if reader.status == .failed {
+                    throw reader.error ?? NSError(
+                        domain: "ExtractAudio",
+                        code: -11,
+                        userInfo: [NSLocalizedDescriptionKey: "AVAssetReader failed during reading"]
+                    )
+                }
 
                 if isCancelled {
-                    try? FileManager.default.removeItem(at: outputURL)
+                    reader.cancelReading()
                     DispatchQueue.main.async {
-                        onError(
-                            NSError(
-                                domain: "ExtractAudio",
-                                code: -3,
-                                userInfo: [NSLocalizedDescriptionKey: "Extraction was cancelled"]
-                            ))
+                        onError(NSError(
+                            domain: "ExtractAudio",
+                            code: -3,
+                            userInfo: [NSLocalizedDescriptionKey: "Extraction was cancelled"]
+                        ))
                     }
                     return
                 }
 
-                if let error = processingError {
-                    try? FileManager.default.removeItem(at: outputURL)
-                    DispatchQueue.main.async {
-                        onError(error)
-                    }
-                    return
-                }
+                // Seek back to the start and patch the WAV header with the real data size.
+                // Int cast is safe: totalPcmBytes is validated against maxWavDataSize (< 2^32).
+                fileHandle.seek(toFileOffset: 0)
+                fileHandle.write(buildWavHeader(
+                    pcmDataSize: Int(totalPcmBytes),
+                    sampleRate: sampleRate,
+                    channels: channels,
+                    bitsPerSample: bitsPerSample
+                ))
 
-                // Finish writing
-                let finishSemaphore = DispatchSemaphore(value: 0)
-                writer.finishWriting {
-                    finishSemaphore.signal()
-                }
-                finishSemaphore.wait()
+                writeSuccess = true
 
-                if writer.status == .completed {
-                    if config.outputPath != nil {
-                        DispatchQueue.main.async {
-                            onProgress(1.0)
-                            onComplete(nil)
-                        }
-                    } else {
-                        let data = try Data(contentsOf: outputURL)
-                        let flutterData = FlutterStandardTypedData(bytes: data)
-                        try? FileManager.default.removeItem(at: outputURL)
-                        DispatchQueue.main.async {
-                            onProgress(1.0)
-                            onComplete(flutterData)
-                        }
+                if config.outputPath != nil {
+                    // File output — return nil data
+                    DispatchQueue.main.async {
+                        onProgress(1.0)
+                        onComplete(nil)
                     }
                 } else {
-                    try? FileManager.default.removeItem(at: outputURL)
-                    let error =
-                        writer.error
-                        ?? NSError(
-                            domain: "ExtractAudio",
-                            code: -12,
-                            userInfo: [NSLocalizedDescriptionKey: "WAV export failed"]
-                        )
+                    // Memory output — read file back and clean up
+                    let data = try Data(contentsOf: outputURL)
+                    let flutterData = FlutterStandardTypedData(bytes: data)
+                    try? fm.removeItem(at: outputURL)
                     DispatchQueue.main.async {
-                        onError(error)
+                        onProgress(1.0)
+                        onComplete(flutterData)
                     }
                 }
 
@@ -668,8 +564,52 @@ class ExtractAudio {
         // Return cancellation handle
         return {
             isCancelled = true
+            task.cancel()
             assetReader?.cancelReading()
-            assetWriter?.cancelWriting()
         }
     }
+
+    /// Builds a standard 44-byte RIFF/WAV header for 16-bit PCM audio.
+    ///
+    /// Writes a placeholder header (pcmDataSize = 0) first, then seeks back
+    /// and calls this again with the final size once all PCM data is written.
+    private static func buildWavHeader(
+        pcmDataSize: Int,
+        sampleRate: Int,
+        channels: Int,
+        bitsPerSample: Int
+    ) -> Data {
+        let byteRate = sampleRate * channels * (bitsPerSample / 8)
+        let blockAlign = channels * (bitsPerSample / 8)
+
+        var header = Data()
+        header.append(contentsOf: [UInt8]("RIFF".utf8))
+        header.append(UInt32(36 + pcmDataSize).littleEndianBytes)
+        header.append(contentsOf: [UInt8]("WAVE".utf8))
+        header.append(contentsOf: [UInt8]("fmt ".utf8))
+        header.append(UInt32(16).littleEndianBytes)           // PCM sub-chunk size
+        header.append(UInt16(1).littleEndianBytes)            // AudioFormat = PCM
+        header.append(UInt16(channels).littleEndianBytes)
+        header.append(UInt32(sampleRate).littleEndianBytes)
+        header.append(UInt32(byteRate).littleEndianBytes)
+        header.append(UInt16(blockAlign).littleEndianBytes)
+        header.append(UInt16(bitsPerSample).littleEndianBytes)
+        header.append(contentsOf: [UInt8]("data".utf8))
+        header.append(UInt32(pcmDataSize).littleEndianBytes)
+        return header
+    }
+}
+
+private extension UInt32 {
+    var littleEndianBytes: Data {
+        var value = self.littleEndian
+        return Data(bytes: &value, count: MemoryLayout<UInt32>.size)
+    }
+}
+
+private extension UInt16 {
+    var littleEndianBytes: Data {
+        var value = self.littleEndian
+        return Data(bytes: &value, count: MemoryLayout<UInt16>.size)
+    }
 }