Socket.IO streaming works, and fails back if it doesn't

jsalsman · jsalsman · commit d8a77f6cf656 · 2023-11-26T20:22:37.000Z
diff --git a/README.md b/README.md
@@ -15,7 +15,8 @@ for comments and code.
 MIT license
 
 # TODO
-- streaming upload with WebSockets (in progress)
+- link, size, and duration on the playback page (in progress)
 - update screenshot and cover image with the noise supression checkbox
-- maybe companding with sox?
 - production WSGI server? Replit deployments might not be cross-platform
+- maybe companding with sox?
+- handle 2 channel stereo?
diff --git a/main.py b/main.py
@@ -9,17 +9,18 @@
 # github: https://github.com/jsalsman/webrec
 
 from flask import Flask, request, render_template,  redirect, send_from_directory
-from flask_socketio import SocketIO
+from flask_socketio import SocketIO, emit  # Fails over to POST submissions
 import sox                     # needs command line sox and the pysox package
+import lameenc                 # to produce .mp3 files for playback
 from datetime import datetime  # for audio file timestamps
 import os                      # to delete old audio files
 from time import time          # to keep files less than 10 minutes old
 
-from sys import stderr  # best for Replit; you may want to import logging
+from sys import stderr  # best for Replit; you may want to 'import logging'
 log = lambda message: stderr.write(message + '\n')  # ...and connect this
 
 app = Flask(__name__)
-socketio = SocketIO(app)
+socketio = SocketIO(app)  # Websocket
 
 @app.route('/')  # redirect from / to /record
 def index():
@@ -40,48 +41,73 @@ def upload_audio():
 
     timestamp = datetime.now().strftime("%M%S%f")[:8]  # MMSSssss
     raw_filename = f"audio-{timestamp}.raw"
-    wav_filename = f"audio-{timestamp}.wav"
 
     audio_file.save('static/' + raw_filename)
 
-    # Convert format, trim silence
-    tfm = sox.Transformer()
-    tfm.set_input_format(file_type='raw', rate=16000, bits=16, 
-               channels=1, encoding='signed-integer')
-
-    tfm.silence(min_silence_duration=0.25,  # remove lengthy silence 
-      buffer_around_silence=True)  # replace removals with 1/4 second
-    # https://pysox.readthedocs.io/en/latest/api.html#sox.transform.Transformer.silence
-         
-    tfm.build('static/' + raw_filename, 'static/' + wav_filename)
-    duration = sox.file_info.duration('static/' + wav_filename)
-
-    # Clean up older files; maximum 40 MB will remain
-    files = [os.path.join('static', f) for f in
-       os.listdir('static') if f.startswith('audio-')]
-    # Sort files by last modified time, oldest first
-    files.sort(key=lambda x: os.path.getmtime(x))
-    current_time = time()
-    # Remove all but the 10 most recent audio files
-    for file in files[:-10]:
-      # Get the modification time of the file
-      mod_time = os.path.getmtime(file)
-      # Calculate the age of the file in seconds
-      file_age = current_time - mod_time
-      # Check if the file is older than 10 minutes
-      if file_age > 600:
-        os.remove(file)
-    audio_space = sum([os.path.getsize('static/' + f) 
-                       for f in os.listdir('static')
-                       if f.startswith('audio-')]) / (1024 ** 2)
-    
-    log(f'Built {wav_filename} ({duration:.1f} seconds.) ' +
-        f'All audio using {audio_space:.2f} MB.')
-  
-    return redirect(f'/playback/{wav_filename}')
+    return redirect(f'/playback/' + process_file(raw_filename))
 
   return "No audio file", 400
 
+def process_file(raw_filename):
+  # Convert format, trim silence
+  tfm = sox.Transformer()
+  tfm.set_input_format(file_type='raw', rate=16000, bits=16, 
+             channels=1, encoding='signed-integer')
+
+  tfm.silence(min_silence_duration=0.25,  # remove lengthy silence 
+    buffer_around_silence=True)  # replace removals with 1/4 second
+  # https://pysox.readthedocs.io/en/latest/api.html#sox.transform.Transformer.silence
+
+  #pcm = tfm.build_array('static/' + raw_filename)  # FAILS
+  # sox/transform.py", line 793, in build_array
+  #    encoding_out = [
+  # IndexError: list index out of range
+
+  tfm.build('static/' + raw_filename, 'static/tmp-' + raw_filename)
+
+  # Set up the MP3 encoder
+  encoder = lameenc.Encoder()
+  encoder.set_in_sample_rate(16000)
+  encoder.set_channels(1)
+  encoder.set_bit_rate(64)  # https://github.com/chrisstaite/lameenc/blob/main/lameenc.c
+  encoder.set_quality(2)  # https://github.com/gypified/libmp3lame/blob/master/include/lame.h
+
+  # Encode the PCM data to MP3
+  with open('static/tmp-' + raw_filename, 'rb') as f:
+    mp3_data = encoder.encode(f.read())
+  mp3_data += encoder.flush()
+  os.remove('static/tmp-' + raw_filename)
+  
+  mp3_fn = raw_filename.replace('.raw', '.mp3')
+
+  with open('static/' + mp3_fn, 'wb') as f:
+    f.write(mp3_data)
+  
+  duration = sox.file_info.duration('static/' + mp3_fn)
+  
+  # Clean up older files; maximum 40 MB will remain
+  files = [os.path.join('static', f) for f in
+     os.listdir('static') if f.startswith('audio-')]
+  # Sort files by last modified time, oldest first
+  files.sort(key=lambda x: os.path.getmtime(x))
+  current_time = time()
+  # Remove all but the 10 most recent audio files
+  for file in files[:-10]:
+    # Get the modification time of the file
+    mod_time = os.path.getmtime(file)
+    # Calculate the age of the file in seconds
+    file_age = current_time - mod_time
+    # Check if the file is older than 10 minutes
+    if file_age > 600:
+      os.remove(file)
+  audio_space = sum([os.path.getsize('static/' + f) 
+                     for f in os.listdir('static')
+                     if f.startswith('audio-')]) / (1024 ** 2)
+
+  log(f'Built {mp3_fn} ({duration:.1f} seconds.) ' +
+      f'All audio using {audio_space:.2f} MB.')
+  return mp3_fn
+
 @app.route('/playback/<filename>')
 def playback(filename):
   return render_template('playback.html', audio=filename)
@@ -98,7 +124,43 @@ def send_js(path):
 for file in [os.path.join('static', f) for f in os.listdir('static') 
              if f.startswith('audio-')]:
   os.remove(file)
-  
-app.run(host='0.0.0.0', port=81)
-# TODO: production WSGI server
+
+# WebSocket implementation
+active_streams = {}
+sid_to_filename = {}
+
+@socketio.on('connect')
+def websocket_connect():
+  timestamp = datetime.now().strftime("%H%M%S%f")[:8]
+  sid_to_filename[request.sid] = f"audio-{timestamp}.raw"
+
+@socketio.on('audio_chunk')
+def websocket_chunk(data):
+  try:
+    if request.sid not in active_streams:
+      filename = sid_to_filename[request.sid]
+      active_streams[request.sid] = open(f'static/{filename}', 'wb')
+    active_streams[request.sid].write(data)
+  except Exception as e:
+    log(f"Error writing audio data: {e}")
+    return 'fail', repr(e)
+
+@socketio.on('end_recording')
+def websocket_end():
+  try:
+    if request.sid in active_streams:
+      active_streams[request.sid].close()
+      filename = sid_to_filename[request.sid]
+      mp3_fn = process_file(filename)  # See above
+      del active_streams[request.sid]
+      del sid_to_filename[request.sid]
+      return '/playback/' + mp3_fn
+  except Exception as e:
+    log(f"Error ending websocket: {e}")
+    return 'fail', repr(e)
+
+#app.run(host='0.0.0.0', port=81)
+socketio.run(app, host='0.0.0.0', port=81)
+
+# TODO? production WSGI server
 # see https://replit.com/talk/learn/How-to-set-up-production-environment-for-your-Flask-project-on-Replit/139169
diff --git a/static/recording-processor.js b/static/recording-processor.js
@@ -1,5 +1,6 @@
-// origin: https://googlechromelabs.github.io/web-audio-samples/audio-worklet/migration/worklet-recorder/
-
+// adapted from:
+// https://googlechromelabs.github.io/web-audio-samples/audio-worklet/migration/worklet-recorder/
+//
 // Copyright (c) 2022 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
@@ -25,15 +26,16 @@ class RecordingProcessor extends AudioWorkletProcessor {
     }
 
     // Initialize _recordingBuffer as a Uint8Array
-    this._recordingBuffer = new Uint8Array(this.maxRecordingFrames * 2);
+    this.recordingBuffer = new Uint8Array(this.maxRecordingFrames * 2);
 
     this.recordedFrames = 0;
     this.isRecording = false;
     this.lastSentFrame = 0;
 
     // We will use a timer to gate our messages; this one will publish at 30hz
-    this.framesSinceLastPublish = 0;
     this.publishInterval = this.sampleRate / 30;
+    this.framesSinceLastPublish = 0;
+    this.bufferSlice = null;  // streaming slices
 
     // We will keep a live sum for rendering the visualizer.
     this.sampleSum = 0;
@@ -43,20 +45,23 @@ class RecordingProcessor extends AudioWorkletProcessor {
         this.isRecording = event.data.setRecording;
 
         if (this.isRecording === false) {
+          this.bufferSlice = this.recordingBuffer.slice(
+            this.lastSentFrame * 2, this.recordedFrames * 2);
           this.port.postMessage({
             message: 'SHARE_RECORDING_BUFFER',
-            buffer: this._recordingBuffer,
-            recordingLength: this.recordedFrames, // ADDED
+            buffer: this.recordingBuffer,
+            recordingLength: this.recordedFrames,
+            bufferSlice: this.bufferSlice,
           });
         } else {
-          this.recordedFrames = 0; // RESET ON START to handle multiple sessions ADDED
+          this.recordedFrames = 0; // RESET ON START to handle multiple sessions
         }
       }
     };
   }
 
   process(inputs, outputs, params) {
-    // Assuming we are only interested in the first channel 0  // TODO: convert to mono properly
+    // Assuming we are only interested in the first channel 0  // TODO? convert to mono properly
     let inputBuffer = inputs[0][0];
     for (let sample = 0; sample < inputBuffer.length; ++sample) {
       let currentSample = inputBuffer[sample];
@@ -66,8 +71,8 @@ class RecordingProcessor extends AudioWorkletProcessor {
         let signed16bits = Math.max(-32768, 
                                     Math.min(32767, currentSample * 32768.0));
         let index = (sample + this.recordedFrames) * 2;
-        this._recordingBuffer[index] = signed16bits & 255; // low byte, little endian
-        this._recordingBuffer[index + 1] = (signed16bits >> 8) & 255; // high
+        this.recordingBuffer[index] = signed16bits & 255; // low byte, little endian
+        this.recordingBuffer[index + 1] = (signed16bits >> 8) & 255; // high
       }
   
       // Sum values for visualizer
@@ -83,13 +88,13 @@ class RecordingProcessor extends AudioWorkletProcessor {
 
         // Post a recording recording length update on the clock's schedule
         if (shouldPublish) {
-          let bufferSlice = this._recordingBuffer.slice(
+          this.bufferSlice = this.recordingBuffer.slice(
             this.lastSentFrame * 2, this.recordedFrames * 2);
 
           this.port.postMessage({
             message: 'UPDATE_RECORDING',
             recordingLength: this.recordedFrames,
-            bufferSlice: bufferSlice,
+            bufferSlice: this.bufferSlice,
           });
 
           this.lastSentFrame = this.recordedFrames;
@@ -99,7 +104,8 @@ class RecordingProcessor extends AudioWorkletProcessor {
         this.isRecording = false;
         this.port.postMessage({
           message: 'MAX_RECORDING_LENGTH_REACHED',
-          buffer: this._recordingBuffer,
+          buffer: this.recordingBuffer,
+          bufferSlice: this.bufferSlice,
         });
 
         this.recordedFrames += 128;
diff --git a/templates/record.html b/templates/record.html