Skip to content

Commit 3adfce7

Browse files
committed
batch processing for more real time
1 parent 19f4b3c commit 3adfce7

File tree

3 files changed

+78
-33
lines changed

3 files changed

+78
-33
lines changed

AudioRecorder.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
DYNAMIC_ENERGY_THRESHOLD = False
88

99
class BaseRecorder:
10-
def __init__(self, source, source_name):
10+
def __init__(self, source):
1111
self.recorder = sr.Recognizer()
1212
self.recorder.energy_threshold = ENERGY_THRESHOLD
1313
self.recorder.dynamic_energy_threshold = DYNAMIC_ENERGY_THRESHOLD
@@ -16,7 +16,6 @@ def __init__(self, source, source_name):
1616
raise ValueError("audio source can't be None")
1717

1818
self.source = source
19-
self.source_name = source_name
2019

2120
def adjust_for_noise(self, device_name, msg):
2221
print(f"[INFO] Adjusting for ambient noise from {device_name}. " + msg)
@@ -27,13 +26,13 @@ def adjust_for_noise(self, device_name, msg):
2726
def record_into_queue(self, audio_queue):
2827
def record_callback(_, audio:sr.AudioData) -> None:
2928
data = audio.get_raw_data()
30-
audio_queue.put((self.source_name, data, datetime.utcnow()))
29+
audio_queue.put((data, datetime.utcnow()))
3130

3231
self.recorder.listen_in_background(self.source, record_callback, phrase_time_limit=RECORD_TIMEOUT)
3332

3433
class DefaultMicRecorder(BaseRecorder):
3534
def __init__(self):
36-
super().__init__(source=sr.Microphone(sample_rate=16000), source_name="You")
35+
super().__init__(source=sr.Microphone(sample_rate=16000))
3736
self.adjust_for_noise("Default Mic", "Please make some noise from the Default Mic...")
3837

3938
class DefaultSpeakerRecorder(BaseRecorder):
@@ -55,5 +54,5 @@ def __init__(self):
5554
sample_rate=int(default_speakers["defaultSampleRate"]),
5655
chunk_size=pyaudio.get_sample_size(pyaudio.paInt16),
5756
channels=default_speakers["maxInputChannels"])
58-
super().__init__(source=source, source_name="Speaker")
57+
super().__init__(source=source)
5958
self.adjust_for_noise("Default Speaker", "Please make or play some noise from the Default Speaker...")

AudioTranscriber.py

Lines changed: 60 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,26 +37,68 @@ def __init__(self, mic_source, speaker_source, model):
3737
}
3838
}
3939

40-
def transcribe_audio_queue(self, audio_queue):
40+
def transcribe_audio_queue(self, speaker_queue, mic_queue):
41+
import queue
42+
4143
while True:
42-
who_spoke, data, time_spoken = audio_queue.get()
43-
self.update_last_sample_and_phrase_status(who_spoke, data, time_spoken)
44-
source_info = self.audio_sources[who_spoke]
45-
46-
text = ''
47-
try:
48-
fd, path = tempfile.mkstemp(suffix=".wav")
49-
os.close(fd)
50-
source_info["process_data_func"](source_info["last_sample"], path)
51-
text = self.audio_model.get_transcription(path)
52-
except Exception as e:
53-
print(e)
54-
finally:
55-
os.unlink(path)
56-
57-
if text != '' and text.lower() != 'you':
58-
self.update_transcript(who_spoke, text, time_spoken)
44+
pending_transcriptions = []
45+
46+
mic_data = []
47+
while True:
48+
try:
49+
data, time_spoken = mic_queue.get_nowait()
50+
self.update_last_sample_and_phrase_status("You", data, time_spoken)
51+
mic_data.append((data, time_spoken))
52+
except queue.Empty:
53+
break
54+
55+
speaker_data = []
56+
while True:
57+
try:
58+
data, time_spoken = speaker_queue.get_nowait()
59+
self.update_last_sample_and_phrase_status("Speaker", data, time_spoken)
60+
speaker_data.append((data, time_spoken))
61+
except queue.Empty:
62+
break
63+
64+
if mic_data:
65+
source_info = self.audio_sources["You"]
66+
try:
67+
fd, path = tempfile.mkstemp(suffix=".wav")
68+
os.close(fd)
69+
source_info["process_data_func"](source_info["last_sample"], path)
70+
text = self.audio_model.get_transcription(path)
71+
if text != '' and text.lower() != 'you':
72+
latest_time = max(time for _, time in mic_data)
73+
pending_transcriptions.append(("You", text, latest_time))
74+
except Exception as e:
75+
print(f"Transcription error for You: {e}")
76+
finally:
77+
os.unlink(path)
78+
79+
if speaker_data:
80+
source_info = self.audio_sources["Speaker"]
81+
try:
82+
fd, path = tempfile.mkstemp(suffix=".wav")
83+
os.close(fd)
84+
source_info["process_data_func"](source_info["last_sample"], path)
85+
text = self.audio_model.get_transcription(path)
86+
if text != '' and text.lower() != 'you':
87+
latest_time = max(time for _, time in speaker_data)
88+
pending_transcriptions.append(("Speaker", text, latest_time))
89+
except Exception as e:
90+
print(f"Transcription error for Speaker: {e}")
91+
finally:
92+
os.unlink(path)
93+
94+
if pending_transcriptions:
95+
pending_transcriptions.sort(key=lambda x: x[2])
96+
for who_spoke, text, time_spoken in pending_transcriptions:
97+
self.update_transcript(who_spoke, text, time_spoken)
98+
5999
self.transcript_changed_event.set()
100+
101+
threading.Event().wait(0.1)
60102

61103
def update_last_sample_and_phrase_status(self, who_spoke, data, time_spoken):
62104
source_info = self.audio_sources[who_spoke]

main.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,15 @@ def update_transcript_UI(transcriber, textbox):
1717
write_in_textbox(textbox, transcript_string)
1818
textbox.after(300, update_transcript_UI, transcriber, textbox)
1919

20-
def clear_context(transcriber, audio_queue):
20+
def clear_context(transcriber, speaker_queue, mic_queue):
2121
transcriber.clear_transcript_data()
22-
with audio_queue.mutex:
23-
audio_queue.queue.clear()
2422

25-
def create_ui_components(root, transcriber, audio_queue):
23+
with speaker_queue.mutex:
24+
speaker_queue.queue.clear()
25+
with mic_queue.mutex:
26+
mic_queue.queue.clear()
27+
28+
def create_ui_components(root, transcriber, speaker_queue, mic_queue):
2629
ctk.set_appearance_mode("dark")
2730
ctk.set_default_color_theme("dark-blue")
2831
root.title("Ecoute")
@@ -49,7 +52,7 @@ def create_ui_components(root, transcriber, audio_queue):
4952
clear_button = ctk.CTkButton(
5053
main_frame,
5154
text="Clear Transcript",
52-
command=lambda: clear_context(transcriber, audio_queue)
55+
command=lambda: clear_context(transcriber, speaker_queue, mic_queue)
5356
)
5457
clear_button.grid(row=1, column=0, sticky="ew", padx=10, pady=(0, 10))
5558

@@ -63,24 +66,25 @@ def main():
6366
return
6467

6568
root = ctk.CTk()
66-
audio_queue = queue.Queue()
69+
speaker_queue = queue.Queue()
70+
mic_queue = queue.Queue()
6771

6872
user_audio_recorder = AudioRecorder.DefaultMicRecorder()
69-
user_audio_recorder.record_into_queue(audio_queue)
73+
user_audio_recorder.record_into_queue(mic_queue)
7074

7175
time.sleep(2)
7276

7377
speaker_audio_recorder = AudioRecorder.DefaultSpeakerRecorder()
74-
speaker_audio_recorder.record_into_queue(audio_queue)
78+
speaker_audio_recorder.record_into_queue(speaker_queue)
7579

7680
model = TranscriberModels.get_model('--api' in sys.argv)
7781

7882
transcriber = AudioTranscriber(user_audio_recorder.source, speaker_audio_recorder.source, model)
79-
transcribe = threading.Thread(target=transcriber.transcribe_audio_queue, args=(audio_queue,))
83+
transcribe = threading.Thread(target=transcriber.transcribe_audio_queue, args=(speaker_queue, mic_queue))
8084
transcribe.daemon = True
8185
transcribe.start()
8286

83-
transcript_textbox = create_ui_components(root, transcriber, audio_queue)
87+
transcript_textbox = create_ui_components(root, transcriber, speaker_queue, mic_queue)
8488

8589
print("READY")
8690

0 commit comments

Comments
 (0)