-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathvoice_activity_detector.py
More file actions
34 lines (26 loc) · 1.17 KB
/
voice_activity_detector.py
File metadata and controls
34 lines (26 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Check out https://arxiv.org/abs/2104.04045
# Also check out https://huggingface.co/philschmid/pyannote-segmentation
# 1. visit hf.co/pyannote/segmentation and accept user conditions
# 2. visit hf.co/settings/tokens to create an access token
# 3. instantiate pretrained voice activity detection pipeline
import os
import sys
import time
from pyannote.audio import Pipeline
HUGGING_FACE_TOKEN = os.getenv("HUGGING_FACE_ACCESS_TOKEN")
class VoiceActivityDetector:
def __init__(self, auth_token=HUGGING_FACE_TOKEN):
self.pipeline = Pipeline.from_pretrained(
"pyannote/voice-activity-detection",
use_auth_token=auth_token)
def get_segments(self, audio_file_path, debug_mode=False):
'''
Returns a list of voice segments (by start/end-times) in an audio file.
'''
start_time = time.time()
output = self.pipeline(audio_file_path)
if debug_mode:
print("Processing for audio file", audio_file_path)
print("--- %s seconds taken ---" % (time.time() - start_time))
return list(map(lambda speech: [speech.start, speech.end],
output.get_timeline().support()))