forked from pnnbao97/VieNeu-TTS
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
95 lines (77 loc) · 4.14 KB
/
main.py
File metadata and controls
95 lines (77 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
VieNeu-TTS SDK Example: Standard Mode (Local Inference)
This example demonstrates how to run VieNeu-TTS locally on your machine.
Ideal for offline apps, local development, or private deployments.
"""
from vieneu import Vieneu
import os
def main():
print("🚀 Initializing local VieNeu engine...")
os.makedirs("outputs", exist_ok=True)
# ---------------------------------------------------------
# PART 1: INITIALIZATION
# ---------------------------------------------------------
# Mode="standard" (default) runs locally.
# By default, it uses "pnnbao-ump/VieNeu-TTS-0.3B-q4-gguf" (Backbone)
# and "neuphonic/distill-neucodec" (Codec) for maximum speed.
tts = Vieneu()
# Optional: If you want to force use a specific PyTorch model:
# tts = Vieneu(backbone_repo="pnnbao-ump/VieNeu-TTS-0.3B", codec_repo="neuphonic/distill-neucodec", backbone_device="cuda", codec_device="cuda")
# ---------------------------------------------------------
# PART 2: LIST PRESET VOICES
# ---------------------------------------------------------
# The SDK returns (Description, ID) tuples
available_voices = tts.list_preset_voices()
print(f"📋 Found {len(available_voices)} preset voices.")
if available_voices:
print(" Showing all voices:")
for desc, name in available_voices:
print(f" - {desc} (ID: {name})")
# ---------------------------------------------------------
# PART 3: USE SPECIFIC VOICE ID
# ---------------------------------------------------------
if available_voices:
print("\n--- PART 3: Using Specific Voice ID ---")
# Example: Select Tuyên (nam miền Bắc) - usually ID is 'Tuyen'
_, my_voice_id = available_voices[1] if len(available_voices) > 1 else available_voices[0]
print(f"👤 Selecting voice: {my_voice_id}")
# Get reference data for this specific voice
voice_data = tts.get_preset_voice(my_voice_id)
test_text = f"Chào bạn, tôi đang nói bằng giọng của bác sĩ Tuyên."
audio_spec = tts.infer(text=test_text, voice=voice_data)
tts.save(audio_spec, f"outputs/standard_{my_voice_id}.wav")
print(f"💾 Saved {my_voice_id} synthesis to: outputs/standard_{my_voice_id}.wav")
# ---------------------------------------------------------
# PART 4: STANDARD SPEECH SYNTHESIS (DEFAULT)
# ---------------------------------------------------------
print("\n--- PART 4: Standard Synthesis (Default) ---")
text = "Xin chào, tôi là VieNeu. Tôi có thể giúp bạn đọc sách, làm chatbot thời gian thực, hoặc thậm chí clone giọng nói của bạn."
print("🎧 Synthesizing speech...")
# By default, it uses the model's 'default_voice'
audio = tts.infer(text=text)
tts.save(audio, "outputs/standard_output.wav")
print("💾 Saved synthesized speech to: outputs/standard_output.wav")
# ---------------------------------------------------------
# PART 5: ZERO-SHOT VOICE CLONING (LOCAL)
# ---------------------------------------------------------
# You can clone any voice using a short audio sample (3-5s) and its transcript
ref_audio = "examples/audio_ref/example_ngoc_huyen.wav"
ref_text = "Tác phẩm dự thi bảo đảm tính khoa học, tính đảng, tính chiến đấu, tính định hướng."
if os.path.exists(ref_audio):
print("\n--- PART 5: Voice Cloning ---")
print(f"🦜 Cloning voice from: {ref_audio}")
cloned_audio = tts.infer(
text="Đây là giọng nói đã được clone thành công từ file mẫu.",
ref_audio=ref_audio,
ref_text=ref_text
)
tts.save(cloned_audio, "outputs/standard_cloned_output.wav")
print("💾 Saved cloned voice to: outputs/standard_cloned_output.wav")
# ---------------------------------------------------------
# PART 6: CLEANUP
# ---------------------------------------------------------
# Explicitly release resources
tts.close()
print("\n✅ All tasks completed!")
if __name__ == "__main__":
main()