From 158da0e52948109078e722ff485f2d165bc78e03 Mon Sep 17 00:00:00 2001
From: Yanqing Sun <sunyq@rd.netease.com>
Date: Fri, 17 Nov 2023 10:30:53 +0800
Subject: [PATCH] Enhance the handling of some chinese characters, add support
 for english mix, with thanks to KimigaiiWuyi in #17

---
 demo_page.py | 40 ++++++++++++++++++---------------------
 frontend.py  | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 68 insertions(+), 25 deletions(-)

diff --git a/demo_page.py b/demo_page.py
index 127dcd8..9c847f8 100644
--- a/demo_page.py
+++ b/demo_page.py
@@ -19,7 +19,7 @@
 import torch
 import re
 
-from frontend import g2p
+from frontend import g2p, re_digits
 from frontend_en import preprocess_english
 from config.joint.config import Config
 from models.prompt_tts_modified.jets import JETSGenerator
@@ -164,9 +164,7 @@ def tts(name, text, prompt, content, speaker, models):
 speakers = config.speakers
 models = get_models()
 
-
-
-
+re_english_word = re.compile('([a-z\d\-\.\']+)', re.I)
 def new_line(i):
     col1, col2, col3, col4 = st.columns([1.5, 1.5, 3.5, 1.3])
     with col1:
@@ -175,28 +173,26 @@ def new_line(i):
         prompt=st.text_input("Prompt (开心/悲伤)", "", key=f"{i}_prompt")
     with col3:
         content=st.text_input("Text to be synthesized into speech (合成文本)", "合成文本", key=f"{i}_text")
-    
     with col4:
-        lang=st.selectbox("Language (语言)", ["ch", "us"], key=f"{i}_lang")
-    
+        lang=st.selectbox("Language (语言)", ["zh_us"], key=f"{i}_lang")
 
     flag = st.button(f"Synthesize (合成)", key=f"{i}_button1")
     if flag:
-        if lang=="us":
-            if contains_chinese(content):
-                st.info("文本含有中文/input texts contain chinese")
-            else:
-                text = g2p_en(content)
-                path = tts(i, text, prompt, content, speaker, models)
-                st.audio(path, sample_rate=config.sampling_rate)
-        else:
-            if not contains_chinese(content):
-                st.info("文本含有英文/input texts contain english")
-            else:            
-                text = g2p_cn(content)
-                path = tts(i, text, prompt, content, speaker, models)
-                st.audio(path, sample_rate=config.sampling_rate)
-
+        parts = re_english_word.split(content)
+        tts_text = ["<sos/eos>"]
+        chartype = ''
+        for part in parts:
+            if part == ' ': continue
+            if re_digits.match(part) and chartype == 'cn' or contains_chinese(part):
+                tts_text.append( g2p_cn(part) )
+                chartype = 'cn'
+            elif re_english_word.match(part):
+                tts_text.append( g2p_en(part).replace("<sos/eos>", "") )
+                chartype = 'en'
+        tts_text.append("<sos/eos>")
+        text =  " ".join(tts_text)
+        path = tts(i, text, prompt, content, speaker, models)
+        st.audio(path, sample_rate=config.sampling_rate)
 
 
 
diff --git a/frontend.py b/frontend.py
index 4852f89..7be599b 100644
--- a/frontend.py
+++ b/frontend.py
@@ -69,7 +69,7 @@ def split_py(py):
     return sm, ym
 
 
-chinese_punctuation_pattern = r'[\u3002\uff0c\uff1f\uff01\uff1b\uff1a\u201c\u201d\u2018\u2019\u300a\u300b\u3008\u3009\u3010\u3011\u300e\u300f\u2014\u2026]'
+chinese_punctuation_pattern = r'[\u3002\uff0c\uff1f\uff01\uff1b\uff1a\u201c\u201d\u2018\u2019\u300a\u300b\u3008\u3009\u3010\u3011\u300e\u300f\u2014\u2026\u3001\uff08\uff09]'
 
 
 def has_chinese_punctuation(text):
@@ -78,12 +78,59 @@ def has_chinese_punctuation(text):
 def has_english_punctuation(text):
     return text in string.punctuation
 
+# with thanks to KimigaiiWuyi in https://github.com/netease-youdao/EmotiVoice/pull/17.
+re_digits = re.compile('(\d[\d\.]*)')
+re_decimals = re.compile('\d+\.\d+')
+def number_to_chinese(char: str):
+    chinese_digits = ['零', '一', '二', '三', '四', '五', '六', '七', '八', '九']
+    chinese_units = ['', '十', '百', '千', '万', '亿']
+
+    result = ''
+    char_str = str(char)
+    length = len(char_str)
+
+    if re_digits.match(char):
+        if length == 1:
+            return chinese_digits[int(char)]
+        for i in range(length):
+            digit = int(char_str[i])
+            unit = length - i - 1
+
+            if digit != 0:
+                result += chinese_digits[digit] + chinese_units[unit]
+            else:
+                if unit == 0 or unit == 4 or unit == 8:
+                    result += chinese_units[unit]
+                elif result[-1] != '零' and result[-1] not in chinese_units:
+                    result += chinese_digits[digit]
+        return result
+    else:
+        return char
+
+def tn_chinese(text):
+    parts = re_digits.split(text)
+    words = []
+    for part in parts:
+        if re_decimals.match(part):
+            # to be improved
+            for sub_part in re.split('(\.)', part):
+                if sub_part == '.':
+                    words.append('点')
+                else:
+                    words.append(number_to_chinese(sub_part))
+        elif re_digits.match(part):
+            words.append(number_to_chinese(part))
+        else:
+            words.append(part)
+    return ''.join(words)
+
 def g2p(text):
     res_text=["<sos/eos>"]
     seg_list = jieba.cut(text)
     for seg in seg_list:
-        
-        py =[_py[0] for _py in pinyin(seg, style=Style.TONE3,neutral_tone_with_five=True)]
+        if seg == " ": continue
+        seg_tn = tn_chinese(seg)
+        py =[_py[0] for _py in pinyin(seg_tn, style=Style.TONE3,neutral_tone_with_five=True)]
 
         if any([has_chinese_punctuation(_py) for _py in py])  or any([has_english_punctuation(_py) for _py in py]):
             res_text.pop()