1
1
#pragma once
2
-
3
2
#include < string>
4
3
#include < vector>
5
4
#include < fstream>
9
8
#include < cassert>
10
9
#include < iostream>
11
10
#include " ../../../../../SDK/components/utilities/include/sample_log.h"
12
-
11
+ // Debug logging switch - set to true to enable debug logs
12
+ static bool DEBUG_LOGGING = false ;
13
+ // Macro for debug logging
14
+ #define DEBUG_LOG (fmt, ...) \
15
+ do { \
16
+ if (DEBUG_LOGGING) { \
17
+ SLOGI (fmt, ##__VA_ARGS__); \
18
+ } \
19
+ } while (0 )
13
20
std::vector<std::string> split (const std::string& s, char delim)
14
21
{
15
22
std::vector<std::string> result;
@@ -30,9 +37,16 @@ class Lexicon {
30
37
std::unordered_map<int , std::string> reverse_tokens;
31
38
32
39
public:
40
+ // Setter for debug logging
41
+ static void setDebugLogging (bool enable)
42
+ {
43
+ DEBUG_LOGGING = enable;
44
+ }
33
45
Lexicon (const std::string& lexicon_filename, const std::string& tokens_filename) : max_phrase_length(0 )
34
46
{
35
- SLOGI (" 词典加载: %zu 发音表加载: %zu" , tokens_filename, lexicon_filename);
47
+ DEBUG_LOG (" Dictionary loading: %s Pronunciation table loading: %s" , tokens_filename.c_str (),
48
+ lexicon_filename.c_str ());
49
+
36
50
std::unordered_map<std::string, int > tokens;
37
51
std::ifstream ifs (tokens_filename);
38
52
assert (ifs.is_open ());
@@ -83,8 +97,10 @@ class Lexicon {
83
97
lexicon[" 。" ] = lexicon[" ." ];
84
98
lexicon[" !" ] = lexicon[" !" ];
85
99
lexicon[" ?" ] = lexicon[" ?" ];
86
- SLOGI (" 词典加载完成,包含 %zu 个条目,最长词组长度: %zu" , lexicon.size (), max_phrase_length);
100
+ DEBUG_LOG (" Dictionary loading complete, containing %zu entries, longest phrase length: %zu" , lexicon.size (),
101
+ max_phrase_length);
87
102
}
103
+
88
104
std::vector<std::string> splitEachChar (const std::string& text)
89
105
{
90
106
std::vector<std::string> words;
@@ -95,93 +111,77 @@ class Lexicon {
95
111
if ((text[i] & 0x80 ) == 0x00 ) {
96
112
// ASCII
97
113
} else if ((text[i] & 0xE0 ) == 0xC0 ) {
98
- next = 2 ; // 2字节UTF -8
114
+ next = 2 ; // 2-byte UTF -8
99
115
} else if ((text[i] & 0xF0 ) == 0xE0 ) {
100
- next = 3 ; // 3字节UTF -8
116
+ next = 3 ; // 3-byte UTF -8
101
117
} else if ((text[i] & 0xF8 ) == 0xF0 ) {
102
- next = 4 ; // 4字节UTF -8
118
+ next = 4 ; // 4-byte UTF -8
103
119
}
104
120
words.push_back (text.substr (i, next));
105
121
i += next;
106
122
}
107
123
return words;
108
124
}
125
+
109
126
bool is_english (const std::string& s)
110
127
{
111
128
return s.size () == 1 && ((s[0 ] >= ' A' && s[0 ] <= ' Z' ) || (s[0 ] >= ' a' && s[0 ] <= ' z' ));
112
129
}
113
-
114
130
bool is_english_token_char (const std::string& s)
115
131
{
116
132
if (s.size () != 1 ) return false ;
117
133
char c = s[0 ];
118
134
return (c >= ' A' && c <= ' Z' ) || (c >= ' a' && c <= ' z' ) || (c >= ' 0' && c <= ' 9' ) || c == ' -' || c == ' _' ;
119
135
}
120
-
121
136
void process_unknown_english (const std::string& word, std::vector<int >& phones, std::vector<int >& tones)
122
137
{
123
- SLOGI (" Processing unknown term: %s" , word.c_str ());
124
-
138
+ DEBUG_LOG (" Processing unknown term: %s" , word.c_str ());
125
139
std::string orig_word = word;
126
140
std::vector<std::string> parts;
127
141
std::vector<std::string> phonetic_parts;
128
-
129
142
size_t start = 0 ;
130
143
while (start < word.size ()) {
131
144
bool matched = false ;
132
-
133
145
for (size_t len = std::min (word.size () - start, (size_t )10 ); len > 0 && !matched; --len) {
134
146
std::string sub_word = word.substr (start, len);
135
147
std::string lower_sub_word = sub_word;
136
148
std::transform (lower_sub_word.begin (), lower_sub_word.end (), lower_sub_word.begin (),
137
149
[](unsigned char c) { return std::tolower (c); });
138
-
139
150
if (lexicon.find (lower_sub_word) != lexicon.end ()) {
140
151
// Substring found in lexicon
141
152
auto & [sub_phones, sub_tones] = lexicon[lower_sub_word];
142
153
phones.insert (phones.end (), sub_phones.begin (), sub_phones.end ());
143
154
tones.insert (tones.end (), sub_tones.begin (), sub_tones.end ());
144
-
145
155
parts.push_back (sub_word);
146
156
phonetic_parts.push_back (phonesToString (sub_phones));
147
-
148
- SLOGI (" Matched: '%s' -> %s" , sub_word.c_str (), phonesToString (sub_phones).c_str ());
149
-
157
+ DEBUG_LOG (" Matched: '%s' -> %s" , sub_word.c_str (), phonesToString (sub_phones).c_str ());
150
158
start += len;
151
159
matched = true ;
152
160
break ;
153
161
}
154
162
}
155
-
156
163
if (!matched) {
157
164
std::string single_char = word.substr (start, 1 );
158
165
std::string lower_char = single_char;
159
166
std::transform (lower_char.begin (), lower_char.end (), lower_char.begin (),
160
167
[](unsigned char c) { return std::tolower (c); });
161
-
162
168
if (lexicon.find (lower_char) != lexicon.end ()) {
163
169
auto & [char_phones, char_tones] = lexicon[lower_char];
164
170
phones.insert (phones.end (), char_phones.begin (), char_phones.end ());
165
171
tones.insert (tones.end (), char_tones.begin (), char_tones.end ());
166
-
167
172
parts.push_back (single_char);
168
173
phonetic_parts.push_back (phonesToString (char_phones));
169
-
170
- SLOGI (" Single char: '%s' -> %s" , single_char.c_str (), phonesToString (char_phones).c_str ());
174
+ DEBUG_LOG (" Single char: '%s' -> %s" , single_char.c_str (), phonesToString (char_phones).c_str ());
171
175
} else {
172
176
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
173
177
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
174
-
175
178
parts.push_back (single_char);
176
179
phonetic_parts.push_back (" _unknown_" );
177
-
178
- SLOGI (" Unknown: '%s'" , single_char.c_str ());
180
+ DEBUG_LOG (" Unknown: '%s'" , single_char.c_str ());
179
181
}
180
-
181
182
start++;
182
183
}
183
184
}
184
-
185
185
std::string parts_str, phonetic_str;
186
186
for (size_t i = 0 ; i < parts.size (); i++) {
187
187
if (i > 0 ) {
@@ -191,20 +191,20 @@ class Lexicon {
191
191
parts_str += parts[i];
192
192
phonetic_str += phonetic_parts[i];
193
193
}
194
-
195
- SLOGI ( " %s \t | \t Decomposed: %s \t | \t Phonetics: %s " , orig_word. c_str (), parts_str. c_str (), phonetic_str.c_str ());
194
+ DEBUG_LOG ( " %s \t | \t Decomposed: %s \t | \t Phonetics: %s " , orig_word. c_str (), parts_str. c_str (),
195
+ phonetic_str.c_str ());
196
196
}
197
+
197
198
void convert (const std::string& text, std::vector<int >& phones, std::vector<int >& tones)
198
199
{
199
- SLOGI (" \n 开始处理文本 : \" %s\" " , text.c_str ());
200
- SLOGI (" =======匹配结果 =======" );
201
- SLOGI ( " 单元 \t |\t 音素 \t |\t 声调 " );
202
- SLOGI (" -----------------------------" );
200
+ DEBUG_LOG (" \n Starting text processing : \" %s\" " , text.c_str ());
201
+ DEBUG_LOG (" =======Matching Results =======" );
202
+ DEBUG_LOG ( " Unit \t |\t Phonemes \t |\t Tones " );
203
+ DEBUG_LOG (" -----------------------------" );
203
204
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
204
205
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
205
-
206
- SLOGI (" <BOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
207
- tonesToString (unknown_token.second ).c_str ());
206
+ DEBUG_LOG (" <BOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
207
+ tonesToString (unknown_token.second ).c_str ());
208
208
auto chars = splitEachChar (text);
209
209
int i = 0 ;
210
210
while (i < chars.size ()) {
@@ -221,8 +221,8 @@ class Lexicon {
221
221
auto & [eng_phones, eng_tones] = lexicon[eng_word];
222
222
phones.insert (phones.end (), eng_phones.begin (), eng_phones.end ());
223
223
tones.insert (tones.end (), eng_tones.begin (), eng_tones.end ());
224
- SLOGI (" %s\t |\t %s\t |\t %s" , orig_word.c_str (), phonesToString (eng_phones).c_str (),
225
- tonesToString (eng_tones).c_str ());
224
+ DEBUG_LOG (" %s\t |\t %s\t |\t %s" , orig_word.c_str (), phonesToString (eng_phones).c_str (),
225
+ tonesToString (eng_tones).c_str ());
226
226
} else {
227
227
process_unknown_english (orig_word, phones, tones);
228
228
}
@@ -241,8 +241,8 @@ class Lexicon {
241
241
auto & [phrase_phones, phrase_tones] = lexicon[phrase];
242
242
phones.insert (phones.end (), phrase_phones.begin (), phrase_phones.end ());
243
243
tones.insert (tones.end (), phrase_tones.begin (), phrase_tones.end ());
244
- SLOGI (" %s\t |\t %s\t |\t %s" , phrase.c_str (), phonesToString (phrase_phones).c_str (),
245
- tonesToString (phrase_tones).c_str ());
244
+ DEBUG_LOG (" %s\t |\t %s\t |\t %s" , phrase.c_str (), phonesToString (phrase_phones).c_str (),
245
+ tonesToString (phrase_tones).c_str ());
246
246
i += len;
247
247
matched = true ;
248
248
break ;
@@ -264,25 +264,25 @@ class Lexicon {
264
264
auto & [char_phones, char_tones] = lexicon[s];
265
265
phones.insert (phones.end (), char_phones.begin (), char_phones.end ());
266
266
tones.insert (tones.end (), char_tones.begin (), char_tones.end ());
267
- SLOGI (" %s\t |\t %s\t |\t %s" , orig_char.c_str (), phonesToString (char_phones).c_str (),
268
- tonesToString (char_tones).c_str ());
267
+ DEBUG_LOG (" %s\t |\t %s\t |\t %s" , orig_char.c_str (), phonesToString (char_phones).c_str (),
268
+ tonesToString (char_tones).c_str ());
269
269
} else {
270
270
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
271
271
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
272
- SLOGI (" %s\t |\t %s (未匹配 )\t |\t %s" , orig_char. c_str (), phonesToString (unknown_token. first ) .c_str (),
273
- tonesToString (unknown_token.second ).c_str ());
272
+ DEBUG_LOG (" %s\t |\t %s (Not matched )\t |\t %s" , orig_char.c_str (),
273
+ phonesToString (unknown_token. first ). c_str (), tonesToString (unknown_token.second ).c_str ());
274
274
}
275
275
}
276
276
}
277
277
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
278
278
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
279
- SLOGI (" <EOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
280
- tonesToString (unknown_token.second ).c_str ());
281
- SLOGI (" \n 处理结果汇总 :" );
282
- SLOGI ( " 原文 : %s" , text.c_str ());
283
- SLOGI ( " 音素 : %s" , phonesToString (phones).c_str ());
284
- SLOGI ( " 声调 : %s" , tonesToString (tones).c_str ());
285
- SLOGI (" ====================" );
279
+ DEBUG_LOG (" <EOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
280
+ tonesToString (unknown_token.second ).c_str ());
281
+ DEBUG_LOG (" \n Processing Summary :" );
282
+ DEBUG_LOG ( " Original text : %s" , text.c_str ());
283
+ DEBUG_LOG ( " Phonemes : %s" , phonesToString (phones).c_str ());
284
+ DEBUG_LOG ( " Tones : %s" , tonesToString (tones).c_str ());
285
+ DEBUG_LOG (" ====================" );
286
286
}
287
287
288
288
private:
0 commit comments