1
1
#pragma once
2
-
3
2
#include < string>
4
3
#include < vector>
5
4
#include < fstream>
9
8
#include < cassert>
10
9
#include < iostream>
11
10
#include " ../../../../../SDK/components/utilities/include/sample_log.h"
12
-
11
+ // Debug logging switch - set to true to enable debug logs
12
+ static bool DEBUG_LOGGING = false ;
13
+ // Macro for debug logging
14
+ #define DEBUG_LOG (fmt, ...) \
15
+ do { \
16
+ if (DEBUG_LOGGING) { \
17
+ SLOGI (fmt, ##__VA_ARGS__); \
18
+ } \
19
+ } while (0 )
13
20
std::vector<std::string> split (const std::string& s, char delim)
14
21
{
15
22
std::vector<std::string> result;
@@ -30,8 +37,16 @@ class Lexicon {
30
37
std::unordered_map<int , std::string> reverse_tokens;
31
38
32
39
public:
40
+ // Setter for debug logging
41
+ static void setDebugLogging (bool enable)
42
+ {
43
+ DEBUG_LOGGING = enable;
44
+ }
33
45
Lexicon (const std::string& lexicon_filename, const std::string& tokens_filename) : max_phrase_length(0 )
34
46
{
47
+ DEBUG_LOG (" Dictionary loading: %s Pronunciation table loading: %s" , tokens_filename.c_str (),
48
+ lexicon_filename.c_str ());
49
+
35
50
std::unordered_map<std::string, int > tokens;
36
51
std::ifstream ifs (tokens_filename);
37
52
assert (ifs.is_open ());
@@ -82,8 +97,10 @@ class Lexicon {
82
97
lexicon[" 。" ] = lexicon[" ." ];
83
98
lexicon[" !" ] = lexicon[" !" ];
84
99
lexicon[" ?" ] = lexicon[" ?" ];
85
- SLOGI (" 词典加载完成,包含 %zu 个条目,最长词组长度: %zu" , lexicon.size (), max_phrase_length);
100
+ DEBUG_LOG (" Dictionary loading complete, containing %zu entries, longest phrase length: %zu" , lexicon.size (),
101
+ max_phrase_length);
86
102
}
103
+
87
104
std::vector<std::string> splitEachChar (const std::string& text)
88
105
{
89
106
std::vector<std::string> words;
@@ -94,93 +111,77 @@ class Lexicon {
94
111
if ((text[i] & 0x80 ) == 0x00 ) {
95
112
// ASCII
96
113
} else if ((text[i] & 0xE0 ) == 0xC0 ) {
97
- next = 2 ; // 2字节UTF -8
114
+ next = 2 ; // 2-byte UTF -8
98
115
} else if ((text[i] & 0xF0 ) == 0xE0 ) {
99
- next = 3 ; // 3字节UTF -8
116
+ next = 3 ; // 3-byte UTF -8
100
117
} else if ((text[i] & 0xF8 ) == 0xF0 ) {
101
- next = 4 ; // 4字节UTF -8
118
+ next = 4 ; // 4-byte UTF -8
102
119
}
103
120
words.push_back (text.substr (i, next));
104
121
i += next;
105
122
}
106
123
return words;
107
124
}
125
+
108
126
bool is_english (const std::string& s)
109
127
{
110
128
return s.size () == 1 && ((s[0 ] >= ' A' && s[0 ] <= ' Z' ) || (s[0 ] >= ' a' && s[0 ] <= ' z' ));
111
129
}
112
-
113
130
bool is_english_token_char (const std::string& s)
114
131
{
115
132
if (s.size () != 1 ) return false ;
116
133
char c = s[0 ];
117
134
return (c >= ' A' && c <= ' Z' ) || (c >= ' a' && c <= ' z' ) || (c >= ' 0' && c <= ' 9' ) || c == ' -' || c == ' _' ;
118
135
}
119
-
120
136
void process_unknown_english (const std::string& word, std::vector<int >& phones, std::vector<int >& tones)
121
137
{
122
- SLOGI (" Processing unknown term: %s" , word.c_str ());
123
-
138
+ DEBUG_LOG (" Processing unknown term: %s" , word.c_str ());
124
139
std::string orig_word = word;
125
140
std::vector<std::string> parts;
126
141
std::vector<std::string> phonetic_parts;
127
-
128
142
size_t start = 0 ;
129
143
while (start < word.size ()) {
130
144
bool matched = false ;
131
-
132
145
for (size_t len = std::min (word.size () - start, (size_t )10 ); len > 0 && !matched; --len) {
133
146
std::string sub_word = word.substr (start, len);
134
147
std::string lower_sub_word = sub_word;
135
148
std::transform (lower_sub_word.begin (), lower_sub_word.end (), lower_sub_word.begin (),
136
149
[](unsigned char c) { return std::tolower (c); });
137
-
138
150
if (lexicon.find (lower_sub_word) != lexicon.end ()) {
139
151
// Substring found in lexicon
140
152
auto & [sub_phones, sub_tones] = lexicon[lower_sub_word];
141
153
phones.insert (phones.end (), sub_phones.begin (), sub_phones.end ());
142
154
tones.insert (tones.end (), sub_tones.begin (), sub_tones.end ());
143
-
144
155
parts.push_back (sub_word);
145
156
phonetic_parts.push_back (phonesToString (sub_phones));
146
-
147
- SLOGI (" Matched: '%s' -> %s" , sub_word.c_str (), phonesToString (sub_phones).c_str ());
148
-
157
+ DEBUG_LOG (" Matched: '%s' -> %s" , sub_word.c_str (), phonesToString (sub_phones).c_str ());
149
158
start += len;
150
159
matched = true ;
151
160
break ;
152
161
}
153
162
}
154
-
155
163
if (!matched) {
156
164
std::string single_char = word.substr (start, 1 );
157
165
std::string lower_char = single_char;
158
166
std::transform (lower_char.begin (), lower_char.end (), lower_char.begin (),
159
167
[](unsigned char c) { return std::tolower (c); });
160
-
161
168
if (lexicon.find (lower_char) != lexicon.end ()) {
162
169
auto & [char_phones, char_tones] = lexicon[lower_char];
163
170
phones.insert (phones.end (), char_phones.begin (), char_phones.end ());
164
171
tones.insert (tones.end (), char_tones.begin (), char_tones.end ());
165
-
166
172
parts.push_back (single_char);
167
173
phonetic_parts.push_back (phonesToString (char_phones));
168
-
169
- SLOGI (" Single char: '%s' -> %s" , single_char.c_str (), phonesToString (char_phones).c_str ());
174
+ DEBUG_LOG (" Single char: '%s' -> %s" , single_char.c_str (), phonesToString (char_phones).c_str ());
170
175
} else {
171
176
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
172
177
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
173
-
174
178
parts.push_back (single_char);
175
179
phonetic_parts.push_back (" _unknown_" );
176
-
177
- SLOGI (" Unknown: '%s'" , single_char.c_str ());
180
+ DEBUG_LOG (" Unknown: '%s'" , single_char.c_str ());
178
181
}
179
-
180
182
start++;
181
183
}
182
184
}
183
-
184
185
std::string parts_str, phonetic_str;
185
186
for (size_t i = 0 ; i < parts.size (); i++) {
186
187
if (i > 0 ) {
@@ -190,20 +191,20 @@ class Lexicon {
190
191
parts_str += parts[i];
191
192
phonetic_str += phonetic_parts[i];
192
193
}
193
-
194
- SLOGI ( " %s \t | \t Decomposed: %s \t | \t Phonetics: %s " , orig_word. c_str (), parts_str. c_str (), phonetic_str.c_str ());
194
+ DEBUG_LOG ( " %s \t | \t Decomposed: %s \t | \t Phonetics: %s " , orig_word. c_str (), parts_str. c_str (),
195
+ phonetic_str.c_str ());
195
196
}
197
+
196
198
void convert (const std::string& text, std::vector<int >& phones, std::vector<int >& tones)
197
199
{
198
- SLOGI (" \n 开始处理文本 : \" %s\" " , text.c_str ());
199
- SLOGI (" =======匹配结果 =======" );
200
- SLOGI ( " 单元 \t |\t 音素 \t |\t 声调 " );
201
- SLOGI (" -----------------------------" );
200
+ DEBUG_LOG (" \n Starting text processing : \" %s\" " , text.c_str ());
201
+ DEBUG_LOG (" =======Matching Results =======" );
202
+ DEBUG_LOG ( " Unit \t |\t Phonemes \t |\t Tones " );
203
+ DEBUG_LOG (" -----------------------------" );
202
204
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
203
205
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
204
-
205
- SLOGI (" <BOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
206
- tonesToString (unknown_token.second ).c_str ());
206
+ DEBUG_LOG (" <BOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
207
+ tonesToString (unknown_token.second ).c_str ());
207
208
auto chars = splitEachChar (text);
208
209
int i = 0 ;
209
210
while (i < chars.size ()) {
@@ -220,8 +221,8 @@ class Lexicon {
220
221
auto & [eng_phones, eng_tones] = lexicon[eng_word];
221
222
phones.insert (phones.end (), eng_phones.begin (), eng_phones.end ());
222
223
tones.insert (tones.end (), eng_tones.begin (), eng_tones.end ());
223
- SLOGI (" %s\t |\t %s\t |\t %s" , orig_word.c_str (), phonesToString (eng_phones).c_str (),
224
- tonesToString (eng_tones).c_str ());
224
+ DEBUG_LOG (" %s\t |\t %s\t |\t %s" , orig_word.c_str (), phonesToString (eng_phones).c_str (),
225
+ tonesToString (eng_tones).c_str ());
225
226
} else {
226
227
process_unknown_english (orig_word, phones, tones);
227
228
}
@@ -240,8 +241,8 @@ class Lexicon {
240
241
auto & [phrase_phones, phrase_tones] = lexicon[phrase];
241
242
phones.insert (phones.end (), phrase_phones.begin (), phrase_phones.end ());
242
243
tones.insert (tones.end (), phrase_tones.begin (), phrase_tones.end ());
243
- SLOGI (" %s\t |\t %s\t |\t %s" , phrase.c_str (), phonesToString (phrase_phones).c_str (),
244
- tonesToString (phrase_tones).c_str ());
244
+ DEBUG_LOG (" %s\t |\t %s\t |\t %s" , phrase.c_str (), phonesToString (phrase_phones).c_str (),
245
+ tonesToString (phrase_tones).c_str ());
245
246
i += len;
246
247
matched = true ;
247
248
break ;
@@ -263,25 +264,25 @@ class Lexicon {
263
264
auto & [char_phones, char_tones] = lexicon[s];
264
265
phones.insert (phones.end (), char_phones.begin (), char_phones.end ());
265
266
tones.insert (tones.end (), char_tones.begin (), char_tones.end ());
266
- SLOGI (" %s\t |\t %s\t |\t %s" , orig_char.c_str (), phonesToString (char_phones).c_str (),
267
- tonesToString (char_tones).c_str ());
267
+ DEBUG_LOG (" %s\t |\t %s\t |\t %s" , orig_char.c_str (), phonesToString (char_phones).c_str (),
268
+ tonesToString (char_tones).c_str ());
268
269
} else {
269
270
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
270
271
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
271
- SLOGI (" %s\t |\t %s (未匹配 )\t |\t %s" , orig_char. c_str (), phonesToString (unknown_token. first ) .c_str (),
272
- tonesToString (unknown_token.second ).c_str ());
272
+ DEBUG_LOG (" %s\t |\t %s (Not matched )\t |\t %s" , orig_char.c_str (),
273
+ phonesToString (unknown_token. first ). c_str (), tonesToString (unknown_token.second ).c_str ());
273
274
}
274
275
}
275
276
}
276
277
phones.insert (phones.end (), unknown_token.first .begin (), unknown_token.first .end ());
277
278
tones.insert (tones.end (), unknown_token.second .begin (), unknown_token.second .end ());
278
- SLOGI (" <EOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
279
- tonesToString (unknown_token.second ).c_str ());
280
- SLOGI (" \n 处理结果汇总 :" );
281
- SLOGI ( " 原文 : %s" , text.c_str ());
282
- SLOGI ( " 音素 : %s" , phonesToString (phones).c_str ());
283
- SLOGI ( " 声调 : %s" , tonesToString (tones).c_str ());
284
- SLOGI (" ====================" );
279
+ DEBUG_LOG (" <EOS>\t |\t %s\t |\t %s" , phonesToString (unknown_token.first ).c_str (),
280
+ tonesToString (unknown_token.second ).c_str ());
281
+ DEBUG_LOG (" \n Processing Summary :" );
282
+ DEBUG_LOG ( " Original text : %s" , text.c_str ());
283
+ DEBUG_LOG ( " Phonemes : %s" , phonesToString (phones).c_str ());
284
+ DEBUG_LOG ( " Tones : %s" , tonesToString (tones).c_str ());
285
+ DEBUG_LOG (" ====================" );
285
286
}
286
287
287
288
private:
0 commit comments