@@ -76,6 +76,66 @@ def get_codepoints(f):
7676 for c in range (prev_codepoint + 1 , NUM_CODEPOINTS ):
7777 yield Codepoint (c , None )
7878
79+ def compress_singletons (singletons ):
80+ uppers = [] # (upper, # items in lowers)
81+ lowers = []
82+
83+ for i in singletons :
84+ upper = i >> 8
85+ lower = i & 0xff
86+ if len (uppers ) == 0 or uppers [- 1 ][0 ] != upper :
87+ uppers .append ((upper , 1 ))
88+ else :
89+ upper , count = uppers [- 1 ]
90+ uppers [- 1 ] = upper , count + 1
91+ lowers .append (lower )
92+
93+ return uppers , lowers
94+
95+ def compress_normal (normal ):
96+ # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f
97+ # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff
98+ compressed = [] # [truelen, (truelenaux), falselen, (falselenaux)]
99+
100+ prev_start = 0
101+ for start , count in normal :
102+ truelen = start - prev_start
103+ falselen = count
104+ prev_start = start + count
105+
106+ assert truelen < 0x8000 and falselen < 0x8000
107+ entry = []
108+ if truelen > 0x7f :
109+ entry .append (0x80 | (truelen >> 8 ))
110+ entry .append (truelen & 0xff )
111+ else :
112+ entry .append (truelen & 0x7f )
113+ if falselen > 0x7f :
114+ entry .append (0x80 | (falselen >> 8 ))
115+ entry .append (falselen & 0xff )
116+ else :
117+ entry .append (falselen & 0x7f )
118+
119+ compressed .append (entry )
120+
121+ return compressed
122+
123+ def print_singletons (uppers , lowers , uppersname , lowersname ):
124+ print ("const {}: &'static [(u8, u8)] = &[" .format (uppersname ))
125+ for u , c in uppers :
126+ print (" ({:#04x}, {})," .format (u , c ))
127+ print ("];" )
128+ print ("const {}: &'static [u8] = &[" .format (lowersname ))
129+ for i in range (0 , len (lowers ), 8 ):
130+ print (" {}" .format (" " .join ("{:#04x}," .format (l ) for l in lowers [i :i + 8 ])))
131+ print ("];" )
132+
133+ def print_normal (normal , normalname ):
134+ print ("const {}: &'static [u8] = &[" .format (normalname ))
135+ for v in normal :
136+ print (" {}" .format (" " .join ("{:#04x}," .format (i ) for i in v )))
137+ print ("];" )
138+
79139def main ():
80140 file = get_file ("http://www.unicode.org/Public/UNIDATA/UnicodeData.txt" )
81141
@@ -111,6 +171,11 @@ def main():
111171 else :
112172 normal0 .append ((a , b - a ))
113173
174+ singletons0u , singletons0l = compress_singletons (singletons0 )
175+ singletons1u , singletons1l = compress_singletons (singletons1 )
176+ normal0 = compress_normal (normal0 )
177+ normal1 = compress_normal (normal1 )
178+
114179 print ("""\
115180 // Copyright 2012-2016 The Rust Project Developers. See the COPYRIGHT
116181// file at the top-level directory of this distribution and at
@@ -125,38 +190,49 @@ def main():
125190// NOTE: The following code was generated by "src/etc/char_private.py",
126191// do not edit directly!
127192
128- use slice::SliceExt;
129-
130- fn check(x: u16, singletons: &[u16], normal: &[u16]) -> bool {
131- for &s in singletons {
132- if x == s {
133- return false;
134- } else if x < s {
193+ fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8],
194+ normal: &[u8]) -> bool {
195+ let xupper = (x >> 8) as u8;
196+ let mut lowerstart = 0;
197+ for &(upper, lowercount) in singletonuppers {
198+ let lowerend = lowerstart + lowercount as usize;
199+ if xupper == upper {
200+ for &lower in &singletonlowers[lowerstart..lowerend] {
201+ if lower == x as u8 {
202+ return false;
203+ }
204+ }
205+ } else if xupper < upper {
135206 break;
136207 }
208+ lowerstart = lowerend;
137209 }
138- for w in normal.chunks(2) {
139- let start = w[0];
140- let len = w[1];
141- let difference = (x as i32) - (start as i32);
142- if 0 <= difference {
143- if difference < len as i32 {
144- return false;
145- }
210+
211+ let mut x = x as i32;
212+ let mut normal = normal.iter().cloned();
213+ let mut current = true;
214+ while let Some(v) = normal.next() {
215+ let len = if v & 0x80 != 0 {
216+ ((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
146217 } else {
218+ v as i32
219+ };
220+ x -= len;
221+ if x < 0 {
147222 break;
148223 }
224+ current = !current;
149225 }
150- true
226+ current
151227}
152228
153229pub fn is_printable(x: char) -> bool {
154230 let x = x as u32;
155231 let lower = x as u16;
156232 if x < 0x10000 {
157- check(lower, SINGLETONS0 , NORMAL0)
233+ check(lower, SINGLETONS0U, SINGLETONS0L , NORMAL0)
158234 } else if x < 0x20000 {
159- check(lower, SINGLETONS1 , NORMAL1)
235+ check(lower, SINGLETONS1U, SINGLETONS1L , NORMAL1)
160236 } else {\
161237 """ )
162238 for a , b in extra :
@@ -169,22 +245,10 @@ def main():
169245}\
170246 """ )
171247 print ()
172- print ("const SINGLETONS0: &'static [u16] = &[" )
173- for s in singletons0 :
174- print (" 0x{:x}," .format (s ))
175- print ("];" )
176- print ("const SINGLETONS1: &'static [u16] = &[" )
177- for s in singletons1 :
178- print (" 0x{:x}," .format (s ))
179- print ("];" )
180- print ("const NORMAL0: &'static [u16] = &[" )
181- for a , b in normal0 :
182- print (" 0x{:x}, 0x{:x}," .format (a , b ))
183- print ("];" )
184- print ("const NORMAL1: &'static [u16] = &[" )
185- for a , b in normal1 :
186- print (" 0x{:x}, 0x{:x}," .format (a , b ))
187- print ("];" )
248+ print_singletons (singletons0u , singletons0l , 'SINGLETONS0U' , 'SINGLETONS0L' )
249+ print_singletons (singletons1u , singletons1l , 'SINGLETONS1U' , 'SINGLETONS1L' )
250+ print_normal (normal0 , 'NORMAL0' )
251+ print_normal (normal1 , 'NORMAL1' )
188252
189253if __name__ == '__main__' :
190254 main ()
0 commit comments