@@ -108,7 +108,7 @@ pub fn build_table(
108
108
}
109
109
110
110
if double_literal {
111
- for len1 in 1 ..( length - 1 ) {
111
+ for len1 in 1 ..length {
112
112
let len2 = length - len1;
113
113
for sym1_index in offsets[ len1] ..next_index[ len1] {
114
114
for sym2_index in offsets[ len2] ..next_index[ len2] {
@@ -142,6 +142,7 @@ pub fn build_table(
142
142
let mut subtable_prefix = !0 ;
143
143
for length in ( primary_table_bits + 1 ) ..=max_length {
144
144
let subtable_size = 1 << ( length - primary_table_bits) ;
145
+ let overflow_bits_mask = subtable_size as u32 - 1 ;
145
146
for _ in 0 ..histogram[ length] {
146
147
// If the codeword's prefix doesn't match the current subtable, create a new
147
148
// subtable.
@@ -151,7 +152,7 @@ pub fn build_table(
151
152
primary_table[ subtable_prefix as usize ] = ( ( subtable_start as u32 ) << 16 )
152
153
| EXCEPTIONAL_ENTRY
153
154
| SECONDARY_TABLE_ENTRY
154
- | ( subtable_size as u32 - 1 ) ;
155
+ | overflow_bits_mask ;
155
156
secondary_table. resize ( subtable_start + subtable_size, 0 ) ;
156
157
}
157
158
@@ -170,13 +171,311 @@ pub fn build_table(
170
171
if length < max_length && codeword & primary_table_mask == subtable_prefix {
171
172
secondary_table. extend_from_within ( subtable_start..) ;
172
173
let subtable_size = secondary_table. len ( ) - subtable_start;
174
+ let overflow_bits_mask = subtable_size as u32 - 1 ;
173
175
primary_table[ subtable_prefix as usize ] = ( ( subtable_start as u32 ) << 16 )
174
176
| EXCEPTIONAL_ENTRY
175
177
| SECONDARY_TABLE_ENTRY
176
- | ( subtable_size as u32 - 1 ) ;
178
+ | overflow_bits_mask ;
177
179
}
178
180
}
179
181
}
180
182
181
183
true
182
184
}
185
+
186
+ #[ cfg( test) ]
187
+ mod test {
188
+ use super :: { LITERAL_ENTRY , SECONDARY_TABLE_ENTRY } ;
189
+ use crate :: tables:: LITLEN_TABLE_ENTRIES ;
190
+
191
+ fn validate_tables (
192
+ primary_table_bits : usize ,
193
+ lengths : & [ u8 ] ,
194
+ primary_table : & [ u32 ] ,
195
+ secondary_table : & [ u16 ] ,
196
+ ) {
197
+ let expecting_only_double_literals =
198
+ ( * lengths. iter ( ) . max ( ) . unwrap ( ) as usize ) * 2 <= primary_table_bits;
199
+ for ( i, entry) in primary_table. into_iter ( ) . enumerate ( ) {
200
+ if 0 != entry & LITERAL_ENTRY {
201
+ // Expected format: aaaaaaaa_bbbbbbbb_100000yy_0000xxxx
202
+ match entry >> 8 & 0x7f {
203
+ 1 => {
204
+ if expecting_only_double_literals {
205
+ panic ! (
206
+ "Unexpected single literal: index={i} ({i:b}); entry=0b{entry:b}"
207
+ ) ;
208
+ }
209
+ }
210
+ 2 => ( ) ,
211
+ other => panic ! ( "Unexpected output_advance_bytes={other}: index={i} ({i:b})" ) ,
212
+ }
213
+
214
+ let input_bits = entry & 0xff ;
215
+ if input_bits == 0 {
216
+ panic ! ( "input_advance_bits unexpectedly equal to 0" ) ;
217
+ } else if input_bits > 15 {
218
+ panic ! ( "Unexpectedly big input_advance_bits: {}" , input_bits) ;
219
+ }
220
+
221
+ let symbol_mask = ( 1 << lengths. len ( ) . min ( 256 ) . ilog2 ( ) + 1 ) - 1 ;
222
+ let s1 = entry >> 16 & 0xff ;
223
+ if 0 != s1 & !symbol_mask {
224
+ panic ! ( "Unexpectedly big symbol: {}" , s1) ;
225
+ }
226
+ let s2 = entry >> 24 & 0xff ;
227
+ if 0 != s2 & !symbol_mask {
228
+ panic ! ( "Unexpectedly big symbol: {}" , s2) ;
229
+ }
230
+ } else if 0 != entry & SECONDARY_TABLE_ENTRY {
231
+ // Expected format: 0000xxxx_xxxxxxxx_01100000_mmmmmmmm
232
+ let overflow_bits_mask = ( entry & 0xff ) as usize ;
233
+ let overflow_bits = overflow_bits_mask. trailing_ones ( ) as usize ;
234
+ if overflow_bits == 0 {
235
+ panic ! ( "Unexpectedly missing mask: index={i} ({i:b}), entry={entry:b}" ) ;
236
+ }
237
+ if overflow_bits + primary_table_bits > 15 {
238
+ // Section 3.2.7 of https://www.ietf.org/rfc/rfc1951.txt implies
239
+ // that codeword lengths are at most 15.
240
+ panic ! ( "Unexpectedly long symbol: index={i} ({i:b}), entry={entry:b}" ) ;
241
+ }
242
+ let index2_base = ( entry >> 16 ) as usize ;
243
+ assert ! ( index2_base + overflow_bits_mask <= secondary_table. len( ) ) ;
244
+ } else {
245
+ // TODO: Provide test coverage/support for EOF symbol (257th symbol - 256)
246
+ // and distance codes (even bigger symbols).
247
+ assert ! ( lengths. len( ) > 256 ) ;
248
+ }
249
+ }
250
+ }
251
+
252
+ #[ derive( Debug , Eq , PartialEq ) ]
253
+ enum LitlenResult {
254
+ SingleLiteral { symbol : u8 , input_bits : usize } ,
255
+ DoubleLiteral { s1 : u8 , s2 : u8 , input_bits : usize } ,
256
+ SecondaryTableLiteral { symbol : u16 , input_bits : usize } ,
257
+ }
258
+
259
+ struct LitlenTables {
260
+ primary_table_bits : usize ,
261
+ primary_table_mask : u64 ,
262
+ primary_table : Vec < u32 > ,
263
+ secondary_table : Vec < u16 > ,
264
+ }
265
+
266
+ impl LitlenTables {
267
+ fn new ( primary_table_bits : usize , lengths : & [ u8 ] ) -> Option < Self > {
268
+ let primary_table_size = 1 << primary_table_bits;
269
+ let primary_table_mask = ( primary_table_size - 1 ) . try_into ( ) . unwrap ( ) ;
270
+ let mut primary_table = vec ! [ 0 ; primary_table_size] ;
271
+ let mut secondary_table = Vec :: new ( ) ;
272
+ let mut codes = [ 0 ; 288 ] ;
273
+
274
+ const IS_DISTANCE_TABLE : bool = false ;
275
+ const DOUBLE_LITERAL : bool = true ;
276
+
277
+ let success = super :: build_table (
278
+ lengths,
279
+ & LITLEN_TABLE_ENTRIES ,
280
+ & mut codes,
281
+ & mut primary_table,
282
+ & mut secondary_table,
283
+ IS_DISTANCE_TABLE ,
284
+ DOUBLE_LITERAL ,
285
+ ) ;
286
+
287
+ if success {
288
+ validate_tables (
289
+ primary_table_bits,
290
+ lengths,
291
+ & primary_table,
292
+ & secondary_table,
293
+ ) ;
294
+ Some ( Self {
295
+ primary_table_bits,
296
+ primary_table_mask,
297
+ primary_table,
298
+ secondary_table,
299
+ } )
300
+ } else {
301
+ None
302
+ }
303
+ }
304
+
305
+ fn decode ( & self , input : u64 ) -> LitlenResult {
306
+ let index = ( input & self . primary_table_mask ) as usize ;
307
+ let entry = self . primary_table [ index] ;
308
+ if entry & LITERAL_ENTRY != 0 {
309
+ let input_bits = ( entry & 0xf ) as usize ;
310
+ let s1 = ( entry >> 16 ) as u8 ;
311
+ let s2 = ( entry >> 24 ) as u8 ;
312
+
313
+ let symbol_count = ( entry & 0xf00 ) >> 8 ;
314
+ match symbol_count {
315
+ 1 => LitlenResult :: SingleLiteral {
316
+ symbol : s1,
317
+ input_bits,
318
+ } ,
319
+ 2 => LitlenResult :: DoubleLiteral { s1, s2, input_bits } ,
320
+ _ => unreachable ! ( ) ,
321
+ }
322
+ } else if entry & SECONDARY_TABLE_ENTRY != 0 {
323
+ let input2 = input >> self . primary_table_bits ;
324
+ let index2 = ( entry >> 16 ) + ( ( input2 as u32 ) & ( entry & 0xff ) ) ;
325
+ let entry2 = self . secondary_table [ index2 as usize ] ;
326
+ let input_bits = ( entry2 & 0xf ) as usize ;
327
+ let symbol = entry2 >> 4 ;
328
+ LitlenResult :: SecondaryTableLiteral { symbol, input_bits }
329
+ } else {
330
+ unreachable ! ( "TODO: implement test covereage for this case" )
331
+ }
332
+ }
333
+ }
334
+
335
+ #[ test]
336
+ fn test_rfc1951_example1 ( ) {
337
+ // https://datatracker.ietf.org/doc/html/rfc1951 gives the following example
338
+ // on page 8:
339
+ //
340
+ // Symbol Code
341
+ // ------ ----
342
+ // A 10
343
+ // B 0
344
+ // C 110
345
+ // D 111
346
+ //
347
+ // The code is completely defined by the sequence of bit lengths (2, 1, 3, 3).
348
+ let t = LitlenTables :: new ( 12 , & [ 2 , 1 , 3 , 3 ] ) . unwrap ( ) ;
349
+ assert_eq ! (
350
+ t. decode( 0b_0_0_0000000_u8 . reverse_bits( ) as u64 ) ,
351
+ LitlenResult :: DoubleLiteral {
352
+ s1: 1 ,
353
+ s2: 1 ,
354
+ input_bits: 2
355
+ } ,
356
+ ) ;
357
+ assert_eq ! (
358
+ t. decode( 0b_110_110_00_u8 . reverse_bits( ) as u64 ) ,
359
+ LitlenResult :: DoubleLiteral {
360
+ s1: 2 ,
361
+ s2: 2 ,
362
+ input_bits: 6
363
+ } ,
364
+ ) ;
365
+ assert_eq ! (
366
+ t. decode( 0b_111_111_00_u8 . reverse_bits( ) as u64 ) ,
367
+ LitlenResult :: DoubleLiteral {
368
+ s1: 3 ,
369
+ s2: 3 ,
370
+ input_bits: 6
371
+ } ,
372
+ ) ;
373
+ assert_eq ! (
374
+ t. decode( 0b_0_10_00000_u8 . reverse_bits( ) as u64 ) ,
375
+ LitlenResult :: DoubleLiteral {
376
+ s1: 1 ,
377
+ s2: 0 ,
378
+ input_bits: 3
379
+ } ,
380
+ ) ;
381
+ }
382
+
383
+ #[ test]
384
+ fn test_rfc1951_example2 ( ) {
385
+ // https://datatracker.ietf.org/doc/html/rfc1951 gives the following example
386
+ // on page 9:
387
+ //
388
+ // Symbol Length Code
389
+ // ------ ------ ----
390
+ // A 3 010
391
+ // B 3 011
392
+ // C 3 100
393
+ // D 3 101
394
+ // E 3 110
395
+ // F 2 00
396
+ // G 4 1110
397
+ // H 4 1111
398
+ let t = LitlenTables :: new ( 12 , & [ 3 , 3 , 3 , 3 , 3 , 2 , 4 , 4 ] ) . unwrap ( ) ;
399
+ assert_eq ! (
400
+ t. decode( 0b_010_011_00_u8 . reverse_bits( ) as u64 ) ,
401
+ LitlenResult :: DoubleLiteral {
402
+ s1: 0 ,
403
+ s2: 1 ,
404
+ input_bits: 6
405
+ } ,
406
+ ) ;
407
+ assert_eq ! (
408
+ t. decode( 0b_00_00_0000_u8 . reverse_bits( ) as u64 ) ,
409
+ LitlenResult :: DoubleLiteral {
410
+ s1: 5 ,
411
+ s2: 5 ,
412
+ input_bits: 4
413
+ } ,
414
+ ) ;
415
+ assert_eq ! (
416
+ t. decode( 0b_1111_1110_u8 . reverse_bits( ) as u64 ) ,
417
+ LitlenResult :: DoubleLiteral {
418
+ s1: 7 ,
419
+ s2: 6 ,
420
+ input_bits: 8
421
+ } ,
422
+ ) ;
423
+ }
424
+
425
+ #[ test]
426
+ fn test_secondary_table ( ) {
427
+ // To smoke test the secondary table usage, we use a lopsided
428
+ // tree that results in codes that are up to 15 bits long:
429
+ //
430
+ // Symbol Length Code
431
+ // ------ ------ ------------------
432
+ // 0 1 0
433
+ // 1 2 10
434
+ // 2 3 110
435
+ // 3 4 1110
436
+ // 4 5 1_1110
437
+ // 5 6 11_1110
438
+ // 6 7 111_1110
439
+ // 7 8 1111_1110
440
+ // 8 9 1_1111_1110
441
+ // 9 10 11_1111_1110
442
+ // 10 11 111_1111_1110
443
+ // 11 12 1111_1111_1110
444
+ // 12 13 1_1111_1111_1110
445
+ // 13 14 11_1111_1111_1110
446
+ // 14 15 111_1111_1111_1110
447
+ // 15 15 111_1111_1111_1111
448
+ let t = LitlenTables :: new ( 12 , & [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 15 ] )
449
+ . unwrap ( ) ;
450
+ assert_eq ! (
451
+ t. decode( 0b_0_0_000000_u8 . reverse_bits( ) as u64 ) ,
452
+ LitlenResult :: DoubleLiteral {
453
+ s1: 0 ,
454
+ s2: 0 ,
455
+ input_bits: 2
456
+ } ,
457
+ ) ;
458
+ assert_eq ! (
459
+ t. decode( 0b_1110_1110_u8 . reverse_bits( ) as u64 ) ,
460
+ LitlenResult :: DoubleLiteral {
461
+ s1: 3 ,
462
+ s2: 3 ,
463
+ input_bits: 8
464
+ } ,
465
+ ) ;
466
+ assert_eq ! (
467
+ t. decode( 0b_1111_1111_1111_1110u16 . reverse_bits( ) as u64 ) ,
468
+ LitlenResult :: SecondaryTableLiteral {
469
+ symbol: 15 ,
470
+ input_bits: 15
471
+ } ,
472
+ ) ;
473
+ assert_eq ! (
474
+ t. decode( 0b_1111_1111_1111_1111u16 . reverse_bits( ) as u64 ) ,
475
+ LitlenResult :: SecondaryTableLiteral {
476
+ symbol: 15 ,
477
+ input_bits: 15
478
+ } ,
479
+ ) ;
480
+ }
481
+ }
0 commit comments