Skip to content

Commit 85196be

Browse files
committed
Squashed commit of the following:
Author: Yuri Astrakhan <[email protected]> Date: Sat May 11 03:53:23 2024 -0400 Clean up bit_cost * remove unused arrays and other dead code * make `ShannonEntropy` return a tuple rather than an output param (they are discouraged) * use `if cfg!(...)` for conditional compilation
1 parent 8950401 commit 85196be

File tree

2 files changed

+79
-111
lines changed

2 files changed

+79
-111
lines changed

src/enc/bit_cost.rs

Lines changed: 73 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use alloc::SliceWrapperMut;
2+
13
use core::cmp::{max, min};
24

35
use super::super::alloc::SliceWrapper;
@@ -6,53 +8,41 @@ use super::util::{FastLog2, FastLog2u16};
68
use super::vectorization::Mem256i;
79
use crate::enc::floatX;
810

9-
pub fn ShannonEntropy(mut population: &[u32], size: usize, total: &mut usize) -> floatX {
10-
let mut sum: usize = 0usize;
11+
12+
const BROTLI_REPEAT_ZERO_CODE_LENGTH: usize = 17;
13+
const BROTLI_CODE_LENGTH_CODES: usize = BROTLI_REPEAT_ZERO_CODE_LENGTH + 1;
14+
15+
pub fn ShannonEntropy(mut population: &[u32], size: usize) -> (floatX, usize) {
16+
let mut sum: usize = 0;
1117
let mut retval: floatX = 0.0;
12-
let mut p: usize;
13-
if size & 1 != 0 && !population.is_empty() {
14-
p = population[0] as usize;
18+
19+
if (size & 1) != 0 && !population.is_empty() {
20+
let p = population[0] as usize;
1521
population = population.split_at(1).1;
1622
sum = sum.wrapping_add(p);
17-
retval -= (p as floatX) * FastLog2u16(p as u16);
23+
retval -= p as floatX * FastLog2u16(p as u16);
1824
}
1925
for pop_iter in population.split_at((size >> 1) << 1).0 {
20-
p = *pop_iter as usize;
26+
let p = *pop_iter as usize;
2127
sum = sum.wrapping_add(p);
22-
retval -= (p as floatX) * FastLog2u16(p as u16);
28+
retval -= p as floatX * FastLog2u16(p as u16);
2329
}
2430
if sum != 0 {
25-
retval += (sum as floatX) * FastLog2(sum as u64); // not sure it's 16 bit
31+
retval += sum as floatX * FastLog2(sum as u64); // not sure it's 16 bit
2632
}
27-
*total = sum;
28-
retval
33+
34+
(retval, sum)
2935
}
3036

3137
#[inline(always)]
3238
pub fn BitsEntropy(population: &[u32], size: usize) -> floatX {
33-
let mut sum: usize = 0;
34-
let retval = ShannonEntropy(population, size, &mut sum);
35-
floatX::max(retval, sum as floatX)
39+
let (mut retval, sum) = ShannonEntropy(population, size);
40+
if retval < sum as floatX {
41+
retval = sum as floatX;
42+
}
43+
retval
3644
}
3745

38-
const BROTLI_REPEAT_ZERO_CODE_LENGTH: usize = 17;
39-
const BROTLI_CODE_LENGTH_CODES: usize = BROTLI_REPEAT_ZERO_CODE_LENGTH + 1;
40-
/*
41-
use std::io::{self, Error, ErrorKind, Read, Write};
42-
43-
macro_rules! println_stderr(
44-
($($val:tt)*) => { {
45-
writeln!(&mut ::std::io::stderr(), $($val)*).unwrap();
46-
} }
47-
);
48-
*/
49-
50-
#[cfg(feature = "vector_scratch_space")]
51-
const vectorize_population_cost: bool = true;
52-
53-
#[cfg(not(feature = "vector_scratch_space"))]
54-
const vectorize_population_cost: bool = false;
55-
5646
#[allow(clippy::excessive_precision)]
5747
fn CostComputation<T: SliceWrapper<Mem256i>>(
5848
depth_histo: &mut [u32; BROTLI_CODE_LENGTH_CODES],
@@ -85,8 +75,6 @@ fn CostComputation<T: SliceWrapper<Mem256i>>(
8575
bits
8676
}
8777

88-
use alloc::SliceWrapperMut;
89-
9078
pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
9179
histogram: &HistogramType,
9280
nnz_data: &mut HistogramType::i32vec,
@@ -95,72 +83,68 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
9583
static kTwoSymbolHistogramCost: floatX = 20.0;
9684
static kThreeSymbolHistogramCost: floatX = 28.0;
9785
static kFourSymbolHistogramCost: floatX = 37.0;
86+
9887
let data_size: usize = histogram.slice().len();
99-
let mut count: i32 = 0i32;
88+
let mut count = 0;
10089
let mut s: [usize; 5] = [0; 5];
101-
10290
let mut bits: floatX = 0.0;
103-
let mut i: usize;
104-
if histogram.total_count() == 0usize {
91+
92+
if histogram.total_count() == 0 {
10593
return kOneSymbolHistogramCost;
10694
}
107-
i = 0usize;
108-
'break1: while i < data_size {
109-
{
110-
if histogram.slice()[i] > 0u32 {
111-
s[count as usize] = i;
112-
count += 1;
113-
if count > 4i32 {
114-
break 'break1;
115-
}
95+
for i in 0..data_size {
96+
if histogram.slice()[i] > 0 {
97+
s[count] = i;
98+
count += 1;
99+
if count > 4 {
100+
break;
116101
}
117102
}
118-
i = i.wrapping_add(1);
119-
}
120-
if count == 1i32 {
121-
return kOneSymbolHistogramCost;
122103
}
123-
if count == 2i32 {
124-
return kTwoSymbolHistogramCost + (histogram.total_count() as floatX);
125-
}
126-
if count == 3i32 {
127-
let histo0: u32 = histogram.slice()[s[0]];
128-
let histo1: u32 = histogram.slice()[s[1]];
129-
let histo2: u32 = histogram.slice()[s[2]];
130-
let histomax: u32 = max(histo0, max(histo1, histo2));
131-
return kThreeSymbolHistogramCost
132-
+ ((2u32).wrapping_mul(histo0.wrapping_add(histo1).wrapping_add(histo2)) as floatX)
133-
- (histomax as floatX);
134-
}
135-
if count == 4i32 {
136-
let mut histo: [u32; 4] = [0; 4];
137-
138-
for i in 0usize..4usize {
139-
histo[i] = histogram.slice()[s[i]];
104+
match count {
105+
1 => return kOneSymbolHistogramCost,
106+
2 => return kTwoSymbolHistogramCost + histogram.total_count() as floatX,
107+
3 => {
108+
let histo0: u32 = histogram.slice()[s[0]];
109+
let histo1: u32 = histogram.slice()[s[1]];
110+
let histo2: u32 = histogram.slice()[s[2]];
111+
let histomax: u32 = max(histo0, max(histo1, histo2));
112+
return kThreeSymbolHistogramCost
113+
+ (2u32).wrapping_mul(histo0.wrapping_add(histo1).wrapping_add(histo2)) as floatX
114+
- histomax as floatX;
140115
}
141-
for i in 0..4 {
142-
for j in i + 1..4 {
143-
if histo[j] > histo[i] {
144-
histo.swap(j, i);
116+
4 => {
117+
let mut histo: [u32; 4] = [0; 4];
118+
119+
for i in 0..4 {
120+
histo[i] = histogram.slice()[s[i]];
121+
}
122+
for i in 0..4 {
123+
for j in i + 1..4 {
124+
if histo[j] > histo[i] {
125+
histo.swap(j, i);
126+
}
145127
}
146128
}
129+
let h23: u32 = histo[2].wrapping_add(histo[3]);
130+
let histomax: u32 = max(h23, histo[0]);
131+
return kFourSymbolHistogramCost
132+
+ (3u32).wrapping_mul(h23) as floatX
133+
+ (2u32).wrapping_mul(histo[0].wrapping_add(histo[1])) as floatX
134+
- histomax as floatX;
147135
}
148-
let h23: u32 = histo[2].wrapping_add(histo[3]);
149-
let histomax: u32 = max(h23, histo[0]);
150-
return kFourSymbolHistogramCost
151-
+ ((3u32).wrapping_mul(h23) as floatX)
152-
+ ((2u32).wrapping_mul(histo[0].wrapping_add(histo[1])) as floatX)
153-
- (histomax as floatX);
136+
_ => {}
154137
}
155-
if vectorize_population_cost {
138+
139+
if cfg!(feature = "vector_scratch_space") {
156140
// vectorization failed: it's faster to do things inline than split into two loops
157141
let mut nnz: usize = 0;
158142
let mut depth_histo = [0u32; 18];
159143
let total_count = histogram.total_count() as floatX;
160144
let log2total = FastLog2(histogram.total_count() as u64);
161-
i = 0usize;
145+
let mut i: usize = 0;
162146
while i < data_size {
163-
if histogram.slice()[i] > 0u32 {
147+
if histogram.slice()[i] > 0 {
164148
let nnz_val = &mut nnz_data.slice_mut()[nnz >> 3];
165149
nnz_val[nnz & 7] = histogram.slice()[i] as i32;
166150
i += 1;
@@ -178,14 +162,14 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
178162
break;
179163
}
180164
if reps < 3 {
181-
depth_histo[0] += reps
165+
depth_histo[0] += reps;
182166
} else {
183167
reps -= 2;
184168
let mut depth_histo_adds: u32 = 0;
185-
while reps > 0u32 {
169+
while reps > 0 {
186170
depth_histo_adds += 1;
187171
bits += 3.0;
188-
reps >>= 3i32;
172+
reps >>= 3;
189173
}
190174
depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH] += depth_histo_adds;
191175
}
@@ -195,7 +179,7 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
195179
} else {
196180
let mut max_depth: usize = 1;
197181
let mut depth_histo = [0u32; 18];
198-
let log2total = FastLog2(histogram.total_count() as u64); // 64 bit here
182+
let log2total: floatX = FastLog2(histogram.total_count() as u64); // 64 bit here
199183
let mut reps: u32 = 0;
200184
for histo in histogram.slice()[..data_size].iter() {
201185
if *histo != 0 {
@@ -204,7 +188,7 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
204188
depth_histo[0] += reps;
205189
} else {
206190
reps -= 2;
207-
while reps > 0u32 {
191+
while reps > 0 {
208192
depth_histo[17] += 1;
209193
bits += 3.0;
210194
reps >>= 3;
@@ -213,8 +197,8 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
213197
reps = 0;
214198
}
215199
let log2p = log2total - FastLog2u16(*histo as u16);
216-
let mut depth: usize = (log2p + 0.5) as usize;
217-
bits += (*histo as floatX) * log2p;
200+
let mut depth = (log2p + 0.5) as usize;
201+
bits += *histo as floatX * log2p;
218202
depth = min(depth, 15);
219203
max_depth = max(depth, max_depth);
220204
depth_histo[depth] += 1;
@@ -223,17 +207,7 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
223207
}
224208
}
225209
bits += (18usize).wrapping_add((2usize).wrapping_mul(max_depth)) as floatX;
226-
bits += BitsEntropy(&depth_histo[..], 18usize);
210+
bits += BitsEntropy(&depth_histo[..], 18);
227211
}
228212
bits
229213
}
230-
/*
231-
fn HistogramDataSizeCommand() -> usize {
232-
704usize
233-
}*/
234-
235-
/*
236-
fn HistogramDataSizeDistance() -> usize {
237-
520usize
238-
}
239-
*/

src/enc/encode.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,7 +1724,6 @@ fn ChooseContextMap(
17241724
let mut two_prefix_histo = [0u32; 6];
17251725

17261726
let mut i: usize;
1727-
let mut dummy: usize = 0;
17281727
let mut entropy = [0.0 as floatX; 4];
17291728
i = 0usize;
17301729
while i < 9usize {
@@ -1742,16 +1741,12 @@ fn ChooseContextMap(
17421741
}
17431742
i = i.wrapping_add(1);
17441743
}
1745-
entropy[1] = ShannonEntropy(&monogram_histo[..], 3usize, &mut dummy);
1746-
entropy[2] = ShannonEntropy(&two_prefix_histo[..], 3usize, &mut dummy)
1747-
+ ShannonEntropy(&two_prefix_histo[3..], 3usize, &mut dummy);
1744+
entropy[1] = ShannonEntropy(&monogram_histo[..], 3).0;
1745+
entropy[2] =
1746+
ShannonEntropy(&two_prefix_histo[..], 3).0 + ShannonEntropy(&two_prefix_histo[3..], 3).0;
17481747
entropy[3] = 0.0;
17491748
for i in 0usize..3usize {
1750-
entropy[3] += ShannonEntropy(
1751-
&bigram_histo[(3usize).wrapping_mul(i)..],
1752-
3usize,
1753-
&mut dummy,
1754-
);
1749+
entropy[3] += ShannonEntropy(&bigram_histo[(3usize).wrapping_mul(i)..], 3).0;
17551750
}
17561751
let total: usize = monogram_histo[0]
17571752
.wrapping_add(monogram_histo[1])
@@ -1818,7 +1813,6 @@ fn ShouldUseComplexStaticContextMap(
18181813
let mut context_histo: [[u32; 32]; 13] = [[0; 32]; 13];
18191814
let mut total = 0u32;
18201815
let mut entropy = [0.0 as floatX; 3];
1821-
let mut dummy = 0usize;
18221816
let utf8_lut = BROTLI_CONTEXT_LUT(ContextType::CONTEXT_UTF8);
18231817
while start_pos + 64 <= end_pos {
18241818
let stride_end_pos = start_pos + 64;
@@ -1840,11 +1834,11 @@ fn ShouldUseComplexStaticContextMap(
18401834
}
18411835
start_pos += 4096;
18421836
}
1843-
entropy[1] = ShannonEntropy(&combined_histo[..], 32, &mut dummy);
1837+
entropy[1] = ShannonEntropy(&combined_histo[..], 32).0;
18441838
entropy[2] = 0.0;
18451839
for i in 0..13 {
18461840
assert!(i < 13);
1847-
entropy[2] += ShannonEntropy(&context_histo[i][..], 32, &mut dummy);
1841+
entropy[2] += ShannonEntropy(&context_histo[i][..], 32).0;
18481842
}
18491843
entropy[0] = 1.0 / (total as floatX);
18501844
entropy[1] *= entropy[0];

0 commit comments

Comments
 (0)