@@ -8,6 +8,13 @@ use std::ptr;
88#[ cfg( test) ]
99mod tests;
1010
11+ /// When hashing something that ends up affecting properties like symbol names,
12+ /// we want these symbol names to be calculated independently of other factors
13+ /// like what architecture you're compiling *from*.
14+ ///
15+ /// To that end, we always convert integers to little-endian format or handle
16+ /// them in an endian-independent way, and extend the architecture-dependent
17+ /// `isize` and `usize` types to 64 bits if needed before hashing.
1118#[ derive( Debug , Clone ) ]
1219pub struct SipHasher128 {
1320 k0 : u64 ,
@@ -125,15 +132,17 @@ impl SipHasher128 {
125132
126133 // A specialized write function for values with size <= 8.
127134 //
128- // The hashing of multi-byte integers depends on endianness. E.g.:
129- // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
130- // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
135+ // The input must be zero-extended to 64-bits by the caller. The extension
136+ // isn't hashed, but the implementation requires it for correctness.
137+ //
138+ // This function, given the same integer type and value, has the same effect
139+ // on both little- and big-endian hardware. It operates on values without
140+ // depending on their sequence in memory, so is independent of endianness.
131141 //
132- // This function does the right thing for little-endian hardware. On
133- // big-endian hardware `x` must be byte-swapped first to give the right
134- // behaviour. After any byte-swapping, the input must be zero-extended to
135- // 64-bits. The caller is responsible for the byte-swapping and
136- // zero-extension.
142+ // The equivalent write() call *does* need the value's bytes converted to
143+ // little-endian (without zero-extension) for equivalent behavior on little-
144+ // and big-endian hardware, as write() *does* operate on byte sequences.
145+ // I.e. write_u32(0xDDCCBBAA) == write(&0xDDCCBBAA_u32.to_le_bytes()).
137146 #[ inline]
138147 fn short_write < T > ( & mut self , _x : T , x : u64 ) {
139148 let size = mem:: size_of :: < T > ( ) ;
@@ -167,12 +176,9 @@ impl SipHasher128 {
167176 // left-shift it five bytes, giving 0xHHGG_FF00_0000_0000. We then
168177 // bitwise-OR that value into `self.tail`, resulting in
169178 // 0xHHGG_FFEE_DDCC_BBAA. `self.tail` is now full, and we can use it
170- // to update `self.state`. (As mentioned above, this assumes a
171- // little-endian machine; on a big-endian machine we would have
172- // byte-swapped 0xIIHH_GGFF in the caller, giving 0xFFGG_HHII, and we
173- // would then end up bitwise-ORing 0xGGHH_II00_0000_0000 into
174- // `self.tail`).
175- //
179+ // to update `self.state`. The analysis is the same whether we are on
180+ // a little-endian or big-endian machine, as the bitwise operations
181+ // are endian-independent.
176182 self . tail |= x << ( 8 * self . ntail ) ;
177183 if size < needed {
178184 self . ntail += size;
@@ -186,8 +192,7 @@ impl SipHasher128 {
186192
187193 // Continuing scenario 2: we have one byte left over from the input. We
188194 // set `self.ntail` to 1 and `self.tail` to `0x0000_0000_IIHH_GGFF >>
189- // 8*3`, which is 0x0000_0000_0000_00II. (Or on a big-endian machine
190- // the prior byte-swapping would leave us with 0x0000_0000_0000_00FF.)
195+ // 8*3`, which is 0x0000_0000_0000_00II.
191196 //
192197 // The `if` is needed to avoid shifting by 64 bits, which Rust
193198 // complains about.
@@ -222,22 +227,30 @@ impl Hasher for SipHasher128 {
222227
223228 #[ inline]
224229 fn write_u16 ( & mut self , i : u16 ) {
225- self . short_write ( i, i. to_le ( ) as u64 ) ;
230+ self . short_write ( i, i as u64 ) ;
226231 }
227232
228233 #[ inline]
229234 fn write_u32 ( & mut self , i : u32 ) {
230- self . short_write ( i, i. to_le ( ) as u64 ) ;
235+ self . short_write ( i, i as u64 ) ;
231236 }
232237
233238 #[ inline]
234239 fn write_u64 ( & mut self , i : u64 ) {
235- self . short_write ( i, i. to_le ( ) as u64 ) ;
240+ self . short_write ( i, i as u64 ) ;
241+ }
242+
243+ #[ inline]
244+ fn write_u128 ( & mut self , i : u128 ) {
245+ self . write ( & i. to_le_bytes ( ) ) ;
236246 }
237247
238248 #[ inline]
239249 fn write_usize ( & mut self , i : usize ) {
240- self . short_write ( i, i. to_le ( ) as u64 ) ;
250+ // Always treat usize as u64 so we get the same results on 32 and 64 bit
251+ // platforms. This is important for symbol hashes when cross compiling,
252+ // for example.
253+ self . write_u64 ( i as u64 ) ;
241254 }
242255
243256 #[ inline]
@@ -247,22 +260,31 @@ impl Hasher for SipHasher128 {
247260
248261 #[ inline]
249262 fn write_i16 ( & mut self , i : i16 ) {
250- self . short_write ( i, ( i as u16 ) . to_le ( ) as u64 ) ;
263+ self . short_write ( i, i as u16 as u64 ) ;
251264 }
252265
253266 #[ inline]
254267 fn write_i32 ( & mut self , i : i32 ) {
255- self . short_write ( i, ( i as u32 ) . to_le ( ) as u64 ) ;
268+ self . short_write ( i, i as u32 as u64 ) ;
256269 }
257270
258271 #[ inline]
259272 fn write_i64 ( & mut self , i : i64 ) {
260- self . short_write ( i, ( i as u64 ) . to_le ( ) as u64 ) ;
273+ self . short_write ( i, i as u64 ) ;
274+ }
275+
276+ #[ inline]
277+ fn write_i128 ( & mut self , i : i128 ) {
278+ self . write ( & i. to_le_bytes ( ) ) ;
261279 }
262280
263281 #[ inline]
264282 fn write_isize ( & mut self , i : isize ) {
265- self . short_write ( i, ( i as usize ) . to_le ( ) as u64 ) ;
283+ // Always treat isize as i64 so we get the same results on 32 and 64 bit
284+ // platforms. This is important for symbol hashes when cross compiling,
285+ // for example. Sign extending here is preferable as it means that the
286+ // same negative number hashes the same on both 32 and 64 bit platforms.
287+ self . write_i64 ( i as i64 ) ;
266288 }
267289
268290 #[ inline]
0 commit comments