]>
Commit | Line | Data |
---|---|---|
abe05a73 XL |
1 | //! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes. |
2 | ||
abe05a73 | 3 | use std::hash::Hasher; |
29967ef6 | 4 | use std::mem::{self, MaybeUninit}; |
dfeec247 | 5 | use std::ptr; |
abe05a73 | 6 | |
416331ca XL |
7 | #[cfg(test)] |
8 | mod tests; | |
9 | ||
29967ef6 XL |
10 | // The SipHash algorithm operates on 8-byte chunks. |
11 | const ELEM_SIZE: usize = mem::size_of::<u64>(); | |
12 | ||
13 | // Size of the buffer in number of elements, not including the spill. | |
14 | // | |
15 | // The selection of this size was guided by rustc-perf benchmark comparisons of | |
16 | // different buffer sizes. It should be periodically reevaluated as the compiler | |
17 | // implementation and input characteristics change. | |
18 | // | |
19 | // Using the same-sized buffer for everything we hash is a performance versus | |
20 | // complexity tradeoff. The ideal buffer size, and whether buffering should even | |
21 | // be used, depends on what is being hashed. It may be worth it to size the | |
22 | // buffer appropriately (perhaps by making SipHasher128 generic over the buffer | |
23 | // size) or disable buffering depending on what is being hashed. But at this | |
24 | // time, we use the same buffer size for everything. | |
25 | const BUFFER_CAPACITY: usize = 8; | |
26 | ||
27 | // Size of the buffer in bytes, not including the spill. | |
28 | const BUFFER_SIZE: usize = BUFFER_CAPACITY * ELEM_SIZE; | |
29 | ||
30 | // Size of the buffer in number of elements, including the spill. | |
31 | const BUFFER_WITH_SPILL_CAPACITY: usize = BUFFER_CAPACITY + 1; | |
32 | ||
33 | // Size of the buffer in bytes, including the spill. | |
34 | const BUFFER_WITH_SPILL_SIZE: usize = BUFFER_WITH_SPILL_CAPACITY * ELEM_SIZE; | |
35 | ||
36 | // Index of the spill element in the buffer. | |
37 | const BUFFER_SPILL_INDEX: usize = BUFFER_WITH_SPILL_CAPACITY - 1; | |
38 | ||
abe05a73 | 39 | #[derive(Debug, Clone)] |
29967ef6 | 40 | #[repr(C)] |
abe05a73 | 41 | pub struct SipHasher128 { |
29967ef6 XL |
42 | // The access pattern during hashing consists of accesses to `nbuf` and |
43 | // `buf` until the buffer is full, followed by accesses to `state` and | |
44 | // `processed`, and then repetition of that pattern until hashing is done. | |
45 | // This is the basis for the ordering of fields below. However, in practice | |
46 | // the cache miss-rate for data access is extremely low regardless of order. | |
47 | nbuf: usize, // how many bytes in buf are valid | |
48 | buf: [MaybeUninit<u64>; BUFFER_WITH_SPILL_CAPACITY], // unprocessed bytes le | |
49 | state: State, // hash State | |
50 | processed: usize, // how many bytes we've processed | |
abe05a73 XL |
51 | } |
52 | ||
53 | #[derive(Debug, Clone, Copy)] | |
54 | #[repr(C)] | |
55 | struct State { | |
56 | // v0, v2 and v1, v3 show up in pairs in the algorithm, | |
57 | // and simd implementations of SipHash will use vectors | |
58 | // of v02 and v13. By placing them in this order in the struct, | |
59 | // the compiler can pick up on just a few simd optimizations by itself. | |
60 | v0: u64, | |
61 | v2: u64, | |
62 | v1: u64, | |
63 | v3: u64, | |
64 | } | |
65 | ||
66 | macro_rules! compress { | |
dfeec247 XL |
67 | ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }}; |
68 | ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{ | |
69 | $v0 = $v0.wrapping_add($v1); | |
70 | $v1 = $v1.rotate_left(13); | |
71 | $v1 ^= $v0; | |
abe05a73 | 72 | $v0 = $v0.rotate_left(32); |
dfeec247 XL |
73 | $v2 = $v2.wrapping_add($v3); |
74 | $v3 = $v3.rotate_left(16); | |
75 | $v3 ^= $v2; | |
76 | $v0 = $v0.wrapping_add($v3); | |
77 | $v3 = $v3.rotate_left(21); | |
78 | $v3 ^= $v0; | |
79 | $v2 = $v2.wrapping_add($v1); | |
80 | $v1 = $v1.rotate_left(17); | |
81 | $v1 ^= $v2; | |
abe05a73 | 82 | $v2 = $v2.rotate_left(32); |
dfeec247 | 83 | }}; |
abe05a73 XL |
84 | } |
85 | ||
29967ef6 XL |
86 | // Copies up to 8 bytes from source to destination. This performs better than |
87 | // `ptr::copy_nonoverlapping` on microbenchmarks and may perform better on real | |
88 | // workloads since all of the copies have fixed sizes and avoid calling memcpy. | |
89 | // | |
90 | // This is specifically designed for copies of up to 8 bytes, because that's the | |
91 | // maximum of number bytes needed to fill an 8-byte-sized element on which | |
92 | // SipHash operates. Note that for variable-sized copies which are known to be | |
93 | // less than 8 bytes, this function will perform more work than necessary unless | |
94 | // the compiler is able to optimize the extra work away. | |
abe05a73 | 95 | #[inline] |
29967ef6 XL |
96 | unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize) { |
97 | debug_assert!(count <= 8); | |
98 | ||
99 | if count == 8 { | |
100 | ptr::copy_nonoverlapping(src, dst, 8); | |
101 | return; | |
102 | } | |
103 | ||
104 | let mut i = 0; | |
105 | if i + 3 < count { | |
106 | ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4); | |
abe05a73 XL |
107 | i += 4; |
108 | } | |
29967ef6 XL |
109 | |
110 | if i + 1 < count { | |
111 | ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2); | |
abe05a73 XL |
112 | i += 2 |
113 | } | |
29967ef6 XL |
114 | |
115 | if i < count { | |
116 | *dst.add(i) = *src.add(i); | |
abe05a73 XL |
117 | i += 1; |
118 | } | |
29967ef6 XL |
119 | |
120 | debug_assert_eq!(i, count); | |
abe05a73 XL |
121 | } |
122 | ||
29967ef6 XL |
123 | // # Implementation |
124 | // | |
125 | // This implementation uses buffering to reduce the hashing cost for inputs | |
126 | // consisting of many small integers. Buffering simplifies the integration of | |
127 | // integer input--the integer write function typically just appends to the | |
128 | // buffer with a statically sized write, updates metadata, and returns. | |
129 | // | |
130 | // Buffering also prevents alternating between writes that do and do not trigger | |
131 | // the hashing process. Only when the entire buffer is full do we transition | |
132 | // into hashing. This allows us to keep the hash state in registers for longer, | |
133 | // instead of loading and storing it before and after processing each element. | |
134 | // | |
135 | // When a write fills the buffer, a buffer processing function is invoked to | |
136 | // hash all of the buffered input. The buffer processing functions are marked | |
137 | // `#[inline(never)]` so that they aren't inlined into the append functions, | |
138 | // which ensures the more frequently called append functions remain inlineable | |
139 | // and don't include register pushing/popping that would only be made necessary | |
140 | // by inclusion of the complex buffer processing path which uses those | |
141 | // registers. | |
142 | // | |
143 | // The buffer includes a "spill"--an extra element at the end--which simplifies | |
144 | // the integer write buffer processing path. The value that fills the buffer can | |
145 | // be written with a statically sized write that may spill over into the spill. | |
146 | // After the buffer is processed, the part of the value that spilled over can be | |
147 | // written from the spill to the beginning of the buffer with another statically | |
148 | // sized write. This write may copy more bytes than actually spilled over, but | |
149 | // we maintain the metadata such that any extra copied bytes will be ignored by | |
150 | // subsequent processing. Due to the static sizes, this scheme performs better | |
151 | // than copying the exact number of bytes needed into the end and beginning of | |
152 | // the buffer. | |
153 | // | |
154 | // The buffer is uninitialized, which improves performance, but may preclude | |
155 | // efficient implementation of alternative approaches. The improvement is not so | |
156 | // large that an alternative approach should be disregarded because it cannot be | |
157 | // efficiently implemented with an uninitialized buffer. On the other hand, an | |
158 | // uninitialized buffer may become more important should a larger one be used. | |
159 | // | |
160 | // # Platform Dependence | |
161 | // | |
162 | // The SipHash algorithm operates on byte sequences. It parses the input stream | |
163 | // as 8-byte little-endian integers. Therefore, given the same byte sequence, it | |
164 | // produces the same result on big- and little-endian hardware. | |
165 | // | |
166 | // However, the Hasher trait has methods which operate on multi-byte integers. | |
167 | // How they are converted into byte sequences can be endian-dependent (by using | |
168 | // native byte order) or independent (by consistently using either LE or BE byte | |
169 | // order). It can also be `isize` and `usize` size dependent (by using the | |
170 | // native size), or independent (by converting to a common size), supposing the | |
171 | // values can be represented in 32 bits. | |
172 | // | |
173 | // In order to make `SipHasher128` consistent with `SipHasher` in libstd, we | |
174 | // choose to do the integer to byte sequence conversion in the platform- | |
175 | // dependent way. Clients can achieve platform-independent hashing by widening | |
176 | // `isize` and `usize` integers to 64 bits on 32-bit systems and byte-swapping | |
177 | // integers on big-endian systems before passing them to the writing functions. | |
178 | // This causes the input byte sequence to look identical on big- and little- | |
179 | // endian systems (supposing `isize` and `usize` values can be represented in 32 | |
180 | // bits), which ensures platform-independent results. | |
abe05a73 XL |
181 | impl SipHasher128 { |
182 | #[inline] | |
183 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 { | |
29967ef6 XL |
184 | let mut hasher = SipHasher128 { |
185 | nbuf: 0, | |
186 | buf: MaybeUninit::uninit_array(), | |
187 | state: State { | |
188 | v0: key0 ^ 0x736f6d6570736575, | |
189 | // The XOR with 0xee is only done on 128-bit algorithm version. | |
190 | v1: key1 ^ (0x646f72616e646f6d ^ 0xee), | |
191 | v2: key0 ^ 0x6c7967656e657261, | |
192 | v3: key1 ^ 0x7465646279746573, | |
193 | }, | |
194 | processed: 0, | |
abe05a73 | 195 | }; |
29967ef6 XL |
196 | |
197 | unsafe { | |
198 | // Initialize spill because we read from it in `short_write_process_buffer`. | |
199 | *hasher.buf.get_unchecked_mut(BUFFER_SPILL_INDEX) = MaybeUninit::zeroed(); | |
200 | } | |
201 | ||
202 | hasher | |
abe05a73 XL |
203 | } |
204 | ||
29967ef6 | 205 | // A specialized write function for values with size <= 8. |
abe05a73 | 206 | #[inline] |
29967ef6 XL |
207 | fn short_write<T>(&mut self, x: T) { |
208 | let size = mem::size_of::<T>(); | |
209 | let nbuf = self.nbuf; | |
210 | debug_assert!(size <= 8); | |
211 | debug_assert!(nbuf < BUFFER_SIZE); | |
212 | debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE); | |
213 | ||
214 | if nbuf + size < BUFFER_SIZE { | |
215 | unsafe { | |
216 | // The memcpy call is optimized away because the size is known. | |
217 | let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); | |
218 | ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size); | |
219 | } | |
220 | ||
221 | self.nbuf = nbuf + size; | |
222 | ||
223 | return; | |
224 | } | |
225 | ||
226 | unsafe { self.short_write_process_buffer(x) } | |
abe05a73 XL |
227 | } |
228 | ||
29967ef6 XL |
229 | // A specialized write function for values with size <= 8 that should only |
230 | // be called when the write would cause the buffer to fill. | |
1b1a35ee | 231 | // |
29967ef6 XL |
232 | // SAFETY: the write of `x` into `self.buf` starting at byte offset |
233 | // `self.nbuf` must cause `self.buf` to become fully initialized (and not | |
234 | // overflow) if it wasn't already. | |
235 | #[inline(never)] | |
236 | unsafe fn short_write_process_buffer<T>(&mut self, x: T) { | |
74b04a01 | 237 | let size = mem::size_of::<T>(); |
29967ef6 XL |
238 | let nbuf = self.nbuf; |
239 | debug_assert!(size <= 8); | |
240 | debug_assert!(nbuf < BUFFER_SIZE); | |
241 | debug_assert!(nbuf + size >= BUFFER_SIZE); | |
242 | debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE); | |
243 | ||
244 | // Copy first part of input into end of buffer, possibly into spill | |
245 | // element. The memcpy call is optimized away because the size is known. | |
246 | let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); | |
247 | ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size); | |
248 | ||
249 | // Process buffer. | |
250 | for i in 0..BUFFER_CAPACITY { | |
251 | let elem = self.buf.get_unchecked(i).assume_init().to_le(); | |
252 | self.state.v3 ^= elem; | |
253 | Sip24Rounds::c_rounds(&mut self.state); | |
254 | self.state.v0 ^= elem; | |
255 | } | |
256 | ||
257 | // Copy remaining input into start of buffer by copying size - 1 | |
258 | // elements from spill (at most size - 1 bytes could have overflowed | |
259 | // into the spill). The memcpy call is optimized away because the size | |
260 | // is known. And the whole copy is optimized away for size == 1. | |
261 | let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8; | |
262 | ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, size - 1); | |
263 | ||
264 | // This function should only be called when the write fills the buffer. | |
265 | // Therefore, when size == 1, the new `self.nbuf` must be zero. The size | |
266 | // is statically known, so the branch is optimized away. | |
267 | self.nbuf = if size == 1 { 0 } else { nbuf + size - BUFFER_SIZE }; | |
268 | self.processed += BUFFER_SIZE; | |
269 | } | |
270 | ||
271 | // A write function for byte slices. | |
272 | #[inline] | |
273 | fn slice_write(&mut self, msg: &[u8]) { | |
274 | let length = msg.len(); | |
275 | let nbuf = self.nbuf; | |
276 | debug_assert!(nbuf < BUFFER_SIZE); | |
277 | ||
278 | if nbuf + length < BUFFER_SIZE { | |
279 | unsafe { | |
280 | let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); | |
281 | ||
282 | if length <= 8 { | |
283 | copy_nonoverlapping_small(msg.as_ptr(), dst, length); | |
284 | } else { | |
285 | // This memcpy is *not* optimized away. | |
286 | ptr::copy_nonoverlapping(msg.as_ptr(), dst, length); | |
287 | } | |
288 | } | |
289 | ||
290 | self.nbuf = nbuf + length; | |
291 | ||
74b04a01 | 292 | return; |
abe05a73 | 293 | } |
74b04a01 | 294 | |
29967ef6 XL |
295 | unsafe { self.slice_write_process_buffer(msg) } |
296 | } | |
297 | ||
298 | // A write function for byte slices that should only be called when the | |
299 | // write would cause the buffer to fill. | |
300 | // | |
301 | // SAFETY: `self.buf` must be initialized up to the byte offset `self.nbuf`, | |
302 | // and `msg` must contain enough bytes to initialize the rest of the element | |
303 | // containing the byte offset `self.nbuf`. | |
304 | #[inline(never)] | |
305 | unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) { | |
306 | let length = msg.len(); | |
307 | let nbuf = self.nbuf; | |
308 | debug_assert!(nbuf < BUFFER_SIZE); | |
309 | debug_assert!(nbuf + length >= BUFFER_SIZE); | |
310 | ||
311 | // Always copy first part of input into current element of buffer. | |
312 | // This function should only be called when the write fills the buffer, | |
313 | // so we know that there is enough input to fill the current element. | |
314 | let valid_in_elem = nbuf % ELEM_SIZE; | |
315 | let needed_in_elem = ELEM_SIZE - valid_in_elem; | |
316 | ||
317 | let src = msg.as_ptr(); | |
318 | let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); | |
319 | copy_nonoverlapping_small(src, dst, needed_in_elem); | |
320 | ||
321 | // Process buffer. | |
322 | ||
323 | // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) / | |
324 | // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0. | |
325 | // We know that is true, because last step ensured we have a full | |
326 | // element in the buffer. | |
327 | let last = nbuf / ELEM_SIZE + 1; | |
328 | ||
329 | for i in 0..last { | |
330 | let elem = self.buf.get_unchecked(i).assume_init().to_le(); | |
331 | self.state.v3 ^= elem; | |
332 | Sip24Rounds::c_rounds(&mut self.state); | |
333 | self.state.v0 ^= elem; | |
334 | } | |
335 | ||
336 | // Process the remaining element-sized chunks of input. | |
337 | let mut processed = needed_in_elem; | |
338 | let input_left = length - processed; | |
339 | let elems_left = input_left / ELEM_SIZE; | |
340 | let extra_bytes_left = input_left % ELEM_SIZE; | |
341 | ||
342 | for _ in 0..elems_left { | |
343 | let elem = (msg.as_ptr().add(processed) as *const u64).read_unaligned().to_le(); | |
344 | self.state.v3 ^= elem; | |
345 | Sip24Rounds::c_rounds(&mut self.state); | |
346 | self.state.v0 ^= elem; | |
347 | processed += ELEM_SIZE; | |
348 | } | |
349 | ||
350 | // Copy remaining input into start of buffer. | |
351 | let src = msg.as_ptr().add(processed); | |
352 | let dst = self.buf.as_mut_ptr() as *mut u8; | |
353 | copy_nonoverlapping_small(src, dst, extra_bytes_left); | |
354 | ||
355 | self.nbuf = extra_bytes_left; | |
356 | self.processed += nbuf + processed; | |
abe05a73 XL |
357 | } |
358 | ||
359 | #[inline] | |
360 | pub fn finish128(mut self) -> (u64, u64) { | |
29967ef6 | 361 | debug_assert!(self.nbuf < BUFFER_SIZE); |
abe05a73 | 362 | |
29967ef6 XL |
363 | // Process full elements in buffer. |
364 | let last = self.nbuf / ELEM_SIZE; | |
abe05a73 | 365 | |
29967ef6 XL |
366 | // Since we're consuming self, avoid updating members for a potential |
367 | // performance gain. | |
368 | let mut state = self.state; | |
369 | ||
370 | for i in 0..last { | |
371 | let elem = unsafe { self.buf.get_unchecked(i).assume_init().to_le() }; | |
372 | state.v3 ^= elem; | |
373 | Sip24Rounds::c_rounds(&mut state); | |
374 | state.v0 ^= elem; | |
375 | } | |
376 | ||
377 | // Get remaining partial element. | |
378 | let elem = if self.nbuf % ELEM_SIZE != 0 { | |
379 | unsafe { | |
380 | // Ensure element is initialized by writing zero bytes. At most | |
381 | // `ELEM_SIZE - 1` are required given the above check. It's safe | |
382 | // to write this many because we have the spill and we maintain | |
383 | // `self.nbuf` such that this write will start before the spill. | |
384 | let dst = (self.buf.as_mut_ptr() as *mut u8).add(self.nbuf); | |
385 | ptr::write_bytes(dst, 0, ELEM_SIZE - 1); | |
386 | self.buf.get_unchecked(last).assume_init().to_le() | |
387 | } | |
388 | } else { | |
389 | 0 | |
390 | }; | |
391 | ||
392 | // Finalize the hash. | |
393 | let length = self.processed + self.nbuf; | |
394 | let b: u64 = ((length as u64 & 0xff) << 56) | elem; | |
395 | ||
396 | state.v3 ^= b; | |
397 | Sip24Rounds::c_rounds(&mut state); | |
398 | state.v0 ^= b; | |
399 | ||
400 | state.v2 ^= 0xee; | |
401 | Sip24Rounds::d_rounds(&mut state); | |
402 | let _0 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; | |
403 | ||
404 | state.v1 ^= 0xdd; | |
405 | Sip24Rounds::d_rounds(&mut state); | |
406 | let _1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; | |
abe05a73 | 407 | |
abe05a73 XL |
408 | (_0, _1) |
409 | } | |
410 | } | |
411 | ||
412 | impl Hasher for SipHasher128 { | |
413 | #[inline] | |
414 | fn write_u8(&mut self, i: u8) { | |
29967ef6 | 415 | self.short_write(i); |
abe05a73 XL |
416 | } |
417 | ||
418 | #[inline] | |
419 | fn write_u16(&mut self, i: u16) { | |
29967ef6 | 420 | self.short_write(i); |
abe05a73 XL |
421 | } |
422 | ||
423 | #[inline] | |
424 | fn write_u32(&mut self, i: u32) { | |
29967ef6 | 425 | self.short_write(i); |
abe05a73 XL |
426 | } |
427 | ||
428 | #[inline] | |
429 | fn write_u64(&mut self, i: u64) { | |
29967ef6 | 430 | self.short_write(i); |
abe05a73 XL |
431 | } |
432 | ||
433 | #[inline] | |
434 | fn write_usize(&mut self, i: usize) { | |
29967ef6 | 435 | self.short_write(i); |
abe05a73 XL |
436 | } |
437 | ||
438 | #[inline] | |
439 | fn write_i8(&mut self, i: i8) { | |
29967ef6 | 440 | self.short_write(i as u8); |
abe05a73 XL |
441 | } |
442 | ||
443 | #[inline] | |
444 | fn write_i16(&mut self, i: i16) { | |
29967ef6 | 445 | self.short_write(i as u16); |
abe05a73 XL |
446 | } |
447 | ||
448 | #[inline] | |
449 | fn write_i32(&mut self, i: i32) { | |
29967ef6 | 450 | self.short_write(i as u32); |
abe05a73 XL |
451 | } |
452 | ||
453 | #[inline] | |
454 | fn write_i64(&mut self, i: i64) { | |
29967ef6 | 455 | self.short_write(i as u64); |
abe05a73 XL |
456 | } |
457 | ||
458 | #[inline] | |
459 | fn write_isize(&mut self, i: isize) { | |
29967ef6 | 460 | self.short_write(i as usize); |
abe05a73 XL |
461 | } |
462 | ||
463 | #[inline] | |
464 | fn write(&mut self, msg: &[u8]) { | |
29967ef6 | 465 | self.slice_write(msg); |
abe05a73 XL |
466 | } |
467 | ||
468 | fn finish(&self) -> u64 { | |
469 | panic!("SipHasher128 cannot provide valid 64 bit hashes") | |
470 | } | |
471 | } | |
472 | ||
473 | #[derive(Debug, Clone, Default)] | |
474 | struct Sip24Rounds; | |
475 | ||
476 | impl Sip24Rounds { | |
477 | #[inline] | |
478 | fn c_rounds(state: &mut State) { | |
479 | compress!(state); | |
480 | compress!(state); | |
481 | } | |
482 | ||
483 | #[inline] | |
484 | fn d_rounds(state: &mut State) { | |
485 | compress!(state); | |
486 | compress!(state); | |
487 | compress!(state); | |
488 | compress!(state); | |
489 | } | |
490 | } |