]>
Commit | Line | Data |
---|---|---|
3157f602 | 1 | //! An implementation of SipHash. |
1a4d82fc | 2 | |
60c5eb7d XL |
3 | // ignore-tidy-undocumented-unsafe |
4 | ||
9fa01778 | 5 | #![allow(deprecated)] // the types in this module are deprecated |
e9174d1e | 6 | |
48663c56 | 7 | use crate::cmp; |
60c5eb7d | 8 | use crate::marker::PhantomData; |
48663c56 | 9 | use crate::mem; |
60c5eb7d | 10 | use crate::ptr; |
3157f602 XL |
11 | |
12 | /// An implementation of SipHash 1-3. | |
13 | /// | |
c30ab7b3 | 14 | /// This is currently the default hashing function used by standard library |
9fa01778 | 15 | /// (e.g., `collections::HashMap` uses it by default). |
c30ab7b3 | 16 | /// |
abe05a73 | 17 | /// See: <https://131002.net/siphash> |
0531ce1d | 18 | #[unstable(feature = "hashmap_internals", issue = "0")] |
60c5eb7d XL |
19 | #[rustc_deprecated( |
20 | since = "1.13.0", | |
21 | reason = "use `std::collections::hash_map::DefaultHasher` instead" | |
22 | )] | |
3157f602 | 23 | #[derive(Debug, Clone, Default)] |
0531ce1d | 24 | #[doc(hidden)] |
3157f602 XL |
25 | pub struct SipHasher13 { |
26 | hasher: Hasher<Sip13Rounds>, | |
27 | } | |
28 | ||
29 | /// An implementation of SipHash 2-4. | |
30 | /// | |
abe05a73 | 31 | /// See: <https://131002.net/siphash/> |
0531ce1d | 32 | #[unstable(feature = "hashmap_internals", issue = "0")] |
60c5eb7d XL |
33 | #[rustc_deprecated( |
34 | since = "1.13.0", | |
35 | reason = "use `std::collections::hash_map::DefaultHasher` instead" | |
36 | )] | |
3157f602 | 37 | #[derive(Debug, Clone, Default)] |
0531ce1d | 38 | struct SipHasher24 { |
3157f602 XL |
39 | hasher: Hasher<Sip24Rounds>, |
40 | } | |
1a4d82fc JJ |
41 | |
42 | /// An implementation of SipHash 2-4. | |
43 | /// | |
abe05a73 | 44 | /// See: <https://131002.net/siphash/> |
1a4d82fc | 45 | /// |
54a0048b SL |
46 | /// SipHash is a general-purpose hashing function: it runs at a good |
47 | /// speed (competitive with Spooky and City) and permits strong _keyed_ | |
48 | /// hashing. This lets you key your hashtables from a strong RNG, such as | |
49 | /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html). | |
50 | /// | |
51 | /// Although the SipHash algorithm is considered to be generally strong, | |
52 | /// it is not intended for cryptographic purposes. As such, all | |
53 | /// cryptographic uses of this implementation are _strongly discouraged_. | |
85aaf69f | 54 | #[stable(feature = "rust1", since = "1.0.0")] |
60c5eb7d XL |
55 | #[rustc_deprecated( |
56 | since = "1.13.0", | |
57 | reason = "use `std::collections::hash_map::DefaultHasher` instead" | |
58 | )] | |
3157f602 XL |
59 | #[derive(Debug, Clone, Default)] |
60 | pub struct SipHasher(SipHasher24); | |
61 | ||
62 | #[derive(Debug)] | |
63 | struct Hasher<S: Sip> { | |
1a4d82fc JJ |
64 | k0: u64, |
65 | k1: u64, | |
c34b1796 | 66 | length: usize, // how many bytes we've processed |
60c5eb7d XL |
67 | state: State, // hash State |
68 | tail: u64, // unprocessed bytes le | |
69 | ntail: usize, // how many bytes in tail are valid | |
3157f602 XL |
70 | _marker: PhantomData<S>, |
71 | } | |
72 | ||
73 | #[derive(Debug, Clone, Copy)] | |
abe05a73 | 74 | #[repr(C)] |
3157f602 | 75 | struct State { |
c1a9b12d SL |
76 | // v0, v2 and v1, v3 show up in pairs in the algorithm, |
77 | // and simd implementations of SipHash will use vectors | |
78 | // of v02 and v13. By placing them in this order in the struct, | |
79 | // the compiler can pick up on just a few simd optimizations by itself. | |
3157f602 | 80 | v0: u64, |
1a4d82fc | 81 | v2: u64, |
c1a9b12d | 82 | v1: u64, |
1a4d82fc | 83 | v3: u64, |
1a4d82fc JJ |
84 | } |
85 | ||
c30ab7b3 | 86 | macro_rules! compress { |
60c5eb7d XL |
87 | ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }}; |
88 | ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{ | |
89 | $v0 = $v0.wrapping_add($v1); | |
90 | $v1 = $v1.rotate_left(13); | |
91 | $v1 ^= $v0; | |
c30ab7b3 | 92 | $v0 = $v0.rotate_left(32); |
60c5eb7d XL |
93 | $v2 = $v2.wrapping_add($v3); |
94 | $v3 = $v3.rotate_left(16); | |
95 | $v3 ^= $v2; | |
96 | $v0 = $v0.wrapping_add($v3); | |
97 | $v3 = $v3.rotate_left(21); | |
98 | $v3 ^= $v0; | |
99 | $v2 = $v2.wrapping_add($v1); | |
100 | $v1 = $v1.rotate_left(17); | |
101 | $v1 ^= $v2; | |
c30ab7b3 | 102 | $v2 = $v2.rotate_left(32); |
60c5eb7d | 103 | }}; |
1a4d82fc JJ |
104 | } |
105 | ||
9fa01778 | 106 | /// Loads an integer of the desired type from a byte stream, in LE order. Uses |
c1a9b12d | 107 | /// `copy_nonoverlapping` to let the compiler generate the most efficient way |
c30ab7b3 | 108 | /// to load it from a possibly unaligned address. |
c1a9b12d | 109 | /// |
c30ab7b3 SL |
110 | /// Unsafe because: unchecked indexing at i..i+size_of(int_ty) |
111 | macro_rules! load_int_le { | |
60c5eb7d XL |
112 | ($buf:expr, $i:expr, $int_ty:ident) => {{ |
113 | debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len()); | |
114 | let mut data = 0 as $int_ty; | |
115 | ptr::copy_nonoverlapping( | |
116 | $buf.get_unchecked($i), | |
117 | &mut data as *mut _ as *mut u8, | |
118 | mem::size_of::<$int_ty>(), | |
119 | ); | |
120 | data.to_le() | |
121 | }}; | |
1a4d82fc JJ |
122 | } |
123 | ||
9fa01778 | 124 | /// Loads an u64 using up to 7 bytes of a byte slice. |
c30ab7b3 SL |
125 | /// |
126 | /// Unsafe because: unchecked indexing at start..start+len | |
127 | #[inline] | |
128 | unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { | |
129 | debug_assert!(len < 8); | |
130 | let mut i = 0; // current byte index (from LSB) in the output u64 | |
131 | let mut out = 0; | |
132 | if i + 3 < len { | |
133 | out = load_int_le!(buf, start + i, u32) as u64; | |
134 | i += 4; | |
135 | } | |
136 | if i + 1 < len { | |
137 | out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8); | |
138 | i += 2 | |
139 | } | |
140 | if i < len { | |
141 | out |= (*buf.get_unchecked(start + i) as u64) << (i * 8); | |
142 | i += 1; | |
143 | } | |
144 | debug_assert_eq!(i, len); | |
145 | out | |
146 | } | |
147 | ||
1a4d82fc JJ |
148 | impl SipHasher { |
149 | /// Creates a new `SipHasher` with the two initial keys set to 0. | |
150 | #[inline] | |
85aaf69f | 151 | #[stable(feature = "rust1", since = "1.0.0")] |
60c5eb7d XL |
152 | #[rustc_deprecated( |
153 | since = "1.13.0", | |
154 | reason = "use `std::collections::hash_map::DefaultHasher` instead" | |
155 | )] | |
1a4d82fc JJ |
156 | pub fn new() -> SipHasher { |
157 | SipHasher::new_with_keys(0, 0) | |
158 | } | |
159 | ||
160 | /// Creates a `SipHasher` that is keyed off the provided keys. | |
161 | #[inline] | |
85aaf69f | 162 | #[stable(feature = "rust1", since = "1.0.0")] |
60c5eb7d XL |
163 | #[rustc_deprecated( |
164 | since = "1.13.0", | |
165 | reason = "use `std::collections::hash_map::DefaultHasher` instead" | |
166 | )] | |
1a4d82fc | 167 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher { |
60c5eb7d | 168 | SipHasher(SipHasher24 { hasher: Hasher::new_with_keys(key0, key1) }) |
3157f602 XL |
169 | } |
170 | } | |
171 | ||
3157f602 XL |
172 | impl SipHasher13 { |
173 | /// Creates a new `SipHasher13` with the two initial keys set to 0. | |
174 | #[inline] | |
0531ce1d | 175 | #[unstable(feature = "hashmap_internals", issue = "0")] |
60c5eb7d XL |
176 | #[rustc_deprecated( |
177 | since = "1.13.0", | |
178 | reason = "use `std::collections::hash_map::DefaultHasher` instead" | |
179 | )] | |
3157f602 XL |
180 | pub fn new() -> SipHasher13 { |
181 | SipHasher13::new_with_keys(0, 0) | |
182 | } | |
183 | ||
184 | /// Creates a `SipHasher13` that is keyed off the provided keys. | |
185 | #[inline] | |
0531ce1d | 186 | #[unstable(feature = "hashmap_internals", issue = "0")] |
60c5eb7d XL |
187 | #[rustc_deprecated( |
188 | since = "1.13.0", | |
189 | reason = "use `std::collections::hash_map::DefaultHasher` instead" | |
190 | )] | |
3157f602 | 191 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 { |
60c5eb7d | 192 | SipHasher13 { hasher: Hasher::new_with_keys(key0, key1) } |
3157f602 XL |
193 | } |
194 | } | |
195 | ||
3157f602 XL |
196 | impl<S: Sip> Hasher<S> { |
197 | #[inline] | |
198 | fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> { | |
199 | let mut state = Hasher { | |
1a4d82fc JJ |
200 | k0: key0, |
201 | k1: key1, | |
202 | length: 0, | |
60c5eb7d | 203 | state: State { v0: 0, v1: 0, v2: 0, v3: 0 }, |
1a4d82fc JJ |
204 | tail: 0, |
205 | ntail: 0, | |
3157f602 | 206 | _marker: PhantomData, |
1a4d82fc JJ |
207 | }; |
208 | state.reset(); | |
209 | state | |
210 | } | |
211 | ||
d9579d0f | 212 | #[inline] |
85aaf69f SL |
213 | fn reset(&mut self) { |
214 | self.length = 0; | |
3157f602 XL |
215 | self.state.v0 = self.k0 ^ 0x736f6d6570736575; |
216 | self.state.v1 = self.k1 ^ 0x646f72616e646f6d; | |
217 | self.state.v2 = self.k0 ^ 0x6c7967656e657261; | |
218 | self.state.v3 = self.k1 ^ 0x7465646279746573; | |
85aaf69f SL |
219 | self.ntail = 0; |
220 | } | |
c30ab7b3 SL |
221 | |
222 | // Specialized write function that is only valid for buffers with len <= 8. | |
223 | // It's used to force inlining of write_u8 and write_usize, those would normally be inlined | |
224 | // except for composite types (that includes slices and str hashing because of delimiter). | |
225 | // Without this extra push the compiler is very reluctant to inline delimiter writes, | |
226 | // degrading performance substantially for the most common use cases. | |
3b2f2976 | 227 | #[inline] |
c30ab7b3 SL |
228 | fn short_write(&mut self, msg: &[u8]) { |
229 | debug_assert!(msg.len() <= 8); | |
230 | let length = msg.len(); | |
231 | self.length += length; | |
232 | ||
233 | let needed = 8 - self.ntail; | |
234 | let fill = cmp::min(length, needed); | |
235 | if fill == 8 { | |
236 | self.tail = unsafe { load_int_le!(msg, 0, u64) }; | |
237 | } else { | |
238 | self.tail |= unsafe { u8to64_le(msg, 0, fill) } << (8 * self.ntail); | |
239 | if length < needed { | |
240 | self.ntail += length; | |
241 | return; | |
242 | } | |
243 | } | |
244 | self.state.v3 ^= self.tail; | |
245 | S::c_rounds(&mut self.state); | |
246 | self.state.v0 ^= self.tail; | |
247 | ||
248 | // Buffered tail is now flushed, process new input. | |
249 | self.ntail = length - needed; | |
250 | self.tail = unsafe { u8to64_le(msg, needed, self.ntail) }; | |
251 | } | |
e9174d1e | 252 | } |
85aaf69f | 253 | |
e9174d1e | 254 | #[stable(feature = "rust1", since = "1.0.0")] |
3157f602 XL |
255 | impl super::Hasher for SipHasher { |
256 | #[inline] | |
257 | fn write(&mut self, msg: &[u8]) { | |
0531ce1d | 258 | self.0.hasher.write(msg) |
3157f602 XL |
259 | } |
260 | ||
261 | #[inline] | |
262 | fn finish(&self) -> u64 { | |
0531ce1d | 263 | self.0.hasher.finish() |
3157f602 XL |
264 | } |
265 | } | |
266 | ||
0531ce1d | 267 | #[unstable(feature = "hashmap_internals", issue = "0")] |
3157f602 XL |
268 | impl super::Hasher for SipHasher13 { |
269 | #[inline] | |
270 | fn write(&mut self, msg: &[u8]) { | |
271 | self.hasher.write(msg) | |
272 | } | |
273 | ||
274 | #[inline] | |
275 | fn finish(&self) -> u64 { | |
276 | self.hasher.finish() | |
277 | } | |
278 | } | |
279 | ||
3157f602 | 280 | impl<S: Sip> super::Hasher for Hasher<S> { |
c30ab7b3 SL |
281 | // see short_write comment for explanation |
282 | #[inline] | |
283 | fn write_usize(&mut self, i: usize) { | |
284 | let bytes = unsafe { | |
48663c56 | 285 | crate::slice::from_raw_parts(&i as *const usize as *const u8, mem::size_of::<usize>()) |
c30ab7b3 SL |
286 | }; |
287 | self.short_write(bytes); | |
288 | } | |
289 | ||
290 | // see short_write comment for explanation | |
291 | #[inline] | |
292 | fn write_u8(&mut self, i: u8) { | |
293 | self.short_write(&[i]); | |
294 | } | |
295 | ||
d9579d0f | 296 | #[inline] |
1a4d82fc JJ |
297 | fn write(&mut self, msg: &[u8]) { |
298 | let length = msg.len(); | |
299 | self.length += length; | |
300 | ||
85aaf69f | 301 | let mut needed = 0; |
1a4d82fc JJ |
302 | |
303 | if self.ntail != 0 { | |
304 | needed = 8 - self.ntail; | |
c30ab7b3 | 305 | self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << 8 * self.ntail; |
1a4d82fc | 306 | if length < needed { |
1a4d82fc | 307 | self.ntail += length; |
60c5eb7d | 308 | return; |
c30ab7b3 SL |
309 | } else { |
310 | self.state.v3 ^= self.tail; | |
311 | S::c_rounds(&mut self.state); | |
312 | self.state.v0 ^= self.tail; | |
313 | self.ntail = 0; | |
1a4d82fc | 314 | } |
1a4d82fc JJ |
315 | } |
316 | ||
317 | // Buffered tail is now flushed, process new input. | |
318 | let len = length - needed; | |
1a4d82fc JJ |
319 | let left = len & 0x7; |
320 | ||
321 | let mut i = needed; | |
c1a9b12d | 322 | while i < len - left { |
c30ab7b3 | 323 | let mi = unsafe { load_int_le!(msg, i, u64) }; |
1a4d82fc | 324 | |
3157f602 XL |
325 | self.state.v3 ^= mi; |
326 | S::c_rounds(&mut self.state); | |
327 | self.state.v0 ^= mi; | |
1a4d82fc JJ |
328 | |
329 | i += 8; | |
330 | } | |
331 | ||
c30ab7b3 | 332 | self.tail = unsafe { u8to64_le(msg, i, left) }; |
1a4d82fc JJ |
333 | self.ntail = left; |
334 | } | |
1a4d82fc | 335 | |
d9579d0f | 336 | #[inline] |
1a4d82fc | 337 | fn finish(&self) -> u64 { |
3157f602 | 338 | let mut state = self.state; |
1a4d82fc JJ |
339 | |
340 | let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail; | |
341 | ||
3157f602 XL |
342 | state.v3 ^= b; |
343 | S::c_rounds(&mut state); | |
344 | state.v0 ^= b; | |
1a4d82fc | 345 | |
3157f602 XL |
346 | state.v2 ^= 0xff; |
347 | S::d_rounds(&mut state); | |
1a4d82fc | 348 | |
3157f602 | 349 | state.v0 ^ state.v1 ^ state.v2 ^ state.v3 |
1a4d82fc JJ |
350 | } |
351 | } | |
352 | ||
3157f602 | 353 | impl<S: Sip> Clone for Hasher<S> { |
1a4d82fc | 354 | #[inline] |
3157f602 XL |
355 | fn clone(&self) -> Hasher<S> { |
356 | Hasher { | |
1a4d82fc JJ |
357 | k0: self.k0, |
358 | k1: self.k1, | |
359 | length: self.length, | |
3157f602 | 360 | state: self.state, |
1a4d82fc JJ |
361 | tail: self.tail, |
362 | ntail: self.ntail, | |
3157f602 | 363 | _marker: self._marker, |
1a4d82fc JJ |
364 | } |
365 | } | |
366 | } | |
367 | ||
3157f602 | 368 | impl<S: Sip> Default for Hasher<S> { |
9e0c209e | 369 | /// Creates a `Hasher<S>` with the two initial keys set to 0. |
3157f602 XL |
370 | #[inline] |
371 | fn default() -> Hasher<S> { | |
372 | Hasher::new_with_keys(0, 0) | |
373 | } | |
374 | } | |
375 | ||
376 | #[doc(hidden)] | |
377 | trait Sip { | |
7cac9316 XL |
378 | fn c_rounds(_: &mut State); |
379 | fn d_rounds(_: &mut State); | |
3157f602 XL |
380 | } |
381 | ||
382 | #[derive(Debug, Clone, Default)] | |
383 | struct Sip13Rounds; | |
384 | ||
385 | impl Sip for Sip13Rounds { | |
386 | #[inline] | |
387 | fn c_rounds(state: &mut State) { | |
388 | compress!(state); | |
389 | } | |
390 | ||
391 | #[inline] | |
392 | fn d_rounds(state: &mut State) { | |
393 | compress!(state); | |
394 | compress!(state); | |
395 | compress!(state); | |
396 | } | |
397 | } | |
398 | ||
399 | #[derive(Debug, Clone, Default)] | |
400 | struct Sip24Rounds; | |
401 | ||
402 | impl Sip for Sip24Rounds { | |
403 | #[inline] | |
404 | fn c_rounds(state: &mut State) { | |
405 | compress!(state); | |
406 | compress!(state); | |
407 | } | |
408 | ||
409 | #[inline] | |
410 | fn d_rounds(state: &mut State) { | |
411 | compress!(state); | |
412 | compress!(state); | |
413 | compress!(state); | |
414 | compress!(state); | |
1a4d82fc JJ |
415 | } |
416 | } |