]> git.proxmox.com Git - rustc.git/blame - library/core/src/hash/sip.rs
New upstream version 1.57.0+dfsg1
[rustc.git] / library / core / src / hash / sip.rs
CommitLineData
3157f602 1//! An implementation of SipHash.
1a4d82fc 2
9fa01778 3#![allow(deprecated)] // the types in this module are deprecated
e9174d1e 4
48663c56 5use crate::cmp;
60c5eb7d 6use crate::marker::PhantomData;
48663c56 7use crate::mem;
60c5eb7d 8use crate::ptr;
3157f602
XL
9
10/// An implementation of SipHash 1-3.
11///
c30ab7b3 12/// This is currently the default hashing function used by standard library
9fa01778 13/// (e.g., `collections::HashMap` uses it by default).
c30ab7b3 14///
abe05a73 15/// See: <https://131002.net/siphash>
dfeec247 16#[unstable(feature = "hashmap_internals", issue = "none")]
60c5eb7d
XL
17#[rustc_deprecated(
18 since = "1.13.0",
19 reason = "use `std::collections::hash_map::DefaultHasher` instead"
20)]
3157f602 21#[derive(Debug, Clone, Default)]
0531ce1d 22#[doc(hidden)]
3157f602
XL
23pub struct SipHasher13 {
24 hasher: Hasher<Sip13Rounds>,
25}
26
27/// An implementation of SipHash 2-4.
28///
abe05a73 29/// See: <https://131002.net/siphash/>
dfeec247 30#[unstable(feature = "hashmap_internals", issue = "none")]
60c5eb7d
XL
31#[rustc_deprecated(
32 since = "1.13.0",
33 reason = "use `std::collections::hash_map::DefaultHasher` instead"
34)]
3157f602 35#[derive(Debug, Clone, Default)]
0531ce1d 36struct SipHasher24 {
3157f602
XL
37 hasher: Hasher<Sip24Rounds>,
38}
1a4d82fc
JJ
39
40/// An implementation of SipHash 2-4.
41///
abe05a73 42/// See: <https://131002.net/siphash/>
1a4d82fc 43///
54a0048b
SL
44/// SipHash is a general-purpose hashing function: it runs at a good
45/// speed (competitive with Spooky and City) and permits strong _keyed_
3dfed10e 46/// hashing. This lets you key your hash tables from a strong RNG, such as
54a0048b
SL
47/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
48///
49/// Although the SipHash algorithm is considered to be generally strong,
50/// it is not intended for cryptographic purposes. As such, all
51/// cryptographic uses of this implementation are _strongly discouraged_.
85aaf69f 52#[stable(feature = "rust1", since = "1.0.0")]
60c5eb7d
XL
53#[rustc_deprecated(
54 since = "1.13.0",
55 reason = "use `std::collections::hash_map::DefaultHasher` instead"
56)]
3157f602
XL
57#[derive(Debug, Clone, Default)]
58pub struct SipHasher(SipHasher24);
59
60#[derive(Debug)]
61struct Hasher<S: Sip> {
1a4d82fc
JJ
62 k0: u64,
63 k1: u64,
c34b1796 64 length: usize, // how many bytes we've processed
60c5eb7d
XL
65 state: State, // hash State
66 tail: u64, // unprocessed bytes le
67 ntail: usize, // how many bytes in tail are valid
3157f602
XL
68 _marker: PhantomData<S>,
69}
70
71#[derive(Debug, Clone, Copy)]
abe05a73 72#[repr(C)]
3157f602 73struct State {
c1a9b12d
SL
74 // v0, v2 and v1, v3 show up in pairs in the algorithm,
75 // and simd implementations of SipHash will use vectors
76 // of v02 and v13. By placing them in this order in the struct,
77 // the compiler can pick up on just a few simd optimizations by itself.
3157f602 78 v0: u64,
1a4d82fc 79 v2: u64,
c1a9b12d 80 v1: u64,
1a4d82fc 81 v3: u64,
1a4d82fc
JJ
82}
83
c30ab7b3 84macro_rules! compress {
60c5eb7d
XL
85 ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }};
86 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
87 $v0 = $v0.wrapping_add($v1);
88 $v1 = $v1.rotate_left(13);
89 $v1 ^= $v0;
c30ab7b3 90 $v0 = $v0.rotate_left(32);
60c5eb7d
XL
91 $v2 = $v2.wrapping_add($v3);
92 $v3 = $v3.rotate_left(16);
93 $v3 ^= $v2;
94 $v0 = $v0.wrapping_add($v3);
95 $v3 = $v3.rotate_left(21);
96 $v3 ^= $v0;
97 $v2 = $v2.wrapping_add($v1);
98 $v1 = $v1.rotate_left(17);
99 $v1 ^= $v2;
c30ab7b3 100 $v2 = $v2.rotate_left(32);
60c5eb7d 101 }};
1a4d82fc
JJ
102}
103
9fa01778 104/// Loads an integer of the desired type from a byte stream, in LE order. Uses
c1a9b12d 105/// `copy_nonoverlapping` to let the compiler generate the most efficient way
c30ab7b3 106/// to load it from a possibly unaligned address.
c1a9b12d 107///
c30ab7b3
SL
108/// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
109macro_rules! load_int_le {
60c5eb7d
XL
110 ($buf:expr, $i:expr, $int_ty:ident) => {{
111 debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
112 let mut data = 0 as $int_ty;
113 ptr::copy_nonoverlapping(
29967ef6 114 $buf.as_ptr().add($i),
60c5eb7d
XL
115 &mut data as *mut _ as *mut u8,
116 mem::size_of::<$int_ty>(),
117 );
118 data.to_le()
119 }};
1a4d82fc
JJ
120}
121
74b04a01
XL
122/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
123/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
124/// sizes and avoid calling `memcpy`, which is good for speed.
c30ab7b3
SL
125///
126/// Unsafe because: unchecked indexing at start..start+len
127#[inline]
128unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
129 debug_assert!(len < 8);
130 let mut i = 0; // current byte index (from LSB) in the output u64
131 let mut out = 0;
132 if i + 3 < len {
f035d41b
XL
133 // SAFETY: `i` cannot be greater than `len`, and the caller must guarantee
134 // that the index start..start+len is in bounds.
135 out = unsafe { load_int_le!(buf, start + i, u32) } as u64;
c30ab7b3
SL
136 i += 4;
137 }
138 if i + 1 < len {
f035d41b
XL
139 // SAFETY: same as above.
140 out |= (unsafe { load_int_le!(buf, start + i, u16) } as u64) << (i * 8);
c30ab7b3
SL
141 i += 2
142 }
143 if i < len {
f035d41b
XL
144 // SAFETY: same as above.
145 out |= (unsafe { *buf.get_unchecked(start + i) } as u64) << (i * 8);
c30ab7b3
SL
146 i += 1;
147 }
148 debug_assert_eq!(i, len);
149 out
150}
151
1a4d82fc
JJ
152impl SipHasher {
153 /// Creates a new `SipHasher` with the two initial keys set to 0.
154 #[inline]
85aaf69f 155 #[stable(feature = "rust1", since = "1.0.0")]
60c5eb7d
XL
156 #[rustc_deprecated(
157 since = "1.13.0",
158 reason = "use `std::collections::hash_map::DefaultHasher` instead"
159 )]
c295e0f8 160 #[must_use]
1a4d82fc
JJ
161 pub fn new() -> SipHasher {
162 SipHasher::new_with_keys(0, 0)
163 }
164
165 /// Creates a `SipHasher` that is keyed off the provided keys.
166 #[inline]
85aaf69f 167 #[stable(feature = "rust1", since = "1.0.0")]
60c5eb7d
XL
168 #[rustc_deprecated(
169 since = "1.13.0",
170 reason = "use `std::collections::hash_map::DefaultHasher` instead"
171 )]
c295e0f8 172 #[must_use]
1a4d82fc 173 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
60c5eb7d 174 SipHasher(SipHasher24 { hasher: Hasher::new_with_keys(key0, key1) })
3157f602
XL
175 }
176}
177
3157f602
XL
178impl SipHasher13 {
179 /// Creates a new `SipHasher13` with the two initial keys set to 0.
180 #[inline]
dfeec247 181 #[unstable(feature = "hashmap_internals", issue = "none")]
60c5eb7d
XL
182 #[rustc_deprecated(
183 since = "1.13.0",
184 reason = "use `std::collections::hash_map::DefaultHasher` instead"
185 )]
3157f602
XL
186 pub fn new() -> SipHasher13 {
187 SipHasher13::new_with_keys(0, 0)
188 }
189
190 /// Creates a `SipHasher13` that is keyed off the provided keys.
191 #[inline]
dfeec247 192 #[unstable(feature = "hashmap_internals", issue = "none")]
60c5eb7d
XL
193 #[rustc_deprecated(
194 since = "1.13.0",
195 reason = "use `std::collections::hash_map::DefaultHasher` instead"
196 )]
3157f602 197 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
60c5eb7d 198 SipHasher13 { hasher: Hasher::new_with_keys(key0, key1) }
3157f602
XL
199 }
200}
201
3157f602
XL
202impl<S: Sip> Hasher<S> {
203 #[inline]
204 fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
205 let mut state = Hasher {
1a4d82fc
JJ
206 k0: key0,
207 k1: key1,
208 length: 0,
60c5eb7d 209 state: State { v0: 0, v1: 0, v2: 0, v3: 0 },
1a4d82fc
JJ
210 tail: 0,
211 ntail: 0,
3157f602 212 _marker: PhantomData,
1a4d82fc
JJ
213 };
214 state.reset();
215 state
216 }
217
d9579d0f 218 #[inline]
85aaf69f
SL
219 fn reset(&mut self) {
220 self.length = 0;
3157f602
XL
221 self.state.v0 = self.k0 ^ 0x736f6d6570736575;
222 self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
223 self.state.v2 = self.k0 ^ 0x6c7967656e657261;
224 self.state.v3 = self.k1 ^ 0x7465646279746573;
85aaf69f
SL
225 self.ntail = 0;
226 }
e9174d1e 227}
85aaf69f 228
e9174d1e 229#[stable(feature = "rust1", since = "1.0.0")]
3157f602
XL
230impl super::Hasher for SipHasher {
231 #[inline]
232 fn write(&mut self, msg: &[u8]) {
0531ce1d 233 self.0.hasher.write(msg)
3157f602
XL
234 }
235
236 #[inline]
237 fn finish(&self) -> u64 {
0531ce1d 238 self.0.hasher.finish()
3157f602
XL
239 }
240}
241
dfeec247 242#[unstable(feature = "hashmap_internals", issue = "none")]
3157f602
XL
243impl super::Hasher for SipHasher13 {
244 #[inline]
245 fn write(&mut self, msg: &[u8]) {
246 self.hasher.write(msg)
247 }
248
249 #[inline]
250 fn finish(&self) -> u64 {
251 self.hasher.finish()
252 }
253}
254
3157f602 255impl<S: Sip> super::Hasher for Hasher<S> {
ba9703b0
XL
256 // Note: no integer hashing methods (`write_u*`, `write_i*`) are defined
257 // for this type. We could add them, copy the `short_write` implementation
258 // in librustc_data_structures/sip128.rs, and add `write_u*`/`write_i*`
259 // methods to `SipHasher`, `SipHasher13`, and `DefaultHasher`. This would
260 // greatly speed up integer hashing by those hashers, at the cost of
261 // slightly slowing down compile speeds on some benchmarks. See #69152 for
262 // details.
d9579d0f 263 #[inline]
1a4d82fc
JJ
264 fn write(&mut self, msg: &[u8]) {
265 let length = msg.len();
266 self.length += length;
267
85aaf69f 268 let mut needed = 0;
1a4d82fc
JJ
269
270 if self.ntail != 0 {
271 needed = 8 - self.ntail;
f9f354fc 272 // SAFETY: `cmp::min(length, needed)` is guaranteed to not be over `length`
74b04a01 273 self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
1a4d82fc 274 if length < needed {
1a4d82fc 275 self.ntail += length;
60c5eb7d 276 return;
c30ab7b3
SL
277 } else {
278 self.state.v3 ^= self.tail;
279 S::c_rounds(&mut self.state);
280 self.state.v0 ^= self.tail;
281 self.ntail = 0;
1a4d82fc 282 }
1a4d82fc
JJ
283 }
284
285 // Buffered tail is now flushed, process new input.
286 let len = length - needed;
f9f354fc 287 let left = len & 0x7; // len % 8
1a4d82fc
JJ
288
289 let mut i = needed;
c1a9b12d 290 while i < len - left {
f9f354fc
XL
291 // SAFETY: because `len - left` is the biggest multiple of 8 under
292 // `len`, and because `i` starts at `needed` where `len` is `length - needed`,
293 // `i + 8` is guaranteed to be less than or equal to `length`.
c30ab7b3 294 let mi = unsafe { load_int_le!(msg, i, u64) };
1a4d82fc 295
3157f602
XL
296 self.state.v3 ^= mi;
297 S::c_rounds(&mut self.state);
298 self.state.v0 ^= mi;
1a4d82fc
JJ
299
300 i += 8;
301 }
302
f9f354fc
XL
303 // SAFETY: `i` is now `needed + len.div_euclid(8) * 8`,
304 // so `i + left` = `needed + len` = `length`, which is by
305 // definition equal to `msg.len()`.
c30ab7b3 306 self.tail = unsafe { u8to64_le(msg, i, left) };
1a4d82fc
JJ
307 self.ntail = left;
308 }
1a4d82fc 309
d9579d0f 310 #[inline]
1a4d82fc 311 fn finish(&self) -> u64 {
3157f602 312 let mut state = self.state;
1a4d82fc
JJ
313
314 let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
315
3157f602
XL
316 state.v3 ^= b;
317 S::c_rounds(&mut state);
318 state.v0 ^= b;
1a4d82fc 319
3157f602
XL
320 state.v2 ^= 0xff;
321 S::d_rounds(&mut state);
1a4d82fc 322
3157f602 323 state.v0 ^ state.v1 ^ state.v2 ^ state.v3
1a4d82fc
JJ
324 }
325}
326
3157f602 327impl<S: Sip> Clone for Hasher<S> {
1a4d82fc 328 #[inline]
3157f602
XL
329 fn clone(&self) -> Hasher<S> {
330 Hasher {
1a4d82fc
JJ
331 k0: self.k0,
332 k1: self.k1,
333 length: self.length,
3157f602 334 state: self.state,
1a4d82fc
JJ
335 tail: self.tail,
336 ntail: self.ntail,
3157f602 337 _marker: self._marker,
1a4d82fc
JJ
338 }
339 }
340}
341
3157f602 342impl<S: Sip> Default for Hasher<S> {
9e0c209e 343 /// Creates a `Hasher<S>` with the two initial keys set to 0.
3157f602
XL
344 #[inline]
345 fn default() -> Hasher<S> {
346 Hasher::new_with_keys(0, 0)
347 }
348}
349
350#[doc(hidden)]
351trait Sip {
7cac9316
XL
352 fn c_rounds(_: &mut State);
353 fn d_rounds(_: &mut State);
3157f602
XL
354}
355
356#[derive(Debug, Clone, Default)]
357struct Sip13Rounds;
358
359impl Sip for Sip13Rounds {
360 #[inline]
361 fn c_rounds(state: &mut State) {
362 compress!(state);
363 }
364
365 #[inline]
366 fn d_rounds(state: &mut State) {
367 compress!(state);
368 compress!(state);
369 compress!(state);
370 }
371}
372
373#[derive(Debug, Clone, Default)]
374struct Sip24Rounds;
375
376impl Sip for Sip24Rounds {
377 #[inline]
378 fn c_rounds(state: &mut State) {
379 compress!(state);
380 compress!(state);
381 }
382
383 #[inline]
384 fn d_rounds(state: &mut State) {
385 compress!(state);
386 compress!(state);
387 compress!(state);
388 compress!(state);
1a4d82fc
JJ
389 }
390}