]> git.proxmox.com Git - rustc.git/blame - src/librustc_data_structures/sip128.rs
New upstream version 1.47.0+dfsg1
[rustc.git] / src / librustc_data_structures / sip128.rs
CommitLineData
abe05a73
XL
1//! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes.
2
3use std::cmp;
4use std::hash::Hasher;
abe05a73 5use std::mem;
dfeec247 6use std::ptr;
abe05a73 7
416331ca
XL
8#[cfg(test)]
9mod tests;
10
abe05a73
XL
11#[derive(Debug, Clone)]
12pub struct SipHasher128 {
13 k0: u64,
14 k1: u64,
15 length: usize, // how many bytes we've processed
dfeec247
XL
16 state: State, // hash State
17 tail: u64, // unprocessed bytes le
18 ntail: usize, // how many bytes in tail are valid
abe05a73
XL
19}
20
21#[derive(Debug, Clone, Copy)]
22#[repr(C)]
23struct State {
24 // v0, v2 and v1, v3 show up in pairs in the algorithm,
25 // and simd implementations of SipHash will use vectors
26 // of v02 and v13. By placing them in this order in the struct,
27 // the compiler can pick up on just a few simd optimizations by itself.
28 v0: u64,
29 v2: u64,
30 v1: u64,
31 v3: u64,
32}
33
34macro_rules! compress {
dfeec247
XL
35 ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }};
36 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
37 $v0 = $v0.wrapping_add($v1);
38 $v1 = $v1.rotate_left(13);
39 $v1 ^= $v0;
abe05a73 40 $v0 = $v0.rotate_left(32);
dfeec247
XL
41 $v2 = $v2.wrapping_add($v3);
42 $v3 = $v3.rotate_left(16);
43 $v3 ^= $v2;
44 $v0 = $v0.wrapping_add($v3);
45 $v3 = $v3.rotate_left(21);
46 $v3 ^= $v0;
47 $v2 = $v2.wrapping_add($v1);
48 $v1 = $v1.rotate_left(17);
49 $v1 ^= $v2;
abe05a73 50 $v2 = $v2.rotate_left(32);
dfeec247 51 }};
abe05a73
XL
52}
53
9fa01778 54/// Loads an integer of the desired type from a byte stream, in LE order. Uses
abe05a73
XL
55/// `copy_nonoverlapping` to let the compiler generate the most efficient way
56/// to load it from a possibly unaligned address.
57///
58/// Unsafe because: unchecked indexing at i..i+size_of(int_ty)
59macro_rules! load_int_le {
dfeec247
XL
60 ($buf:expr, $i:expr, $int_ty:ident) => {{
61 debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
62 let mut data = 0 as $int_ty;
63 ptr::copy_nonoverlapping(
64 $buf.get_unchecked($i),
65 &mut data as *mut _ as *mut u8,
66 mem::size_of::<$int_ty>(),
67 );
68 data.to_le()
69 }};
abe05a73
XL
70}
71
74b04a01
XL
72/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
73/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
74/// sizes and avoid calling `memcpy`, which is good for speed.
abe05a73
XL
75///
76/// Unsafe because: unchecked indexing at start..start+len
77#[inline]
78unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
79 debug_assert!(len < 8);
80 let mut i = 0; // current byte index (from LSB) in the output u64
81 let mut out = 0;
82 if i + 3 < len {
74b04a01 83 out = load_int_le!(buf, start + i, u32) as u64;
abe05a73
XL
84 i += 4;
85 }
86 if i + 1 < len {
74b04a01 87 out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
abe05a73
XL
88 i += 2
89 }
90 if i < len {
74b04a01 91 out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
abe05a73
XL
92 i += 1;
93 }
94 debug_assert_eq!(i, len);
95 out
96}
97
abe05a73
XL
98impl SipHasher128 {
99 #[inline]
100 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 {
101 let mut state = SipHasher128 {
102 k0: key0,
103 k1: key1,
104 length: 0,
dfeec247 105 state: State { v0: 0, v1: 0, v2: 0, v3: 0 },
abe05a73
XL
106 tail: 0,
107 ntail: 0,
108 };
109 state.reset();
110 state
111 }
112
113 #[inline]
114 fn reset(&mut self) {
115 self.length = 0;
116 self.state.v0 = self.k0 ^ 0x736f6d6570736575;
117 self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
118 self.state.v2 = self.k0 ^ 0x6c7967656e657261;
119 self.state.v3 = self.k1 ^ 0x7465646279746573;
120 self.ntail = 0;
121
122 // This is only done in the 128 bit version:
123 self.state.v1 ^= 0xee;
124 }
125
74b04a01
XL
126 // A specialized write function for values with size <= 8.
127 //
128 // The hashing of multi-byte integers depends on endianness. E.g.:
129 // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
130 // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
131 //
132 // This function does the right thing for little-endian hardware. On
133 // big-endian hardware `x` must be byte-swapped first to give the right
134 // behaviour. After any byte-swapping, the input must be zero-extended to
135 // 64-bits. The caller is responsible for the byte-swapping and
136 // zero-extension.
abe05a73 137 #[inline]
74b04a01
XL
138 fn short_write<T>(&mut self, _x: T, x: u64) {
139 let size = mem::size_of::<T>();
140 self.length += size;
141
142 // The original number must be zero-extended, not sign-extended.
143 debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
abe05a73 144
74b04a01 145 // The number of bytes needed to fill `self.tail`.
abe05a73 146 let needed = 8 - self.ntail;
74b04a01
XL
147
148 // SipHash parses the input stream as 8-byte little-endian integers.
149 // Inputs are put into `self.tail` until 8 bytes of data have been
150 // collected, and then that word is processed.
151 //
152 // For example, imagine that `self.tail` is 0x0000_00EE_DDCC_BBAA,
153 // `self.ntail` is 5 (because 5 bytes have been put into `self.tail`),
154 // and `needed` is therefore 3.
155 //
156 // - Scenario 1, `self.write_u8(0xFF)`: we have already zero-extended
157 // the input to 0x0000_0000_0000_00FF. We now left-shift it five
158 // bytes, giving 0x0000_FF00_0000_0000. We then bitwise-OR that value
159 // into `self.tail`, resulting in 0x0000_FFEE_DDCC_BBAA.
160 // (Zero-extension of the original input is critical in this scenario
161 // because we don't want the high two bytes of `self.tail` to be
162 // touched by the bitwise-OR.) `self.tail` is not yet full, so we
163 // return early, after updating `self.ntail` to 6.
164 //
165 // - Scenario 2, `self.write_u32(0xIIHH_GGFF)`: we have already
166 // zero-extended the input to 0x0000_0000_IIHH_GGFF. We now
167 // left-shift it five bytes, giving 0xHHGG_FF00_0000_0000. We then
168 // bitwise-OR that value into `self.tail`, resulting in
169 // 0xHHGG_FFEE_DDCC_BBAA. `self.tail` is now full, and we can use it
170 // to update `self.state`. (As mentioned above, this assumes a
171 // little-endian machine; on a big-endian machine we would have
172 // byte-swapped 0xIIHH_GGFF in the caller, giving 0xFFGG_HHII, and we
173 // would then end up bitwise-ORing 0xGGHH_II00_0000_0000 into
174 // `self.tail`).
175 //
176 self.tail |= x << (8 * self.ntail);
177 if size < needed {
178 self.ntail += size;
179 return;
abe05a73 180 }
74b04a01
XL
181
182 // `self.tail` is full, process it.
abe05a73
XL
183 self.state.v3 ^= self.tail;
184 Sip24Rounds::c_rounds(&mut self.state);
185 self.state.v0 ^= self.tail;
186
74b04a01
XL
187 // Continuing scenario 2: we have one byte left over from the input. We
188 // set `self.ntail` to 1 and `self.tail` to `0x0000_0000_IIHH_GGFF >>
189 // 8*3`, which is 0x0000_0000_0000_00II. (Or on a big-endian machine
190 // the prior byte-swapping would leave us with 0x0000_0000_0000_00FF.)
191 //
192 // The `if` is needed to avoid shifting by 64 bits, which Rust
193 // complains about.
194 self.ntail = size - needed;
195 self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
abe05a73
XL
196 }
197
198 #[inline]
199 pub fn finish128(mut self) -> (u64, u64) {
200 let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
201
202 self.state.v3 ^= b;
203 Sip24Rounds::c_rounds(&mut self.state);
204 self.state.v0 ^= b;
205
206 self.state.v2 ^= 0xee;
207 Sip24Rounds::d_rounds(&mut self.state);
208 let _0 = self.state.v0 ^ self.state.v1 ^ self.state.v2 ^ self.state.v3;
209
210 self.state.v1 ^= 0xdd;
211 Sip24Rounds::d_rounds(&mut self.state);
212 let _1 = self.state.v0 ^ self.state.v1 ^ self.state.v2 ^ self.state.v3;
213 (_0, _1)
214 }
215}
216
217impl Hasher for SipHasher128 {
218 #[inline]
219 fn write_u8(&mut self, i: u8) {
74b04a01 220 self.short_write(i, i as u64);
abe05a73
XL
221 }
222
223 #[inline]
224 fn write_u16(&mut self, i: u16) {
74b04a01 225 self.short_write(i, i.to_le() as u64);
abe05a73
XL
226 }
227
228 #[inline]
229 fn write_u32(&mut self, i: u32) {
74b04a01 230 self.short_write(i, i.to_le() as u64);
abe05a73
XL
231 }
232
233 #[inline]
234 fn write_u64(&mut self, i: u64) {
74b04a01 235 self.short_write(i, i.to_le() as u64);
abe05a73
XL
236 }
237
238 #[inline]
239 fn write_usize(&mut self, i: usize) {
74b04a01 240 self.short_write(i, i.to_le() as u64);
abe05a73
XL
241 }
242
243 #[inline]
244 fn write_i8(&mut self, i: i8) {
74b04a01 245 self.short_write(i, i as u8 as u64);
abe05a73
XL
246 }
247
248 #[inline]
249 fn write_i16(&mut self, i: i16) {
74b04a01 250 self.short_write(i, (i as u16).to_le() as u64);
abe05a73
XL
251 }
252
253 #[inline]
254 fn write_i32(&mut self, i: i32) {
74b04a01 255 self.short_write(i, (i as u32).to_le() as u64);
abe05a73
XL
256 }
257
258 #[inline]
259 fn write_i64(&mut self, i: i64) {
74b04a01 260 self.short_write(i, (i as u64).to_le() as u64);
abe05a73
XL
261 }
262
263 #[inline]
264 fn write_isize(&mut self, i: isize) {
74b04a01 265 self.short_write(i, (i as usize).to_le() as u64);
abe05a73
XL
266 }
267
268 #[inline]
269 fn write(&mut self, msg: &[u8]) {
270 let length = msg.len();
271 self.length += length;
272
273 let mut needed = 0;
274
275 if self.ntail != 0 {
276 needed = 8 - self.ntail;
dc9dc135 277 self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
abe05a73
XL
278 if length < needed {
279 self.ntail += length;
dfeec247 280 return;
abe05a73
XL
281 } else {
282 self.state.v3 ^= self.tail;
283 Sip24Rounds::c_rounds(&mut self.state);
284 self.state.v0 ^= self.tail;
285 self.ntail = 0;
286 }
287 }
288
289 // Buffered tail is now flushed, process new input.
290 let len = length - needed;
291 let left = len & 0x7;
292
293 let mut i = needed;
294 while i < len - left {
295 let mi = unsafe { load_int_le!(msg, i, u64) };
296
297 self.state.v3 ^= mi;
298 Sip24Rounds::c_rounds(&mut self.state);
299 self.state.v0 ^= mi;
300
301 i += 8;
302 }
303
304 self.tail = unsafe { u8to64_le(msg, i, left) };
305 self.ntail = left;
306 }
307
308 fn finish(&self) -> u64 {
309 panic!("SipHasher128 cannot provide valid 64 bit hashes")
310 }
311}
312
313#[derive(Debug, Clone, Default)]
314struct Sip24Rounds;
315
316impl Sip24Rounds {
317 #[inline]
318 fn c_rounds(state: &mut State) {
319 compress!(state);
320 compress!(state);
321 }
322
323 #[inline]
324 fn d_rounds(state: &mut State) {
325 compress!(state);
326 compress!(state);
327 compress!(state);
328 compress!(state);
329 }
330}