]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
0a96a21b | 2 | * Copyright (c) 2008, 2009, 2010, 2012, 2013, 2014, 2016 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | #ifndef HASH_H | |
17 | #define HASH_H 1 | |
18 | ||
8e542118 | 19 | #include <stdbool.h> |
064af421 BP |
20 | #include <stddef.h> |
21 | #include <stdint.h> | |
22 | #include <string.h> | |
cce1d8bd | 23 | #include "util.h" |
064af421 | 24 | |
43d1478b CB |
25 | #ifdef __cplusplus |
26 | extern "C" { | |
27 | #endif | |
28 | ||
d556327b BP |
29 | static inline uint32_t |
30 | hash_rot(uint32_t x, int k) | |
31 | { | |
32 | return (x << k) | (x >> (32 - k)); | |
33 | } | |
064af421 | 34 | |
c49d1dd1 | 35 | uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis); |
2a638b8d | 36 | /* The hash input must be a word larger than 128 bits. */ |
468cdd91 JS |
37 | void hash_bytes128(const void *_, size_t n_bytes, uint32_t basis, |
38 | ovs_u128 *out); | |
c49d1dd1 BP |
39 | |
40 | static inline uint32_t hash_int(uint32_t x, uint32_t basis); | |
41 | static inline uint32_t hash_2words(uint32_t, uint32_t); | |
5df26bd0 GS |
42 | static inline uint32_t hash_uint64(const uint64_t); |
43 | static inline uint32_t hash_uint64_basis(const uint64_t x, | |
44 | const uint32_t basis); | |
c49d1dd1 BP |
45 | uint32_t hash_3words(uint32_t, uint32_t, uint32_t); |
46 | ||
47 | static inline uint32_t hash_boolean(bool x, uint32_t basis); | |
48 | uint32_t hash_double(double, uint32_t basis); | |
49 | ||
50 | static inline uint32_t hash_pointer(const void *, uint32_t basis); | |
51 | static inline uint32_t hash_string(const char *, uint32_t basis); | |
52 | ||
53 | /* Murmurhash by Austin Appleby, | |
91bb4a0b | 54 | * from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp |
c49d1dd1 BP |
55 | * |
56 | * The upstream license there says: | |
57 | * | |
58 | * // MurmurHash3 was written by Austin Appleby, and is placed in the public | |
59 | * // domain. The author hereby disclaims copyright to this source code. | |
60 | * | |
61 | * See hash_words() for sample usage. */ | |
62 | ||
63 | static inline uint32_t mhash_add__(uint32_t hash, uint32_t data) | |
d556327b | 64 | { |
a17408e4 BB |
65 | /* zero-valued 'data' will not change the 'hash' value */ |
66 | if (!data) { | |
67 | return hash; | |
68 | } | |
69 | ||
c49d1dd1 BP |
70 | data *= 0xcc9e2d51; |
71 | data = hash_rot(data, 15); | |
72 | data *= 0x1b873593; | |
73 | return hash ^ data; | |
d556327b | 74 | } |
064af421 | 75 | |
c49d1dd1 | 76 | static inline uint32_t mhash_add(uint32_t hash, uint32_t data) |
d556327b | 77 | { |
c49d1dd1 BP |
78 | hash = mhash_add__(hash, data); |
79 | hash = hash_rot(hash, 13); | |
80 | return hash * 5 + 0xe6546b64; | |
d556327b | 81 | } |
064af421 | 82 | |
468cdd91 | 83 | static inline uint32_t mhash_finish(uint32_t hash) |
c49d1dd1 | 84 | { |
c49d1dd1 BP |
85 | hash ^= hash >> 16; |
86 | hash *= 0x85ebca6b; | |
87 | hash ^= hash >> 13; | |
88 | hash *= 0xc2b2ae35; | |
89 | hash ^= hash >> 16; | |
90 | return hash; | |
91 | } | |
064af421 | 92 | |
67702b79 BP |
93 | static inline uint32_t hash_add(uint32_t hash, uint32_t data); |
94 | static inline uint32_t hash_add64(uint32_t hash, uint64_t data); | |
67702b79 BP |
95 | |
96 | static inline uint32_t hash_add_words(uint32_t, const uint32_t *, size_t); | |
97 | static inline uint32_t hash_add_words64(uint32_t, const uint64_t *, size_t); | |
98 | static inline uint32_t hash_add_bytes32(uint32_t, const uint32_t *, size_t); | |
99 | static inline uint32_t hash_add_bytes64(uint32_t, const uint64_t *, size_t); | |
100 | ||
ff8eeabd | 101 | #if !(defined(__SSE4_2__) && defined(__x86_64__)) |
468cdd91 | 102 | /* Mhash-based implementation. */ |
ff8eeabd | 103 | |
33c6a1b9 JR |
104 | static inline uint32_t hash_add(uint32_t hash, uint32_t data) |
105 | { | |
106 | return mhash_add(hash, data); | |
107 | } | |
108 | ||
aae7c34f JR |
109 | static inline uint32_t hash_add64(uint32_t hash, uint64_t data) |
110 | { | |
111 | return hash_add(hash_add(hash, data), data >> 32); | |
112 | } | |
113 | ||
33c6a1b9 JR |
114 | static inline uint32_t hash_finish(uint32_t hash, uint32_t final) |
115 | { | |
468cdd91 | 116 | return mhash_finish(hash ^ final); |
33c6a1b9 JR |
117 | } |
118 | ||
ff8eeabd JR |
119 | /* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'. |
120 | * 'p' must be properly aligned. | |
121 | * | |
122 | * This is inlined for the compiler to have access to the 'n_words', which | |
123 | * in many cases is a constant. */ | |
124 | static inline uint32_t | |
67702b79 | 125 | hash_words_inline(const uint32_t *p, size_t n_words, uint32_t basis) |
064af421 | 126 | { |
67702b79 | 127 | return hash_finish(hash_add_words(basis, p, n_words), n_words * 4); |
064af421 BP |
128 | } |
129 | ||
ff8eeabd | 130 | static inline uint32_t |
67702b79 | 131 | hash_words64_inline(const uint64_t *p, size_t n_words, uint32_t basis) |
8e542118 | 132 | { |
67702b79 | 133 | return hash_finish(hash_add_words64(basis, p, n_words), n_words * 8); |
8e542118 BP |
134 | } |
135 | ||
00644675 BP |
136 | static inline uint32_t hash_pointer(const void *p, uint32_t basis) |
137 | { | |
138 | /* Often pointers are hashed simply by casting to integer type, but that | |
139 | * has pitfalls since the lower bits of a pointer are often all 0 for | |
140 | * alignment reasons. It's hard to guess where the entropy really is, so | |
141 | * we give up here and just use a high-quality hash function. | |
142 | * | |
143 | * The double cast suppresses a warning on 64-bit systems about casting to | |
144 | * an integer to different size. That's OK in this case, since most of the | |
145 | * entropy in the pointer is almost certainly in the lower 32 bits. */ | |
146 | return hash_int((uint32_t) (uintptr_t) p, basis); | |
147 | } | |
148 | ||
c49d1dd1 | 149 | static inline uint32_t hash_2words(uint32_t x, uint32_t y) |
9879b94f | 150 | { |
33c6a1b9 | 151 | return hash_finish(hash_add(hash_add(x, 0), y), 8); |
9879b94f BP |
152 | } |
153 | ||
aae7c34f JR |
154 | static inline uint32_t hash_uint64_basis(const uint64_t x, |
155 | const uint32_t basis) | |
965607c8 | 156 | { |
aae7c34f | 157 | return hash_finish(hash_add64(basis, x), 8); |
965607c8 AZ |
158 | } |
159 | ||
aae7c34f | 160 | static inline uint32_t hash_uint64(const uint64_t x) |
7e36ac42 | 161 | { |
aae7c34f | 162 | return hash_uint64_basis(x, 0); |
7e36ac42 | 163 | } |
ff8eeabd JR |
164 | |
165 | #else /* __SSE4_2__ && __x86_64__ */ | |
166 | #include <smmintrin.h> | |
167 | ||
168 | static inline uint32_t hash_add(uint32_t hash, uint32_t data) | |
169 | { | |
170 | return _mm_crc32_u32(hash, data); | |
171 | } | |
172 | ||
aae7c34f JR |
173 | /* Add the halves of 'data' in the memory order. */ |
174 | static inline uint32_t hash_add64(uint32_t hash, uint64_t data) | |
175 | { | |
176 | return _mm_crc32_u64(hash, data); | |
177 | } | |
178 | ||
ff8eeabd JR |
179 | static inline uint32_t hash_finish(uint64_t hash, uint64_t final) |
180 | { | |
181 | /* The finishing multiplier 0x805204f3 has been experimentally | |
182 | * derived to pass the testsuite hash tests. */ | |
183 | hash = _mm_crc32_u64(hash, final) * 0x805204f3; | |
184 | return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */ | |
185 | } | |
186 | ||
187 | /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'. | |
188 | * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__. | |
189 | * | |
190 | * This is inlined for the compiler to have access to the 'n_words', which | |
191 | * in many cases is a constant. */ | |
192 | static inline uint32_t | |
193 | hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis) | |
194 | { | |
195 | const uint64_t *p = (const void *)p_; | |
196 | uint64_t hash1 = basis; | |
197 | uint64_t hash2 = 0; | |
198 | uint64_t hash3 = n_words; | |
199 | const uint32_t *endp = (const uint32_t *)p + n_words; | |
200 | const uint64_t *limit = p + n_words / 2 - 3; | |
201 | ||
202 | while (p <= limit) { | |
203 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
204 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
205 | hash3 = _mm_crc32_u64(hash3, p[2]); | |
206 | p += 3; | |
207 | } | |
208 | switch (endp - (const uint32_t *)p) { | |
209 | case 1: | |
210 | hash1 = _mm_crc32_u32(hash1, *(const uint32_t *)&p[0]); | |
211 | break; | |
212 | case 2: | |
213 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
214 | break; | |
215 | case 3: | |
216 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
217 | hash2 = _mm_crc32_u32(hash2, *(const uint32_t *)&p[1]); | |
218 | break; | |
219 | case 4: | |
220 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
221 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
222 | break; | |
223 | case 5: | |
224 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
225 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
226 | hash3 = _mm_crc32_u32(hash3, *(const uint32_t *)&p[2]); | |
227 | break; | |
228 | } | |
229 | return hash_finish(hash1, hash2 << 32 | hash3); | |
230 | } | |
231 | ||
232 | /* A simpler version for 64-bit data. | |
233 | * 'n_words' is the count of 64-bit words, basis is 64 bits. */ | |
234 | static inline uint32_t | |
4ad07ad7 | 235 | hash_words64_inline(const uint64_t p[], size_t n_words, uint32_t basis) |
ff8eeabd | 236 | { |
4ad07ad7 JR |
237 | uint64_t hash1 = basis; |
238 | uint64_t hash2 = 0; | |
ff8eeabd JR |
239 | uint64_t hash3 = n_words; |
240 | const uint64_t *endp = p + n_words; | |
241 | const uint64_t *limit = endp - 3; | |
242 | ||
243 | while (p <= limit) { | |
244 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
245 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
246 | hash3 = _mm_crc32_u64(hash3, p[2]); | |
247 | p += 3; | |
248 | } | |
249 | switch (endp - p) { | |
250 | case 1: | |
251 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
252 | break; | |
253 | case 2: | |
254 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
255 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
256 | break; | |
257 | } | |
258 | return hash_finish(hash1, hash2 << 32 | hash3); | |
259 | } | |
260 | ||
261 | static inline uint32_t hash_uint64_basis(const uint64_t x, | |
262 | const uint32_t basis) | |
263 | { | |
264 | /* '23' chosen to mix bits enough for the test-hash to pass. */ | |
aae7c34f | 265 | return hash_finish(hash_add64(basis, x), 23); |
ff8eeabd JR |
266 | } |
267 | ||
268 | static inline uint32_t hash_uint64(const uint64_t x) | |
269 | { | |
270 | return hash_uint64_basis(x, 0); | |
271 | } | |
272 | ||
273 | static inline uint32_t hash_2words(uint32_t x, uint32_t y) | |
274 | { | |
275 | return hash_uint64((uint64_t)y << 32 | x); | |
276 | } | |
277 | ||
278 | static inline uint32_t hash_pointer(const void *p, uint32_t basis) | |
279 | { | |
280 | return hash_uint64_basis((uint64_t) (uintptr_t) p, basis); | |
281 | } | |
282 | #endif | |
283 | ||
284 | uint32_t hash_words__(const uint32_t p[], size_t n_words, uint32_t basis); | |
4ad07ad7 | 285 | uint32_t hash_words64__(const uint64_t p[], size_t n_words, uint32_t basis); |
ff8eeabd JR |
286 | |
287 | /* Inline the larger hash functions only when 'n_words' is known to be | |
288 | * compile-time constant. */ | |
289 | #if __GNUC__ >= 4 | |
290 | static inline uint32_t | |
291 | hash_words(const uint32_t p[], size_t n_words, uint32_t basis) | |
292 | { | |
293 | if (__builtin_constant_p(n_words)) { | |
294 | return hash_words_inline(p, n_words, basis); | |
295 | } else { | |
296 | return hash_words__(p, n_words, basis); | |
297 | } | |
298 | } | |
299 | ||
300 | static inline uint32_t | |
4ad07ad7 | 301 | hash_words64(const uint64_t p[], size_t n_words, uint32_t basis) |
ff8eeabd JR |
302 | { |
303 | if (__builtin_constant_p(n_words)) { | |
304 | return hash_words64_inline(p, n_words, basis); | |
305 | } else { | |
306 | return hash_words64__(p, n_words, basis); | |
307 | } | |
308 | } | |
309 | ||
310 | #else | |
311 | ||
312 | static inline uint32_t | |
313 | hash_words(const uint32_t p[], size_t n_words, uint32_t basis) | |
314 | { | |
315 | return hash_words__(p, n_words, basis); | |
316 | } | |
317 | ||
318 | static inline uint32_t | |
4ad07ad7 | 319 | hash_words64(const uint64_t p[], size_t n_words, uint32_t basis) |
ff8eeabd JR |
320 | { |
321 | return hash_words64__(p, n_words, basis); | |
322 | } | |
323 | #endif | |
324 | ||
0a96a21b BP |
325 | static inline uint32_t |
326 | hash_bytes32(const uint32_t p[], size_t n_bytes, uint32_t basis) | |
327 | { | |
328 | return hash_words(p, n_bytes / 4, basis); | |
329 | } | |
330 | ||
331 | static inline uint32_t | |
332 | hash_bytes64(const uint64_t p[], size_t n_bytes, uint32_t basis) | |
333 | { | |
334 | return hash_words64(p, n_bytes / 8, basis); | |
335 | } | |
336 | ||
ff8eeabd JR |
337 | static inline uint32_t hash_string(const char *s, uint32_t basis) |
338 | { | |
339 | return hash_bytes(s, strlen(s), basis); | |
340 | } | |
341 | ||
342 | static inline uint32_t hash_int(uint32_t x, uint32_t basis) | |
343 | { | |
344 | return hash_2words(x, basis); | |
345 | } | |
346 | ||
347 | /* An attempt at a useful 1-bit hash function. Has not been analyzed for | |
348 | * quality. */ | |
349 | static inline uint32_t hash_boolean(bool x, uint32_t basis) | |
350 | { | |
351 | const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */ | |
352 | const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */ | |
353 | return (x ? P0 : P1) ^ hash_rot(basis, 1); | |
354 | } | |
67702b79 BP |
355 | \f |
356 | /* Helper functions for calling hash_add() for several 32- or 64-bit words in a | |
357 | * buffer. These are not hash functions by themselves, since they need | |
358 | * hash_finish() to be called, so if you are looking for a full hash function | |
359 | * see hash_words(), etc. */ | |
360 | ||
361 | static inline uint32_t | |
362 | hash_add_words(uint32_t hash, const uint32_t *p, size_t n_words) | |
363 | { | |
364 | for (size_t i = 0; i < n_words; i++) { | |
365 | hash = hash_add(hash, p[i]); | |
366 | } | |
367 | return hash; | |
368 | } | |
369 | ||
370 | static inline uint32_t | |
371 | hash_add_words64(uint32_t hash, const uint64_t *p, size_t n_words) | |
372 | { | |
373 | for (size_t i = 0; i < n_words; i++) { | |
374 | hash = hash_add64(hash, p[i]); | |
375 | } | |
376 | return hash; | |
377 | } | |
378 | ||
379 | static inline uint32_t | |
380 | hash_add_bytes32(uint32_t hash, const uint32_t *p, size_t n_bytes) | |
381 | { | |
382 | return hash_add_words(hash, p, n_bytes / 4); | |
383 | } | |
384 | ||
385 | static inline uint32_t | |
386 | hash_add_bytes64(uint32_t hash, const uint64_t *p, size_t n_bytes) | |
387 | { | |
388 | return hash_add_words64(hash, p, n_bytes / 8); | |
389 | } | |
ff8eeabd | 390 | |
43d1478b CB |
391 | #ifdef __cplusplus |
392 | } | |
393 | #endif | |
394 | ||
064af421 | 395 | #endif /* hash.h */ |