]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
0a96a21b | 2 | * Copyright (c) 2008, 2009, 2010, 2012, 2013, 2014, 2016 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | #ifndef HASH_H | |
17 | #define HASH_H 1 | |
18 | ||
8e542118 | 19 | #include <stdbool.h> |
064af421 BP |
20 | #include <stddef.h> |
21 | #include <stdint.h> | |
22 | #include <string.h> | |
cce1d8bd | 23 | #include "util.h" |
064af421 | 24 | |
43d1478b CB |
25 | #ifdef __cplusplus |
26 | extern "C" { | |
27 | #endif | |
28 | ||
d556327b BP |
29 | static inline uint32_t |
30 | hash_rot(uint32_t x, int k) | |
31 | { | |
32 | return (x << k) | (x >> (32 - k)); | |
33 | } | |
064af421 | 34 | |
c49d1dd1 | 35 | uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis); |
2a638b8d | 36 | /* The hash input must be a word larger than 128 bits. */ |
468cdd91 JS |
37 | void hash_bytes128(const void *_, size_t n_bytes, uint32_t basis, |
38 | ovs_u128 *out); | |
c49d1dd1 BP |
39 | |
40 | static inline uint32_t hash_int(uint32_t x, uint32_t basis); | |
41 | static inline uint32_t hash_2words(uint32_t, uint32_t); | |
5df26bd0 GS |
42 | static inline uint32_t hash_uint64(const uint64_t); |
43 | static inline uint32_t hash_uint64_basis(const uint64_t x, | |
44 | const uint32_t basis); | |
c49d1dd1 BP |
45 | uint32_t hash_3words(uint32_t, uint32_t, uint32_t); |
46 | ||
47 | static inline uint32_t hash_boolean(bool x, uint32_t basis); | |
48 | uint32_t hash_double(double, uint32_t basis); | |
49 | ||
50 | static inline uint32_t hash_pointer(const void *, uint32_t basis); | |
51 | static inline uint32_t hash_string(const char *, uint32_t basis); | |
52 | ||
53 | /* Murmurhash by Austin Appleby, | |
91bb4a0b | 54 | * from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp |
c49d1dd1 BP |
55 | * |
56 | * The upstream license there says: | |
57 | * | |
b5343307 BP |
58 | * MurmurHash3 was written by Austin Appleby, and is placed in the public |
59 | * domain. The author hereby disclaims copyright to this source code. | |
c49d1dd1 BP |
60 | * |
61 | * See hash_words() for sample usage. */ | |
62 | ||
63 | static inline uint32_t mhash_add__(uint32_t hash, uint32_t data) | |
d556327b | 64 | { |
a17408e4 BB |
65 | /* zero-valued 'data' will not change the 'hash' value */ |
66 | if (!data) { | |
67 | return hash; | |
68 | } | |
69 | ||
c49d1dd1 BP |
70 | data *= 0xcc9e2d51; |
71 | data = hash_rot(data, 15); | |
72 | data *= 0x1b873593; | |
73 | return hash ^ data; | |
d556327b | 74 | } |
064af421 | 75 | |
c49d1dd1 | 76 | static inline uint32_t mhash_add(uint32_t hash, uint32_t data) |
d556327b | 77 | { |
c49d1dd1 BP |
78 | hash = mhash_add__(hash, data); |
79 | hash = hash_rot(hash, 13); | |
80 | return hash * 5 + 0xe6546b64; | |
d556327b | 81 | } |
064af421 | 82 | |
468cdd91 | 83 | static inline uint32_t mhash_finish(uint32_t hash) |
c49d1dd1 | 84 | { |
c49d1dd1 BP |
85 | hash ^= hash >> 16; |
86 | hash *= 0x85ebca6b; | |
87 | hash ^= hash >> 13; | |
88 | hash *= 0xc2b2ae35; | |
89 | hash ^= hash >> 16; | |
90 | return hash; | |
91 | } | |
064af421 | 92 | |
67702b79 BP |
93 | static inline uint32_t hash_add(uint32_t hash, uint32_t data); |
94 | static inline uint32_t hash_add64(uint32_t hash, uint64_t data); | |
67702b79 BP |
95 | |
96 | static inline uint32_t hash_add_words(uint32_t, const uint32_t *, size_t); | |
97 | static inline uint32_t hash_add_words64(uint32_t, const uint64_t *, size_t); | |
98 | static inline uint32_t hash_add_bytes32(uint32_t, const uint32_t *, size_t); | |
99 | static inline uint32_t hash_add_bytes64(uint32_t, const uint64_t *, size_t); | |
100 | ||
2525148a YWATC |
101 | #if (defined(__ARM_FEATURE_CRC32) && defined(__aarch64__)) |
102 | #include "hash-aarch64.h" | |
103 | ||
104 | #elif !(defined(__SSE4_2__) && defined(__x86_64__)) | |
468cdd91 | 105 | /* Mhash-based implementation. */ |
ff8eeabd | 106 | |
33c6a1b9 JR |
107 | static inline uint32_t hash_add(uint32_t hash, uint32_t data) |
108 | { | |
109 | return mhash_add(hash, data); | |
110 | } | |
111 | ||
aae7c34f JR |
112 | static inline uint32_t hash_add64(uint32_t hash, uint64_t data) |
113 | { | |
114 | return hash_add(hash_add(hash, data), data >> 32); | |
115 | } | |
116 | ||
33c6a1b9 JR |
117 | static inline uint32_t hash_finish(uint32_t hash, uint32_t final) |
118 | { | |
468cdd91 | 119 | return mhash_finish(hash ^ final); |
33c6a1b9 JR |
120 | } |
121 | ||
ff8eeabd JR |
122 | /* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'. |
123 | * 'p' must be properly aligned. | |
124 | * | |
125 | * This is inlined for the compiler to have access to the 'n_words', which | |
126 | * in many cases is a constant. */ | |
127 | static inline uint32_t | |
67702b79 | 128 | hash_words_inline(const uint32_t *p, size_t n_words, uint32_t basis) |
064af421 | 129 | { |
67702b79 | 130 | return hash_finish(hash_add_words(basis, p, n_words), n_words * 4); |
064af421 BP |
131 | } |
132 | ||
ff8eeabd | 133 | static inline uint32_t |
67702b79 | 134 | hash_words64_inline(const uint64_t *p, size_t n_words, uint32_t basis) |
8e542118 | 135 | { |
67702b79 | 136 | return hash_finish(hash_add_words64(basis, p, n_words), n_words * 8); |
8e542118 BP |
137 | } |
138 | ||
00644675 BP |
139 | static inline uint32_t hash_pointer(const void *p, uint32_t basis) |
140 | { | |
141 | /* Often pointers are hashed simply by casting to integer type, but that | |
142 | * has pitfalls since the lower bits of a pointer are often all 0 for | |
143 | * alignment reasons. It's hard to guess where the entropy really is, so | |
144 | * we give up here and just use a high-quality hash function. | |
145 | * | |
146 | * The double cast suppresses a warning on 64-bit systems about casting to | |
147 | * an integer to different size. That's OK in this case, since most of the | |
148 | * entropy in the pointer is almost certainly in the lower 32 bits. */ | |
149 | return hash_int((uint32_t) (uintptr_t) p, basis); | |
150 | } | |
151 | ||
c49d1dd1 | 152 | static inline uint32_t hash_2words(uint32_t x, uint32_t y) |
9879b94f | 153 | { |
33c6a1b9 | 154 | return hash_finish(hash_add(hash_add(x, 0), y), 8); |
9879b94f BP |
155 | } |
156 | ||
aae7c34f JR |
157 | static inline uint32_t hash_uint64_basis(const uint64_t x, |
158 | const uint32_t basis) | |
965607c8 | 159 | { |
aae7c34f | 160 | return hash_finish(hash_add64(basis, x), 8); |
965607c8 AZ |
161 | } |
162 | ||
aae7c34f | 163 | static inline uint32_t hash_uint64(const uint64_t x) |
7e36ac42 | 164 | { |
aae7c34f | 165 | return hash_uint64_basis(x, 0); |
7e36ac42 | 166 | } |
ff8eeabd JR |
167 | |
168 | #else /* __SSE4_2__ && __x86_64__ */ | |
169 | #include <smmintrin.h> | |
170 | ||
171 | static inline uint32_t hash_add(uint32_t hash, uint32_t data) | |
172 | { | |
173 | return _mm_crc32_u32(hash, data); | |
174 | } | |
175 | ||
aae7c34f JR |
176 | /* Add the halves of 'data' in the memory order. */ |
177 | static inline uint32_t hash_add64(uint32_t hash, uint64_t data) | |
178 | { | |
179 | return _mm_crc32_u64(hash, data); | |
180 | } | |
181 | ||
ff8eeabd JR |
182 | static inline uint32_t hash_finish(uint64_t hash, uint64_t final) |
183 | { | |
184 | /* The finishing multiplier 0x805204f3 has been experimentally | |
185 | * derived to pass the testsuite hash tests. */ | |
186 | hash = _mm_crc32_u64(hash, final) * 0x805204f3; | |
187 | return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */ | |
188 | } | |
189 | ||
190 | /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'. | |
191 | * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__. | |
192 | * | |
193 | * This is inlined for the compiler to have access to the 'n_words', which | |
194 | * in many cases is a constant. */ | |
195 | static inline uint32_t | |
196 | hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis) | |
197 | { | |
198 | const uint64_t *p = (const void *)p_; | |
199 | uint64_t hash1 = basis; | |
200 | uint64_t hash2 = 0; | |
201 | uint64_t hash3 = n_words; | |
202 | const uint32_t *endp = (const uint32_t *)p + n_words; | |
203 | const uint64_t *limit = p + n_words / 2 - 3; | |
204 | ||
205 | while (p <= limit) { | |
206 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
207 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
208 | hash3 = _mm_crc32_u64(hash3, p[2]); | |
209 | p += 3; | |
210 | } | |
211 | switch (endp - (const uint32_t *)p) { | |
212 | case 1: | |
213 | hash1 = _mm_crc32_u32(hash1, *(const uint32_t *)&p[0]); | |
214 | break; | |
215 | case 2: | |
216 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
217 | break; | |
218 | case 3: | |
219 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
220 | hash2 = _mm_crc32_u32(hash2, *(const uint32_t *)&p[1]); | |
221 | break; | |
222 | case 4: | |
223 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
224 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
225 | break; | |
226 | case 5: | |
227 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
228 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
229 | hash3 = _mm_crc32_u32(hash3, *(const uint32_t *)&p[2]); | |
230 | break; | |
231 | } | |
232 | return hash_finish(hash1, hash2 << 32 | hash3); | |
233 | } | |
234 | ||
235 | /* A simpler version for 64-bit data. | |
236 | * 'n_words' is the count of 64-bit words, basis is 64 bits. */ | |
237 | static inline uint32_t | |
4ad07ad7 | 238 | hash_words64_inline(const uint64_t p[], size_t n_words, uint32_t basis) |
ff8eeabd | 239 | { |
4ad07ad7 JR |
240 | uint64_t hash1 = basis; |
241 | uint64_t hash2 = 0; | |
ff8eeabd JR |
242 | uint64_t hash3 = n_words; |
243 | const uint64_t *endp = p + n_words; | |
244 | const uint64_t *limit = endp - 3; | |
245 | ||
246 | while (p <= limit) { | |
247 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
248 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
249 | hash3 = _mm_crc32_u64(hash3, p[2]); | |
250 | p += 3; | |
251 | } | |
252 | switch (endp - p) { | |
253 | case 1: | |
254 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
255 | break; | |
256 | case 2: | |
257 | hash1 = _mm_crc32_u64(hash1, p[0]); | |
258 | hash2 = _mm_crc32_u64(hash2, p[1]); | |
259 | break; | |
260 | } | |
261 | return hash_finish(hash1, hash2 << 32 | hash3); | |
262 | } | |
263 | ||
264 | static inline uint32_t hash_uint64_basis(const uint64_t x, | |
265 | const uint32_t basis) | |
266 | { | |
267 | /* '23' chosen to mix bits enough for the test-hash to pass. */ | |
aae7c34f | 268 | return hash_finish(hash_add64(basis, x), 23); |
ff8eeabd JR |
269 | } |
270 | ||
271 | static inline uint32_t hash_uint64(const uint64_t x) | |
272 | { | |
273 | return hash_uint64_basis(x, 0); | |
274 | } | |
275 | ||
276 | static inline uint32_t hash_2words(uint32_t x, uint32_t y) | |
277 | { | |
278 | return hash_uint64((uint64_t)y << 32 | x); | |
279 | } | |
280 | ||
281 | static inline uint32_t hash_pointer(const void *p, uint32_t basis) | |
282 | { | |
283 | return hash_uint64_basis((uint64_t) (uintptr_t) p, basis); | |
284 | } | |
285 | #endif | |
286 | ||
287 | uint32_t hash_words__(const uint32_t p[], size_t n_words, uint32_t basis); | |
4ad07ad7 | 288 | uint32_t hash_words64__(const uint64_t p[], size_t n_words, uint32_t basis); |
ff8eeabd JR |
289 | |
290 | /* Inline the larger hash functions only when 'n_words' is known to be | |
291 | * compile-time constant. */ | |
292 | #if __GNUC__ >= 4 | |
293 | static inline uint32_t | |
294 | hash_words(const uint32_t p[], size_t n_words, uint32_t basis) | |
295 | { | |
296 | if (__builtin_constant_p(n_words)) { | |
297 | return hash_words_inline(p, n_words, basis); | |
298 | } else { | |
299 | return hash_words__(p, n_words, basis); | |
300 | } | |
301 | } | |
302 | ||
303 | static inline uint32_t | |
4ad07ad7 | 304 | hash_words64(const uint64_t p[], size_t n_words, uint32_t basis) |
ff8eeabd JR |
305 | { |
306 | if (__builtin_constant_p(n_words)) { | |
307 | return hash_words64_inline(p, n_words, basis); | |
308 | } else { | |
309 | return hash_words64__(p, n_words, basis); | |
310 | } | |
311 | } | |
312 | ||
313 | #else | |
314 | ||
315 | static inline uint32_t | |
316 | hash_words(const uint32_t p[], size_t n_words, uint32_t basis) | |
317 | { | |
318 | return hash_words__(p, n_words, basis); | |
319 | } | |
320 | ||
321 | static inline uint32_t | |
4ad07ad7 | 322 | hash_words64(const uint64_t p[], size_t n_words, uint32_t basis) |
ff8eeabd JR |
323 | { |
324 | return hash_words64__(p, n_words, basis); | |
325 | } | |
326 | #endif | |
327 | ||
0a96a21b BP |
328 | static inline uint32_t |
329 | hash_bytes32(const uint32_t p[], size_t n_bytes, uint32_t basis) | |
330 | { | |
331 | return hash_words(p, n_bytes / 4, basis); | |
332 | } | |
333 | ||
334 | static inline uint32_t | |
335 | hash_bytes64(const uint64_t p[], size_t n_bytes, uint32_t basis) | |
336 | { | |
337 | return hash_words64(p, n_bytes / 8, basis); | |
338 | } | |
339 | ||
ff8eeabd JR |
340 | static inline uint32_t hash_string(const char *s, uint32_t basis) |
341 | { | |
342 | return hash_bytes(s, strlen(s), basis); | |
343 | } | |
344 | ||
345 | static inline uint32_t hash_int(uint32_t x, uint32_t basis) | |
346 | { | |
347 | return hash_2words(x, basis); | |
348 | } | |
349 | ||
350 | /* An attempt at a useful 1-bit hash function. Has not been analyzed for | |
351 | * quality. */ | |
352 | static inline uint32_t hash_boolean(bool x, uint32_t basis) | |
353 | { | |
354 | const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */ | |
355 | const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */ | |
356 | return (x ? P0 : P1) ^ hash_rot(basis, 1); | |
357 | } | |
67702b79 BP |
358 | \f |
359 | /* Helper functions for calling hash_add() for several 32- or 64-bit words in a | |
360 | * buffer. These are not hash functions by themselves, since they need | |
361 | * hash_finish() to be called, so if you are looking for a full hash function | |
362 | * see hash_words(), etc. */ | |
363 | ||
364 | static inline uint32_t | |
365 | hash_add_words(uint32_t hash, const uint32_t *p, size_t n_words) | |
366 | { | |
367 | for (size_t i = 0; i < n_words; i++) { | |
368 | hash = hash_add(hash, p[i]); | |
369 | } | |
370 | return hash; | |
371 | } | |
372 | ||
373 | static inline uint32_t | |
374 | hash_add_words64(uint32_t hash, const uint64_t *p, size_t n_words) | |
375 | { | |
376 | for (size_t i = 0; i < n_words; i++) { | |
377 | hash = hash_add64(hash, p[i]); | |
378 | } | |
379 | return hash; | |
380 | } | |
381 | ||
382 | static inline uint32_t | |
383 | hash_add_bytes32(uint32_t hash, const uint32_t *p, size_t n_bytes) | |
384 | { | |
385 | return hash_add_words(hash, p, n_bytes / 4); | |
386 | } | |
387 | ||
388 | static inline uint32_t | |
389 | hash_add_bytes64(uint32_t hash, const uint64_t *p, size_t n_bytes) | |
390 | { | |
391 | return hash_add_words64(hash, p, n_bytes / 8); | |
392 | } | |
ff8eeabd | 393 | |
43d1478b CB |
394 | #ifdef __cplusplus |
395 | } | |
396 | #endif | |
397 | ||
064af421 | 398 | #endif /* hash.h */ |