]> git.proxmox.com Git - mirror_ovs.git/blob - lib/hash.h
cirrus: Use FreeBSD 12.2.
[mirror_ovs.git] / lib / hash.h
1 /*
2 * Copyright (c) 2008, 2009, 2010, 2012, 2013, 2014, 2016 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #ifndef HASH_H
17 #define HASH_H 1
18
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <string.h>
23 #include "util.h"
24
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28
29 static inline uint32_t
30 hash_rot(uint32_t x, int k)
31 {
32 return (x << k) | (x >> (32 - k));
33 }
34
35 uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);
36 /* The hash input must be a word larger than 128 bits. */
37 void hash_bytes128(const void *_, size_t n_bytes, uint32_t basis,
38 ovs_u128 *out);
39
40 static inline uint32_t hash_int(uint32_t x, uint32_t basis);
41 static inline uint32_t hash_2words(uint32_t, uint32_t);
42 static inline uint32_t hash_uint64(const uint64_t);
43 static inline uint32_t hash_uint64_basis(const uint64_t x,
44 const uint32_t basis);
45 uint32_t hash_3words(uint32_t, uint32_t, uint32_t);
46
47 static inline uint32_t hash_boolean(bool x, uint32_t basis);
48 uint32_t hash_double(double, uint32_t basis);
49
50 static inline uint32_t hash_pointer(const void *, uint32_t basis);
51 static inline uint32_t hash_string(const char *, uint32_t basis);
52
53 /* Murmurhash by Austin Appleby,
54 * from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
55 *
56 * The upstream license there says:
57 *
58 * MurmurHash3 was written by Austin Appleby, and is placed in the public
59 * domain. The author hereby disclaims copyright to this source code.
60 *
61 * See hash_words() for sample usage. */
62
63 static inline uint32_t mhash_add__(uint32_t hash, uint32_t data)
64 {
65 /* zero-valued 'data' will not change the 'hash' value */
66 if (!data) {
67 return hash;
68 }
69
70 data *= 0xcc9e2d51;
71 data = hash_rot(data, 15);
72 data *= 0x1b873593;
73 return hash ^ data;
74 }
75
76 static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
77 {
78 hash = mhash_add__(hash, data);
79 hash = hash_rot(hash, 13);
80 return hash * 5 + 0xe6546b64;
81 }
82
83 static inline uint32_t mhash_finish(uint32_t hash)
84 {
85 hash ^= hash >> 16;
86 hash *= 0x85ebca6b;
87 hash ^= hash >> 13;
88 hash *= 0xc2b2ae35;
89 hash ^= hash >> 16;
90 return hash;
91 }
92
93 static inline uint32_t hash_add(uint32_t hash, uint32_t data);
94 static inline uint32_t hash_add64(uint32_t hash, uint64_t data);
95
96 static inline uint32_t hash_add_words(uint32_t, const uint32_t *, size_t);
97 static inline uint32_t hash_add_words64(uint32_t, const uint64_t *, size_t);
98 static inline uint32_t hash_add_bytes32(uint32_t, const uint32_t *, size_t);
99 static inline uint32_t hash_add_bytes64(uint32_t, const uint64_t *, size_t);
100
101 #if (defined(__ARM_FEATURE_CRC32) && defined(__aarch64__))
102 #include "hash-aarch64.h"
103
104 #elif !(defined(__SSE4_2__) && defined(__x86_64__))
105 /* Mhash-based implementation. */
106
107 static inline uint32_t hash_add(uint32_t hash, uint32_t data)
108 {
109 return mhash_add(hash, data);
110 }
111
112 static inline uint32_t hash_add64(uint32_t hash, uint64_t data)
113 {
114 return hash_add(hash_add(hash, data), data >> 32);
115 }
116
117 static inline uint32_t hash_finish(uint32_t hash, uint32_t final)
118 {
119 return mhash_finish(hash ^ final);
120 }
121
122 /* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'.
123 * 'p' must be properly aligned.
124 *
125 * This is inlined for the compiler to have access to the 'n_words', which
126 * in many cases is a constant. */
127 static inline uint32_t
128 hash_words_inline(const uint32_t *p, size_t n_words, uint32_t basis)
129 {
130 return hash_finish(hash_add_words(basis, p, n_words), n_words * 4);
131 }
132
133 static inline uint32_t
134 hash_words64_inline(const uint64_t *p, size_t n_words, uint32_t basis)
135 {
136 return hash_finish(hash_add_words64(basis, p, n_words), n_words * 8);
137 }
138
139 static inline uint32_t hash_pointer(const void *p, uint32_t basis)
140 {
141 /* Often pointers are hashed simply by casting to integer type, but that
142 * has pitfalls since the lower bits of a pointer are often all 0 for
143 * alignment reasons. It's hard to guess where the entropy really is, so
144 * we give up here and just use a high-quality hash function.
145 *
146 * The double cast suppresses a warning on 64-bit systems about casting to
147 * an integer to different size. That's OK in this case, since most of the
148 * entropy in the pointer is almost certainly in the lower 32 bits. */
149 return hash_int((uint32_t) (uintptr_t) p, basis);
150 }
151
152 static inline uint32_t hash_2words(uint32_t x, uint32_t y)
153 {
154 return hash_finish(hash_add(hash_add(x, 0), y), 8);
155 }
156
157 static inline uint32_t hash_uint64_basis(const uint64_t x,
158 const uint32_t basis)
159 {
160 return hash_finish(hash_add64(basis, x), 8);
161 }
162
163 static inline uint32_t hash_uint64(const uint64_t x)
164 {
165 return hash_uint64_basis(x, 0);
166 }
167
168 #else /* __SSE4_2__ && __x86_64__ */
169 #include <smmintrin.h>
170
171 static inline uint32_t hash_add(uint32_t hash, uint32_t data)
172 {
173 return _mm_crc32_u32(hash, data);
174 }
175
176 /* Add the halves of 'data' in the memory order. */
177 static inline uint32_t hash_add64(uint32_t hash, uint64_t data)
178 {
179 return _mm_crc32_u64(hash, data);
180 }
181
182 static inline uint32_t hash_finish(uint64_t hash, uint64_t final)
183 {
184 /* The finishing multiplier 0x805204f3 has been experimentally
185 * derived to pass the testsuite hash tests. */
186 hash = _mm_crc32_u64(hash, final) * 0x805204f3;
187 return hash ^ (uint32_t)hash >> 16; /* Increase entropy in LSBs. */
188 }
189
190 /* Returns the hash of the 'n' 32-bit words at 'p_', starting from 'basis'.
191 * We access 'p_' as a uint64_t pointer, which is fine for __SSE_4_2__.
192 *
193 * This is inlined for the compiler to have access to the 'n_words', which
194 * in many cases is a constant. */
195 static inline uint32_t
196 hash_words_inline(const uint32_t p_[], size_t n_words, uint32_t basis)
197 {
198 const uint64_t *p = (const void *)p_;
199 uint64_t hash1 = basis;
200 uint64_t hash2 = 0;
201 uint64_t hash3 = n_words;
202 const uint32_t *endp = (const uint32_t *)p + n_words;
203 const uint64_t *limit = p + n_words / 2 - 3;
204
205 while (p <= limit) {
206 hash1 = _mm_crc32_u64(hash1, p[0]);
207 hash2 = _mm_crc32_u64(hash2, p[1]);
208 hash3 = _mm_crc32_u64(hash3, p[2]);
209 p += 3;
210 }
211 switch (endp - (const uint32_t *)p) {
212 case 1:
213 hash1 = _mm_crc32_u32(hash1, *(const uint32_t *)&p[0]);
214 break;
215 case 2:
216 hash1 = _mm_crc32_u64(hash1, p[0]);
217 break;
218 case 3:
219 hash1 = _mm_crc32_u64(hash1, p[0]);
220 hash2 = _mm_crc32_u32(hash2, *(const uint32_t *)&p[1]);
221 break;
222 case 4:
223 hash1 = _mm_crc32_u64(hash1, p[0]);
224 hash2 = _mm_crc32_u64(hash2, p[1]);
225 break;
226 case 5:
227 hash1 = _mm_crc32_u64(hash1, p[0]);
228 hash2 = _mm_crc32_u64(hash2, p[1]);
229 hash3 = _mm_crc32_u32(hash3, *(const uint32_t *)&p[2]);
230 break;
231 }
232 return hash_finish(hash1, hash2 << 32 | hash3);
233 }
234
235 /* A simpler version for 64-bit data.
236 * 'n_words' is the count of 64-bit words, basis is 64 bits. */
237 static inline uint32_t
238 hash_words64_inline(const uint64_t p[], size_t n_words, uint32_t basis)
239 {
240 uint64_t hash1 = basis;
241 uint64_t hash2 = 0;
242 uint64_t hash3 = n_words;
243 const uint64_t *endp = p + n_words;
244 const uint64_t *limit = endp - 3;
245
246 while (p <= limit) {
247 hash1 = _mm_crc32_u64(hash1, p[0]);
248 hash2 = _mm_crc32_u64(hash2, p[1]);
249 hash3 = _mm_crc32_u64(hash3, p[2]);
250 p += 3;
251 }
252 switch (endp - p) {
253 case 1:
254 hash1 = _mm_crc32_u64(hash1, p[0]);
255 break;
256 case 2:
257 hash1 = _mm_crc32_u64(hash1, p[0]);
258 hash2 = _mm_crc32_u64(hash2, p[1]);
259 break;
260 }
261 return hash_finish(hash1, hash2 << 32 | hash3);
262 }
263
264 static inline uint32_t hash_uint64_basis(const uint64_t x,
265 const uint32_t basis)
266 {
267 /* '23' chosen to mix bits enough for the test-hash to pass. */
268 return hash_finish(hash_add64(basis, x), 23);
269 }
270
271 static inline uint32_t hash_uint64(const uint64_t x)
272 {
273 return hash_uint64_basis(x, 0);
274 }
275
276 static inline uint32_t hash_2words(uint32_t x, uint32_t y)
277 {
278 return hash_uint64((uint64_t)y << 32 | x);
279 }
280
281 static inline uint32_t hash_pointer(const void *p, uint32_t basis)
282 {
283 return hash_uint64_basis((uint64_t) (uintptr_t) p, basis);
284 }
285 #endif
286
287 uint32_t hash_words__(const uint32_t p[], size_t n_words, uint32_t basis);
288 uint32_t hash_words64__(const uint64_t p[], size_t n_words, uint32_t basis);
289
290 /* Inline the larger hash functions only when 'n_words' is known to be
291 * compile-time constant. */
292 #if __GNUC__ >= 4
293 static inline uint32_t
294 hash_words(const uint32_t p[], size_t n_words, uint32_t basis)
295 {
296 if (__builtin_constant_p(n_words)) {
297 return hash_words_inline(p, n_words, basis);
298 } else {
299 return hash_words__(p, n_words, basis);
300 }
301 }
302
303 static inline uint32_t
304 hash_words64(const uint64_t p[], size_t n_words, uint32_t basis)
305 {
306 if (__builtin_constant_p(n_words)) {
307 return hash_words64_inline(p, n_words, basis);
308 } else {
309 return hash_words64__(p, n_words, basis);
310 }
311 }
312
313 #else
314
315 static inline uint32_t
316 hash_words(const uint32_t p[], size_t n_words, uint32_t basis)
317 {
318 return hash_words__(p, n_words, basis);
319 }
320
321 static inline uint32_t
322 hash_words64(const uint64_t p[], size_t n_words, uint32_t basis)
323 {
324 return hash_words64__(p, n_words, basis);
325 }
326 #endif
327
328 static inline uint32_t
329 hash_bytes32(const uint32_t p[], size_t n_bytes, uint32_t basis)
330 {
331 return hash_words(p, n_bytes / 4, basis);
332 }
333
334 static inline uint32_t
335 hash_bytes64(const uint64_t p[], size_t n_bytes, uint32_t basis)
336 {
337 return hash_words64(p, n_bytes / 8, basis);
338 }
339
340 static inline uint32_t hash_string(const char *s, uint32_t basis)
341 {
342 return hash_bytes(s, strlen(s), basis);
343 }
344
345 static inline uint32_t hash_int(uint32_t x, uint32_t basis)
346 {
347 return hash_2words(x, basis);
348 }
349
350 /* An attempt at a useful 1-bit hash function. Has not been analyzed for
351 * quality. */
352 static inline uint32_t hash_boolean(bool x, uint32_t basis)
353 {
354 const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */
355 const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */
356 return (x ? P0 : P1) ^ hash_rot(basis, 1);
357 }
358 \f
359 /* Helper functions for calling hash_add() for several 32- or 64-bit words in a
360 * buffer. These are not hash functions by themselves, since they need
361 * hash_finish() to be called, so if you are looking for a full hash function
362 * see hash_words(), etc. */
363
364 static inline uint32_t
365 hash_add_words(uint32_t hash, const uint32_t *p, size_t n_words)
366 {
367 for (size_t i = 0; i < n_words; i++) {
368 hash = hash_add(hash, p[i]);
369 }
370 return hash;
371 }
372
373 static inline uint32_t
374 hash_add_words64(uint32_t hash, const uint64_t *p, size_t n_words)
375 {
376 for (size_t i = 0; i < n_words; i++) {
377 hash = hash_add64(hash, p[i]);
378 }
379 return hash;
380 }
381
382 static inline uint32_t
383 hash_add_bytes32(uint32_t hash, const uint32_t *p, size_t n_bytes)
384 {
385 return hash_add_words(hash, p, n_bytes / 4);
386 }
387
388 static inline uint32_t
389 hash_add_bytes64(uint32_t hash, const uint64_t *p, size_t n_bytes)
390 {
391 return hash_add_words64(hash, p, n_bytes / 8);
392 }
393
394 #ifdef __cplusplus
395 }
396 #endif
397
398 #endif /* hash.h */