]> git.proxmox.com Git - mirror_ovs.git/blob - lib/hash.h
hash: Introduce an implementation of murmurhash.
[mirror_ovs.git] / lib / hash.h
1 /*
2 * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #ifndef HASH_H
17 #define HASH_H 1
18
19 #include <stdbool.h>
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <string.h>
23 #include "util.h"
24
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28
29 /* This is the public domain lookup3 hash by Bob Jenkins from
30 * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */
31
32 static inline uint32_t
33 hash_rot(uint32_t x, int k)
34 {
35 return (x << k) | (x >> (32 - k));
36 }
37
38 static inline void
39 hash_mix(uint32_t *a, uint32_t *b, uint32_t *c)
40 {
41 *a -= *c; *a ^= hash_rot(*c, 4); *c += *b;
42 *b -= *a; *b ^= hash_rot(*a, 6); *a += *c;
43 *c -= *b; *c ^= hash_rot(*b, 8); *b += *a;
44 *a -= *c; *a ^= hash_rot(*c, 16); *c += *b;
45 *b -= *a; *b ^= hash_rot(*a, 19); *a += *c;
46 *c -= *b; *c ^= hash_rot(*b, 4); *b += *a;
47 }
48
49 static inline void
50 hash_final(uint32_t *a, uint32_t *b, uint32_t *c)
51 {
52 *c ^= *b; *c -= hash_rot(*b, 14);
53 *a ^= *c; *a -= hash_rot(*c, 11);
54 *b ^= *a; *b -= hash_rot(*a, 25);
55 *c ^= *b; *c -= hash_rot(*b, 16);
56 *a ^= *c; *a -= hash_rot(*c, 4);
57 *b ^= *a; *b -= hash_rot(*a, 14);
58 *c ^= *b; *c -= hash_rot(*b, 24);
59 }
60
61 uint32_t hash_words(const uint32_t *, size_t n_word, uint32_t basis);
62 uint32_t hash_2words(uint32_t, uint32_t);
63 uint32_t hash_3words(uint32_t, uint32_t, uint32_t);
64 uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis);
65
66 static inline uint32_t hash_string(const char *s, uint32_t basis)
67 {
68 return hash_bytes(s, strlen(s), basis);
69 }
70
71 /* This is Bob Jenkins' integer hash from
72 * http://burtleburtle.net/bob/hash/integer.html, modified for style.
73 *
74 * This hash is faster than hash_2words(), but it isn't as good when 'basis' is
75 * important. So use this function for speed or hash_2words() for hash
76 * quality. */
77 static inline uint32_t hash_int(uint32_t x, uint32_t basis)
78 {
79 x -= x << 6;
80 x ^= x >> 17;
81 x -= x << 9;
82 x ^= x << 4;
83 x += basis;
84 x -= x << 3;
85 x ^= x << 10;
86 x ^= x >> 15;
87 return x;
88 }
89
90 /* An attempt at a useful 1-bit hash function. Has not been analyzed for
91 * quality. */
92 static inline uint32_t hash_boolean(bool x, uint32_t basis)
93 {
94 const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */
95 const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */
96 return (x ? P0 : P1) ^ hash_rot(basis, 1);
97 }
98
99 static inline uint32_t hash_double(double x, uint32_t basis)
100 {
101 uint32_t value[2];
102 BUILD_ASSERT_DECL(sizeof x == sizeof value);
103
104 memcpy(value, &x, sizeof value);
105 return hash_3words(value[0], value[1], basis);
106 }
107
108 static inline uint32_t hash_pointer(const void *p, uint32_t basis)
109 {
110 /* Often pointers are hashed simply by casting to integer type, but that
111 * has pitfalls since the lower bits of a pointer are often all 0 for
112 * alignment reasons. It's hard to guess where the entropy really is, so
113 * we give up here and just use a high-quality hash function.
114 *
115 * The double cast suppresses a warning on 64-bit systems about casting to
116 * an integer to different size. That's OK in this case, since most of the
117 * entropy in the pointer is almost certainly in the lower 32 bits. */
118 return hash_int((uint32_t) (uintptr_t) p, basis);
119 }
120
121 /* Murmurhash by Austin Appleby,
122 * from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp.
123 *
124 * The upstream license there says:
125 *
126 * // MurmurHash3 was written by Austin Appleby, and is placed in the public
127 * // domain. The author hereby disclaims copyright to this source code.
128 *
129 * Murmurhash is faster and higher-quality than the Jenkins lookup3 hash. When
130 * we have a little more familiarity with it, it's probably a good idea to
131 * switch all of OVS to it.
132 *
133 * For now, we have this implementation here for use by code that needs a hash
134 * that is convenient for use one word at a time, since the Jenkins lookup3
135 * hash works three words at a time.
136 *
137 * See mhash_words() for sample usage. */
138
139 uint32_t mhash_words(const uint32_t data[], size_t n_words, uint32_t basis);
140
141 static inline uint32_t mhash_add(uint32_t hash, uint32_t data)
142 {
143 data *= 0xcc9e2d51;
144 data = hash_rot(data, 15);
145 data *= 0x1b873593;
146
147 hash ^= data;
148 hash = hash_rot(hash, 13);
149 return hash * 5 + 0xe6546b64;
150 }
151
152 static inline uint32_t mhash_finish(uint32_t hash, size_t n)
153 {
154 hash ^= n * 4;
155 hash ^= hash_rot(hash, 16);
156 hash *= 0x85ebca6b;
157 hash ^= hash_rot(hash, 13);
158 hash *= 0xc2b2ae35;
159 hash ^= hash_rot(hash, 16);
160 return hash;
161 }
162
163 #ifdef __cplusplus
164 }
165 #endif
166
167 #endif /* hash.h */