]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | /* | |
7 | Murmurhash from http://sites.google.com/site/murmurhash/ | |
8 | ||
11fdf7f2 TL |
9 | All code is released to the public domain. For business purposes, Murmurhash |
10 | is under the MIT license. | |
7c673cae FG |
11 | */ |
12 | #include "murmurhash.h" | |
1e59de90 | 13 | |
20effc67 | 14 | #include "port/lang.h" |
7c673cae FG |
15 | |
16 | #if defined(__x86_64__) | |
17 | ||
18 | // ------------------------------------------------------------------- | |
19 | // | |
20 | // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment | |
21 | // and endian-ness issues if used across multiple platforms. | |
22 | // | |
23 | // 64-bit hash for 64-bit platforms | |
24 | ||
11fdf7f2 TL |
25 | #ifdef ROCKSDB_UBSAN_RUN |
26 | #if defined(__clang__) | |
27 | __attribute__((__no_sanitize__("alignment"))) | |
28 | #elif defined(__GNUC__) | |
29 | __attribute__((__no_sanitize_undefined__)) | |
30 | #endif | |
31 | #endif | |
1e59de90 | 32 | // clang-format off |
7c673cae FG |
33 | uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed ) |
34 | { | |
35 | const uint64_t m = 0xc6a4a7935bd1e995; | |
36 | const int r = 47; | |
37 | ||
38 | uint64_t h = seed ^ (len * m); | |
39 | ||
40 | const uint64_t * data = (const uint64_t *)key; | |
41 | const uint64_t * end = data + (len/8); | |
42 | ||
43 | while(data != end) | |
44 | { | |
45 | uint64_t k = *data++; | |
46 | ||
47 | k *= m; | |
48 | k ^= k >> r; | |
49 | k *= m; | |
50 | ||
51 | h ^= k; | |
52 | h *= m; | |
53 | } | |
54 | ||
55 | const unsigned char * data2 = (const unsigned char*)data; | |
56 | ||
57 | switch(len & 7) | |
58 | { | |
11fdf7f2 TL |
59 | case 7: h ^= ((uint64_t)data2[6]) << 48; FALLTHROUGH_INTENDED; |
60 | case 6: h ^= ((uint64_t)data2[5]) << 40; FALLTHROUGH_INTENDED; | |
61 | case 5: h ^= ((uint64_t)data2[4]) << 32; FALLTHROUGH_INTENDED; | |
62 | case 4: h ^= ((uint64_t)data2[3]) << 24; FALLTHROUGH_INTENDED; | |
63 | case 3: h ^= ((uint64_t)data2[2]) << 16; FALLTHROUGH_INTENDED; | |
64 | case 2: h ^= ((uint64_t)data2[1]) << 8; FALLTHROUGH_INTENDED; | |
7c673cae FG |
65 | case 1: h ^= ((uint64_t)data2[0]); |
66 | h *= m; | |
67 | }; | |
68 | ||
69 | h ^= h >> r; | |
70 | h *= m; | |
71 | h ^= h >> r; | |
72 | ||
73 | return h; | |
74 | } | |
1e59de90 | 75 | // clang-format on |
7c673cae FG |
76 | |
77 | #elif defined(__i386__) | |
78 | ||
79 | // ------------------------------------------------------------------- | |
80 | // | |
81 | // Note - This code makes a few assumptions about how your machine behaves - | |
82 | // | |
83 | // 1. We can read a 4-byte value from any address without crashing | |
84 | // 2. sizeof(int) == 4 | |
85 | // | |
86 | // And it has a few limitations - | |
87 | // | |
88 | // 1. It will not work incrementally. | |
89 | // 2. It will not produce the same results on little-endian and big-endian | |
90 | // machines. | |
1e59de90 | 91 | // clang-format off |
7c673cae FG |
92 | unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed ) |
93 | { | |
94 | // 'm' and 'r' are mixing constants generated offline. | |
95 | // They're not really 'magic', they just happen to work well. | |
96 | ||
97 | const unsigned int m = 0x5bd1e995; | |
98 | const int r = 24; | |
99 | ||
100 | // Initialize the hash to a 'random' value | |
101 | ||
102 | unsigned int h = seed ^ len; | |
103 | ||
104 | // Mix 4 bytes at a time into the hash | |
105 | ||
106 | const unsigned char * data = (const unsigned char *)key; | |
107 | ||
108 | while(len >= 4) | |
109 | { | |
110 | unsigned int k = *(unsigned int *)data; | |
111 | ||
112 | k *= m; | |
113 | k ^= k >> r; | |
114 | k *= m; | |
115 | ||
116 | h *= m; | |
117 | h ^= k; | |
118 | ||
119 | data += 4; | |
120 | len -= 4; | |
121 | } | |
122 | ||
123 | // Handle the last few bytes of the input array | |
124 | ||
125 | switch(len) | |
126 | { | |
11fdf7f2 TL |
127 | case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED; |
128 | case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED; | |
7c673cae FG |
129 | case 1: h ^= data[0]; |
130 | h *= m; | |
131 | }; | |
132 | ||
133 | // Do a few final mixes of the hash to ensure the last few | |
134 | // bytes are well-incorporated. | |
135 | ||
136 | h ^= h >> 13; | |
137 | h *= m; | |
138 | h ^= h >> 15; | |
139 | ||
140 | return h; | |
141 | } | |
1e59de90 | 142 | // clang-format on |
7c673cae FG |
143 | |
144 | #else | |
145 | ||
146 | // ------------------------------------------------------------------- | |
147 | // | |
148 | // Same as MurmurHash2, but endian- and alignment-neutral. | |
149 | // Half the speed though, alas. | |
1e59de90 | 150 | // clang-format off |
7c673cae FG |
151 | unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed ) |
152 | { | |
153 | const unsigned int m = 0x5bd1e995; | |
154 | const int r = 24; | |
155 | ||
156 | unsigned int h = seed ^ len; | |
157 | ||
158 | const unsigned char * data = (const unsigned char *)key; | |
159 | ||
160 | while(len >= 4) | |
161 | { | |
162 | unsigned int k; | |
163 | ||
164 | k = data[0]; | |
165 | k |= data[1] << 8; | |
166 | k |= data[2] << 16; | |
167 | k |= data[3] << 24; | |
168 | ||
169 | k *= m; | |
170 | k ^= k >> r; | |
171 | k *= m; | |
172 | ||
173 | h *= m; | |
174 | h ^= k; | |
175 | ||
176 | data += 4; | |
177 | len -= 4; | |
178 | } | |
179 | ||
180 | switch(len) | |
181 | { | |
11fdf7f2 TL |
182 | case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED; |
183 | case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED; | |
7c673cae FG |
184 | case 1: h ^= data[0]; |
185 | h *= m; | |
186 | }; | |
187 | ||
188 | h ^= h >> 13; | |
189 | h *= m; | |
190 | h ^= h >> 15; | |
191 | ||
192 | return h; | |
193 | } | |
1e59de90 | 194 | // clang-format on |
7c673cae FG |
195 | |
196 | #endif |