]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae | 5 | /* |
f67539c2 | 6 | xxHash - Extremely Fast Hash algorithm |
7c673cae | 7 | Header File |
f67539c2 TL |
8 | Copyright (C) 2012-2016, Yann Collet. |
9 | ||
7c673cae FG |
10 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
11 | ||
12 | Redistribution and use in source and binary forms, with or without | |
13 | modification, are permitted provided that the following conditions are | |
14 | met: | |
15 | ||
16 | * Redistributions of source code must retain the above copyright | |
17 | notice, this list of conditions and the following disclaimer. | |
18 | * Redistributions in binary form must reproduce the above | |
19 | copyright notice, this list of conditions and the following disclaimer | |
20 | in the documentation and/or other materials provided with the | |
21 | distribution. | |
22 | ||
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
26 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
27 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
28 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
29 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
30 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
31 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
32 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
33 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
34 | ||
35 | You can contact the author at : | |
f67539c2 | 36 | - xxHash source repository : https://github.com/Cyan4973/xxHash |
7c673cae FG |
37 | */ |
38 | ||
39 | /* Notice extracted from xxHash homepage : | |
40 | ||
41 | xxHash is an extremely fast Hash algorithm, running at RAM speed limits. | |
42 | It also successfully passes all tests from the SMHasher suite. | |
43 | ||
44 | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) | |
45 | ||
46 | Name Speed Q.Score Author | |
47 | xxHash 5.4 GB/s 10 | |
48 | CrapWow 3.2 GB/s 2 Andrew | |
49 | MumurHash 3a 2.7 GB/s 10 Austin Appleby | |
50 | SpookyHash 2.0 GB/s 10 Bob Jenkins | |
51 | SBox 1.4 GB/s 9 Bret Mulvey | |
52 | Lookup3 1.2 GB/s 9 Bob Jenkins | |
53 | SuperFastHash 1.2 GB/s 1 Paul Hsieh | |
54 | CityHash64 1.05 GB/s 10 Pike & Alakuijala | |
55 | FNV 0.55 GB/s 5 Fowler, Noll, Vo | |
f67539c2 | 56 | CRC32 0.43 GB/s # 9 |
7c673cae FG |
57 | MD5-32 0.33 GB/s 10 Ronald L. Rivest |
58 | SHA1-32 0.28 GB/s 10 | |
59 | ||
f67539c2 TL |
60 | Note #: other CRC32 implementations can be over 40x faster than SMHasher's: |
61 | http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735 | |
62 | ||
7c673cae FG |
63 | Q.Score is a measure of quality of the hash function. |
64 | It depends on successfully passing SMHasher test set. | |
65 | 10 is a perfect score. | |
7c673cae | 66 | |
f67539c2 TL |
67 | A 64-bit version, named XXH64, is available since r35. |
68 | It offers much better speed, but for 64-bit applications only. | |
69 | Name Speed on 64 bits Speed on 32 bits | |
70 | XXH64 13.8 GB/s 1.9 GB/s | |
71 | XXH32 6.8 GB/s 6.0 GB/s | |
72 | */ | |
7c673cae | 73 | |
f67539c2 TL |
74 | #ifndef XXHASH_H_5627135585666179 |
75 | #define XXHASH_H_5627135585666179 1 | |
494da23a | 76 | |
f67539c2 TL |
77 | /* BEGIN RocksDB customizations */ |
78 | #ifndef XXH_STATIC_LINKING_ONLY | |
79 | #define XXH_STATIC_LINKING_ONLY 1 /* access experimental APIs like XXH3 */ | |
494da23a | 80 | #endif |
f67539c2 TL |
81 | #define XXH_NAMESPACE ROCKSDB_ |
82 | /* END RocksDB customizations */ | |
494da23a | 83 | |
7c673cae | 84 | #if defined (__cplusplus) |
f67539c2 | 85 | extern "C" { |
7c673cae FG |
86 | #endif |
87 | ||
88 | ||
f67539c2 TL |
89 | /* **************************** |
90 | * Definitions | |
91 | ******************************/ | |
92 | #include <stddef.h> /* size_t */ | |
7c673cae FG |
93 | typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; |
94 | ||
95 | ||
f67539c2 TL |
96 | /* **************************** |
97 | * API modifier | |
98 | ******************************/ | |
99 | /** XXH_INLINE_ALL (and XXH_PRIVATE_API) | |
100 | * This build macro includes xxhash functions in `static` mode | |
101 | * in order to inline them, and remove their symbol from the public list. | |
102 | * Inlining offers great performance improvement on small keys, | |
103 | * and dramatic ones when length is expressed as a compile-time constant. | |
104 | * See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html . | |
105 | * Methodology : | |
106 | * #define XXH_INLINE_ALL | |
107 | * #include "xxhash.h" | |
108 | * `xxhash.c` is automatically included. | |
109 | * It's not useful to compile and link it as a separate object. | |
110 | */ | |
111 | #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) | |
112 | # ifndef XXH_STATIC_LINKING_ONLY | |
113 | # define XXH_STATIC_LINKING_ONLY | |
114 | # endif | |
115 | # if defined(__GNUC__) | |
116 | # define XXH_PUBLIC_API static __inline __attribute__((unused)) | |
117 | # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) | |
118 | # define XXH_PUBLIC_API static inline | |
119 | # elif defined(_MSC_VER) | |
120 | # define XXH_PUBLIC_API static __inline | |
121 | # else | |
122 | /* this version may generate warnings for unused static functions */ | |
123 | # define XXH_PUBLIC_API static | |
124 | # endif | |
125 | #else | |
126 | # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) | |
127 | # ifdef XXH_EXPORT | |
128 | # define XXH_PUBLIC_API __declspec(dllexport) | |
129 | # elif XXH_IMPORT | |
130 | # define XXH_PUBLIC_API __declspec(dllimport) | |
131 | # endif | |
132 | # else | |
133 | # define XXH_PUBLIC_API /* do nothing */ | |
134 | # endif | |
135 | #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ | |
136 | ||
137 | /*! XXH_NAMESPACE, aka Namespace Emulation : | |
138 | * | |
139 | * If you want to include _and expose_ xxHash functions from within your own library, | |
140 | * but also want to avoid symbol collisions with other libraries which may also include xxHash, | |
141 | * | |
142 | * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library | |
143 | * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). | |
144 | * | |
145 | * Note that no change is required within the calling program as long as it includes `xxhash.h` : | |
146 | * regular symbol name will be automatically translated by this header. | |
147 | */ | |
148 | #ifdef XXH_NAMESPACE | |
149 | # define XXH_CAT(A,B) A##B | |
150 | # define XXH_NAME2(A,B) XXH_CAT(A,B) | |
151 | # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) | |
152 | # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) | |
153 | # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) | |
154 | # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) | |
155 | # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) | |
156 | # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) | |
157 | # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) | |
158 | # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) | |
159 | # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) | |
160 | # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) | |
161 | # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) | |
162 | # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) | |
163 | # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) | |
164 | # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) | |
165 | # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) | |
166 | # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) | |
167 | # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) | |
168 | # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) | |
169 | # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) | |
170 | #endif | |
7c673cae | 171 | |
7c673cae | 172 | |
f67539c2 TL |
173 | /* ************************************* |
174 | * Version | |
175 | ***************************************/ | |
176 | #define XXH_VERSION_MAJOR 0 | |
177 | #define XXH_VERSION_MINOR 7 | |
178 | #define XXH_VERSION_RELEASE 2 | |
179 | #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) | |
180 | XXH_PUBLIC_API unsigned XXH_versionNumber (void); | |
7c673cae FG |
181 | |
182 | ||
f67539c2 TL |
183 | /*-********************************************************************** |
184 | * 32-bit hash | |
185 | ************************************************************************/ | |
186 | #if !defined (__VMS) \ | |
187 | && (defined (__cplusplus) \ | |
188 | || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) | |
189 | # include <stdint.h> | |
190 | typedef uint32_t XXH32_hash_t; | |
191 | #else | |
192 | # include <limits.h> | |
193 | # if UINT_MAX == 0xFFFFFFFFUL | |
194 | typedef unsigned int XXH32_hash_t; | |
195 | # else | |
196 | # if ULONG_MAX == 0xFFFFFFFFUL | |
197 | typedef unsigned long XXH32_hash_t; | |
198 | # else | |
199 | # error "unsupported platform : need a 32-bit type" | |
200 | # endif | |
201 | # endif | |
202 | #endif | |
7c673cae | 203 | |
f67539c2 TL |
204 | /*! XXH32() : |
205 | Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". | |
206 | The memory between input & input+length must be valid (allocated and read-accessible). | |
207 | "seed" can be used to alter the result predictably. | |
208 | Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ | |
209 | XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); | |
7c673cae | 210 | |
f67539c2 | 211 | /*====== Streaming ======*/ |
7c673cae FG |
212 | |
213 | /* | |
f67539c2 TL |
214 | * Streaming functions generate the xxHash value from an incrememtal input. |
215 | * This method is slower than single-call functions, due to state management. | |
216 | * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. | |
217 | * | |
218 | * XXH state must first be allocated, using XXH*_createState() . | |
219 | * | |
220 | * Start a new hash by initializing state with a seed, using XXH*_reset(). | |
221 | * | |
222 | * Then, feed the hash state by calling XXH*_update() as many times as necessary. | |
223 | * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. | |
224 | * | |
225 | * Finally, a hash value can be produced anytime, by using XXH*_digest(). | |
226 | * This function returns the nn-bits hash as an int or long long. | |
227 | * | |
228 | * It's still possible to continue inserting input into the hash state after a digest, | |
229 | * and generate some new hash values later on, by invoking again XXH*_digest(). | |
230 | * | |
231 | * When done, release the state, using XXH*_freeState(). | |
232 | */ | |
233 | ||
234 | typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ | |
235 | XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); | |
236 | XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); | |
237 | XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); | |
238 | ||
239 | XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); | |
240 | XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); | |
241 | XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); | |
7c673cae | 242 | |
f67539c2 | 243 | /*====== Canonical representation ======*/ |
7c673cae | 244 | |
f67539c2 TL |
245 | /* Default return values from XXH functions are basic unsigned 32 and 64 bits. |
246 | * This the simplest and fastest format for further post-processing. | |
247 | * However, this leaves open the question of what is the order of bytes, | |
248 | * since little and big endian conventions will write the same number differently. | |
249 | * | |
250 | * The canonical representation settles this issue, | |
251 | * by mandating big-endian convention, | |
252 | * aka, the same convention as human-readable numbers (large digits first). | |
253 | * When writing hash values to storage, sending them over a network, or printing them, | |
254 | * it's highly recommended to use the canonical representation, | |
255 | * to ensure portability across a wider range of systems, present and future. | |
256 | * | |
257 | * The following functions allow transformation of hash values into and from canonical format. | |
258 | */ | |
259 | ||
260 | typedef struct { unsigned char digest[4]; } XXH32_canonical_t; | |
261 | XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); | |
262 | XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); | |
263 | ||
264 | ||
265 | #ifndef XXH_NO_LONG_LONG | |
266 | /*-********************************************************************** | |
267 | * 64-bit hash | |
268 | ************************************************************************/ | |
269 | #if !defined (__VMS) \ | |
270 | && (defined (__cplusplus) \ | |
271 | || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) | |
272 | # include <stdint.h> | |
273 | typedef uint64_t XXH64_hash_t; | |
274 | #else | |
275 | /* the following type must have a width of 64-bit */ | |
276 | typedef unsigned long long XXH64_hash_t; | |
277 | #endif | |
7c673cae | 278 | |
f67539c2 TL |
279 | /*! XXH64() : |
280 | Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". | |
281 | "seed" can be used to alter the result predictably. | |
282 | This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). | |
7c673cae | 283 | */ |
f67539c2 | 284 | XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, XXH64_hash_t seed); |
7c673cae | 285 | |
f67539c2 TL |
286 | /*====== Streaming ======*/ |
287 | typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ | |
288 | XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); | |
289 | XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); | |
290 | XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); | |
7c673cae | 291 | |
f67539c2 TL |
292 | XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed); |
293 | XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); | |
294 | XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); | |
7c673cae | 295 | |
f67539c2 TL |
296 | /*====== Canonical representation ======*/ |
297 | typedef struct { unsigned char digest[8]; } XXH64_canonical_t; | |
298 | XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); | |
299 | XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); | |
7c673cae | 300 | |
7c673cae | 301 | |
f67539c2 | 302 | #endif /* XXH_NO_LONG_LONG */ |
7c673cae | 303 | |
494da23a | 304 | |
494da23a | 305 | |
f67539c2 | 306 | #ifdef XXH_STATIC_LINKING_ONLY |
494da23a | 307 | |
f67539c2 TL |
308 | /* ================================================================================================ |
309 | This section contains declarations which are not guaranteed to remain stable. | |
310 | They may change in future versions, becoming incompatible with a different version of the library. | |
311 | These declarations should only be used with static linking. | |
312 | Never use them in association with dynamic linking ! | |
313 | =================================================================================================== */ | |
494da23a TL |
314 | |
315 | /* These definitions are only present to allow | |
316 | * static allocation of XXH state, on stack or in a struct for example. | |
317 | * Never **ever** use members directly. */ | |
318 | ||
f67539c2 TL |
319 | struct XXH32_state_s { |
320 | XXH32_hash_t total_len_32; | |
321 | XXH32_hash_t large_len; | |
322 | XXH32_hash_t v1; | |
323 | XXH32_hash_t v2; | |
324 | XXH32_hash_t v3; | |
325 | XXH32_hash_t v4; | |
326 | XXH32_hash_t mem32[4]; | |
327 | XXH32_hash_t memsize; | |
328 | XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */ | |
329 | }; /* typedef'd to XXH32_state_t */ | |
330 | ||
331 | #ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ | |
494da23a | 332 | struct XXH64_state_s { |
f67539c2 TL |
333 | XXH64_hash_t total_len; |
334 | XXH64_hash_t v1; | |
335 | XXH64_hash_t v2; | |
336 | XXH64_hash_t v3; | |
337 | XXH64_hash_t v4; | |
338 | XXH64_hash_t mem64[4]; | |
339 | XXH32_hash_t memsize; | |
340 | XXH32_hash_t reserved32; /* required for padding anyway */ | |
341 | XXH64_hash_t reserved64; /* never read nor write, might be removed in a future version */ | |
342 | }; /* typedef'd to XXH64_state_t */ | |
343 | #endif /* XXH_NO_LONG_LONG */ | |
344 | ||
345 | ||
346 | /*-********************************************************************** | |
347 | * XXH3 | |
348 | * New experimental hash | |
349 | ************************************************************************/ | |
350 | #ifndef XXH_NO_LONG_LONG | |
351 | ||
352 | ||
353 | /* ============================================ | |
354 | * XXH3 is a new hash algorithm, | |
355 | * featuring improved speed performance for both small and large inputs. | |
356 | * See full speed analysis at : http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html | |
357 | * In general, expect XXH3 to run about ~2x faster on large inputs, | |
358 | * and >3x faster on small ones, though exact differences depend on platform. | |
359 | * | |
360 | * The algorithm is portable, will generate the same hash on all platforms. | |
361 | * It benefits greatly from vectorization units, but does not require it. | |
362 | * | |
363 | * XXH3 offers 2 variants, _64bits and _128bits. | |
364 | * When only 64 bits are needed, prefer calling the _64bits variant : | |
365 | * it reduces the amount of mixing, resulting in faster speed on small inputs. | |
366 | * It's also generally simpler to manipulate a scalar return type than a struct. | |
367 | * | |
368 | * The XXH3 algorithm is still considered experimental. | |
369 | * Produced results can still change between versions. | |
370 | * Results produced by v0.7.x are not comparable with results from v0.7.y . | |
371 | * It's nonetheless possible to use XXH3 for ephemeral data (local sessions), | |
372 | * but avoid storing values in long-term storage for later reads. | |
373 | * | |
374 | * The API supports one-shot hashing, streaming mode, and custom secrets. | |
375 | * | |
376 | * There are still a number of opened questions that community can influence during the experimental period. | |
377 | * I'm trying to list a few of them below, though don't consider this list as complete. | |
378 | * | |
379 | * - 128-bits output type : currently defined as a structure of two 64-bits fields. | |
380 | * That's because 128-bit values do not exist in C standard. | |
381 | * Note that it means that, at byte level, result is not identical depending on endianess. | |
382 | * However, at field level, they are identical on all platforms. | |
383 | * The canonical representation solves the issue of identical byte-level representation across platforms, | |
384 | * which is necessary for serialization. | |
385 | * Q1 : Would there be a better representation for a 128-bit hash result ? | |
386 | * Q2 : Are the names of the inner 64-bit fields important ? Should they be changed ? | |
387 | * | |
388 | * - Prototype XXH128() : XXH128() uses the same arguments as XXH64(), for consistency. | |
389 | * It means it maps to XXH3p_128bits_withSeed(). | |
390 | * This variant is slightly slower than XXH3p_128bits(), | |
391 | * because the seed is now part of the algorithm, and can't be simplified. | |
392 | * Is that a good idea ? | |
393 | * | |
394 | * - Seed type for XXH128() : currently, it's a single 64-bit value, like the 64-bit variant. | |
395 | * It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash. | |
396 | * But 128-bit seed is more difficult to use, since it requires to pass a structure instead of a scalar value. | |
397 | * Such a variant could either replace current one, or become an additional one. | |
398 | * Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`). | |
399 | * Follow up question : if both 64-bit and 128-bit seeds are allowed, which variant should be called XXH128 ? | |
400 | * | |
401 | * - Result for len==0 : Currently, the result of hashing a zero-length input is always `0`. | |
402 | * It seems okay as a return value when using "default" secret and seed. | |
403 | * But is it still fine to return `0` when secret or seed are non-default ? | |
404 | * Are there use cases which could depend on generating a different hash result for zero-length input when the secret is different ? | |
405 | * | |
406 | * - Consistency (1) : Streaming XXH128 uses an XXH3 state, which is the same state as XXH3p_64bits(). | |
407 | * It means a 128bit streaming loop must invoke the following symbols : | |
408 | * XXH3p_createState(), XXH3p_128bits_reset(), XXH3p_128bits_update() (loop), XXH3p_128bits_digest(), XXH3p_freeState(). | |
409 | * Is that consistent enough ? | |
410 | * | |
411 | * - Consistency (2) : The canonical representation of `XXH3p_64bits` is provided by existing functions | |
412 | * XXH64_canonicalFromHash(), and reverse operation XXH64_hashFromCanonical(). | |
413 | * As a mirror, canonical functions for XXH128_hash_t results generated by `XXH3p_128bits` | |
414 | * are XXH128_canonicalFromHash() and XXH128_hashFromCanonical(). | |
415 | * Which means, `XXH3` doesn't appear in the names, because canonical functions operate on a type, | |
416 | * independently of which algorithm was used to generate that type. | |
417 | * Is that consistent enough ? | |
418 | */ | |
419 | ||
420 | #ifdef XXH_NAMESPACE | |
421 | # define XXH3p_64bits XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits) | |
422 | # define XXH3p_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits_withSecret) | |
423 | # define XXH3p_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits_withSeed) | |
424 | ||
425 | # define XXH3p_createState XXH_NAME2(XXH_NAMESPACE, XXH3p_createState) | |
426 | # define XXH3p_freeState XXH_NAME2(XXH_NAMESPACE, XXH3p_freeState) | |
427 | # define XXH3p_copyState XXH_NAME2(XXH_NAMESPACE, XXH3p_copyState) | |
428 | ||
429 | # define XXH3p_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits_reset) | |
430 | # define XXH3p_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits_reset_withSeed) | |
431 | # define XXH3p_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits_reset_withSecret) | |
432 | # define XXH3p_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits_update) | |
433 | # define XXH3p_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3p_64bits_digest) | |
434 | #endif | |
494da23a | 435 | |
f67539c2 TL |
436 | /* XXH3p_64bits() : |
437 | * default 64-bit variant, using default secret and default seed of 0. | |
438 | * It's the fastest variant. */ | |
439 | XXH_PUBLIC_API XXH64_hash_t XXH3p_64bits(const void* data, size_t len); | |
440 | ||
441 | /* XXH3p_64bits_withSecret() : | |
442 | * It's possible to provide any blob of bytes as a "secret" to generate the hash. | |
443 | * This makes it more difficult for an external actor to prepare an intentional collision. | |
444 | * The secret *must* be large enough (>= XXH3p_SECRET_SIZE_MIN). | |
445 | * It should consist of random bytes. | |
446 | * Avoid repeating same character, or sequences of bytes, | |
447 | * and especially avoid swathes of \0. | |
448 | * Failure to respect these conditions will result in a poor quality hash. | |
449 | */ | |
450 | #define XXH3p_SECRET_SIZE_MIN 136 | |
451 | XXH_PUBLIC_API XXH64_hash_t XXH3p_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize); | |
452 | ||
453 | /* XXH3p_64bits_withSeed() : | |
454 | * This variant generates on the fly a custom secret, | |
455 | * based on the default secret, altered using the `seed` value. | |
456 | * While this operation is decently fast, note that it's not completely free. | |
457 | * note : seed==0 produces same results as XXH3p_64bits() */ | |
458 | XXH_PUBLIC_API XXH64_hash_t XXH3p_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); | |
459 | ||
460 | ||
461 | /* streaming 64-bit */ | |
462 | ||
463 | #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */ | |
464 | # include <stdalign.h> | |
465 | # define XXH_ALIGN(n) alignas(n) | |
466 | #elif defined(__GNUC__) | |
467 | # define XXH_ALIGN(n) __attribute__ ((aligned(n))) | |
468 | #elif defined(_MSC_VER) | |
469 | # define XXH_ALIGN(n) __declspec(align(n)) | |
494da23a | 470 | #else |
f67539c2 TL |
471 | # define XXH_ALIGN(n) /* disabled */ |
472 | #endif | |
494da23a | 473 | |
f67539c2 TL |
474 | typedef struct XXH3p_state_s XXH3p_state_t; |
475 | ||
476 | #define XXH3p_SECRET_DEFAULT_SIZE 192 /* minimum XXH3p_SECRET_SIZE_MIN */ | |
477 | #define XXH3p_INTERNALBUFFER_SIZE 256 | |
478 | struct XXH3p_state_s { | |
479 | XXH_ALIGN(64) XXH64_hash_t acc[8]; | |
480 | XXH_ALIGN(64) unsigned char customSecret[XXH3p_SECRET_DEFAULT_SIZE]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */ | |
481 | XXH_ALIGN(64) unsigned char buffer[XXH3p_INTERNALBUFFER_SIZE]; | |
482 | XXH32_hash_t bufferedSize; | |
483 | XXH32_hash_t nbStripesPerBlock; | |
484 | XXH32_hash_t nbStripesSoFar; | |
485 | XXH32_hash_t secretLimit; | |
486 | XXH32_hash_t reserved32; | |
487 | XXH32_hash_t reserved32_2; | |
488 | XXH64_hash_t totalLen; | |
489 | XXH64_hash_t seed; | |
490 | XXH64_hash_t reserved64; | |
491 | const unsigned char* secret; /* note : there is some padding after, due to alignment on 64 bytes */ | |
492 | }; /* typedef'd to XXH3p_state_t */ | |
493 | ||
494 | /* Streaming requires state maintenance. | |
495 | * This operation costs memory and cpu. | |
496 | * As a consequence, streaming is slower than one-shot hashing. | |
497 | * For better performance, prefer using one-shot functions whenever possible. */ | |
498 | ||
499 | XXH_PUBLIC_API XXH3p_state_t* XXH3p_createState(void); | |
500 | XXH_PUBLIC_API XXH_errorcode XXH3p_freeState(XXH3p_state_t* statePtr); | |
501 | XXH_PUBLIC_API void XXH3p_copyState(XXH3p_state_t* dst_state, const XXH3p_state_t* src_state); | |
502 | ||
503 | ||
504 | /* XXH3p_64bits_reset() : | |
505 | * initialize with default parameters. | |
506 | * result will be equivalent to `XXH3p_64bits()`. */ | |
507 | XXH_PUBLIC_API XXH_errorcode XXH3p_64bits_reset(XXH3p_state_t* statePtr); | |
508 | /* XXH3p_64bits_reset_withSeed() : | |
509 | * generate a custom secret from `seed`, and store it into state. | |
510 | * digest will be equivalent to `XXH3p_64bits_withSeed()`. */ | |
511 | XXH_PUBLIC_API XXH_errorcode XXH3p_64bits_reset_withSeed(XXH3p_state_t* statePtr, XXH64_hash_t seed); | |
512 | /* XXH3p_64bits_reset_withSecret() : | |
513 | * `secret` is referenced, and must outlive the hash streaming session. | |
514 | * secretSize must be >= XXH3p_SECRET_SIZE_MIN. | |
515 | */ | |
516 | XXH_PUBLIC_API XXH_errorcode XXH3p_64bits_reset_withSecret(XXH3p_state_t* statePtr, const void* secret, size_t secretSize); | |
517 | ||
518 | XXH_PUBLIC_API XXH_errorcode XXH3p_64bits_update (XXH3p_state_t* statePtr, const void* input, size_t length); | |
519 | XXH_PUBLIC_API XXH64_hash_t XXH3p_64bits_digest (const XXH3p_state_t* statePtr); | |
520 | ||
521 | ||
522 | /* 128-bit */ | |
523 | ||
524 | #ifdef XXH_NAMESPACE | |
525 | # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) | |
526 | # define XXH3p_128bits XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits) | |
527 | # define XXH3p_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits_withSeed) | |
528 | # define XXH3p_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits_withSecret) | |
529 | ||
530 | # define XXH3p_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits_reset) | |
531 | # define XXH3p_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits_reset_withSeed) | |
532 | # define XXH3p_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits_reset_withSecret) | |
533 | # define XXH3p_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits_update) | |
534 | # define XXH3p_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3p_128bits_digest) | |
535 | ||
536 | # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) | |
537 | # define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) | |
538 | # define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) | |
539 | # define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) | |
494da23a TL |
540 | #endif |
541 | ||
f67539c2 TL |
542 | typedef struct { |
543 | XXH64_hash_t low64; | |
544 | XXH64_hash_t high64; | |
545 | } XXH128_hash_t; | |
546 | ||
547 | XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed); | |
548 | XXH_PUBLIC_API XXH128_hash_t XXH3p_128bits(const void* data, size_t len); | |
549 | XXH_PUBLIC_API XXH128_hash_t XXH3p_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); /* == XXH128() */ | |
550 | XXH_PUBLIC_API XXH128_hash_t XXH3p_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize); | |
551 | ||
552 | XXH_PUBLIC_API XXH_errorcode XXH3p_128bits_reset(XXH3p_state_t* statePtr); | |
553 | XXH_PUBLIC_API XXH_errorcode XXH3p_128bits_reset_withSeed(XXH3p_state_t* statePtr, XXH64_hash_t seed); | |
554 | XXH_PUBLIC_API XXH_errorcode XXH3p_128bits_reset_withSecret(XXH3p_state_t* statePtr, const void* secret, size_t secretSize); | |
555 | ||
556 | XXH_PUBLIC_API XXH_errorcode XXH3p_128bits_update (XXH3p_state_t* statePtr, const void* input, size_t length); | |
557 | XXH_PUBLIC_API XXH128_hash_t XXH3p_128bits_digest (const XXH3p_state_t* statePtr); | |
558 | ||
559 | ||
560 | /* Note : for better performance, following functions can be inlined, | |
561 | * using XXH_INLINE_ALL */ | |
562 | ||
563 | /* return : 1 is equal, 0 if different */ | |
564 | XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); | |
565 | ||
566 | /* This comparator is compatible with stdlib's qsort(). | |
567 | * return : >0 if *h128_1 > *h128_2 | |
568 | * <0 if *h128_1 < *h128_2 | |
569 | * =0 if *h128_1 == *h128_2 */ | |
570 | XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2); | |
571 | ||
572 | ||
573 | /*====== Canonical representation ======*/ | |
574 | typedef struct { unsigned char digest[16]; } XXH128_canonical_t; | |
575 | XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash); | |
576 | XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src); | |
577 | ||
578 | ||
579 | #endif /* XXH_NO_LONG_LONG */ | |
580 | ||
581 | ||
582 | /*-********************************************************************** | |
583 | * XXH_INLINE_ALL | |
584 | ************************************************************************/ | |
585 | #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) | |
586 | # include "xxhash.cc" /* include xxhash function bodies as `static`, for inlining */ | |
494da23a | 587 | #endif |
7c673cae | 588 | |
f67539c2 TL |
589 | |
590 | ||
591 | #endif /* XXH_STATIC_LINKING_ONLY */ | |
592 | ||
593 | ||
7c673cae | 594 | #if defined (__cplusplus) |
f67539c2 | 595 | } |
7c673cae | 596 | #endif |
f67539c2 TL |
597 | |
598 | #endif /* XXHASH_H_5627135585666179 */ |