]>
git.proxmox.com Git - libgit2.git/blob - src/khash.h
3 Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 KHASH_MAP_INIT_INT(32, char)
34 khash_t(32) *h = kh_init(32);
35 k = kh_put(32, h, 5, &ret);
37 k = kh_get(32, h, 10);
38 is_missing = (k == kh_end(h));
41 for (k = kh_begin(h); k != kh_end(h); ++k)
42 if (kh_exist(h, k)) kh_value(h, k) = 1;
51 * Minor code clean up; no actual effect.
55 * The capacity is a power of 2. This seems to dramatically improve the
56 speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
58 - http://code.google.com/p/ulib/
59 - http://nothings.org/computer/judy/
61 * Allow to optionally use linear probing which usually has better
62 performance for random input. Double hashing is still the default as it
63 is more robust to certain non-random input.
65 * Added Wang's integer hash function (not used by default). This hash
66 function is more robust to certain non-random input.
70 * Allow to declare global functions.
78 * Corrected the example
83 * Improved speed a little in kh_put()
88 * Fixed a compiling error
92 * Changed to token concatenation which increases flexibility.
96 * Fixed a bug in kh_get(), which has not been tested previously.
110 Generic hash table library.
113 #define AC_VERSION_KHASH_H "0.2.6"
119 /* compipler specific configuration */
121 #if UINT_MAX == 0xffffffffu
122 typedef unsigned int khint32_t
;
123 #elif ULONG_MAX == 0xffffffffu
124 typedef unsigned long khint32_t
;
127 #if ULONG_MAX == ULLONG_MAX
128 typedef unsigned long khint64_t
;
130 typedef unsigned long long khint64_t
;
134 #define inline __inline
137 typedef khint32_t khint_t
;
138 typedef khint_t khiter_t
;
140 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
141 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
142 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
143 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
144 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
145 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
146 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
149 #define __ac_inc(k, m) 1
151 #define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m)
154 #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
157 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
161 #define kcalloc(N,Z) calloc(N,Z)
164 #define kmalloc(Z) malloc(Z)
167 #define krealloc(P,Z) realloc(P,Z)
170 #define kfree(P) free(P)
173 static const double __ac_HASH_UPPER
= 0.77;
175 #define __KHASH_TYPE(name, khkey_t, khval_t) \
177 khint_t n_buckets, size, n_occupied, upper_bound; \
183 #define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
184 extern kh_##name##_t *kh_init_##name(void); \
185 extern void kh_destroy_##name(kh_##name##_t *h); \
186 extern void kh_clear_##name(kh_##name##_t *h); \
187 extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
188 extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
189 extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
190 extern void kh_del_##name(kh_##name##_t *h, khint_t x);
192 #define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
193 SCOPE kh_##name##_t *kh_init_##name(void) { \
194 return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
196 SCOPE void kh_destroy_##name(kh_##name##_t *h) \
199 kfree((void *)h->keys); kfree(h->flags); \
200 kfree((void *)h->vals); \
204 SCOPE void kh_clear_##name(kh_##name##_t *h) \
206 if (h && h->flags) { \
207 memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
208 h->size = h->n_occupied = 0; \
211 SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
213 if (h->n_buckets) { \
214 khint_t inc, k, i, last, mask; \
215 mask = h->n_buckets - 1; \
216 k = __hash_func(key); i = k & mask; \
217 inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \
218 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
219 i = (i + inc) & mask; \
220 if (i == last) return h->n_buckets; \
222 return __ac_iseither(h->flags, i)? h->n_buckets : i; \
225 SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
226 { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
227 khint32_t *new_flags = 0; \
230 kroundup32(new_n_buckets); \
231 if (new_n_buckets < 4) new_n_buckets = 4; \
232 if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
233 else { /* hash table size to be changed (shrink or expand); rehash */ \
234 new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
235 if (!new_flags) return -1; \
236 memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
237 if (h->n_buckets < new_n_buckets) { /* expand */ \
238 khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
239 if (!new_keys) return -1; \
240 h->keys = new_keys; \
242 khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
243 if (!new_vals) return -1; \
244 h->vals = new_vals; \
246 } /* otherwise shrink */ \
249 if (j) { /* rehashing is needed */ \
250 for (j = 0; j != h->n_buckets; ++j) { \
251 if (__ac_iseither(h->flags, j) == 0) { \
252 khkey_t key = h->keys[j]; \
255 new_mask = new_n_buckets - 1; \
256 if (kh_is_map) val = h->vals[j]; \
257 __ac_set_isdel_true(h->flags, j); \
258 while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
260 k = __hash_func(key); \
262 inc = __ac_inc(k, new_mask); \
263 while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
264 __ac_set_isempty_false(new_flags, i); \
265 if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
266 { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
267 if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
268 __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
269 } else { /* write the element and jump out of the loop */ \
271 if (kh_is_map) h->vals[i] = val; \
277 if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
278 h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
279 if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
281 kfree(h->flags); /* free the working space */ \
282 h->flags = new_flags; \
283 h->n_buckets = new_n_buckets; \
284 h->n_occupied = h->size; \
285 h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
289 SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
292 if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
293 if (h->n_buckets > (h->size<<1)) { \
294 if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
295 *ret = -1; return h->n_buckets; \
297 } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
298 *ret = -1; return h->n_buckets; \
300 } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
302 khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
303 x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
304 if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
306 inc = __ac_inc(k, mask); last = i; \
307 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
308 if (__ac_isdel(h->flags, i)) site = i; \
309 i = (i + inc) & mask; \
310 if (i == last) { x = site; break; } \
312 if (x == h->n_buckets) { \
313 if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
318 if (__ac_isempty(h->flags, x)) { /* not present at all */ \
320 __ac_set_isboth_false(h->flags, x); \
321 ++h->size; ++h->n_occupied; \
323 } else if (__ac_isdel(h->flags, x)) { /* deleted */ \
325 __ac_set_isboth_false(h->flags, x); \
328 } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
331 SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
333 if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
334 __ac_set_isdel_true(h->flags, x); \
339 #define KHASH_DECLARE(name, khkey_t, khval_t) \
340 __KHASH_TYPE(name, khkey_t, khval_t) \
341 __KHASH_PROTOTYPES(name, khkey_t, khval_t)
343 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
344 __KHASH_TYPE(name, khkey_t, khval_t) \
345 __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
347 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
348 KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
350 /* --- BEGIN OF HASH FUNCTIONS --- */
353 @abstract Integer hash function
354 @param key The integer [khint32_t]
355 @return The hash value [khint_t]
357 #define kh_int_hash_func(key) (khint32_t)(key)
359 @abstract Integer comparison function
361 #define kh_int_hash_equal(a, b) ((a) == (b))
363 @abstract 64-bit integer hash function
364 @param key The integer [khint64_t]
365 @return The hash value [khint_t]
367 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
369 @abstract 64-bit integer comparison function
371 #define kh_int64_hash_equal(a, b) ((a) == (b))
373 @abstract const char* hash function
374 @param s Pointer to a null terminated string
375 @return The hash value
377 static inline khint_t
__ac_X31_hash_string(const char *s
)
379 khint_t h
= (khint_t
)*s
;
380 if (h
) for (++s
; *s
; ++s
) h
= (h
<< 5) - h
+ (khint_t
)*s
;
384 @abstract Another interface to const char* hash function
385 @param key Pointer to a null terminated string [const char*]
386 @return The hash value [khint_t]
388 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
390 @abstract Const char* comparison function
392 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
394 static inline khint_t
__ac_Wang_hash(khint_t key
)
404 #define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
406 /* --- END OF HASH FUNCTIONS --- */
408 /* Other convenient macros... */
411 @abstract Type of the hash table.
412 @param name Name of the hash table [symbol]
414 #define khash_t(name) kh_##name##_t
417 @abstract Initiate a hash table.
418 @param name Name of the hash table [symbol]
419 @return Pointer to the hash table [khash_t(name)*]
421 #define kh_init(name) kh_init_##name()
424 @abstract Destroy a hash table.
425 @param name Name of the hash table [symbol]
426 @param h Pointer to the hash table [khash_t(name)*]
428 #define kh_destroy(name, h) kh_destroy_##name(h)
431 @abstract Reset a hash table without deallocating memory.
432 @param name Name of the hash table [symbol]
433 @param h Pointer to the hash table [khash_t(name)*]
435 #define kh_clear(name, h) kh_clear_##name(h)
438 @abstract Resize a hash table.
439 @param name Name of the hash table [symbol]
440 @param h Pointer to the hash table [khash_t(name)*]
441 @param s New size [khint_t]
443 #define kh_resize(name, h, s) kh_resize_##name(h, s)
446 @abstract Insert a key to the hash table.
447 @param name Name of the hash table [symbol]
448 @param h Pointer to the hash table [khash_t(name)*]
449 @param k Key [type of keys]
450 @param r Extra return code: 0 if the key is present in the hash table;
451 1 if the bucket is empty (never used); 2 if the element in
452 the bucket has been deleted [int*]
453 @return Iterator to the inserted element [khint_t]
455 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
458 @abstract Retrieve a key from the hash table.
459 @param name Name of the hash table [symbol]
460 @param h Pointer to the hash table [khash_t(name)*]
461 @param k Key [type of keys]
462 @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
464 #define kh_get(name, h, k) kh_get_##name(h, k)
467 @abstract Remove a key from the hash table.
468 @param name Name of the hash table [symbol]
469 @param h Pointer to the hash table [khash_t(name)*]
470 @param k Iterator to the element to be deleted [khint_t]
472 #define kh_del(name, h, k) kh_del_##name(h, k)
475 @abstract Test whether a bucket contains data.
476 @param h Pointer to the hash table [khash_t(name)*]
477 @param x Iterator to the bucket [khint_t]
478 @return 1 if containing data; 0 otherwise [int]
480 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
483 @abstract Get key given an iterator
484 @param h Pointer to the hash table [khash_t(name)*]
485 @param x Iterator to the bucket [khint_t]
486 @return Key [type of keys]
488 #define kh_key(h, x) ((h)->keys[x])
491 @abstract Get value given an iterator
492 @param h Pointer to the hash table [khash_t(name)*]
493 @param x Iterator to the bucket [khint_t]
494 @return Value [type of values]
495 @discussion For hash sets, calling this results in segfault.
497 #define kh_val(h, x) ((h)->vals[x])
500 @abstract Alias of kh_val()
502 #define kh_value(h, x) ((h)->vals[x])
505 @abstract Get the start iterator
506 @param h Pointer to the hash table [khash_t(name)*]
507 @return The start iterator [khint_t]
509 #define kh_begin(h) (khint_t)(0)
512 @abstract Get the end iterator
513 @param h Pointer to the hash table [khash_t(name)*]
514 @return The end iterator [khint_t]
516 #define kh_end(h) ((h)->n_buckets)
519 @abstract Get the number of elements in the hash table
520 @param h Pointer to the hash table [khash_t(name)*]
521 @return Number of elements in the hash table [khint_t]
523 #define kh_size(h) ((h)->size)
526 @abstract Get the number of buckets in the hash table
527 @param h Pointer to the hash table [khash_t(name)*]
528 @return Number of buckets in the hash table [khint_t]
530 #define kh_n_buckets(h) ((h)->n_buckets)
533 @abstract Iterate over the entries in the hash table
534 @param h Pointer to the hash table [khash_t(name)*]
535 @param kvar Variable to which key will be assigned
536 @param vvar Variable to which value will be assigned
537 @param code Block of code to execute
539 #define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
540 for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
541 if (!kh_exist(h,__i)) continue; \
542 (kvar) = kh_key(h,__i); \
543 (vvar) = kh_val(h,__i); \
548 @abstract Iterate over the values in the hash table
549 @param h Pointer to the hash table [khash_t(name)*]
550 @param vvar Variable to which value will be assigned
551 @param code Block of code to execute
553 #define kh_foreach_value(h, vvar, code) { khint_t __i; \
554 for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
555 if (!kh_exist(h,__i)) continue; \
556 (vvar) = kh_val(h,__i); \
560 /* More conenient interfaces */
563 @abstract Instantiate a hash set containing integer keys
564 @param name Name of the hash table [symbol]
566 #define KHASH_SET_INIT_INT(name) \
567 KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
570 @abstract Instantiate a hash map containing integer keys
571 @param name Name of the hash table [symbol]
572 @param khval_t Type of values [type]
574 #define KHASH_MAP_INIT_INT(name, khval_t) \
575 KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
578 @abstract Instantiate a hash map containing 64-bit integer keys
579 @param name Name of the hash table [symbol]
581 #define KHASH_SET_INIT_INT64(name) \
582 KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
585 @abstract Instantiate a hash map containing 64-bit integer keys
586 @param name Name of the hash table [symbol]
587 @param khval_t Type of values [type]
589 #define KHASH_MAP_INIT_INT64(name, khval_t) \
590 KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
592 typedef const char *kh_cstr_t
;
594 @abstract Instantiate a hash map containing const char* keys
595 @param name Name of the hash table [symbol]
597 #define KHASH_SET_INIT_STR(name) \
598 KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
601 @abstract Instantiate a hash map containing const char* keys
602 @param name Name of the hash table [symbol]
603 @param khval_t Type of values [type]
605 #define KHASH_MAP_INIT_STR(name, khval_t) \
606 KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
608 #endif /* __AC_KHASH_H */