lib/hash.c

   1 /* Hash routine.
   2  * Copyright (C) 1998 Kunihiro Ishiguro
   3  *
   4  * This file is part of GNU Zebra.
   5  *
   6  * GNU Zebra is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published
   8  * by the Free Software Foundation; either version 2, or (at your
   9  * option) any later version.
  10  *
  11  * GNU Zebra is distributed in the hope that it will be useful, but
  12  * WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License along
  17  * with this program; see the file COPYING; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include <zebra.h>
  22 #include <math.h>
  23
  24 #include "hash.h"
  25 #include "memory.h"
  26 #include "linklist.h"
  27 #include "termtable.h"
  28 #include "vty.h"
  29 #include "command.h"
  30 #include "libfrr.h"
  31 #include "frr_pthread.h"
  32
  33 DEFINE_MTYPE_STATIC(LIB, HASH, "Hash")
  34 DEFINE_MTYPE_STATIC(LIB, HASH_BACKET, "Hash Bucket")
  35 DEFINE_MTYPE_STATIC(LIB, HASH_INDEX, "Hash Index")
  36
  37 static pthread_mutex_t _hashes_mtx = PTHREAD_MUTEX_INITIALIZER;
  38 static struct list *_hashes;
  39
  40 struct hash *hash_create_size(unsigned int size,
  41                               unsigned int (*hash_key)(const void *),
  42                               bool (*hash_cmp)(const void *, const void *),
  43                               const char *name)
  44 {
  45         struct hash *hash;
  46
  47         assert((size & (size - 1)) == 0);
  48         hash = XCALLOC(MTYPE_HASH, sizeof(struct hash));
  49         hash->index =
  50                 XCALLOC(MTYPE_HASH_INDEX, sizeof(struct hash_bucket *) * size);
  51         hash->size = size;
  52         hash->hash_key = hash_key;
  53         hash->hash_cmp = hash_cmp;
  54         hash->count = 0;
  55         hash->name = name ? XSTRDUP(MTYPE_HASH, name) : NULL;
  56         hash->stats.empty = hash->size;
  57
  58         frr_with_mutex(&_hashes_mtx) {
  59                 if (!_hashes)
  60                         _hashes = list_new();
  61
  62                 listnode_add(_hashes, hash);
  63         }
  64
  65         return hash;
  66 }
  67
  68 struct hash *hash_create(unsigned int (*hash_key)(const void *),
  69                          bool (*hash_cmp)(const void *, const void *),
  70                          const char *name)
  71 {
  72         return hash_create_size(HASH_INITIAL_SIZE, hash_key, hash_cmp, name);
  73 }
  74
  75 void *hash_alloc_intern(void *arg)
  76 {
  77         return arg;
  78 }
  79
  80 /*
  81  * ssq = ssq + (new^2 - old^2)
  82  *     = ssq + ((new + old) * (new - old))
  83  */
  84 #define hash_update_ssq(hz, old, new)                                          \
  85         do {                                                                   \
  86                 int _adjust = (new + old) * (new - old);                       \
  87                 if (_adjust < 0)                                               \
  88                         atomic_fetch_sub_explicit(&hz->stats.ssq, -_adjust,    \
  89                                                   memory_order_relaxed);       \
  90                 else                                                           \
  91                         atomic_fetch_add_explicit(&hz->stats.ssq, _adjust,     \
  92                                                   memory_order_relaxed);       \
  93         } while (0)
  94
  95 /* Expand hash if the chain length exceeds the threshold. */
  96 static void hash_expand(struct hash *hash)
  97 {
  98         unsigned int i, new_size;
  99         struct hash_bucket *hb, *hbnext, **new_index;
 100
 101         new_size = hash->size * 2;
 102
 103         if (hash->max_size && new_size > hash->max_size)
 104                 return;
 105
 106         new_index = XCALLOC(MTYPE_HASH_INDEX,
 107                             sizeof(struct hash_bucket *) * new_size);
 108
 109         hash->stats.empty = new_size;
 110
 111         for (i = 0; i < hash->size; i++)
 112                 for (hb = hash->index[i]; hb; hb = hbnext) {
 113                         unsigned int h = hb->key & (new_size - 1);
 114
 115                         hbnext = hb->next;
 116                         hb->next = new_index[h];
 117
 118                         int oldlen = hb->next ? hb->next->len : 0;
 119                         int newlen = oldlen + 1;
 120
 121                         if (newlen == 1)
 122                                 hash->stats.empty--;
 123                         else
 124                                 hb->next->len = 0;
 125
 126                         hb->len = newlen;
 127
 128                         hash_update_ssq(hash, oldlen, newlen);
 129
 130                         new_index[h] = hb;
 131                 }
 132
 133         /* Switch to new table */
 134         XFREE(MTYPE_HASH_INDEX, hash->index);
 135         hash->size = new_size;
 136         hash->index = new_index;
 137 }
 138
 139 void *hash_get(struct hash *hash, void *data, void *(*alloc_func)(void *))
 140 {
 141         unsigned int key;
 142         unsigned int index;
 143         void *newdata;
 144         struct hash_bucket *bucket;
 145
 146         if (!alloc_func && !hash->count)
 147                 return NULL;
 148
 149         key = (*hash->hash_key)(data);
 150         index = key & (hash->size - 1);
 151
 152         for (bucket = hash->index[index]; bucket != NULL;
 153              bucket = bucket->next) {
 154                 if (bucket->key == key && (*hash->hash_cmp)(bucket->data, data))
 155                         return bucket->data;
 156         }
 157
 158         if (alloc_func) {
 159                 newdata = (*alloc_func)(data);
 160                 if (newdata == NULL)
 161                         return NULL;
 162
 163                 if (HASH_THRESHOLD(hash->count + 1, hash->size)) {
 164                         hash_expand(hash);
 165                         index = key & (hash->size - 1);
 166                 }
 167
 168                 bucket = XCALLOC(MTYPE_HASH_BACKET, sizeof(struct hash_bucket));
 169                 bucket->data = newdata;
 170                 bucket->key = key;
 171                 bucket->next = hash->index[index];
 172                 hash->index[index] = bucket;
 173                 hash->count++;
 174
 175                 int oldlen = bucket->next ? bucket->next->len : 0;
 176                 int newlen = oldlen + 1;
 177
 178                 if (newlen == 1)
 179                         hash->stats.empty--;
 180                 else
 181                         bucket->next->len = 0;
 182
 183                 bucket->len = newlen;
 184
 185                 hash_update_ssq(hash, oldlen, newlen);
 186
 187                 return bucket->data;
 188         }
 189         return NULL;
 190 }
 191
 192 void *hash_lookup(struct hash *hash, void *data)
 193 {
 194         return hash_get(hash, data, NULL);
 195 }
 196
 197 unsigned int string_hash_make(const char *str)
 198 {
 199         unsigned int hash = 0;
 200
 201         while (*str)
 202                 hash = (hash * 33) ^ (unsigned int)*str++;
 203
 204         return hash;
 205 }
 206
 207 void *hash_release(struct hash *hash, void *data)
 208 {
 209         void *ret;
 210         unsigned int key;
 211         unsigned int index;
 212         struct hash_bucket *bucket;
 213         struct hash_bucket *pp;
 214
 215         key = (*hash->hash_key)(data);
 216         index = key & (hash->size - 1);
 217
 218         for (bucket = pp = hash->index[index]; bucket; bucket = bucket->next) {
 219                 if (bucket->key == key
 220                     && (*hash->hash_cmp)(bucket->data, data)) {
 221                         int oldlen = hash->index[index]->len;
 222                         int newlen = oldlen - 1;
 223
 224                         if (bucket == pp)
 225                                 hash->index[index] = bucket->next;
 226                         else
 227                                 pp->next = bucket->next;
 228
 229                         if (hash->index[index])
 230                                 hash->index[index]->len = newlen;
 231                         else
 232                                 hash->stats.empty++;
 233
 234                         hash_update_ssq(hash, oldlen, newlen);
 235
 236                         ret = bucket->data;
 237                         XFREE(MTYPE_HASH_BACKET, bucket);
 238                         hash->count--;
 239                         return ret;
 240                 }
 241                 pp = bucket;
 242         }
 243         return NULL;
 244 }
 245
 246 void hash_iterate(struct hash *hash, void (*func)(struct hash_bucket *, void *),
 247                   void *arg)
 248 {
 249         unsigned int i;
 250         struct hash_bucket *hb;
 251         struct hash_bucket *hbnext;
 252
 253         for (i = 0; i < hash->size; i++)
 254                 for (hb = hash->index[i]; hb; hb = hbnext) {
 255                         /* get pointer to next hash bucket here, in case (*func)
 256                          * decides to delete hb by calling hash_release
 257                          */
 258                         hbnext = hb->next;
 259                         (*func)(hb, arg);
 260                 }
 261 }
 262
 263 void hash_walk(struct hash *hash, int (*func)(struct hash_bucket *, void *),
 264                void *arg)
 265 {
 266         unsigned int i;
 267         struct hash_bucket *hb;
 268         struct hash_bucket *hbnext;
 269         int ret = HASHWALK_CONTINUE;
 270
 271         for (i = 0; i < hash->size; i++) {
 272                 for (hb = hash->index[i]; hb; hb = hbnext) {
 273                         /* get pointer to next hash bucket here, in case (*func)
 274                          * decides to delete hb by calling hash_release
 275                          */
 276                         hbnext = hb->next;
 277                         ret = (*func)(hb, arg);
 278                         if (ret == HASHWALK_ABORT)
 279                                 return;
 280                 }
 281         }
 282 }
 283
 284 void hash_clean(struct hash *hash, void (*free_func)(void *))
 285 {
 286         unsigned int i;
 287         struct hash_bucket *hb;
 288         struct hash_bucket *next;
 289
 290         for (i = 0; i < hash->size; i++) {
 291                 for (hb = hash->index[i]; hb; hb = next) {
 292                         next = hb->next;
 293
 294                         if (free_func)
 295                                 (*free_func)(hb->data);
 296
 297                         XFREE(MTYPE_HASH_BACKET, hb);
 298                         hash->count--;
 299                 }
 300                 hash->index[i] = NULL;
 301         }
 302
 303         hash->stats.ssq = 0;
 304         hash->stats.empty = hash->size;
 305 }
 306
 307 static void hash_to_list_iter(struct hash_bucket *hb, void *arg)
 308 {
 309         struct list *list = arg;
 310
 311         listnode_add(list, hb->data);
 312 }
 313
 314 struct list *hash_to_list(struct hash *hash)
 315 {
 316         struct list *list = list_new();
 317
 318         hash_iterate(hash, hash_to_list_iter, list);
 319         return list;
 320 }
 321
 322 void hash_free(struct hash *hash)
 323 {
 324         frr_with_mutex(&_hashes_mtx) {
 325                 if (_hashes) {
 326                         listnode_delete(_hashes, hash);
 327                         if (_hashes->count == 0) {
 328                                 list_delete(&_hashes);
 329                         }
 330                 }
 331         }
 332
 333         XFREE(MTYPE_HASH, hash->name);
 334
 335         XFREE(MTYPE_HASH_INDEX, hash->index);
 336         XFREE(MTYPE_HASH, hash);
 337 }
 338
 339
 340 /* CLI commands ------------------------------------------------------------ */
 341
 342 DEFUN_NOSH(show_hash_stats,
 343            show_hash_stats_cmd,
 344            "show debugging hashtable [statistics]",
 345            SHOW_STR
 346            DEBUG_STR
 347            "Statistics about hash tables\n"
 348            "Statistics about hash tables\n")
 349 {
 350         struct hash *h;
 351         struct listnode *ln;
 352         struct ttable *tt = ttable_new(&ttable_styles[TTSTYLE_BLANK]);
 353
 354         ttable_add_row(tt, "Hash table|Buckets|Entries|Empty|LF|SD|FLF|SD");
 355         tt->style.cell.lpad = 2;
 356         tt->style.cell.rpad = 1;
 357         tt->style.corner = '+';
 358         ttable_restyle(tt);
 359         ttable_rowseps(tt, 0, BOTTOM, true, '-');
 360
 361         /* Summary statistics calculated are:
 362          *
 363          * - Load factor: This is the number of elements in the table divided
 364          *   by the number of buckets. Since this hash table implementation
 365          *   uses chaining, this value can be greater than 1.
 366          *   This number provides information on how 'full' the table is, but
 367          *   does not provide information on how evenly distributed the
 368          *   elements are.
 369          *   Notably, a load factor >= 1 does not imply that every bucket has
 370          *   an element; with a pathological hash function, all elements could
 371          *   be in a single bucket.
 372          *
 373          * - Full load factor: this is the number of elements in the table
 374          *   divided by the number of buckets that have some elements in them.
 375          *
 376          * - Std. Dev.: This is the standard deviation calculated from the
 377          *   relevant load factor. If the load factor is the mean of number of
 378          *   elements per bucket, the standard deviation measures how much any
 379          *   particular bucket is likely to deviate from the mean.
 380          *   As a rule of thumb this number should be less than 2, and ideally
 381          *   <= 1 for optimal performance. A number larger than 3 generally
 382          *   indicates a poor hash function.
 383          */
 384
 385         double lf;    // load factor
 386         double flf;   // full load factor
 387         double var;   // overall variance
 388         double fvar;  // full variance
 389         double stdv;  // overall stddev
 390         double fstdv; // full stddev
 391
 392         long double x2;   // h->count ^ 2
 393         long double ldc;  // (long double) h->count
 394         long double full; // h->size - h->stats.empty
 395         long double ssq;  // ssq casted to long double
 396
 397         pthread_mutex_lock(&_hashes_mtx);
 398         if (!_hashes) {
 399                 pthread_mutex_unlock(&_hashes_mtx);
 400                 ttable_del(tt);
 401                 vty_out(vty, "No hash tables in use.\n");
 402                 return CMD_SUCCESS;
 403         }
 404
 405         for (ALL_LIST_ELEMENTS_RO(_hashes, ln, h)) {
 406                 if (!h->name)
 407                         continue;
 408
 409                 ssq = (long double)h->stats.ssq;
 410                 x2 = h->count * h->count;
 411                 ldc = (long double)h->count;
 412                 full = h->size - h->stats.empty;
 413                 lf = h->count / (double)h->size;
 414                 flf = full ? h->count / (double)(full) : 0;
 415                 var = ldc ? (1.0 / ldc) * (ssq - x2 / ldc) : 0;
 416                 fvar = full ? (1.0 / full) * (ssq - x2 / full) : 0;
 417                 var = (var < .0001) ? 0 : var;
 418                 fvar = (fvar < .0001) ? 0 : fvar;
 419                 stdv = sqrt(var);
 420                 fstdv = sqrt(fvar);
 421
 422                 ttable_add_row(tt, "%s|%d|%ld|%.0f%%|%.2lf|%.2lf|%.2lf|%.2lf",
 423                                h->name, h->size, h->count,
 424                                (h->stats.empty / (double)h->size) * 100, lf,
 425                                stdv, flf, fstdv);
 426         }
 427         pthread_mutex_unlock(&_hashes_mtx);
 428
 429         /* display header */
 430         char header[] = "Showing hash table statistics for ";
 431         char underln[sizeof(header) + strlen(frr_protonameinst)];
 432         memset(underln, '-', sizeof(underln));
 433         underln[sizeof(underln) - 1] = '\0';
 434         vty_out(vty, "%s%s\n", header, frr_protonameinst);
 435         vty_out(vty, "%s\n", underln);
 436
 437         vty_out(vty, "# allocated: %d\n", _hashes->count);
 438         vty_out(vty, "# named:     %d\n\n", tt->nrows - 1);
 439
 440         if (tt->nrows > 1) {
 441                 ttable_colseps(tt, 0, RIGHT, true, '|');
 442                 char *table = ttable_dump(tt, "\n");
 443                 vty_out(vty, "%s\n", table);
 444                 XFREE(MTYPE_TMP, table);
 445         } else
 446                 vty_out(vty, "No named hash tables to display.\n");
 447
 448         ttable_del(tt);
 449
 450         return CMD_SUCCESS;
 451 }
 452
 453 void hash_cmd_init(void)
 454 {
 455         install_element(ENABLE_NODE, &show_hash_stats_cmd);
 456 }