ceph/src/civetweb/src/third_party/duktape-1.5.2/src-separate/duk_heap_hashstring.c

   1 /*
   2  *  String hash computation (interning).
   3  *
   4  *  String hashing is performance critical because a string hash is computed
   5  *  for all new strings which are candidates to be added to the string table.
   6  *  However, strings actually added to the string table go through a codepoint
   7  *  length calculation which dominates performance because it goes through
   8  *  every byte of the input string (but only for strings added).
   9  *
  10  *  The string hash algorithm should be fast, but on the other hand provide
  11  *  good enough hashes to ensure both string table and object property table
  12  *  hash tables work reasonably well (i.e., there aren't too many collisions
  13  *  with real world inputs).  Unless the hash is cryptographic, it's always
  14  *  possible to craft inputs with maximal hash collisions.
  15  *
  16  *  NOTE: The hash algorithms must match src/dukutil.py:duk_heap_hashstring()
  17  *  for ROM string support!
  18  */
  19
  20 #include "duk_internal.h"
  21
  22 #if defined(DUK_USE_STRHASH_DENSE)
  23 /* Constants for duk_hashstring(). */
  24 #define DUK__STRHASH_SHORTSTRING   4096L
  25 #define DUK__STRHASH_MEDIUMSTRING  (256L * 1024L)
  26 #define DUK__STRHASH_BLOCKSIZE     256L
  27
  28 DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) {
  29         duk_uint32_t hash;
  30
  31         /* Use Murmurhash2 directly for short strings, and use "block skipping"
  32          * for long strings: hash an initial part and then sample the rest of
  33          * the string with reasonably sized chunks.  An initial offset for the
  34          * sampling is computed based on a hash of the initial part of the string;
  35          * this is done to (usually) avoid the case where all long strings have
  36          * certain offset ranges which are never sampled.
  37          *
  38          * Skip should depend on length and bound the total time to roughly
  39          * logarithmic.  With current values:
  40          *
  41          *   1M string => 256 * 241 = 61696 bytes (0.06M) of hashing
  42          *   1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing
  43          *
  44          * XXX: It would be better to compute the skip offset more "smoothly"
  45          * instead of having a few boundary values.
  46          */
  47
  48         /* note: mixing len into seed improves hashing when skipping */
  49         duk_uint32_t str_seed = heap->hash_seed ^ ((duk_uint32_t) len);
  50
  51         if (len <= DUK__STRHASH_SHORTSTRING) {
  52                 hash = duk_util_hashbytes(str, len, str_seed);
  53         } else {
  54                 duk_size_t off;
  55                 duk_size_t skip;
  56
  57                 if (len <= DUK__STRHASH_MEDIUMSTRING) {
  58                         skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
  59                 } else {
  60                         skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
  61                 }
  62
  63                 hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed);
  64                 off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256;
  65
  66                 /* XXX: inefficient loop */
  67                 while (off < len) {
  68                         duk_size_t left = len - off;
  69                         duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left);
  70                         hash ^= duk_util_hashbytes(str + off, now, str_seed);
  71                         off += skip;
  72                 }
  73         }
  74
  75 #if defined(DUK_USE_STRHASH16)
  76         /* Truncate to 16 bits here, so that a computed hash can be compared
  77          * against a hash stored in a 16-bit field.
  78          */
  79         hash &= 0x0000ffffUL;
  80 #endif
  81         return hash;
  82 }
  83
  84 #undef DUK__STRHASH_SHORTSTRING
  85 #undef DUK__STRHASH_MEDIUMSTRING
  86 #undef DUK__STRHASH_BLOCKSIZE
  87 #else  /* DUK_USE_STRHASH_DENSE */
  88 DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) {
  89         duk_uint32_t hash;
  90         duk_size_t step;
  91         duk_size_t off;
  92
  93         /* Slightly modified "Bernstein hash" from:
  94          *
  95          *     http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
  96          *
  97          * Modifications: string skipping and reverse direction similar to
  98          * Lua 5.1.5, and different hash initializer.
  99          *
 100          * The reverse direction ensures last byte it always included in the
 101          * hash which is a good default as changing parts of the string are
 102          * more often in the suffix than in the prefix.
 103          */
 104
 105         hash = heap->hash_seed ^ ((duk_uint32_t) len);  /* Bernstein hash init value is normally 5381 */
 106         step = (len >> DUK_USE_STRHASH_SKIP_SHIFT) + 1;
 107         for (off = len; off >= step; off -= step) {
 108                 DUK_ASSERT(off >= 1);  /* off >= step, and step >= 1 */
 109                 hash = (hash * 33) + str[off - 1];
 110         }
 111
 112 #if defined(DUK_USE_STRHASH16)
 113         /* Truncate to 16 bits here, so that a computed hash can be compared
 114          * against a hash stored in a 16-bit field.
 115          */
 116         hash &= 0x0000ffffUL;
 117 #endif
 118         return hash;
 119 }
 120 #endif  /* DUK_USE_STRHASH_DENSE */