ceph/src/civetweb/src/third_party/duktape-1.3.0/src-separate/duk_heap_hashstring.c

   1 /*
   2  *  String hash computation (interning).
   3  */
   4
   5 #include "duk_internal.h"
   6
   7 /* constants for duk_hashstring() */
   8 #define DUK__STRHASH_SHORTSTRING   4096L
   9 #define DUK__STRHASH_MEDIUMSTRING  (256L * 1024L)
  10 #define DUK__STRHASH_BLOCKSIZE     256L
  11
  12 DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) {
  13         duk_uint32_t hash;
  14
  15         /*
  16          *  Sampling long strings by byte skipping (like Lua does) is potentially
  17          *  a cache problem.  Here we do 'block skipping' instead for long strings:
  18          *  hash an initial part, and then sample the rest of the string with
  19          *  reasonably sized chunks.
  20          *
  21          *  Skip should depend on length and bound the total time to roughly
  22          *  logarithmic.
  23          *
  24          *  With current values:
  25          *
  26          *    1M string => 256 * 241 = 61696 bytes (0.06M) of hashing
  27          *    1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing
  28          *
  29          *  After an initial part has been hashed, an offset is applied before
  30          *  starting the sampling.  The initial offset is computed from the
  31          *  hash of the initial part of the string.  The idea is to avoid the
  32          *  case that all long strings have certain offset ranges that are never
  33          *  sampled.
  34          */
  35
  36         /* note: mixing len into seed improves hashing when skipping */
  37         duk_uint32_t str_seed = heap->hash_seed ^ ((duk_uint32_t) len);
  38
  39         if (len <= DUK__STRHASH_SHORTSTRING) {
  40                 hash = duk_util_hashbytes(str, len, str_seed);
  41         } else {
  42                 duk_size_t off;
  43                 duk_size_t skip;
  44
  45                 if (len <= DUK__STRHASH_MEDIUMSTRING) {
  46                         skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
  47                 } else {
  48                         skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
  49                 }
  50
  51                 hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed);
  52                 off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256;
  53
  54                 /* XXX: inefficient loop */
  55                 while (off < len) {
  56                         duk_size_t left = len - off;
  57                         duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left);
  58                         hash ^= duk_util_hashbytes(str + off, now, str_seed);
  59                         off += skip;
  60                 }
  61         }
  62
  63 #if defined(DUK_USE_STRHASH16)
  64         /* Truncate to 16 bits here, so that a computed hash can be compared
  65          * against a hash stored in a 16-bit field.
  66          */
  67         hash &= 0x0000ffffUL;
  68 #endif
  69         return hash;
  70 }