2 * String hash computation (interning).
5 #include "duk_internal.h"
7 /* constants for duk_hashstring() */
8 #define DUK__STRHASH_SHORTSTRING 4096L
9 #define DUK__STRHASH_MEDIUMSTRING (256L * 1024L)
10 #define DUK__STRHASH_BLOCKSIZE 256L
12 DUK_INTERNAL duk_uint32_t
duk_heap_hashstring(duk_heap
*heap
, const duk_uint8_t
*str
, duk_size_t len
) {
16 * Sampling long strings by byte skipping (like Lua does) is potentially
17 * a cache problem. Here we do 'block skipping' instead for long strings:
18 * hash an initial part, and then sample the rest of the string with
19 * reasonably sized chunks.
21 * Skip should depend on length and bound the total time to roughly
24 * With current values:
26 * 1M string => 256 * 241 = 61696 bytes (0.06M) of hashing
27 * 1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing
29 * After an initial part has been hashed, an offset is applied before
30 * starting the sampling. The initial offset is computed from the
31 * hash of the initial part of the string. The idea is to avoid the
32 * case that all long strings have certain offset ranges that are never
36 /* note: mixing len into seed improves hashing when skipping */
37 duk_uint32_t str_seed
= heap
->hash_seed
^ ((duk_uint32_t
) len
);
39 if (len
<= DUK__STRHASH_SHORTSTRING
) {
40 hash
= duk_util_hashbytes(str
, len
, str_seed
);
45 if (len
<= DUK__STRHASH_MEDIUMSTRING
) {
46 skip
= (duk_size_t
) (16 * DUK__STRHASH_BLOCKSIZE
+ DUK__STRHASH_BLOCKSIZE
);
48 skip
= (duk_size_t
) (256 * DUK__STRHASH_BLOCKSIZE
+ DUK__STRHASH_BLOCKSIZE
);
51 hash
= duk_util_hashbytes(str
, (duk_size_t
) DUK__STRHASH_SHORTSTRING
, str_seed
);
52 off
= DUK__STRHASH_SHORTSTRING
+ (skip
* (hash
% 256)) / 256;
54 /* XXX: inefficient loop */
56 duk_size_t left
= len
- off
;
57 duk_size_t now
= (duk_size_t
) (left
> DUK__STRHASH_BLOCKSIZE
? DUK__STRHASH_BLOCKSIZE
: left
);
58 hash
^= duk_util_hashbytes(str
+ off
, now
, str_seed
);
63 #if defined(DUK_USE_STRHASH16)
64 /* Truncate to 16 bits here, so that a computed hash can be compared
65 * against a hash stored in a 16-bit field.