[ceph.git] / ceph / src / jaegertracing / opentelemetry-cpp / third_party / prometheus-cpp / 3rdparty / civetweb / src / third_party / duktape-1.8.0 / src-separate / duk_heap_hashstring.c

/*
 *  String hash computation (interning).
 *
 *  String hashing is performance critical because a string hash is computed
 *  for all new strings which are candidates to be added to the string table.
 *  However, strings actually added to the string table go through a codepoint
 *  length calculation which dominates performance because it goes through
 *  every byte of the input string (but only for strings added).
 *
 *  The string hash algorithm should be fast, but on the other hand provide
 *  good enough hashes to ensure both string table and object property table
 *  hash tables work reasonably well (i.e., there aren't too many collisions
 *  with real world inputs).  Unless the hash is cryptographic, it's always
 *  possible to craft inputs with maximal hash collisions.
 *
 *  NOTE: The hash algorithms must match src/dukutil.py:duk_heap_hashstring()
 *  for ROM string support!
 */

#include "duk_internal.h"

#if defined(DUK_USE_STRHASH_DENSE)
/* Constants for duk_hashstring(). */
#define DUK__STRHASH_SHORTSTRING   4096L
#define DUK__STRHASH_MEDIUMSTRING  (256L * 1024L)
#define DUK__STRHASH_BLOCKSIZE     256L

DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) {
	duk_uint32_t hash;

	/* Use Murmurhash2 directly for short strings, and use "block skipping"
	 * for long strings: hash an initial part and then sample the rest of
	 * the string with reasonably sized chunks.  An initial offset for the
	 * sampling is computed based on a hash of the initial part of the string;
	 * this is done to (usually) avoid the case where all long strings have
	 * certain offset ranges which are never sampled.
	 *
	 * Skip should depend on length and bound the total time to roughly
	 * logarithmic.  With current values:
	 *
	 *   1M string => 256 * 241 = 61696 bytes (0.06M) of hashing
	 *   1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing
	 *
	 * XXX: It would be better to compute the skip offset more "smoothly"
	 * instead of having a few boundary values.
	 */

	/* note: mixing len into seed improves hashing when skipping */
	duk_uint32_t str_seed = heap->hash_seed ^ ((duk_uint32_t) len);

	if (len <= DUK__STRHASH_SHORTSTRING) {
		hash = duk_util_hashbytes(str, len, str_seed);
	} else {
		duk_size_t off;
		duk_size_t skip;

		if (len <= DUK__STRHASH_MEDIUMSTRING) {
			skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
		} else {
			skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
		}

		hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed);
		off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256;

		/* XXX: inefficient loop */
		while (off < len) {
			duk_size_t left = len - off;
			duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left);
			hash ^= duk_util_hashbytes(str + off, now, str_seed);
			off += skip;
		}
	}

#if defined(DUK_USE_STRHASH16)
	/* Truncate to 16 bits here, so that a computed hash can be compared
	 * against a hash stored in a 16-bit field.
	 */
	hash &= 0x0000ffffUL;
#endif
	return hash;
}

#undef DUK__STRHASH_SHORTSTRING
#undef DUK__STRHASH_MEDIUMSTRING
#undef DUK__STRHASH_BLOCKSIZE
#else  /* DUK_USE_STRHASH_DENSE */
DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap *heap, const duk_uint8_t *str, duk_size_t len) {
	duk_uint32_t hash;
	duk_size_t step;
	duk_size_t off;

	/* Slightly modified "Bernstein hash" from:
	 *
	 *     http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
	 *
	 * Modifications: string skipping and reverse direction similar to
	 * Lua 5.1.5, and different hash initializer.
	 *
	 * The reverse direction ensures last byte it always included in the
	 * hash which is a good default as changing parts of the string are
	 * more often in the suffix than in the prefix.
	 */

	hash = heap->hash_seed ^ ((duk_uint32_t) len);  /* Bernstein hash init value is normally 5381 */
	step = (len >> DUK_USE_STRHASH_SKIP_SHIFT) + 1;
	for (off = len; off >= step; off -= step) {
		DUK_ASSERT(off >= 1);  /* off >= step, and step >= 1 */
		hash = (hash * 33) + str[off - 1];
	}

#if defined(DUK_USE_STRHASH16)
	/* Truncate to 16 bits here, so that a computed hash can be compared
	 * against a hash stored in a 16-bit field.
	 */
	hash &= 0x0000ffffUL;
#endif
	return hash;
}
#endif  /* DUK_USE_STRHASH_DENSE */
Commit	Line	Data
1e59de90 TL	1	/*
	2	* String hash computation (interning).
	3	*
	4	* String hashing is performance critical because a string hash is computed
	5	* for all new strings which are candidates to be added to the string table.
	6	* However, strings actually added to the string table go through a codepoint
	7	* length calculation which dominates performance because it goes through
	8	* every byte of the input string (but only for strings added).
	9	*
	10	* The string hash algorithm should be fast, but on the other hand provide
	11	* good enough hashes to ensure both string table and object property table
	12	* hash tables work reasonably well (i.e., there aren't too many collisions
	13	* with real world inputs). Unless the hash is cryptographic, it's always
	14	* possible to craft inputs with maximal hash collisions.
	15	*
	16	* NOTE: The hash algorithms must match src/dukutil.py:duk_heap_hashstring()
	17	* for ROM string support!
	18	*/
	19
	20	#include "duk_internal.h"
	21
	22	#if defined(DUK_USE_STRHASH_DENSE)
	23	/* Constants for duk_hashstring(). */
	24	#define DUK__STRHASH_SHORTSTRING 4096L
	25	#define DUK__STRHASH_MEDIUMSTRING (256L * 1024L)
	26	#define DUK__STRHASH_BLOCKSIZE 256L
	27
	28	DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap heap, const duk_uint8_t str, duk_size_t len) {
	29	duk_uint32_t hash;
	30
	31	/* Use Murmurhash2 directly for short strings, and use "block skipping"
	32	* for long strings: hash an initial part and then sample the rest of
	33	* the string with reasonably sized chunks. An initial offset for the
	34	* sampling is computed based on a hash of the initial part of the string;
	35	* this is done to (usually) avoid the case where all long strings have
	36	* certain offset ranges which are never sampled.
	37	*
	38	* Skip should depend on length and bound the total time to roughly
	39	* logarithmic. With current values:
	40	*
	41	* 1M string => 256 * 241 = 61696 bytes (0.06M) of hashing
	42	* 1G string => 256 * 16321 = 4178176 bytes (3.98M) of hashing
	43	*
	44	* XXX: It would be better to compute the skip offset more "smoothly"
	45	* instead of having a few boundary values.
	46	*/
	47
	48	/* note: mixing len into seed improves hashing when skipping */
	49	duk_uint32_t str_seed = heap->hash_seed ^ ((duk_uint32_t) len);
	50
	51	if (len <= DUK__STRHASH_SHORTSTRING) {
	52	hash = duk_util_hashbytes(str, len, str_seed);
	53	} else {
	54	duk_size_t off;
	55	duk_size_t skip;
	56
	57	if (len <= DUK__STRHASH_MEDIUMSTRING) {
	58	skip = (duk_size_t) (16 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
	59	} else {
	60	skip = (duk_size_t) (256 * DUK__STRHASH_BLOCKSIZE + DUK__STRHASH_BLOCKSIZE);
	61	}
	62
	63	hash = duk_util_hashbytes(str, (duk_size_t) DUK__STRHASH_SHORTSTRING, str_seed);
	64	off = DUK__STRHASH_SHORTSTRING + (skip * (hash % 256)) / 256;
65
66	/* XXX: inefficient loop */
67	while (off < len) {
68	duk_size_t left = len - off;
69	duk_size_t now = (duk_size_t) (left > DUK__STRHASH_BLOCKSIZE ? DUK__STRHASH_BLOCKSIZE : left);
70	hash ^= duk_util_hashbytes(str + off, now, str_seed);
71	off += skip;
72	}
73	}
74
75	#if defined(DUK_USE_STRHASH16)
76	/* Truncate to 16 bits here, so that a computed hash can be compared
77	* against a hash stored in a 16-bit field.
78	*/
79	hash &= 0x0000ffffUL;
80	#endif
81	return hash;
82	}
83
84	#undef DUK__STRHASH_SHORTSTRING
85	#undef DUK__STRHASH_MEDIUMSTRING
86	#undef DUK__STRHASH_BLOCKSIZE
87	#else /* DUK_USE_STRHASH_DENSE */
88	DUK_INTERNAL duk_uint32_t duk_heap_hashstring(duk_heap heap, const duk_uint8_t str, duk_size_t len) {
89	duk_uint32_t hash;
90	duk_size_t step;
91	duk_size_t off;
92
93	/* Slightly modified "Bernstein hash" from:
94	*
95	* http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
96	*
97	* Modifications: string skipping and reverse direction similar to
98	* Lua 5.1.5, and different hash initializer.
99	*
100	* The reverse direction ensures last byte it always included in the
101	* hash which is a good default as changing parts of the string are
102	* more often in the suffix than in the prefix.
103	*/
104
105	hash = heap->hash_seed ^ ((duk_uint32_t) len); /* Bernstein hash init value is normally 5381 */
106	step = (len >> DUK_USE_STRHASH_SKIP_SHIFT) + 1;
107	for (off = len; off >= step; off -= step) {
108	DUK_ASSERT(off >= 1); /* off >= step, and step >= 1 */
109	hash = (hash * 33) + str[off - 1];
110	}
111
112	#if defined(DUK_USE_STRHASH16)
113	/* Truncate to 16 bits here, so that a computed hash can be compared
114	* against a hash stored in a 16-bit field.
115	*/
116	hash &= 0x0000ffffUL;
117	#endif
118	return hash;
119	}
120	#endif /* DUK_USE_STRHASH_DENSE */