4 * Provides a cache to optimize indexed string lookups. The cache keeps
5 * track of (byte offset, char offset) states for a fixed number of strings.
6 * Otherwise we'd need to scan from either end of the string, as we store
7 * strings in (extended) UTF-8.
10 #include "duk_internal.h"
13 * Delete references to given hstring from the heap string cache.
15 * String cache references are 'weak': they are not counted towards
16 * reference counts, nor serve as roots for mark-and-sweep. When an
17 * object is about to be freed, such references need to be removed.
20 DUK_INTERNAL
void duk_heap_strcache_string_remove(duk_heap
*heap
, duk_hstring
*h
) {
22 for (i
= 0; i
< DUK_HEAP_STRCACHE_SIZE
; i
++) {
23 duk_strcache
*c
= heap
->strcache
+ i
;
25 DUK_DD(DUK_DDPRINT("deleting weak strcache reference to hstring %p from heap %p",
26 (void *) h
, (void *) heap
));
29 /* XXX: the string shouldn't appear twice, but we now loop to the
30 * end anyway; if fixed, add a looping assertion to ensure there
38 * String scanning helpers
41 DUK_LOCAL duk_uint8_t
*duk__scan_forwards(duk_uint8_t
*p
, duk_uint8_t
*q
, duk_uint_fast32_t n
) {
48 if ((*p
& 0xc0) != 0x80) {
57 DUK_LOCAL duk_uint8_t
*duk__scan_backwards(duk_uint8_t
*p
, duk_uint8_t
*q
, duk_uint_fast32_t n
) {
64 if ((*p
& 0xc0) != 0x80) {
74 * Convert char offset to byte offset
76 * Avoid using the string cache if possible: for ASCII strings byte and
77 * char offsets are equal and for short strings direct scanning may be
78 * better than using the string cache (which may evict a more important
81 * Typing now assumes 32-bit string byte/char offsets (duk_uint_fast32_t).
82 * Better typing might be to use duk_size_t.
85 DUK_INTERNAL duk_uint_fast32_t
duk_heap_strcache_offset_char2byte(duk_hthread
*thr
, duk_hstring
*h
, duk_uint_fast32_t char_offset
) {
88 duk_uint_fast32_t byte_offset
;
91 duk_uint_fast32_t dist_start
, dist_end
, dist_sce
;
96 if (char_offset
> DUK_HSTRING_GET_CHARLEN(h
)) {
101 * For ASCII strings, the answer is simple.
104 if (DUK_HSTRING_IS_ASCII(h
)) {
105 /* clen == blen -> pure ascii */
110 * For non-ASCII strings, we need to scan forwards or backwards
111 * from some starting point. The starting point may be the start
112 * or end of the string, or some cached midpoint in the string
115 * For "short" strings we simply scan without checking or updating
116 * the cache. For longer strings we check and update the cache as
117 * necessary, inserting a new cache entry if none exists.
120 DUK_DDD(DUK_DDDPRINT("non-ascii string %p, char_offset=%ld, clen=%ld, blen=%ld",
121 (void *) h
, (long) char_offset
,
122 (long) DUK_HSTRING_GET_CHARLEN(h
),
123 (long) DUK_HSTRING_GET_BYTELEN(h
)));
127 use_cache
= (DUK_HSTRING_GET_CHARLEN(h
) > DUK_HEAP_STRINGCACHE_NOCACHE_LIMIT
);
130 #ifdef DUK_USE_DDDPRINT
131 DUK_DDD(DUK_DDDPRINT("stringcache before char2byte (using cache):"));
132 for (i
= 0; i
< DUK_HEAP_STRCACHE_SIZE
; i
++) {
133 duk_strcache
*c
= heap
->strcache
+ i
;
134 DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
135 (long) i
, (void *) c
->h
, (long) c
->cidx
, (long) c
->bidx
));
139 for (i
= 0; i
< DUK_HEAP_STRCACHE_SIZE
; i
++) {
140 duk_strcache
*c
= heap
->strcache
+ i
;
150 * Scan from shortest distance:
153 * - cache entry (if exists)
156 DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h
) >= char_offset
);
157 dist_start
= char_offset
;
158 dist_end
= DUK_HSTRING_GET_CHARLEN(h
) - char_offset
;
159 dist_sce
= 0; DUK_UNREF(dist_sce
); /* initialize for debug prints, needed if sce==NULL */
161 p_start
= (duk_uint8_t
*) DUK_HSTRING_GET_DATA(h
);
162 p_end
= (duk_uint8_t
*) (p_start
+ DUK_HSTRING_GET_BYTELEN(h
));
166 if (char_offset
>= sce
->cidx
) {
167 dist_sce
= char_offset
- sce
->cidx
;
168 if ((dist_sce
<= dist_start
) && (dist_sce
<= dist_end
)) {
169 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
170 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
171 "scan forwards from sce",
172 (long) use_cache
, (void *) (sce
? sce
->h
: NULL
),
173 (sce
? (long) sce
->cidx
: (long) -1),
174 (sce
? (long) sce
->bidx
: (long) -1),
175 (long) dist_start
, (long) dist_end
, (long) dist_sce
));
177 p_found
= duk__scan_forwards(p_start
+ sce
->bidx
,
183 dist_sce
= sce
->cidx
- char_offset
;
184 if ((dist_sce
<= dist_start
) && (dist_sce
<= dist_end
)) {
185 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
186 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
187 "scan backwards from sce",
188 (long) use_cache
, (void *) (sce
? sce
->h
: NULL
),
189 (sce
? (long) sce
->cidx
: (long) -1),
190 (sce
? (long) sce
->bidx
: (long) -1),
191 (long) dist_start
, (long) dist_end
, (long) dist_sce
));
193 p_found
= duk__scan_backwards(p_start
+ sce
->bidx
,
201 /* no sce, or sce scan not best */
203 if (dist_start
<= dist_end
) {
204 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
205 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
206 "scan forwards from string start",
207 (long) use_cache
, (void *) (sce
? sce
->h
: NULL
),
208 (sce
? (long) sce
->cidx
: (long) -1),
209 (sce
? (long) sce
->bidx
: (long) -1),
210 (long) dist_start
, (long) dist_end
, (long) dist_sce
));
212 p_found
= duk__scan_forwards(p_start
,
216 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
217 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
218 "scan backwards from string end",
219 (long) use_cache
, (void *) (sce
? sce
->h
: NULL
),
220 (sce
? (long) sce
->cidx
: (long) -1),
221 (sce
? (long) sce
->bidx
: (long) -1),
222 (long) dist_start
, (long) dist_end
, (long) dist_sce
));
224 p_found
= duk__scan_backwards(p_end
,
232 /* Scan error: this shouldn't normally happen; it could happen if
233 * string is not valid UTF-8 data, and clen/blen are not consistent
234 * with the scanning algorithm.
239 DUK_ASSERT(p_found
>= p_start
);
240 DUK_ASSERT(p_found
<= p_end
); /* may be equal */
241 byte_offset
= (duk_uint32_t
) (p_found
- p_start
);
243 DUK_DDD(DUK_DDDPRINT("-> string %p, cidx %ld -> bidx %ld",
244 (void *) h
, (long) char_offset
, (long) byte_offset
));
247 * Update cache entry (allocating if necessary), and move the
248 * cache entry to the first place (in an "LRU" policy).
252 /* update entry, allocating if necessary */
254 sce
= heap
->strcache
+ DUK_HEAP_STRCACHE_SIZE
- 1; /* take last entry */
257 DUK_ASSERT(sce
!= NULL
);
258 sce
->bidx
= (duk_uint32_t
) (p_found
- p_start
);
259 sce
->cidx
= (duk_uint32_t
) char_offset
;
261 /* LRU: move our entry to first */
262 if (sce
> &heap
->strcache
[0]) {
272 DUK_MEMMOVE((void *) (&heap
->strcache
[1]),
273 (void *) (&heap
->strcache
[0]),
274 (size_t) (((char *) sce
) - ((char *) &heap
->strcache
[0])));
275 heap
->strcache
[0] = tmp
;
277 /* 'sce' points to the wrong entry here, but is no longer used */
279 #ifdef DUK_USE_DDDPRINT
280 DUK_DDD(DUK_DDDPRINT("stringcache after char2byte (using cache):"));
281 for (i
= 0; i
< DUK_HEAP_STRCACHE_SIZE
; i
++) {
282 duk_strcache
*c
= heap
->strcache
+ i
;
283 DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
284 (long) i
, (void *) c
->h
, (long) c
->cidx
, (long) c
->bidx
));
292 DUK_ERROR(thr
, DUK_ERR_INTERNAL_ERROR
, "string scan error");