]> git.proxmox.com Git - ceph.git/blame - ceph/src/civetweb/src/third_party/duktape-1.5.2/src-separate/duk_heap_stringcache.c
buildsys: switch source download to quincy
[ceph.git] / ceph / src / civetweb / src / third_party / duktape-1.5.2 / src-separate / duk_heap_stringcache.c
CommitLineData
7c673cae
FG
1/*
2 * String cache.
3 *
4 * Provides a cache to optimize indexed string lookups. The cache keeps
5 * track of (byte offset, char offset) states for a fixed number of strings.
6 * Otherwise we'd need to scan from either end of the string, as we store
7 * strings in (extended) UTF-8.
8 */
9
10#include "duk_internal.h"
11
12/*
13 * Delete references to given hstring from the heap string cache.
14 *
15 * String cache references are 'weak': they are not counted towards
16 * reference counts, nor serve as roots for mark-and-sweep. When an
17 * object is about to be freed, such references need to be removed.
18 */
19
20DUK_INTERNAL void duk_heap_strcache_string_remove(duk_heap *heap, duk_hstring *h) {
21 duk_small_int_t i;
22 for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
23 duk_strcache *c = heap->strcache + i;
24 if (c->h == h) {
25 DUK_DD(DUK_DDPRINT("deleting weak strcache reference to hstring %p from heap %p",
26 (void *) h, (void *) heap));
27 c->h = NULL;
28
29 /* XXX: the string shouldn't appear twice, but we now loop to the
30 * end anyway; if fixed, add a looping assertion to ensure there
31 * is no duplicate.
32 */
33 }
34 }
35}
36
37/*
38 * String scanning helpers
11fdf7f2
TL
39 *
40 * All bytes other than UTF-8 continuation bytes ([0x80,0xbf]) are
41 * considered to contribute a character. This must match how string
42 * character length is computed.
7c673cae
FG
43 */
44
11fdf7f2 45DUK_LOCAL const duk_uint8_t *duk__scan_forwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) {
7c673cae
FG
46 while (n > 0) {
47 for (;;) {
48 p++;
49 if (p >= q) {
50 return NULL;
51 }
52 if ((*p & 0xc0) != 0x80) {
53 break;
54 }
55 }
56 n--;
57 }
58 return p;
59}
60
11fdf7f2 61DUK_LOCAL const duk_uint8_t *duk__scan_backwards(const duk_uint8_t *p, const duk_uint8_t *q, duk_uint_fast32_t n) {
7c673cae
FG
62 while (n > 0) {
63 for (;;) {
64 p--;
65 if (p < q) {
66 return NULL;
67 }
68 if ((*p & 0xc0) != 0x80) {
69 break;
70 }
71 }
72 n--;
73 }
74 return p;
75}
76
77/*
78 * Convert char offset to byte offset
79 *
80 * Avoid using the string cache if possible: for ASCII strings byte and
81 * char offsets are equal and for short strings direct scanning may be
82 * better than using the string cache (which may evict a more important
83 * entry).
84 *
85 * Typing now assumes 32-bit string byte/char offsets (duk_uint_fast32_t).
86 * Better typing might be to use duk_size_t.
87 */
88
89DUK_INTERNAL duk_uint_fast32_t duk_heap_strcache_offset_char2byte(duk_hthread *thr, duk_hstring *h, duk_uint_fast32_t char_offset) {
90 duk_heap *heap;
91 duk_strcache *sce;
92 duk_uint_fast32_t byte_offset;
93 duk_small_int_t i;
94 duk_bool_t use_cache;
95 duk_uint_fast32_t dist_start, dist_end, dist_sce;
11fdf7f2
TL
96 const duk_uint8_t *p_start;
97 const duk_uint8_t *p_end;
98 const duk_uint8_t *p_found;
7c673cae
FG
99
100 if (char_offset > DUK_HSTRING_GET_CHARLEN(h)) {
101 goto error;
102 }
103
104 /*
105 * For ASCII strings, the answer is simple.
106 */
107
108 if (DUK_HSTRING_IS_ASCII(h)) {
109 /* clen == blen -> pure ascii */
110 return char_offset;
111 }
112
113 /*
114 * For non-ASCII strings, we need to scan forwards or backwards
115 * from some starting point. The starting point may be the start
116 * or end of the string, or some cached midpoint in the string
117 * cache.
118 *
119 * For "short" strings we simply scan without checking or updating
120 * the cache. For longer strings we check and update the cache as
121 * necessary, inserting a new cache entry if none exists.
122 */
123
124 DUK_DDD(DUK_DDDPRINT("non-ascii string %p, char_offset=%ld, clen=%ld, blen=%ld",
125 (void *) h, (long) char_offset,
126 (long) DUK_HSTRING_GET_CHARLEN(h),
127 (long) DUK_HSTRING_GET_BYTELEN(h)));
128
129 heap = thr->heap;
130 sce = NULL;
131 use_cache = (DUK_HSTRING_GET_CHARLEN(h) > DUK_HEAP_STRINGCACHE_NOCACHE_LIMIT);
132
133 if (use_cache) {
134#ifdef DUK_USE_DDDPRINT
135 DUK_DDD(DUK_DDDPRINT("stringcache before char2byte (using cache):"));
136 for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
137 duk_strcache *c = heap->strcache + i;
138 DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
139 (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx));
140 }
141#endif
142
143 for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
144 duk_strcache *c = heap->strcache + i;
145
146 if (c->h == h) {
147 sce = c;
148 break;
149 }
150 }
151 }
152
153 /*
154 * Scan from shortest distance:
155 * - start of string
156 * - end of string
157 * - cache entry (if exists)
158 */
159
160 DUK_ASSERT(DUK_HSTRING_GET_CHARLEN(h) >= char_offset);
161 dist_start = char_offset;
162 dist_end = DUK_HSTRING_GET_CHARLEN(h) - char_offset;
163 dist_sce = 0; DUK_UNREF(dist_sce); /* initialize for debug prints, needed if sce==NULL */
164
11fdf7f2
TL
165 p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h);
166 p_end = (const duk_uint8_t *) (p_start + DUK_HSTRING_GET_BYTELEN(h));
7c673cae
FG
167 p_found = NULL;
168
169 if (sce) {
170 if (char_offset >= sce->cidx) {
171 dist_sce = char_offset - sce->cidx;
172 if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) {
173 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
174 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
175 "scan forwards from sce",
176 (long) use_cache, (void *) (sce ? sce->h : NULL),
177 (sce ? (long) sce->cidx : (long) -1),
178 (sce ? (long) sce->bidx : (long) -1),
179 (long) dist_start, (long) dist_end, (long) dist_sce));
180
181 p_found = duk__scan_forwards(p_start + sce->bidx,
182 p_end,
183 dist_sce);
184 goto scan_done;
185 }
186 } else {
187 dist_sce = sce->cidx - char_offset;
188 if ((dist_sce <= dist_start) && (dist_sce <= dist_end)) {
189 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
190 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
191 "scan backwards from sce",
192 (long) use_cache, (void *) (sce ? sce->h : NULL),
193 (sce ? (long) sce->cidx : (long) -1),
194 (sce ? (long) sce->bidx : (long) -1),
195 (long) dist_start, (long) dist_end, (long) dist_sce));
196
197 p_found = duk__scan_backwards(p_start + sce->bidx,
198 p_start,
199 dist_sce);
200 goto scan_done;
201 }
202 }
203 }
204
205 /* no sce, or sce scan not best */
206
207 if (dist_start <= dist_end) {
208 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
209 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
210 "scan forwards from string start",
211 (long) use_cache, (void *) (sce ? sce->h : NULL),
212 (sce ? (long) sce->cidx : (long) -1),
213 (sce ? (long) sce->bidx : (long) -1),
214 (long) dist_start, (long) dist_end, (long) dist_sce));
215
216 p_found = duk__scan_forwards(p_start,
217 p_end,
218 dist_start);
219 } else {
220 DUK_DDD(DUK_DDDPRINT("non-ascii string, use_cache=%ld, sce=%p:%ld:%ld, "
221 "dist_start=%ld, dist_end=%ld, dist_sce=%ld => "
222 "scan backwards from string end",
223 (long) use_cache, (void *) (sce ? sce->h : NULL),
224 (sce ? (long) sce->cidx : (long) -1),
225 (sce ? (long) sce->bidx : (long) -1),
226 (long) dist_start, (long) dist_end, (long) dist_sce));
227
228 p_found = duk__scan_backwards(p_end,
229 p_start,
230 dist_end);
231 }
232
233 scan_done:
234
235 if (!p_found) {
236 /* Scan error: this shouldn't normally happen; it could happen if
237 * string is not valid UTF-8 data, and clen/blen are not consistent
238 * with the scanning algorithm.
239 */
240 goto error;
241 }
242
243 DUK_ASSERT(p_found >= p_start);
244 DUK_ASSERT(p_found <= p_end); /* may be equal */
245 byte_offset = (duk_uint32_t) (p_found - p_start);
246
247 DUK_DDD(DUK_DDDPRINT("-> string %p, cidx %ld -> bidx %ld",
248 (void *) h, (long) char_offset, (long) byte_offset));
249
250 /*
251 * Update cache entry (allocating if necessary), and move the
252 * cache entry to the first place (in an "LRU" policy).
253 */
254
255 if (use_cache) {
256 /* update entry, allocating if necessary */
257 if (!sce) {
258 sce = heap->strcache + DUK_HEAP_STRCACHE_SIZE - 1; /* take last entry */
259 sce->h = h;
260 }
261 DUK_ASSERT(sce != NULL);
262 sce->bidx = (duk_uint32_t) (p_found - p_start);
263 sce->cidx = (duk_uint32_t) char_offset;
264
265 /* LRU: move our entry to first */
266 if (sce > &heap->strcache[0]) {
267 /*
268 * A C
269 * B A
270 * C <- sce ==> B
271 * D D
272 */
273 duk_strcache tmp;
274
275 tmp = *sce;
276 DUK_MEMMOVE((void *) (&heap->strcache[1]),
11fdf7f2 277 (const void *) (&heap->strcache[0]),
7c673cae
FG
278 (size_t) (((char *) sce) - ((char *) &heap->strcache[0])));
279 heap->strcache[0] = tmp;
280
281 /* 'sce' points to the wrong entry here, but is no longer used */
282 }
283#ifdef DUK_USE_DDDPRINT
284 DUK_DDD(DUK_DDDPRINT("stringcache after char2byte (using cache):"));
285 for (i = 0; i < DUK_HEAP_STRCACHE_SIZE; i++) {
286 duk_strcache *c = heap->strcache + i;
287 DUK_DDD(DUK_DDDPRINT(" [%ld] -> h=%p, cidx=%ld, bidx=%ld",
288 (long) i, (void *) c->h, (long) c->cidx, (long) c->bidx));
289 }
290#endif
291 }
292
293 return byte_offset;
294
295 error:
11fdf7f2 296 DUK_ERROR_INTERNAL_DEFMSG(thr);
7c673cae
FG
297 return 0;
298}