]>
Commit | Line | Data |
---|---|---|
1e59de90 TL |
1 | /* |
2 | * String built-ins | |
3 | */ | |
4 | ||
5 | /* XXX: There are several limitations in the current implementation for | |
6 | * strings with >= 0x80000000UL characters. In some cases one would need | |
7 | * to be able to represent the range [-0xffffffff,0xffffffff] and so on. | |
8 | * Generally character and byte length are assumed to fit into signed 32 | |
9 | * bits (< 0x80000000UL). Places with issues are not marked explicitly | |
10 | * below in all cases, look for signed type usage (duk_int_t etc) for | |
11 | * offsets/lengths. | |
12 | */ | |
13 | ||
14 | #include "duk_internal.h" | |
15 | ||
16 | /* | |
17 | * Constructor | |
18 | */ | |
19 | ||
20 | DUK_INTERNAL duk_ret_t duk_bi_string_constructor(duk_context *ctx) { | |
21 | /* String constructor needs to distinguish between an argument not given at all | |
22 | * vs. given as 'undefined'. We're a vararg function to handle this properly. | |
23 | */ | |
24 | ||
25 | if (duk_get_top(ctx) == 0) { | |
26 | duk_push_hstring_stridx(ctx, DUK_STRIDX_EMPTY_STRING); | |
27 | } else { | |
28 | duk_to_string(ctx, 0); | |
29 | } | |
30 | DUK_ASSERT(duk_is_string(ctx, 0)); | |
31 | duk_set_top(ctx, 1); | |
32 | ||
33 | if (duk_is_constructor_call(ctx)) { | |
34 | duk_push_object_helper(ctx, | |
35 | DUK_HOBJECT_FLAG_EXTENSIBLE | | |
36 | DUK_HOBJECT_FLAG_EXOTIC_STRINGOBJ | | |
37 | DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_STRING), | |
38 | DUK_BIDX_STRING_PROTOTYPE); | |
39 | ||
40 | /* String object internal value is immutable */ | |
41 | duk_dup(ctx, 0); | |
42 | duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_INT_VALUE, DUK_PROPDESC_FLAGS_NONE); | |
43 | } | |
44 | /* Note: unbalanced stack on purpose */ | |
45 | ||
46 | return 1; | |
47 | } | |
48 | ||
49 | DUK_INTERNAL duk_ret_t duk_bi_string_constructor_from_char_code(duk_context *ctx) { | |
50 | duk_hthread *thr = (duk_hthread *) ctx; | |
51 | duk_bufwriter_ctx bw_alloc; | |
52 | duk_bufwriter_ctx *bw; | |
53 | duk_idx_t i, n; | |
54 | duk_ucodepoint_t cp; | |
55 | ||
56 | /* XXX: It would be nice to build the string directly but ToUint16() | |
57 | * coercion is needed so a generic helper would not be very | |
58 | * helpful (perhaps coerce the value stack first here and then | |
59 | * build a string from a duk_tval number sequence in one go?). | |
60 | */ | |
61 | ||
62 | n = duk_get_top(ctx); | |
63 | ||
64 | bw = &bw_alloc; | |
65 | DUK_BW_INIT_PUSHBUF(thr, bw, n); /* initial estimate for ASCII only codepoints */ | |
66 | ||
67 | for (i = 0; i < n; i++) { | |
68 | /* XXX: could improve bufwriter handling to write multiple codepoints | |
69 | * with one ensure call but the relative benefit would be quite small. | |
70 | */ | |
71 | ||
72 | #if defined(DUK_USE_NONSTD_STRING_FROMCHARCODE_32BIT) | |
73 | /* ToUint16() coercion is mandatory in the E5.1 specification, but | |
74 | * this non-compliant behavior makes more sense because we support | |
75 | * non-BMP codepoints. Don't use CESU-8 because that'd create | |
76 | * surrogate pairs. | |
77 | */ | |
78 | ||
79 | cp = (duk_ucodepoint_t) duk_to_uint32(ctx, i); | |
80 | DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp); | |
81 | #else | |
82 | cp = (duk_ucodepoint_t) duk_to_uint16(ctx, i); | |
83 | DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp); | |
84 | #endif | |
85 | } | |
86 | ||
87 | DUK_BW_COMPACT(thr, bw); | |
88 | duk_to_string(ctx, -1); | |
89 | return 1; | |
90 | } | |
91 | ||
92 | /* | |
93 | * toString(), valueOf() | |
94 | */ | |
95 | ||
96 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_to_string(duk_context *ctx) { | |
97 | duk_tval *tv; | |
98 | ||
99 | duk_push_this(ctx); | |
100 | tv = duk_require_tval(ctx, -1); | |
101 | DUK_ASSERT(tv != NULL); | |
102 | ||
103 | if (DUK_TVAL_IS_STRING(tv)) { | |
104 | /* return as is */ | |
105 | return 1; | |
106 | } else if (DUK_TVAL_IS_OBJECT(tv)) { | |
107 | duk_hobject *h = DUK_TVAL_GET_OBJECT(tv); | |
108 | DUK_ASSERT(h != NULL); | |
109 | ||
110 | /* Must be a "string object", i.e. class "String" */ | |
111 | if (DUK_HOBJECT_GET_CLASS_NUMBER(h) != DUK_HOBJECT_CLASS_STRING) { | |
112 | goto type_error; | |
113 | } | |
114 | ||
115 | duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INT_VALUE); | |
116 | DUK_ASSERT(duk_is_string(ctx, -1)); | |
117 | ||
118 | return 1; | |
119 | } else { | |
120 | goto type_error; | |
121 | } | |
122 | ||
123 | /* never here, but fall through */ | |
124 | ||
125 | type_error: | |
126 | return DUK_RET_TYPE_ERROR; | |
127 | } | |
128 | ||
129 | /* | |
130 | * Character and charcode access | |
131 | */ | |
132 | ||
133 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_at(duk_context *ctx) { | |
134 | duk_int_t pos; | |
135 | ||
136 | /* XXX: faster implementation */ | |
137 | ||
138 | (void) duk_push_this_coercible_to_string(ctx); | |
139 | pos = duk_to_int(ctx, 0); | |
140 | duk_substring(ctx, -1, pos, pos + 1); | |
141 | return 1; | |
142 | } | |
143 | ||
144 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_context *ctx) { | |
145 | duk_hthread *thr = (duk_hthread *) ctx; | |
146 | duk_int_t pos; | |
147 | duk_hstring *h; | |
148 | duk_bool_t clamped; | |
149 | ||
150 | /* XXX: faster implementation */ | |
151 | ||
152 | DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *) duk_get_tval(ctx, 0))); | |
153 | ||
154 | h = duk_push_this_coercible_to_string(ctx); | |
155 | DUK_ASSERT(h != NULL); | |
156 | ||
157 | pos = duk_to_int_clamped_raw(ctx, | |
158 | 0 /*index*/, | |
159 | 0 /*min(incl)*/, | |
160 | DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/, | |
161 | &clamped /*out_clamped*/); | |
162 | if (clamped) { | |
163 | duk_push_number(ctx, DUK_DOUBLE_NAN); | |
164 | return 1; | |
165 | } | |
166 | ||
167 | duk_push_u32(ctx, (duk_uint32_t) duk_hstring_char_code_at_raw(thr, h, pos)); | |
168 | return 1; | |
169 | } | |
170 | ||
171 | /* | |
172 | * substring(), substr(), slice() | |
173 | */ | |
174 | ||
175 | /* XXX: any chance of merging these three similar but still slightly | |
176 | * different algorithms so that footprint would be reduced? | |
177 | */ | |
178 | ||
179 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substring(duk_context *ctx) { | |
180 | duk_hstring *h; | |
181 | duk_int_t start_pos, end_pos; | |
182 | duk_int_t len; | |
183 | ||
184 | h = duk_push_this_coercible_to_string(ctx); | |
185 | DUK_ASSERT(h != NULL); | |
186 | len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h); | |
187 | ||
188 | /* [ start end str ] */ | |
189 | ||
190 | start_pos = duk_to_int_clamped(ctx, 0, 0, len); | |
191 | if (duk_is_undefined(ctx, 1)) { | |
192 | end_pos = len; | |
193 | } else { | |
194 | end_pos = duk_to_int_clamped(ctx, 1, 0, len); | |
195 | } | |
196 | DUK_ASSERT(start_pos >= 0 && start_pos <= len); | |
197 | DUK_ASSERT(end_pos >= 0 && end_pos <= len); | |
198 | ||
199 | if (start_pos > end_pos) { | |
200 | duk_int_t tmp = start_pos; | |
201 | start_pos = end_pos; | |
202 | end_pos = tmp; | |
203 | } | |
204 | ||
205 | DUK_ASSERT(end_pos >= start_pos); | |
206 | ||
207 | duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos); | |
208 | return 1; | |
209 | } | |
210 | ||
211 | #ifdef DUK_USE_SECTION_B | |
212 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) { | |
213 | duk_hstring *h; | |
214 | duk_int_t start_pos, end_pos; | |
215 | duk_int_t len; | |
216 | ||
217 | /* Unlike non-obsolete String calls, substr() algorithm in E5.1 | |
218 | * specification will happily coerce undefined and null to strings | |
219 | * ("undefined" and "null"). | |
220 | */ | |
221 | duk_push_this(ctx); | |
222 | h = duk_to_hstring(ctx, -1); | |
223 | DUK_ASSERT(h != NULL); | |
224 | len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h); | |
225 | ||
226 | /* [ start length str ] */ | |
227 | ||
228 | /* The implementation for computing of start_pos and end_pos differs | |
229 | * from the standard algorithm, but is intended to result in the exactly | |
230 | * same behavior. This is not always obvious. | |
231 | */ | |
232 | ||
233 | /* combines steps 2 and 5; -len ensures max() not needed for step 5 */ | |
234 | start_pos = duk_to_int_clamped(ctx, 0, -len, len); | |
235 | if (start_pos < 0) { | |
236 | start_pos = len + start_pos; | |
237 | } | |
238 | DUK_ASSERT(start_pos >= 0 && start_pos <= len); | |
239 | ||
240 | /* combines steps 3, 6; step 7 is not needed */ | |
241 | if (duk_is_undefined(ctx, 1)) { | |
242 | end_pos = len; | |
243 | } else { | |
244 | DUK_ASSERT(start_pos <= len); | |
245 | end_pos = start_pos + duk_to_int_clamped(ctx, 1, 0, len - start_pos); | |
246 | } | |
247 | DUK_ASSERT(start_pos >= 0 && start_pos <= len); | |
248 | DUK_ASSERT(end_pos >= 0 && end_pos <= len); | |
249 | DUK_ASSERT(end_pos >= start_pos); | |
250 | ||
251 | duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos); | |
252 | return 1; | |
253 | } | |
254 | #else /* DUK_USE_SECTION_B */ | |
255 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) { | |
256 | DUK_UNREF(ctx); | |
257 | return DUK_RET_UNSUPPORTED_ERROR; | |
258 | } | |
259 | #endif /* DUK_USE_SECTION_B */ | |
260 | ||
261 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_slice(duk_context *ctx) { | |
262 | duk_hstring *h; | |
263 | duk_int_t start_pos, end_pos; | |
264 | duk_int_t len; | |
265 | ||
266 | h = duk_push_this_coercible_to_string(ctx); | |
267 | DUK_ASSERT(h != NULL); | |
268 | len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h); | |
269 | ||
270 | /* [ start end str ] */ | |
271 | ||
272 | start_pos = duk_to_int_clamped(ctx, 0, -len, len); | |
273 | if (start_pos < 0) { | |
274 | start_pos = len + start_pos; | |
275 | } | |
276 | if (duk_is_undefined(ctx, 1)) { | |
277 | end_pos = len; | |
278 | } else { | |
279 | end_pos = duk_to_int_clamped(ctx, 1, -len, len); | |
280 | if (end_pos < 0) { | |
281 | end_pos = len + end_pos; | |
282 | } | |
283 | } | |
284 | DUK_ASSERT(start_pos >= 0 && start_pos <= len); | |
285 | DUK_ASSERT(end_pos >= 0 && end_pos <= len); | |
286 | ||
287 | if (end_pos < start_pos) { | |
288 | end_pos = start_pos; | |
289 | } | |
290 | ||
291 | DUK_ASSERT(end_pos >= start_pos); | |
292 | ||
293 | duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos); | |
294 | return 1; | |
295 | } | |
296 | ||
297 | /* | |
298 | * Case conversion | |
299 | */ | |
300 | ||
301 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_caseconv_shared(duk_context *ctx) { | |
302 | duk_hthread *thr = (duk_hthread *) ctx; | |
303 | duk_small_int_t uppercase = duk_get_current_magic(ctx); | |
304 | ||
305 | (void) duk_push_this_coercible_to_string(ctx); | |
306 | duk_unicode_case_convert_string(thr, (duk_bool_t) uppercase); | |
307 | return 1; | |
308 | } | |
309 | ||
310 | /* | |
311 | * indexOf() and lastIndexOf() | |
312 | */ | |
313 | ||
314 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_indexof_shared(duk_context *ctx) { | |
315 | duk_hthread *thr = (duk_hthread *) ctx; | |
316 | duk_hstring *h_this; | |
317 | duk_hstring *h_search; | |
318 | duk_int_t clen_this; | |
319 | duk_int_t cpos; | |
320 | duk_int_t bpos; | |
321 | const duk_uint8_t *p_start, *p_end, *p; | |
322 | const duk_uint8_t *q_start; | |
323 | duk_int_t q_blen; | |
324 | duk_uint8_t firstbyte; | |
325 | duk_uint8_t t; | |
326 | duk_small_int_t is_lastindexof = duk_get_current_magic(ctx); /* 0=indexOf, 1=lastIndexOf */ | |
327 | ||
328 | h_this = duk_push_this_coercible_to_string(ctx); | |
329 | DUK_ASSERT(h_this != NULL); | |
330 | clen_this = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h_this); | |
331 | ||
332 | h_search = duk_to_hstring(ctx, 0); | |
333 | DUK_ASSERT(h_search != NULL); | |
334 | q_start = DUK_HSTRING_GET_DATA(h_search); | |
335 | q_blen = (duk_int_t) DUK_HSTRING_GET_BYTELEN(h_search); | |
336 | ||
337 | duk_to_number(ctx, 1); | |
338 | if (duk_is_nan(ctx, 1) && is_lastindexof) { | |
339 | /* indexOf: NaN should cause pos to be zero. | |
340 | * lastIndexOf: NaN should cause pos to be +Infinity | |
341 | * (and later be clamped to len). | |
342 | */ | |
343 | cpos = clen_this; | |
344 | } else { | |
345 | cpos = duk_to_int_clamped(ctx, 1, 0, clen_this); | |
346 | } | |
347 | ||
348 | /* Empty searchstring always matches; cpos must be clamped here. | |
349 | * (If q_blen were < 0 due to clamped coercion, it would also be | |
350 | * caught here.) | |
351 | */ | |
352 | if (q_blen <= 0) { | |
353 | duk_push_int(ctx, cpos); | |
354 | return 1; | |
355 | } | |
356 | DUK_ASSERT(q_blen > 0); | |
357 | ||
358 | bpos = (duk_int_t) duk_heap_strcache_offset_char2byte(thr, h_this, (duk_uint32_t) cpos); | |
359 | ||
360 | p_start = DUK_HSTRING_GET_DATA(h_this); | |
361 | p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this); | |
362 | p = p_start + bpos; | |
363 | ||
364 | /* This loop is optimized for size. For speed, there should be | |
365 | * two separate loops, and we should ensure that memcmp() can be | |
366 | * used without an extra "will searchstring fit" check. Doing | |
367 | * the preconditioning for 'p' and 'p_end' is easy but cpos | |
368 | * must be updated if 'p' is wound back (backward scanning). | |
369 | */ | |
370 | ||
371 | firstbyte = q_start[0]; /* leading byte of match string */ | |
372 | while (p <= p_end && p >= p_start) { | |
373 | t = *p; | |
374 | ||
375 | /* For Ecmascript strings, this check can only match for | |
376 | * initial UTF-8 bytes (not continuation bytes). For other | |
377 | * strings all bets are off. | |
378 | */ | |
379 | ||
380 | if ((t == firstbyte) && ((duk_size_t) (p_end - p) >= (duk_size_t) q_blen)) { | |
381 | DUK_ASSERT(q_blen > 0); /* no issues with memcmp() zero size, even if broken */ | |
382 | if (DUK_MEMCMP((const void *) p, (const void *) q_start, (size_t) q_blen) == 0) { | |
383 | duk_push_int(ctx, cpos); | |
384 | return 1; | |
385 | } | |
386 | } | |
387 | ||
388 | /* track cpos while scanning */ | |
389 | if (is_lastindexof) { | |
390 | /* when going backwards, we decrement cpos 'early'; | |
391 | * 'p' may point to a continuation byte of the char | |
392 | * at offset 'cpos', but that's OK because we'll | |
393 | * backtrack all the way to the initial byte. | |
394 | */ | |
395 | if ((t & 0xc0) != 0x80) { | |
396 | cpos--; | |
397 | } | |
398 | p--; | |
399 | } else { | |
400 | if ((t & 0xc0) != 0x80) { | |
401 | cpos++; | |
402 | } | |
403 | p++; | |
404 | } | |
405 | } | |
406 | ||
407 | /* Not found. Empty string case is handled specially above. */ | |
408 | duk_push_int(ctx, -1); | |
409 | return 1; | |
410 | } | |
411 | ||
412 | /* | |
413 | * replace() | |
414 | */ | |
415 | ||
416 | /* XXX: the current implementation works but is quite clunky; it compiles | |
417 | * to almost 1,4kB of x86 code so it needs to be simplified (better approach, | |
418 | * shared helpers, etc). Some ideas for refactoring: | |
419 | * | |
420 | * - a primitive to convert a string into a regexp matcher (reduces matching | |
421 | * code at the cost of making matching much slower) | |
422 | * - use replace() as a basic helper for match() and split(), which are both | |
423 | * much simpler | |
424 | * - API call to get_prop and to_boolean | |
425 | */ | |
426 | ||
427 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_context *ctx) { | |
428 | duk_hthread *thr = (duk_hthread *) ctx; | |
429 | duk_hstring *h_input; | |
430 | duk_hstring *h_match; | |
431 | duk_hstring *h_search; | |
432 | duk_hobject *h_re; | |
433 | duk_bufwriter_ctx bw_alloc; | |
434 | duk_bufwriter_ctx *bw; | |
435 | #ifdef DUK_USE_REGEXP_SUPPORT | |
436 | duk_bool_t is_regexp; | |
437 | duk_bool_t is_global; | |
438 | #endif | |
439 | duk_bool_t is_repl_func; | |
440 | duk_uint32_t match_start_coff, match_start_boff; | |
441 | #ifdef DUK_USE_REGEXP_SUPPORT | |
442 | duk_int_t match_caps; | |
443 | #endif | |
444 | duk_uint32_t prev_match_end_boff; | |
445 | const duk_uint8_t *r_start, *r_end, *r; /* repl string scan */ | |
446 | duk_size_t tmp_sz; | |
447 | ||
448 | DUK_ASSERT_TOP(ctx, 2); | |
449 | h_input = duk_push_this_coercible_to_string(ctx); | |
450 | DUK_ASSERT(h_input != NULL); | |
451 | ||
452 | bw = &bw_alloc; | |
453 | DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input)); /* input size is good output starting point */ | |
454 | ||
455 | DUK_ASSERT_TOP(ctx, 4); | |
456 | ||
457 | /* stack[0] = search value | |
458 | * stack[1] = replace value | |
459 | * stack[2] = input string | |
460 | * stack[3] = result buffer | |
461 | */ | |
462 | ||
463 | h_re = duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP); | |
464 | if (h_re) { | |
465 | #ifdef DUK_USE_REGEXP_SUPPORT | |
466 | is_regexp = 1; | |
467 | is_global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL); | |
468 | ||
469 | if (is_global) { | |
470 | /* start match from beginning */ | |
471 | duk_push_int(ctx, 0); | |
472 | duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
473 | } | |
474 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
475 | return DUK_RET_UNSUPPORTED_ERROR; | |
476 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
477 | } else { | |
478 | duk_to_string(ctx, 0); | |
479 | #ifdef DUK_USE_REGEXP_SUPPORT | |
480 | is_regexp = 0; | |
481 | is_global = 0; | |
482 | #endif | |
483 | } | |
484 | ||
485 | if (duk_is_function(ctx, 1)) { | |
486 | is_repl_func = 1; | |
487 | r_start = NULL; | |
488 | r_end = NULL; | |
489 | } else { | |
490 | duk_hstring *h_repl; | |
491 | ||
492 | is_repl_func = 0; | |
493 | h_repl = duk_to_hstring(ctx, 1); | |
494 | DUK_ASSERT(h_repl != NULL); | |
495 | r_start = DUK_HSTRING_GET_DATA(h_repl); | |
496 | r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl); | |
497 | } | |
498 | ||
499 | prev_match_end_boff = 0; | |
500 | ||
501 | for (;;) { | |
502 | /* | |
503 | * If matching with a regexp: | |
504 | * - non-global RegExp: lastIndex not touched on a match, zeroed | |
505 | * on a non-match | |
506 | * - global RegExp: on match, lastIndex will be updated by regexp | |
507 | * executor to point to next char after the matching part (so that | |
508 | * characters in the matching part are not matched again) | |
509 | * | |
510 | * If matching with a string: | |
511 | * - always non-global match, find first occurrence | |
512 | * | |
513 | * We need: | |
514 | * - The character offset of start-of-match for the replacer function | |
515 | * - The byte offsets for start-of-match and end-of-match to implement | |
516 | * the replacement values $&, $`, and $', and to copy non-matching | |
517 | * input string portions (including header and trailer) verbatim. | |
518 | * | |
519 | * NOTE: the E5.1 specification is a bit vague how the RegExp should | |
520 | * behave in the replacement process; e.g. is matching done first for | |
521 | * all matches (in the global RegExp case) before any replacer calls | |
522 | * are made? See: test-bi-string-proto-replace.js for discussion. | |
523 | */ | |
524 | ||
525 | DUK_ASSERT_TOP(ctx, 4); | |
526 | ||
527 | #ifdef DUK_USE_REGEXP_SUPPORT | |
528 | if (is_regexp) { | |
529 | duk_dup(ctx, 0); | |
530 | duk_dup(ctx, 2); | |
531 | duk_regexp_match(thr); /* [ ... regexp input ] -> [ res_obj ] */ | |
532 | if (!duk_is_object(ctx, -1)) { | |
533 | duk_pop(ctx); | |
534 | break; | |
535 | } | |
536 | ||
537 | duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); | |
538 | DUK_ASSERT(duk_is_number(ctx, -1)); | |
539 | match_start_coff = duk_get_int(ctx, -1); | |
540 | duk_pop(ctx); | |
541 | ||
542 | duk_get_prop_index(ctx, -1, 0); | |
543 | DUK_ASSERT(duk_is_string(ctx, -1)); | |
544 | h_match = duk_get_hstring(ctx, -1); | |
545 | DUK_ASSERT(h_match != NULL); | |
546 | duk_pop(ctx); /* h_match is borrowed, remains reachable through match_obj */ | |
547 | ||
548 | if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) { | |
549 | /* This should be equivalent to match() algorithm step 8.f.iii.2: | |
550 | * detect an empty match and allow it, but don't allow it twice. | |
551 | */ | |
552 | duk_uint32_t last_index; | |
553 | ||
554 | duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
555 | last_index = (duk_uint32_t) duk_get_uint(ctx, -1); | |
556 | DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld", | |
557 | (long) last_index, (long) (last_index + 1))); | |
558 | duk_pop(ctx); | |
559 | duk_push_int(ctx, last_index + 1); | |
560 | duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
561 | } | |
562 | ||
563 | DUK_ASSERT(duk_get_length(ctx, -1) <= DUK_INT_MAX); /* string limits */ | |
564 | match_caps = (duk_int_t) duk_get_length(ctx, -1); | |
565 | } else { | |
566 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
567 | { /* unconditionally */ | |
568 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
569 | const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ | |
570 | const duk_uint8_t *q_start; /* match string */ | |
571 | duk_size_t q_blen; | |
572 | ||
573 | #ifdef DUK_USE_REGEXP_SUPPORT | |
574 | DUK_ASSERT(!is_global); /* single match always */ | |
575 | #endif | |
576 | ||
577 | p_start = DUK_HSTRING_GET_DATA(h_input); | |
578 | p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); | |
579 | p = p_start; | |
580 | ||
581 | h_search = duk_get_hstring(ctx, 0); | |
582 | DUK_ASSERT(h_search != NULL); | |
583 | q_start = DUK_HSTRING_GET_DATA(h_search); | |
584 | q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_search); | |
585 | ||
586 | p_end -= q_blen; /* ensure full memcmp() fits in while */ | |
587 | ||
588 | match_start_coff = 0; | |
589 | ||
590 | while (p <= p_end) { | |
591 | DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); | |
592 | if (DUK_MEMCMP((const void *) p, (const void *) q_start, (size_t) q_blen) == 0) { | |
593 | duk_dup(ctx, 0); | |
594 | h_match = duk_get_hstring(ctx, -1); | |
595 | DUK_ASSERT(h_match != NULL); | |
596 | #ifdef DUK_USE_REGEXP_SUPPORT | |
597 | match_caps = 0; | |
598 | #endif | |
599 | goto found; | |
600 | } | |
601 | ||
602 | /* track utf-8 non-continuation bytes */ | |
603 | if ((p[0] & 0xc0) != 0x80) { | |
604 | match_start_coff++; | |
605 | } | |
606 | p++; | |
607 | } | |
608 | ||
609 | /* not found */ | |
610 | break; | |
611 | } | |
612 | found: | |
613 | ||
614 | /* stack[0] = search value | |
615 | * stack[1] = replace value | |
616 | * stack[2] = input string | |
617 | * stack[3] = result buffer | |
618 | * stack[4] = regexp match OR match string | |
619 | */ | |
620 | ||
621 | match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff); | |
622 | ||
623 | tmp_sz = (duk_size_t) (match_start_boff - prev_match_end_boff); | |
624 | DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); | |
625 | ||
626 | prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match); | |
627 | ||
628 | if (is_repl_func) { | |
629 | duk_idx_t idx_args; | |
630 | duk_hstring *h_repl; | |
631 | ||
632 | /* regexp res_obj is at index 4 */ | |
633 | ||
634 | duk_dup(ctx, 1); | |
635 | idx_args = duk_get_top(ctx); | |
636 | ||
637 | #ifdef DUK_USE_REGEXP_SUPPORT | |
638 | if (is_regexp) { | |
639 | duk_int_t idx; | |
640 | duk_require_stack(ctx, match_caps + 2); | |
641 | for (idx = 0; idx < match_caps; idx++) { | |
642 | /* match followed by capture(s) */ | |
643 | duk_get_prop_index(ctx, 4, idx); | |
644 | } | |
645 | } else { | |
646 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
647 | { /* unconditionally */ | |
648 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
649 | /* match == search string, by definition */ | |
650 | duk_dup(ctx, 0); | |
651 | } | |
652 | duk_push_int(ctx, match_start_coff); | |
653 | duk_dup(ctx, 2); | |
654 | ||
655 | /* [ ... replacer match [captures] match_char_offset input ] */ | |
656 | ||
657 | duk_call(ctx, duk_get_top(ctx) - idx_args); | |
658 | h_repl = duk_to_hstring(ctx, -1); /* -> [ ... repl_value ] */ | |
659 | DUK_ASSERT(h_repl != NULL); | |
660 | ||
661 | DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl); | |
662 | ||
663 | duk_pop(ctx); /* repl_value */ | |
664 | } else { | |
665 | r = r_start; | |
666 | ||
667 | while (r < r_end) { | |
668 | duk_int_t ch1; | |
669 | duk_int_t ch2; | |
670 | #ifdef DUK_USE_REGEXP_SUPPORT | |
671 | duk_int_t ch3; | |
672 | #endif | |
673 | duk_size_t left; | |
674 | ||
675 | ch1 = *r++; | |
676 | if (ch1 != DUK_ASC_DOLLAR) { | |
677 | goto repl_write; | |
678 | } | |
679 | left = r_end - r; | |
680 | ||
681 | if (left <= 0) { | |
682 | goto repl_write; | |
683 | } | |
684 | ||
685 | ch2 = r[0]; | |
686 | switch ((int) ch2) { | |
687 | case DUK_ASC_DOLLAR: { | |
688 | ch1 = (1 << 8) + DUK_ASC_DOLLAR; | |
689 | goto repl_write; | |
690 | } | |
691 | case DUK_ASC_AMP: { | |
692 | DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match); | |
693 | r++; | |
694 | continue; | |
695 | } | |
696 | case DUK_ASC_GRAVE: { | |
697 | tmp_sz = (duk_size_t) match_start_boff; | |
698 | DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input), tmp_sz); | |
699 | r++; | |
700 | continue; | |
701 | } | |
702 | case DUK_ASC_SINGLEQUOTE: { | |
703 | duk_uint32_t match_end_boff; | |
704 | ||
705 | /* Use match charlen instead of bytelen, just in case the input and | |
706 | * match codepoint encodings would have different lengths. | |
707 | */ | |
708 | match_end_boff = duk_heap_strcache_offset_char2byte(thr, | |
709 | h_input, | |
710 | match_start_coff + DUK_HSTRING_GET_CHARLEN(h_match)); | |
711 | ||
712 | tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff); | |
713 | DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff, tmp_sz); | |
714 | r++; | |
715 | continue; | |
716 | } | |
717 | default: { | |
718 | #ifdef DUK_USE_REGEXP_SUPPORT | |
719 | duk_int_t capnum, captmp, capadv; | |
720 | /* XXX: optional check, match_caps is zero if no regexp, | |
721 | * so dollar will be interpreted literally anyway. | |
722 | */ | |
723 | ||
724 | if (!is_regexp) { | |
725 | goto repl_write; | |
726 | } | |
727 | ||
728 | if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) { | |
729 | goto repl_write; | |
730 | } | |
731 | capnum = ch2 - DUK_ASC_0; | |
732 | capadv = 1; | |
733 | ||
734 | if (left >= 2) { | |
735 | ch3 = r[1]; | |
736 | if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) { | |
737 | captmp = capnum * 10 + (ch3 - DUK_ASC_0); | |
738 | if (captmp < match_caps) { | |
739 | capnum = captmp; | |
740 | capadv = 2; | |
741 | } | |
742 | } | |
743 | } | |
744 | ||
745 | if (capnum > 0 && capnum < match_caps) { | |
746 | DUK_ASSERT(is_regexp != 0); /* match_caps == 0 without regexps */ | |
747 | ||
748 | /* regexp res_obj is at offset 4 */ | |
749 | duk_get_prop_index(ctx, 4, (duk_uarridx_t) capnum); | |
750 | if (duk_is_string(ctx, -1)) { | |
751 | duk_hstring *h_tmp_str; | |
752 | ||
753 | h_tmp_str = duk_get_hstring(ctx, -1); | |
754 | DUK_ASSERT(h_tmp_str != NULL); | |
755 | ||
756 | DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str); | |
757 | } else { | |
758 | /* undefined -> skip (replaced with empty) */ | |
759 | } | |
760 | duk_pop(ctx); | |
761 | r += capadv; | |
762 | continue; | |
763 | } else { | |
764 | goto repl_write; | |
765 | } | |
766 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
767 | goto repl_write; /* unconditionally */ | |
768 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
769 | } /* default case */ | |
770 | } /* switch (ch2) */ | |
771 | ||
772 | repl_write: | |
773 | /* ch1 = (r_increment << 8) + byte */ | |
774 | ||
775 | DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t) (ch1 & 0xff)); | |
776 | r += ch1 >> 8; | |
777 | } /* while repl */ | |
778 | } /* if (is_repl_func) */ | |
779 | ||
780 | duk_pop(ctx); /* pop regexp res_obj or match string */ | |
781 | ||
782 | #ifdef DUK_USE_REGEXP_SUPPORT | |
783 | if (!is_global) { | |
784 | #else | |
785 | { /* unconditionally; is_global==0 */ | |
786 | #endif | |
787 | break; | |
788 | } | |
789 | } | |
790 | ||
791 | /* trailer */ | |
792 | tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff); | |
793 | DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz); | |
794 | ||
795 | DUK_ASSERT_TOP(ctx, 4); | |
796 | DUK_BW_COMPACT(thr, bw); | |
797 | duk_to_string(ctx, -1); | |
798 | return 1; | |
799 | } | |
800 | ||
801 | /* | |
802 | * split() | |
803 | */ | |
804 | ||
805 | /* XXX: very messy now, but works; clean up, remove unused variables (nomimally | |
806 | * used so compiler doesn't complain). | |
807 | */ | |
808 | ||
809 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_context *ctx) { | |
810 | duk_hthread *thr = (duk_hthread *) ctx; | |
811 | duk_hstring *h_input; | |
812 | duk_hstring *h_sep; | |
813 | duk_uint32_t limit; | |
814 | duk_uint32_t arr_idx; | |
815 | #ifdef DUK_USE_REGEXP_SUPPORT | |
816 | duk_bool_t is_regexp; | |
817 | #endif | |
818 | duk_bool_t matched; /* set to 1 if any match exists (needed for empty input special case) */ | |
819 | duk_uint32_t prev_match_end_coff, prev_match_end_boff; | |
820 | duk_uint32_t match_start_boff, match_start_coff; | |
821 | duk_uint32_t match_end_boff, match_end_coff; | |
822 | ||
823 | DUK_UNREF(thr); | |
824 | ||
825 | h_input = duk_push_this_coercible_to_string(ctx); | |
826 | DUK_ASSERT(h_input != NULL); | |
827 | ||
828 | duk_push_array(ctx); | |
829 | ||
830 | if (duk_is_undefined(ctx, 1)) { | |
831 | limit = 0xffffffffUL; | |
832 | } else { | |
833 | limit = duk_to_uint32(ctx, 1); | |
834 | } | |
835 | ||
836 | if (limit == 0) { | |
837 | return 1; | |
838 | } | |
839 | ||
840 | /* If the separator is a RegExp, make a "clone" of it. The specification | |
841 | * algorithm calls [[Match]] directly for specific indices; we emulate this | |
842 | * by tweaking lastIndex and using a "force global" variant of duk_regexp_match() | |
843 | * which will use global-style matching even when the RegExp itself is non-global. | |
844 | */ | |
845 | ||
846 | if (duk_is_undefined(ctx, 0)) { | |
847 | /* The spec algorithm first does "R = ToString(separator)" before checking | |
848 | * whether separator is undefined. Since this is side effect free, we can | |
849 | * skip the ToString() here. | |
850 | */ | |
851 | duk_dup(ctx, 2); | |
852 | duk_put_prop_index(ctx, 3, 0); | |
853 | return 1; | |
854 | } else if (duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP) != NULL) { | |
855 | #ifdef DUK_USE_REGEXP_SUPPORT | |
856 | duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR); | |
857 | duk_dup(ctx, 0); | |
858 | duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */ | |
859 | duk_replace(ctx, 0); | |
860 | /* lastIndex is initialized to zero by new RegExp() */ | |
861 | is_regexp = 1; | |
862 | #else | |
863 | return DUK_RET_UNSUPPORTED_ERROR; | |
864 | #endif | |
865 | } else { | |
866 | duk_to_string(ctx, 0); | |
867 | #ifdef DUK_USE_REGEXP_SUPPORT | |
868 | is_regexp = 0; | |
869 | #endif | |
870 | } | |
871 | ||
872 | /* stack[0] = separator (string or regexp) | |
873 | * stack[1] = limit | |
874 | * stack[2] = input string | |
875 | * stack[3] = result array | |
876 | */ | |
877 | ||
878 | prev_match_end_boff = 0; | |
879 | prev_match_end_coff = 0; | |
880 | arr_idx = 0; | |
881 | matched = 0; | |
882 | ||
883 | for (;;) { | |
884 | /* | |
885 | * The specification uses RegExp [[Match]] to attempt match at specific | |
886 | * offsets. We don't have such a primitive, so we use an actual RegExp | |
887 | * and tweak lastIndex. Since the RegExp may be non-global, we use a | |
888 | * special variant which forces global-like behavior for matching. | |
889 | */ | |
890 | ||
891 | DUK_ASSERT_TOP(ctx, 4); | |
892 | ||
893 | #ifdef DUK_USE_REGEXP_SUPPORT | |
894 | if (is_regexp) { | |
895 | duk_dup(ctx, 0); | |
896 | duk_dup(ctx, 2); | |
897 | duk_regexp_match_force_global(thr); /* [ ... regexp input ] -> [ res_obj ] */ | |
898 | if (!duk_is_object(ctx, -1)) { | |
899 | duk_pop(ctx); | |
900 | break; | |
901 | } | |
902 | matched = 1; | |
903 | ||
904 | duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); | |
905 | DUK_ASSERT(duk_is_number(ctx, -1)); | |
906 | match_start_coff = duk_get_int(ctx, -1); | |
907 | match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff); | |
908 | duk_pop(ctx); | |
909 | ||
910 | if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) { | |
911 | /* don't allow an empty match at the end of the string */ | |
912 | duk_pop(ctx); | |
913 | break; | |
914 | } | |
915 | ||
916 | duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
917 | DUK_ASSERT(duk_is_number(ctx, -1)); | |
918 | match_end_coff = duk_get_int(ctx, -1); | |
919 | match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_end_coff); | |
920 | duk_pop(ctx); | |
921 | ||
922 | /* empty match -> bump and continue */ | |
923 | if (prev_match_end_boff == match_end_boff) { | |
924 | duk_push_int(ctx, match_end_coff + 1); | |
925 | duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
926 | duk_pop(ctx); | |
927 | continue; | |
928 | } | |
929 | } else { | |
930 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
931 | { /* unconditionally */ | |
932 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
933 | const duk_uint8_t *p_start, *p_end, *p; /* input string scan */ | |
934 | const duk_uint8_t *q_start; /* match string */ | |
935 | duk_size_t q_blen, q_clen; | |
936 | ||
937 | p_start = DUK_HSTRING_GET_DATA(h_input); | |
938 | p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input); | |
939 | p = p_start + prev_match_end_boff; | |
940 | ||
941 | h_sep = duk_get_hstring(ctx, 0); | |
942 | DUK_ASSERT(h_sep != NULL); | |
943 | q_start = DUK_HSTRING_GET_DATA(h_sep); | |
944 | q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sep); | |
945 | q_clen = (duk_size_t) DUK_HSTRING_GET_CHARLEN(h_sep); | |
946 | ||
947 | p_end -= q_blen; /* ensure full memcmp() fits in while */ | |
948 | ||
949 | match_start_coff = prev_match_end_coff; | |
950 | ||
951 | if (q_blen == 0) { | |
952 | /* Handle empty separator case: it will always match, and always | |
953 | * triggers the check in step 13.c.iii initially. Note that we | |
954 | * must skip to either end of string or start of first codepoint, | |
955 | * skipping over any continuation bytes! | |
956 | * | |
957 | * Don't allow an empty string to match at the end of the input. | |
958 | */ | |
959 | ||
960 | matched = 1; /* empty separator can always match */ | |
961 | ||
962 | match_start_coff++; | |
963 | p++; | |
964 | while (p < p_end) { | |
965 | if ((p[0] & 0xc0) != 0x80) { | |
966 | goto found; | |
967 | } | |
968 | p++; | |
969 | } | |
970 | goto not_found; | |
971 | } | |
972 | ||
973 | DUK_ASSERT(q_blen > 0 && q_clen > 0); | |
974 | while (p <= p_end) { | |
975 | DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input)); | |
976 | DUK_ASSERT(q_blen > 0); /* no issues with empty memcmp() */ | |
977 | if (DUK_MEMCMP((const void *) p, (const void *) q_start, (size_t) q_blen) == 0) { | |
978 | /* never an empty match, so step 13.c.iii can't be triggered */ | |
979 | goto found; | |
980 | } | |
981 | ||
982 | /* track utf-8 non-continuation bytes */ | |
983 | if ((p[0] & 0xc0) != 0x80) { | |
984 | match_start_coff++; | |
985 | } | |
986 | p++; | |
987 | } | |
988 | ||
989 | not_found: | |
990 | /* not found */ | |
991 | break; | |
992 | ||
993 | found: | |
994 | matched = 1; | |
995 | match_start_boff = (duk_uint32_t) (p - p_start); | |
996 | match_end_coff = (duk_uint32_t) (match_start_coff + q_clen); /* constrained by string length */ | |
997 | match_end_boff = (duk_uint32_t) (match_start_boff + q_blen); /* ditto */ | |
998 | ||
999 | /* empty match (may happen with empty separator) -> bump and continue */ | |
1000 | if (prev_match_end_boff == match_end_boff) { | |
1001 | prev_match_end_boff++; | |
1002 | prev_match_end_coff++; | |
1003 | continue; | |
1004 | } | |
1005 | } /* if (is_regexp) */ | |
1006 | ||
1007 | /* stack[0] = separator (string or regexp) | |
1008 | * stack[1] = limit | |
1009 | * stack[2] = input string | |
1010 | * stack[3] = result array | |
1011 | * stack[4] = regexp res_obj (if is_regexp) | |
1012 | */ | |
1013 | ||
1014 | DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end b=%ld,c=%ld, prev_end b=%ld,c=%ld", | |
1015 | (long) match_start_boff, (long) match_start_coff, | |
1016 | (long) match_end_boff, (long) match_end_coff, | |
1017 | (long) prev_match_end_boff, (long) prev_match_end_coff)); | |
1018 | ||
1019 | duk_push_lstring(ctx, | |
1020 | (const char *) (DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff), | |
1021 | (duk_size_t) (match_start_boff - prev_match_end_boff)); | |
1022 | duk_put_prop_index(ctx, 3, arr_idx); | |
1023 | arr_idx++; | |
1024 | if (arr_idx >= limit) { | |
1025 | goto hit_limit; | |
1026 | } | |
1027 | ||
1028 | #ifdef DUK_USE_REGEXP_SUPPORT | |
1029 | if (is_regexp) { | |
1030 | duk_size_t i, len; | |
1031 | ||
1032 | len = duk_get_length(ctx, 4); | |
1033 | for (i = 1; i < len; i++) { | |
1034 | DUK_ASSERT(i <= DUK_UARRIDX_MAX); /* cannot have >4G captures */ | |
1035 | duk_get_prop_index(ctx, 4, (duk_uarridx_t) i); | |
1036 | duk_put_prop_index(ctx, 3, arr_idx); | |
1037 | arr_idx++; | |
1038 | if (arr_idx >= limit) { | |
1039 | goto hit_limit; | |
1040 | } | |
1041 | } | |
1042 | ||
1043 | duk_pop(ctx); | |
1044 | /* lastIndex already set up for next match */ | |
1045 | } else { | |
1046 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
1047 | { /* unconditionally */ | |
1048 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
1049 | /* no action */ | |
1050 | } | |
1051 | ||
1052 | prev_match_end_boff = match_end_boff; | |
1053 | prev_match_end_coff = match_end_coff; | |
1054 | continue; | |
1055 | } /* for */ | |
1056 | ||
1057 | /* Combined step 11 (empty string special case) and 14-15. */ | |
1058 | ||
1059 | DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld", | |
1060 | (long) prev_match_end_boff, (long) prev_match_end_coff)); | |
1061 | ||
1062 | if (DUK_HSTRING_GET_CHARLEN(h_input) > 0 || !matched) { | |
1063 | /* Add trailer if: | |
1064 | * a) non-empty input | |
1065 | * b) empty input and no (zero size) match found (step 11) | |
1066 | */ | |
1067 | ||
1068 | duk_push_lstring(ctx, | |
1069 | (const char *) DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, | |
1070 | (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff)); | |
1071 | duk_put_prop_index(ctx, 3, arr_idx); | |
1072 | /* No arr_idx update or limit check */ | |
1073 | } | |
1074 | ||
1075 | return 1; | |
1076 | ||
1077 | hit_limit: | |
1078 | #ifdef DUK_USE_REGEXP_SUPPORT | |
1079 | if (is_regexp) { | |
1080 | duk_pop(ctx); | |
1081 | } | |
1082 | #endif | |
1083 | ||
1084 | return 1; | |
1085 | } | |
1086 | ||
1087 | /* | |
1088 | * Various | |
1089 | */ | |
1090 | ||
1091 | #ifdef DUK_USE_REGEXP_SUPPORT | |
1092 | DUK_LOCAL void duk__to_regexp_helper(duk_context *ctx, duk_idx_t index, duk_bool_t force_new) { | |
1093 | duk_hobject *h; | |
1094 | ||
1095 | /* Shared helper for match() steps 3-4, search() steps 3-4. */ | |
1096 | ||
1097 | DUK_ASSERT(index >= 0); | |
1098 | ||
1099 | if (force_new) { | |
1100 | goto do_new; | |
1101 | } | |
1102 | ||
1103 | h = duk_get_hobject_with_class(ctx, index, DUK_HOBJECT_CLASS_REGEXP); | |
1104 | if (!h) { | |
1105 | goto do_new; | |
1106 | } | |
1107 | return; | |
1108 | ||
1109 | do_new: | |
1110 | duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR); | |
1111 | duk_dup(ctx, index); | |
1112 | duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */ | |
1113 | duk_replace(ctx, index); | |
1114 | } | |
1115 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
1116 | ||
1117 | #ifdef DUK_USE_REGEXP_SUPPORT | |
1118 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) { | |
1119 | duk_hthread *thr = (duk_hthread *) ctx; | |
1120 | ||
1121 | /* Easiest way to implement the search required by the specification | |
1122 | * is to do a RegExp test() with lastIndex forced to zero. To avoid | |
1123 | * side effects on the argument, "clone" the RegExp if a RegExp was | |
1124 | * given as input. | |
1125 | * | |
1126 | * The global flag of the RegExp should be ignored; setting lastIndex | |
1127 | * to zero (which happens when "cloning" the RegExp) should have an | |
1128 | * equivalent effect. | |
1129 | */ | |
1130 | ||
1131 | DUK_ASSERT_TOP(ctx, 1); | |
1132 | (void) duk_push_this_coercible_to_string(ctx); /* at index 1 */ | |
1133 | duk__to_regexp_helper(ctx, 0 /*index*/, 1 /*force_new*/); | |
1134 | ||
1135 | /* stack[0] = regexp | |
1136 | * stack[1] = string | |
1137 | */ | |
1138 | ||
1139 | /* Avoid using RegExp.prototype methods, as they're writable and | |
1140 | * configurable and may have been changed. | |
1141 | */ | |
1142 | ||
1143 | duk_dup(ctx, 0); | |
1144 | duk_dup(ctx, 1); /* [ ... re_obj input ] */ | |
1145 | duk_regexp_match(thr); /* -> [ ... res_obj ] */ | |
1146 | ||
1147 | if (!duk_is_object(ctx, -1)) { | |
1148 | duk_push_int(ctx, -1); | |
1149 | return 1; | |
1150 | } | |
1151 | ||
1152 | duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX); | |
1153 | DUK_ASSERT(duk_is_number(ctx, -1)); | |
1154 | return 1; | |
1155 | } | |
1156 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
1157 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) { | |
1158 | DUK_UNREF(ctx); | |
1159 | return DUK_RET_UNSUPPORTED_ERROR; | |
1160 | } | |
1161 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
1162 | ||
1163 | #ifdef DUK_USE_REGEXP_SUPPORT | |
1164 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) { | |
1165 | duk_hthread *thr = (duk_hthread *) ctx; | |
1166 | duk_bool_t global; | |
1167 | duk_int_t prev_last_index; | |
1168 | duk_int_t this_index; | |
1169 | duk_int_t arr_idx; | |
1170 | ||
1171 | DUK_ASSERT_TOP(ctx, 1); | |
1172 | (void) duk_push_this_coercible_to_string(ctx); | |
1173 | duk__to_regexp_helper(ctx, 0 /*index*/, 0 /*force_new*/); | |
1174 | global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL); | |
1175 | DUK_ASSERT_TOP(ctx, 2); | |
1176 | ||
1177 | /* stack[0] = regexp | |
1178 | * stack[1] = string | |
1179 | */ | |
1180 | ||
1181 | if (!global) { | |
1182 | duk_regexp_match(thr); /* -> [ res_obj ] */ | |
1183 | return 1; /* return 'res_obj' */ | |
1184 | } | |
1185 | ||
1186 | /* Global case is more complex. */ | |
1187 | ||
1188 | /* [ regexp string ] */ | |
1189 | ||
1190 | duk_push_int(ctx, 0); | |
1191 | duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
1192 | duk_push_array(ctx); | |
1193 | ||
1194 | /* [ regexp string res_arr ] */ | |
1195 | ||
1196 | prev_last_index = 0; | |
1197 | arr_idx = 0; | |
1198 | ||
1199 | for (;;) { | |
1200 | DUK_ASSERT_TOP(ctx, 3); | |
1201 | ||
1202 | duk_dup(ctx, 0); | |
1203 | duk_dup(ctx, 1); | |
1204 | duk_regexp_match(thr); /* -> [ ... regexp string ] -> [ ... res_obj ] */ | |
1205 | ||
1206 | if (!duk_is_object(ctx, -1)) { | |
1207 | duk_pop(ctx); | |
1208 | break; | |
1209 | } | |
1210 | ||
1211 | duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
1212 | DUK_ASSERT(duk_is_number(ctx, -1)); | |
1213 | this_index = duk_get_int(ctx, -1); | |
1214 | duk_pop(ctx); | |
1215 | ||
1216 | if (this_index == prev_last_index) { | |
1217 | this_index++; | |
1218 | duk_push_int(ctx, this_index); | |
1219 | duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX); | |
1220 | } | |
1221 | prev_last_index = this_index; | |
1222 | ||
1223 | duk_get_prop_index(ctx, -1, 0); /* match string */ | |
1224 | duk_put_prop_index(ctx, 2, arr_idx); | |
1225 | arr_idx++; | |
1226 | duk_pop(ctx); /* res_obj */ | |
1227 | } | |
1228 | ||
1229 | if (arr_idx == 0) { | |
1230 | duk_push_null(ctx); | |
1231 | } | |
1232 | ||
1233 | return 1; /* return 'res_arr' or 'null' */ | |
1234 | } | |
1235 | #else /* DUK_USE_REGEXP_SUPPORT */ | |
1236 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) { | |
1237 | DUK_UNREF(ctx); | |
1238 | return DUK_RET_UNSUPPORTED_ERROR; | |
1239 | } | |
1240 | #endif /* DUK_USE_REGEXP_SUPPORT */ | |
1241 | ||
1242 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_concat(duk_context *ctx) { | |
1243 | /* duk_concat() coerces arguments with ToString() in correct order */ | |
1244 | (void) duk_push_this_coercible_to_string(ctx); | |
1245 | duk_insert(ctx, 0); /* this is relatively expensive */ | |
1246 | duk_concat(ctx, duk_get_top(ctx)); | |
1247 | return 1; | |
1248 | } | |
1249 | ||
1250 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_trim(duk_context *ctx) { | |
1251 | DUK_ASSERT_TOP(ctx, 0); | |
1252 | (void) duk_push_this_coercible_to_string(ctx); | |
1253 | duk_trim(ctx, 0); | |
1254 | DUK_ASSERT_TOP(ctx, 1); | |
1255 | return 1; | |
1256 | } | |
1257 | ||
1258 | DUK_INTERNAL duk_ret_t duk_bi_string_prototype_locale_compare(duk_context *ctx) { | |
1259 | duk_hstring *h1; | |
1260 | duk_hstring *h2; | |
1261 | duk_size_t h1_len, h2_len, prefix_len; | |
1262 | duk_small_int_t ret = 0; | |
1263 | duk_small_int_t rc; | |
1264 | ||
1265 | /* The current implementation of localeCompare() is simply a codepoint | |
1266 | * by codepoint comparison, implemented with a simple string compare | |
1267 | * because UTF-8 should preserve codepoint ordering (assuming valid | |
1268 | * shortest UTF-8 encoding). | |
1269 | * | |
1270 | * The specification requires that the return value must be related | |
1271 | * to the sort order: e.g. negative means that 'this' comes before | |
1272 | * 'that' in sort order. We assume an ascending sort order. | |
1273 | */ | |
1274 | ||
1275 | /* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */ | |
1276 | ||
1277 | h1 = duk_push_this_coercible_to_string(ctx); | |
1278 | DUK_ASSERT(h1 != NULL); | |
1279 | ||
1280 | h2 = duk_to_hstring(ctx, 0); | |
1281 | DUK_ASSERT(h2 != NULL); | |
1282 | ||
1283 | h1_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h1); | |
1284 | h2_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h2); | |
1285 | prefix_len = (h1_len <= h2_len ? h1_len : h2_len); | |
1286 | ||
1287 | /* Zero size compare not an issue with DUK_MEMCMP. */ | |
1288 | rc = (duk_small_int_t) DUK_MEMCMP((const void *) DUK_HSTRING_GET_DATA(h1), | |
1289 | (const void *) DUK_HSTRING_GET_DATA(h2), | |
1290 | (size_t) prefix_len); | |
1291 | ||
1292 | if (rc < 0) { | |
1293 | ret = -1; | |
1294 | goto done; | |
1295 | } else if (rc > 0) { | |
1296 | ret = 1; | |
1297 | goto done; | |
1298 | } | |
1299 | ||
1300 | /* prefix matches, lengths matter now */ | |
1301 | if (h1_len > h2_len) { | |
1302 | ret = 1; | |
1303 | goto done; | |
1304 | } else if (h1_len == h2_len) { | |
1305 | DUK_ASSERT(ret == 0); | |
1306 | goto done; | |
1307 | } | |
1308 | ret = -1; | |
1309 | goto done; | |
1310 | ||
1311 | done: | |
1312 | duk_push_int(ctx, (duk_int_t) ret); | |
1313 | return 1; | |
1314 | } |