5 /* XXX: There are several limitations in the current implementation for
6 * strings with >= 0x80000000UL characters. In some cases one would need
7 * to be able to represent the range [-0xffffffff,0xffffffff] and so on.
8 * Generally character and byte length are assumed to fit into signed 32
9 * bits (< 0x80000000UL). Places with issues are not marked explicitly
10 * below in all cases, look for signed type usage (duk_int_t etc) for
14 #include "duk_internal.h"
20 DUK_INTERNAL duk_ret_t
duk_bi_string_constructor(duk_context
*ctx
) {
21 /* String constructor needs to distinguish between an argument not given at all
22 * vs. given as 'undefined'. We're a vararg function to handle this properly.
25 if (duk_get_top(ctx
) == 0) {
26 duk_push_hstring_stridx(ctx
, DUK_STRIDX_EMPTY_STRING
);
28 duk_to_string(ctx
, 0);
30 DUK_ASSERT(duk_is_string(ctx
, 0));
33 if (duk_is_constructor_call(ctx
)) {
34 duk_push_object_helper(ctx
,
35 DUK_HOBJECT_FLAG_EXTENSIBLE
|
36 DUK_HOBJECT_FLAG_EXOTIC_STRINGOBJ
|
37 DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_STRING
),
38 DUK_BIDX_STRING_PROTOTYPE
);
40 /* String object internal value is immutable */
42 duk_xdef_prop_stridx(ctx
, -2, DUK_STRIDX_INT_VALUE
, DUK_PROPDESC_FLAGS_NONE
);
44 /* Note: unbalanced stack on purpose */
49 DUK_INTERNAL duk_ret_t
duk_bi_string_constructor_from_char_code(duk_context
*ctx
) {
50 duk_hthread
*thr
= (duk_hthread
*) ctx
;
51 duk_bufwriter_ctx bw_alloc
;
52 duk_bufwriter_ctx
*bw
;
56 /* XXX: It would be nice to build the string directly but ToUint16()
57 * coercion is needed so a generic helper would not be very
58 * helpful (perhaps coerce the value stack first here and then
59 * build a string from a duk_tval number sequence in one go?).
65 DUK_BW_INIT_PUSHBUF(thr
, bw
, n
); /* initial estimate for ASCII only codepoints */
67 for (i
= 0; i
< n
; i
++) {
68 /* XXX: could improve bufwriter handling to write multiple codepoints
69 * with one ensure call but the relative benefit would be quite small.
72 #if defined(DUK_USE_NONSTD_STRING_FROMCHARCODE_32BIT)
73 /* ToUint16() coercion is mandatory in the E5.1 specification, but
74 * this non-compliant behavior makes more sense because we support
75 * non-BMP codepoints. Don't use CESU-8 because that'd create
79 cp
= (duk_ucodepoint_t
) duk_to_uint32(ctx
, i
);
80 DUK_BW_WRITE_ENSURE_XUTF8(thr
, bw
, cp
);
82 cp
= (duk_ucodepoint_t
) duk_to_uint32(ctx
, i
);
83 DUK_BW_WRITE_ENSURE_CESU8(thr
, bw
, cp
);
87 DUK_BW_COMPACT(thr
, bw
);
88 duk_to_string(ctx
, -1);
93 * toString(), valueOf()
96 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_to_string(duk_context
*ctx
) {
100 tv
= duk_require_tval(ctx
, -1);
101 DUK_ASSERT(tv
!= NULL
);
103 if (DUK_TVAL_IS_STRING(tv
)) {
106 } else if (DUK_TVAL_IS_OBJECT(tv
)) {
107 duk_hobject
*h
= DUK_TVAL_GET_OBJECT(tv
);
108 DUK_ASSERT(h
!= NULL
);
110 /* Must be a "string object", i.e. class "String" */
111 if (DUK_HOBJECT_GET_CLASS_NUMBER(h
) != DUK_HOBJECT_CLASS_STRING
) {
115 duk_get_prop_stridx(ctx
, -1, DUK_STRIDX_INT_VALUE
);
116 DUK_ASSERT(duk_is_string(ctx
, -1));
123 /* never here, but fall through */
126 return DUK_RET_TYPE_ERROR
;
130 * Character and charcode access
133 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_char_at(duk_context
*ctx
) {
136 /* XXX: faster implementation */
138 (void) duk_push_this_coercible_to_string(ctx
);
139 pos
= duk_to_int(ctx
, 0);
140 duk_substring(ctx
, -1, pos
, pos
+ 1);
144 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_char_code_at(duk_context
*ctx
) {
145 duk_hthread
*thr
= (duk_hthread
*) ctx
;
150 /* XXX: faster implementation */
152 DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval
*) duk_get_tval(ctx
, 0)));
154 h
= duk_push_this_coercible_to_string(ctx
);
155 DUK_ASSERT(h
!= NULL
);
157 pos
= duk_to_int_clamped_raw(ctx
,
160 DUK_HSTRING_GET_CHARLEN(h
) - 1 /*max(incl)*/,
161 &clamped
/*out_clamped*/);
163 duk_push_number(ctx
, DUK_DOUBLE_NAN
);
167 duk_push_u32(ctx
, (duk_uint32_t
) duk_hstring_char_code_at_raw(thr
, h
, pos
));
172 * substring(), substr(), slice()
175 /* XXX: any chance of merging these three similar but still slightly
176 * different algorithms so that footprint would be reduced?
179 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_substring(duk_context
*ctx
) {
181 duk_int_t start_pos
, end_pos
;
184 h
= duk_push_this_coercible_to_string(ctx
);
185 DUK_ASSERT(h
!= NULL
);
186 len
= (duk_int_t
) DUK_HSTRING_GET_CHARLEN(h
);
188 /* [ start end str ] */
190 start_pos
= duk_to_int_clamped(ctx
, 0, 0, len
);
191 if (duk_is_undefined(ctx
, 1)) {
194 end_pos
= duk_to_int_clamped(ctx
, 1, 0, len
);
196 DUK_ASSERT(start_pos
>= 0 && start_pos
<= len
);
197 DUK_ASSERT(end_pos
>= 0 && end_pos
<= len
);
199 if (start_pos
> end_pos
) {
200 duk_int_t tmp
= start_pos
;
205 DUK_ASSERT(end_pos
>= start_pos
);
207 duk_substring(ctx
, -1, (duk_size_t
) start_pos
, (duk_size_t
) end_pos
);
211 #ifdef DUK_USE_SECTION_B
212 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_substr(duk_context
*ctx
) {
214 duk_int_t start_pos
, end_pos
;
217 /* Unlike non-obsolete String calls, substr() algorithm in E5.1
218 * specification will happily coerce undefined and null to strings
219 * ("undefined" and "null").
222 h
= duk_to_hstring(ctx
, -1);
223 DUK_ASSERT(h
!= NULL
);
224 len
= (duk_int_t
) DUK_HSTRING_GET_CHARLEN(h
);
226 /* [ start length str ] */
228 /* The implementation for computing of start_pos and end_pos differs
229 * from the standard algorithm, but is intended to result in the exactly
230 * same behavior. This is not always obvious.
233 /* combines steps 2 and 5; -len ensures max() not needed for step 5 */
234 start_pos
= duk_to_int_clamped(ctx
, 0, -len
, len
);
236 start_pos
= len
+ start_pos
;
238 DUK_ASSERT(start_pos
>= 0 && start_pos
<= len
);
240 /* combines steps 3, 6; step 7 is not needed */
241 if (duk_is_undefined(ctx
, 1)) {
244 DUK_ASSERT(start_pos
<= len
);
245 end_pos
= start_pos
+ duk_to_int_clamped(ctx
, 1, 0, len
- start_pos
);
247 DUK_ASSERT(start_pos
>= 0 && start_pos
<= len
);
248 DUK_ASSERT(end_pos
>= 0 && end_pos
<= len
);
249 DUK_ASSERT(end_pos
>= start_pos
);
251 duk_substring(ctx
, -1, (duk_size_t
) start_pos
, (duk_size_t
) end_pos
);
254 #else /* DUK_USE_SECTION_B */
255 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_substr(duk_context
*ctx
) {
257 return DUK_RET_UNSUPPORTED_ERROR
;
259 #endif /* DUK_USE_SECTION_B */
261 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_slice(duk_context
*ctx
) {
263 duk_int_t start_pos
, end_pos
;
266 h
= duk_push_this_coercible_to_string(ctx
);
267 DUK_ASSERT(h
!= NULL
);
268 len
= (duk_int_t
) DUK_HSTRING_GET_CHARLEN(h
);
270 /* [ start end str ] */
272 start_pos
= duk_to_int_clamped(ctx
, 0, -len
, len
);
274 start_pos
= len
+ start_pos
;
276 if (duk_is_undefined(ctx
, 1)) {
279 end_pos
= duk_to_int_clamped(ctx
, 1, -len
, len
);
281 end_pos
= len
+ end_pos
;
284 DUK_ASSERT(start_pos
>= 0 && start_pos
<= len
);
285 DUK_ASSERT(end_pos
>= 0 && end_pos
<= len
);
287 if (end_pos
< start_pos
) {
291 DUK_ASSERT(end_pos
>= start_pos
);
293 duk_substring(ctx
, -1, (duk_size_t
) start_pos
, (duk_size_t
) end_pos
);
301 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_caseconv_shared(duk_context
*ctx
) {
302 duk_hthread
*thr
= (duk_hthread
*) ctx
;
303 duk_small_int_t uppercase
= duk_get_current_magic(ctx
);
305 (void) duk_push_this_coercible_to_string(ctx
);
306 duk_unicode_case_convert_string(thr
, (duk_bool_t
) uppercase
);
311 * indexOf() and lastIndexOf()
314 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_indexof_shared(duk_context
*ctx
) {
315 duk_hthread
*thr
= (duk_hthread
*) ctx
;
317 duk_hstring
*h_search
;
321 const duk_uint8_t
*p_start
, *p_end
, *p
;
322 const duk_uint8_t
*q_start
;
324 duk_uint8_t firstbyte
;
326 duk_small_int_t is_lastindexof
= duk_get_current_magic(ctx
); /* 0=indexOf, 1=lastIndexOf */
328 h_this
= duk_push_this_coercible_to_string(ctx
);
329 DUK_ASSERT(h_this
!= NULL
);
330 clen_this
= (duk_int_t
) DUK_HSTRING_GET_CHARLEN(h_this
);
332 h_search
= duk_to_hstring(ctx
, 0);
333 DUK_ASSERT(h_search
!= NULL
);
334 q_start
= DUK_HSTRING_GET_DATA(h_search
);
335 q_blen
= (duk_int_t
) DUK_HSTRING_GET_BYTELEN(h_search
);
337 duk_to_number(ctx
, 1);
338 if (duk_is_nan(ctx
, 1) && is_lastindexof
) {
339 /* indexOf: NaN should cause pos to be zero.
340 * lastIndexOf: NaN should cause pos to be +Infinity
341 * (and later be clamped to len).
345 cpos
= duk_to_int_clamped(ctx
, 1, 0, clen_this
);
348 /* Empty searchstring always matches; cpos must be clamped here.
349 * (If q_blen were < 0 due to clamped coercion, it would also be
353 duk_push_int(ctx
, cpos
);
356 DUK_ASSERT(q_blen
> 0);
358 bpos
= (duk_int_t
) duk_heap_strcache_offset_char2byte(thr
, h_this
, (duk_uint32_t
) cpos
);
360 p_start
= DUK_HSTRING_GET_DATA(h_this
);
361 p_end
= p_start
+ DUK_HSTRING_GET_BYTELEN(h_this
);
364 /* This loop is optimized for size. For speed, there should be
365 * two separate loops, and we should ensure that memcmp() can be
366 * used without an extra "will searchstring fit" check. Doing
367 * the preconditioning for 'p' and 'p_end' is easy but cpos
368 * must be updated if 'p' is wound back (backward scanning).
371 firstbyte
= q_start
[0]; /* leading byte of match string */
372 while (p
<= p_end
&& p
>= p_start
) {
375 /* For Ecmascript strings, this check can only match for
376 * initial UTF-8 bytes (not continuation bytes). For other
377 * strings all bets are off.
380 if ((t
== firstbyte
) && ((duk_size_t
) (p_end
- p
) >= (duk_size_t
) q_blen
)) {
381 DUK_ASSERT(q_blen
> 0); /* no issues with memcmp() zero size, even if broken */
382 if (DUK_MEMCMP(p
, q_start
, (duk_size_t
) q_blen
) == 0) {
383 duk_push_int(ctx
, cpos
);
388 /* track cpos while scanning */
389 if (is_lastindexof
) {
390 /* when going backwards, we decrement cpos 'early';
391 * 'p' may point to a continuation byte of the char
392 * at offset 'cpos', but that's OK because we'll
393 * backtrack all the way to the initial byte.
395 if ((t
& 0xc0) != 0x80) {
400 if ((t
& 0xc0) != 0x80) {
407 /* Not found. Empty string case is handled specially above. */
408 duk_push_int(ctx
, -1);
416 /* XXX: the current implementation works but is quite clunky; it compiles
417 * to almost 1,4kB of x86 code so it needs to be simplified (better approach,
418 * shared helpers, etc). Some ideas for refactoring:
420 * - a primitive to convert a string into a regexp matcher (reduces matching
421 * code at the cost of making matching much slower)
422 * - use replace() as a basic helper for match() and split(), which are both
424 * - API call to get_prop and to_boolean
427 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_replace(duk_context
*ctx
) {
428 duk_hthread
*thr
= (duk_hthread
*) ctx
;
429 duk_hstring
*h_input
;
430 duk_hstring
*h_match
;
431 duk_hstring
*h_search
;
433 duk_bufwriter_ctx bw_alloc
;
434 duk_bufwriter_ctx
*bw
;
435 #ifdef DUK_USE_REGEXP_SUPPORT
436 duk_bool_t is_regexp
;
437 duk_bool_t is_global
;
439 duk_bool_t is_repl_func
;
440 duk_uint32_t match_start_coff
, match_start_boff
;
441 #ifdef DUK_USE_REGEXP_SUPPORT
442 duk_int_t match_caps
;
444 duk_uint32_t prev_match_end_boff
;
445 const duk_uint8_t
*r_start
, *r_end
, *r
; /* repl string scan */
448 DUK_ASSERT_TOP(ctx
, 2);
449 h_input
= duk_push_this_coercible_to_string(ctx
);
450 DUK_ASSERT(h_input
!= NULL
);
453 DUK_BW_INIT_PUSHBUF(thr
, bw
, DUK_HSTRING_GET_BYTELEN(h_input
)); /* input size is good output starting point */
455 DUK_ASSERT_TOP(ctx
, 4);
457 /* stack[0] = search value
458 * stack[1] = replace value
459 * stack[2] = input string
460 * stack[3] = result buffer
463 h_re
= duk_get_hobject_with_class(ctx
, 0, DUK_HOBJECT_CLASS_REGEXP
);
465 #ifdef DUK_USE_REGEXP_SUPPORT
467 is_global
= duk_get_prop_stridx_boolean(ctx
, 0, DUK_STRIDX_GLOBAL
, NULL
);
470 /* start match from beginning */
471 duk_push_int(ctx
, 0);
472 duk_put_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
474 #else /* DUK_USE_REGEXP_SUPPORT */
475 return DUK_RET_UNSUPPORTED_ERROR
;
476 #endif /* DUK_USE_REGEXP_SUPPORT */
478 duk_to_string(ctx
, 0);
479 #ifdef DUK_USE_REGEXP_SUPPORT
485 if (duk_is_function(ctx
, 1)) {
493 h_repl
= duk_to_hstring(ctx
, 1);
494 DUK_ASSERT(h_repl
!= NULL
);
495 r_start
= DUK_HSTRING_GET_DATA(h_repl
);
496 r_end
= r_start
+ DUK_HSTRING_GET_BYTELEN(h_repl
);
499 prev_match_end_boff
= 0;
503 * If matching with a regexp:
504 * - non-global RegExp: lastIndex not touched on a match, zeroed
506 * - global RegExp: on match, lastIndex will be updated by regexp
507 * executor to point to next char after the matching part (so that
508 * characters in the matching part are not matched again)
510 * If matching with a string:
511 * - always non-global match, find first occurrence
514 * - The character offset of start-of-match for the replacer function
515 * - The byte offsets for start-of-match and end-of-match to implement
516 * the replacement values $&, $`, and $', and to copy non-matching
517 * input string portions (including header and trailer) verbatim.
519 * NOTE: the E5.1 specification is a bit vague how the RegExp should
520 * behave in the replacement process; e.g. is matching done first for
521 * all matches (in the global RegExp case) before any replacer calls
522 * are made? See: test-bi-string-proto-replace.js for discussion.
525 DUK_ASSERT_TOP(ctx
, 4);
527 #ifdef DUK_USE_REGEXP_SUPPORT
531 duk_regexp_match(thr
); /* [ ... regexp input ] -> [ res_obj ] */
532 if (!duk_is_object(ctx
, -1)) {
537 duk_get_prop_stridx(ctx
, -1, DUK_STRIDX_INDEX
);
538 DUK_ASSERT(duk_is_number(ctx
, -1));
539 match_start_coff
= duk_get_int(ctx
, -1);
542 duk_get_prop_index(ctx
, -1, 0);
543 DUK_ASSERT(duk_is_string(ctx
, -1));
544 h_match
= duk_get_hstring(ctx
, -1);
545 DUK_ASSERT(h_match
!= NULL
);
546 duk_pop(ctx
); /* h_match is borrowed, remains reachable through match_obj */
548 if (DUK_HSTRING_GET_BYTELEN(h_match
) == 0) {
549 /* This should be equivalent to match() algorithm step 8.f.iii.2:
550 * detect an empty match and allow it, but don't allow it twice.
552 duk_uint32_t last_index
;
554 duk_get_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
555 last_index
= (duk_uint32_t
) duk_get_uint(ctx
, -1);
556 DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld",
557 (long) last_index
, (long) (last_index
+ 1)));
559 duk_push_int(ctx
, last_index
+ 1);
560 duk_put_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
563 DUK_ASSERT(duk_get_length(ctx
, -1) <= DUK_INT_MAX
); /* string limits */
564 match_caps
= (duk_int_t
) duk_get_length(ctx
, -1);
566 #else /* DUK_USE_REGEXP_SUPPORT */
567 { /* unconditionally */
568 #endif /* DUK_USE_REGEXP_SUPPORT */
569 const duk_uint8_t
*p_start
, *p_end
, *p
; /* input string scan */
570 const duk_uint8_t
*q_start
; /* match string */
573 #ifdef DUK_USE_REGEXP_SUPPORT
574 DUK_ASSERT(!is_global
); /* single match always */
577 p_start
= DUK_HSTRING_GET_DATA(h_input
);
578 p_end
= p_start
+ DUK_HSTRING_GET_BYTELEN(h_input
);
581 h_search
= duk_get_hstring(ctx
, 0);
582 DUK_ASSERT(h_search
!= NULL
);
583 q_start
= DUK_HSTRING_GET_DATA(h_search
);
584 q_blen
= (duk_size_t
) DUK_HSTRING_GET_BYTELEN(h_search
);
586 p_end
-= q_blen
; /* ensure full memcmp() fits in while */
588 match_start_coff
= 0;
591 DUK_ASSERT(p
+ q_blen
<= DUK_HSTRING_GET_DATA(h_input
) + DUK_HSTRING_GET_BYTELEN(h_input
));
592 if (DUK_MEMCMP((void *) p
, (void *) q_start
, (size_t) q_blen
) == 0) {
594 h_match
= duk_get_hstring(ctx
, -1);
595 DUK_ASSERT(h_match
!= NULL
);
596 #ifdef DUK_USE_REGEXP_SUPPORT
602 /* track utf-8 non-continuation bytes */
603 if ((p
[0] & 0xc0) != 0x80) {
614 /* stack[0] = search value
615 * stack[1] = replace value
616 * stack[2] = input string
617 * stack[3] = result buffer
618 * stack[4] = regexp match OR match string
621 match_start_boff
= duk_heap_strcache_offset_char2byte(thr
, h_input
, match_start_coff
);
623 tmp_sz
= (duk_size_t
) (match_start_boff
- prev_match_end_boff
);
624 DUK_BW_WRITE_ENSURE_BYTES(thr
, bw
, DUK_HSTRING_GET_DATA(h_input
) + prev_match_end_boff
, tmp_sz
);
626 prev_match_end_boff
= match_start_boff
+ DUK_HSTRING_GET_BYTELEN(h_match
);
632 /* regexp res_obj is at index 4 */
635 idx_args
= duk_get_top(ctx
);
637 #ifdef DUK_USE_REGEXP_SUPPORT
640 duk_require_stack(ctx
, match_caps
+ 2);
641 for (idx
= 0; idx
< match_caps
; idx
++) {
642 /* match followed by capture(s) */
643 duk_get_prop_index(ctx
, 4, idx
);
646 #else /* DUK_USE_REGEXP_SUPPORT */
647 { /* unconditionally */
648 #endif /* DUK_USE_REGEXP_SUPPORT */
649 /* match == search string, by definition */
652 duk_push_int(ctx
, match_start_coff
);
655 /* [ ... replacer match [captures] match_char_offset input ] */
657 duk_call(ctx
, duk_get_top(ctx
) - idx_args
);
658 h_repl
= duk_to_hstring(ctx
, -1); /* -> [ ... repl_value ] */
659 DUK_ASSERT(h_repl
!= NULL
);
661 DUK_BW_WRITE_ENSURE_HSTRING(thr
, bw
, h_repl
);
663 duk_pop(ctx
); /* repl_value */
670 #ifdef DUK_USE_REGEXP_SUPPORT
676 if (ch1
!= DUK_ASC_DOLLAR
) {
687 case DUK_ASC_DOLLAR
: {
688 ch1
= (1 << 8) + DUK_ASC_DOLLAR
;
692 DUK_BW_WRITE_ENSURE_HSTRING(thr
, bw
, h_match
);
696 case DUK_ASC_GRAVE
: {
697 tmp_sz
= (duk_size_t
) match_start_boff
;
698 DUK_BW_WRITE_ENSURE_BYTES(thr
, bw
, DUK_HSTRING_GET_DATA(h_input
), tmp_sz
);
702 case DUK_ASC_SINGLEQUOTE
: {
703 duk_uint32_t match_end_boff
;
705 /* Use match charlen instead of bytelen, just in case the input and
706 * match codepoint encodings would have different lengths.
708 match_end_boff
= duk_heap_strcache_offset_char2byte(thr
,
710 match_start_coff
+ DUK_HSTRING_GET_CHARLEN(h_match
));
712 tmp_sz
= (duk_size_t
) (DUK_HSTRING_GET_BYTELEN(h_input
) - match_end_boff
);
713 DUK_BW_WRITE_ENSURE_BYTES(thr
, bw
, DUK_HSTRING_GET_DATA(h_input
) + match_end_boff
, tmp_sz
);
718 #ifdef DUK_USE_REGEXP_SUPPORT
719 duk_int_t capnum
, captmp
, capadv
;
720 /* XXX: optional check, match_caps is zero if no regexp,
721 * so dollar will be interpreted literally anyway.
728 if (!(ch2
>= DUK_ASC_0
&& ch2
<= DUK_ASC_9
)) {
731 capnum
= ch2
- DUK_ASC_0
;
736 if (ch3
>= DUK_ASC_0
&& ch3
<= DUK_ASC_9
) {
737 captmp
= capnum
* 10 + (ch3
- DUK_ASC_0
);
738 if (captmp
< match_caps
) {
745 if (capnum
> 0 && capnum
< match_caps
) {
746 DUK_ASSERT(is_regexp
!= 0); /* match_caps == 0 without regexps */
748 /* regexp res_obj is at offset 4 */
749 duk_get_prop_index(ctx
, 4, (duk_uarridx_t
) capnum
);
750 if (duk_is_string(ctx
, -1)) {
751 duk_hstring
*h_tmp_str
;
753 h_tmp_str
= duk_get_hstring(ctx
, -1);
754 DUK_ASSERT(h_tmp_str
!= NULL
);
756 DUK_BW_WRITE_ENSURE_HSTRING(thr
, bw
, h_tmp_str
);
758 /* undefined -> skip (replaced with empty) */
766 #else /* DUK_USE_REGEXP_SUPPORT */
767 goto repl_write
; /* unconditionally */
768 #endif /* DUK_USE_REGEXP_SUPPORT */
773 /* ch1 = (r_increment << 8) + byte */
775 DUK_BW_WRITE_ENSURE_U8(thr
, bw
, (duk_uint8_t
) (ch1
& 0xff));
778 } /* if (is_repl_func) */
780 duk_pop(ctx
); /* pop regexp res_obj or match string */
782 #ifdef DUK_USE_REGEXP_SUPPORT
785 { /* unconditionally; is_global==0 */
792 tmp_sz
= (duk_size_t
) (DUK_HSTRING_GET_BYTELEN(h_input
) - prev_match_end_boff
);
793 DUK_BW_WRITE_ENSURE_BYTES(thr
, bw
, DUK_HSTRING_GET_DATA(h_input
) + prev_match_end_boff
, tmp_sz
);
795 DUK_ASSERT_TOP(ctx
, 4);
796 DUK_BW_COMPACT(thr
, bw
);
797 duk_to_string(ctx
, -1);
805 /* XXX: very messy now, but works; clean up, remove unused variables (nomimally
806 * used so compiler doesn't complain).
809 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_split(duk_context
*ctx
) {
810 duk_hthread
*thr
= (duk_hthread
*) ctx
;
811 duk_hstring
*h_input
;
814 duk_uint32_t arr_idx
;
815 #ifdef DUK_USE_REGEXP_SUPPORT
816 duk_bool_t is_regexp
;
818 duk_bool_t matched
; /* set to 1 if any match exists (needed for empty input special case) */
819 duk_uint32_t prev_match_end_coff
, prev_match_end_boff
;
820 duk_uint32_t match_start_boff
, match_start_coff
;
821 duk_uint32_t match_end_boff
, match_end_coff
;
825 h_input
= duk_push_this_coercible_to_string(ctx
);
826 DUK_ASSERT(h_input
!= NULL
);
830 if (duk_is_undefined(ctx
, 1)) {
831 limit
= 0xffffffffUL
;
833 limit
= duk_to_uint32(ctx
, 1);
840 /* If the separator is a RegExp, make a "clone" of it. The specification
841 * algorithm calls [[Match]] directly for specific indices; we emulate this
842 * by tweaking lastIndex and using a "force global" variant of duk_regexp_match()
843 * which will use global-style matching even when the RegExp itself is non-global.
846 if (duk_is_undefined(ctx
, 0)) {
847 /* The spec algorithm first does "R = ToString(separator)" before checking
848 * whether separator is undefined. Since this is side effect free, we can
849 * skip the ToString() here.
852 duk_put_prop_index(ctx
, 3, 0);
854 } else if (duk_get_hobject_with_class(ctx
, 0, DUK_HOBJECT_CLASS_REGEXP
) != NULL
) {
855 #ifdef DUK_USE_REGEXP_SUPPORT
856 duk_push_hobject_bidx(ctx
, DUK_BIDX_REGEXP_CONSTRUCTOR
);
858 duk_new(ctx
, 1); /* [ ... RegExp val ] -> [ ... res ] */
860 /* lastIndex is initialized to zero by new RegExp() */
863 return DUK_RET_UNSUPPORTED_ERROR
;
866 duk_to_string(ctx
, 0);
867 #ifdef DUK_USE_REGEXP_SUPPORT
872 /* stack[0] = separator (string or regexp)
874 * stack[2] = input string
875 * stack[3] = result array
878 prev_match_end_boff
= 0;
879 prev_match_end_coff
= 0;
885 * The specification uses RegExp [[Match]] to attempt match at specific
886 * offsets. We don't have such a primitive, so we use an actual RegExp
887 * and tweak lastIndex. Since the RegExp may be non-global, we use a
888 * special variant which forces global-like behavior for matching.
891 DUK_ASSERT_TOP(ctx
, 4);
893 #ifdef DUK_USE_REGEXP_SUPPORT
897 duk_regexp_match_force_global(thr
); /* [ ... regexp input ] -> [ res_obj ] */
898 if (!duk_is_object(ctx
, -1)) {
904 duk_get_prop_stridx(ctx
, -1, DUK_STRIDX_INDEX
);
905 DUK_ASSERT(duk_is_number(ctx
, -1));
906 match_start_coff
= duk_get_int(ctx
, -1);
907 match_start_boff
= duk_heap_strcache_offset_char2byte(thr
, h_input
, match_start_coff
);
910 if (match_start_coff
== DUK_HSTRING_GET_CHARLEN(h_input
)) {
911 /* don't allow an empty match at the end of the string */
916 duk_get_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
917 DUK_ASSERT(duk_is_number(ctx
, -1));
918 match_end_coff
= duk_get_int(ctx
, -1);
919 match_end_boff
= duk_heap_strcache_offset_char2byte(thr
, h_input
, match_end_coff
);
922 /* empty match -> bump and continue */
923 if (prev_match_end_boff
== match_end_boff
) {
924 duk_push_int(ctx
, match_end_coff
+ 1);
925 duk_put_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
930 #else /* DUK_USE_REGEXP_SUPPORT */
931 { /* unconditionally */
932 #endif /* DUK_USE_REGEXP_SUPPORT */
933 const duk_uint8_t
*p_start
, *p_end
, *p
; /* input string scan */
934 const duk_uint8_t
*q_start
; /* match string */
935 duk_size_t q_blen
, q_clen
;
937 p_start
= DUK_HSTRING_GET_DATA(h_input
);
938 p_end
= p_start
+ DUK_HSTRING_GET_BYTELEN(h_input
);
939 p
= p_start
+ prev_match_end_boff
;
941 h_sep
= duk_get_hstring(ctx
, 0);
942 DUK_ASSERT(h_sep
!= NULL
);
943 q_start
= DUK_HSTRING_GET_DATA(h_sep
);
944 q_blen
= (duk_size_t
) DUK_HSTRING_GET_BYTELEN(h_sep
);
945 q_clen
= (duk_size_t
) DUK_HSTRING_GET_CHARLEN(h_sep
);
947 p_end
-= q_blen
; /* ensure full memcmp() fits in while */
949 match_start_coff
= prev_match_end_coff
;
952 /* Handle empty separator case: it will always match, and always
953 * triggers the check in step 13.c.iii initially. Note that we
954 * must skip to either end of string or start of first codepoint,
955 * skipping over any continuation bytes!
957 * Don't allow an empty string to match at the end of the input.
960 matched
= 1; /* empty separator can always match */
965 if ((p
[0] & 0xc0) != 0x80) {
973 DUK_ASSERT(q_blen
> 0 && q_clen
> 0);
975 DUK_ASSERT(p
+ q_blen
<= DUK_HSTRING_GET_DATA(h_input
) + DUK_HSTRING_GET_BYTELEN(h_input
));
976 DUK_ASSERT(q_blen
> 0); /* no issues with empty memcmp() */
977 if (DUK_MEMCMP((void *) p
, (void *) q_start
, (duk_size_t
) q_blen
) == 0) {
978 /* never an empty match, so step 13.c.iii can't be triggered */
982 /* track utf-8 non-continuation bytes */
983 if ((p
[0] & 0xc0) != 0x80) {
995 match_start_boff
= (duk_uint32_t
) (p
- p_start
);
996 match_end_coff
= (duk_uint32_t
) (match_start_coff
+ q_clen
); /* constrained by string length */
997 match_end_boff
= (duk_uint32_t
) (match_start_boff
+ q_blen
); /* ditto */
999 /* empty match (may happen with empty separator) -> bump and continue */
1000 if (prev_match_end_boff
== match_end_boff
) {
1001 prev_match_end_boff
++;
1002 prev_match_end_coff
++;
1005 } /* if (is_regexp) */
1007 /* stack[0] = separator (string or regexp)
1009 * stack[2] = input string
1010 * stack[3] = result array
1011 * stack[4] = regexp res_obj (if is_regexp)
1014 DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end b=%ld,c=%ld, prev_end b=%ld,c=%ld",
1015 (long) match_start_boff
, (long) match_start_coff
,
1016 (long) match_end_boff
, (long) match_end_coff
,
1017 (long) prev_match_end_boff
, (long) prev_match_end_coff
));
1019 duk_push_lstring(ctx
,
1020 (const char *) (DUK_HSTRING_GET_DATA(h_input
) + prev_match_end_boff
),
1021 (duk_size_t
) (match_start_boff
- prev_match_end_boff
));
1022 duk_put_prop_index(ctx
, 3, arr_idx
);
1024 if (arr_idx
>= limit
) {
1028 #ifdef DUK_USE_REGEXP_SUPPORT
1032 len
= duk_get_length(ctx
, 4);
1033 for (i
= 1; i
< len
; i
++) {
1034 DUK_ASSERT(i
<= DUK_UARRIDX_MAX
); /* cannot have >4G captures */
1035 duk_get_prop_index(ctx
, 4, (duk_uarridx_t
) i
);
1036 duk_put_prop_index(ctx
, 3, arr_idx
);
1038 if (arr_idx
>= limit
) {
1044 /* lastIndex already set up for next match */
1046 #else /* DUK_USE_REGEXP_SUPPORT */
1047 { /* unconditionally */
1048 #endif /* DUK_USE_REGEXP_SUPPORT */
1052 prev_match_end_boff
= match_end_boff
;
1053 prev_match_end_coff
= match_end_coff
;
1057 /* Combined step 11 (empty string special case) and 14-15. */
1059 DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld",
1060 (long) prev_match_end_boff
, (long) prev_match_end_coff
));
1062 if (DUK_HSTRING_GET_CHARLEN(h_input
) > 0 || !matched
) {
1064 * a) non-empty input
1065 * b) empty input and no (zero size) match found (step 11)
1068 duk_push_lstring(ctx
,
1069 (const char *) DUK_HSTRING_GET_DATA(h_input
) + prev_match_end_boff
,
1070 (duk_size_t
) (DUK_HSTRING_GET_BYTELEN(h_input
) - prev_match_end_boff
));
1071 duk_put_prop_index(ctx
, 3, arr_idx
);
1072 /* No arr_idx update or limit check */
1078 #ifdef DUK_USE_REGEXP_SUPPORT
1091 #ifdef DUK_USE_REGEXP_SUPPORT
1092 DUK_LOCAL
void duk__to_regexp_helper(duk_context
*ctx
, duk_idx_t index
, duk_bool_t force_new
) {
1095 /* Shared helper for match() steps 3-4, search() steps 3-4. */
1097 DUK_ASSERT(index
>= 0);
1103 h
= duk_get_hobject_with_class(ctx
, index
, DUK_HOBJECT_CLASS_REGEXP
);
1110 duk_push_hobject_bidx(ctx
, DUK_BIDX_REGEXP_CONSTRUCTOR
);
1111 duk_dup(ctx
, index
);
1112 duk_new(ctx
, 1); /* [ ... RegExp val ] -> [ ... res ] */
1113 duk_replace(ctx
, index
);
1115 #endif /* DUK_USE_REGEXP_SUPPORT */
1117 #ifdef DUK_USE_REGEXP_SUPPORT
1118 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_search(duk_context
*ctx
) {
1119 duk_hthread
*thr
= (duk_hthread
*) ctx
;
1121 /* Easiest way to implement the search required by the specification
1122 * is to do a RegExp test() with lastIndex forced to zero. To avoid
1123 * side effects on the argument, "clone" the RegExp if a RegExp was
1126 * The global flag of the RegExp should be ignored; setting lastIndex
1127 * to zero (which happens when "cloning" the RegExp) should have an
1128 * equivalent effect.
1131 DUK_ASSERT_TOP(ctx
, 1);
1132 (void) duk_push_this_coercible_to_string(ctx
); /* at index 1 */
1133 duk__to_regexp_helper(ctx
, 0 /*index*/, 1 /*force_new*/);
1135 /* stack[0] = regexp
1139 /* Avoid using RegExp.prototype methods, as they're writable and
1140 * configurable and may have been changed.
1144 duk_dup(ctx
, 1); /* [ ... re_obj input ] */
1145 duk_regexp_match(thr
); /* -> [ ... res_obj ] */
1147 if (!duk_is_object(ctx
, -1)) {
1148 duk_push_int(ctx
, -1);
1152 duk_get_prop_stridx(ctx
, -1, DUK_STRIDX_INDEX
);
1153 DUK_ASSERT(duk_is_number(ctx
, -1));
1156 #else /* DUK_USE_REGEXP_SUPPORT */
1157 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_search(duk_context
*ctx
) {
1159 return DUK_RET_UNSUPPORTED_ERROR
;
1161 #endif /* DUK_USE_REGEXP_SUPPORT */
1163 #ifdef DUK_USE_REGEXP_SUPPORT
1164 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_match(duk_context
*ctx
) {
1165 duk_hthread
*thr
= (duk_hthread
*) ctx
;
1167 duk_int_t prev_last_index
;
1168 duk_int_t this_index
;
1171 DUK_ASSERT_TOP(ctx
, 1);
1172 (void) duk_push_this_coercible_to_string(ctx
);
1173 duk__to_regexp_helper(ctx
, 0 /*index*/, 0 /*force_new*/);
1174 global
= duk_get_prop_stridx_boolean(ctx
, 0, DUK_STRIDX_GLOBAL
, NULL
);
1175 DUK_ASSERT_TOP(ctx
, 2);
1177 /* stack[0] = regexp
1182 duk_regexp_match(thr
); /* -> [ res_obj ] */
1183 return 1; /* return 'res_obj' */
1186 /* Global case is more complex. */
1188 /* [ regexp string ] */
1190 duk_push_int(ctx
, 0);
1191 duk_put_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
1192 duk_push_array(ctx
);
1194 /* [ regexp string res_arr ] */
1196 prev_last_index
= 0;
1200 DUK_ASSERT_TOP(ctx
, 3);
1204 duk_regexp_match(thr
); /* -> [ ... regexp string ] -> [ ... res_obj ] */
1206 if (!duk_is_object(ctx
, -1)) {
1211 duk_get_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
1212 DUK_ASSERT(duk_is_number(ctx
, -1));
1213 this_index
= duk_get_int(ctx
, -1);
1216 if (this_index
== prev_last_index
) {
1218 duk_push_int(ctx
, this_index
);
1219 duk_put_prop_stridx(ctx
, 0, DUK_STRIDX_LAST_INDEX
);
1221 prev_last_index
= this_index
;
1223 duk_get_prop_index(ctx
, -1, 0); /* match string */
1224 duk_put_prop_index(ctx
, 2, arr_idx
);
1226 duk_pop(ctx
); /* res_obj */
1233 return 1; /* return 'res_arr' or 'null' */
1235 #else /* DUK_USE_REGEXP_SUPPORT */
1236 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_match(duk_context
*ctx
) {
1238 return DUK_RET_UNSUPPORTED_ERROR
;
1240 #endif /* DUK_USE_REGEXP_SUPPORT */
1242 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_concat(duk_context
*ctx
) {
1243 /* duk_concat() coerces arguments with ToString() in correct order */
1244 (void) duk_push_this_coercible_to_string(ctx
);
1245 duk_insert(ctx
, 0); /* this is relatively expensive */
1246 duk_concat(ctx
, duk_get_top(ctx
));
1250 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_trim(duk_context
*ctx
) {
1251 DUK_ASSERT_TOP(ctx
, 0);
1252 (void) duk_push_this_coercible_to_string(ctx
);
1254 DUK_ASSERT_TOP(ctx
, 1);
1258 DUK_INTERNAL duk_ret_t
duk_bi_string_prototype_locale_compare(duk_context
*ctx
) {
1261 duk_size_t h1_len
, h2_len
, prefix_len
;
1262 duk_small_int_t ret
= 0;
1265 /* The current implementation of localeCompare() is simply a codepoint
1266 * by codepoint comparison, implemented with a simple string compare
1267 * because UTF-8 should preserve codepoint ordering (assuming valid
1268 * shortest UTF-8 encoding).
1270 * The specification requires that the return value must be related
1271 * to the sort order: e.g. negative means that 'this' comes before
1272 * 'that' in sort order. We assume an ascending sort order.
1275 /* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */
1277 h1
= duk_push_this_coercible_to_string(ctx
);
1278 DUK_ASSERT(h1
!= NULL
);
1280 h2
= duk_to_hstring(ctx
, 0);
1281 DUK_ASSERT(h2
!= NULL
);
1283 h1_len
= (duk_size_t
) DUK_HSTRING_GET_BYTELEN(h1
);
1284 h2_len
= (duk_size_t
) DUK_HSTRING_GET_BYTELEN(h2
);
1285 prefix_len
= (h1_len
<= h2_len
? h1_len
: h2_len
);
1287 /* Zero size compare not an issue with DUK_MEMCMP. */
1288 rc
= (duk_small_int_t
) DUK_MEMCMP((const char *) DUK_HSTRING_GET_DATA(h1
),
1289 (const char *) DUK_HSTRING_GET_DATA(h2
),
1295 } else if (rc
> 0) {
1300 /* prefix matches, lengths matter now */
1301 if (h1_len
> h2_len
) {
1304 } else if (h1_len
== h2_len
) {
1305 DUK_ASSERT(ret
== 0);
1312 duk_push_int(ctx
, (duk_int_t
) ret
);