ceph/src/civetweb/src/third_party/duktape-1.3.0/src-separate/duk_bi_string.c

   1 /*
   2  *  String built-ins
   3  */
   4
   5 /* XXX: There are several limitations in the current implementation for
   6  * strings with >= 0x80000000UL characters.  In some cases one would need
   7  * to be able to represent the range [-0xffffffff,0xffffffff] and so on.
   8  * Generally character and byte length are assumed to fit into signed 32
   9  * bits (< 0x80000000UL).  Places with issues are not marked explicitly
  10  * below in all cases, look for signed type usage (duk_int_t etc) for
  11  * offsets/lengths.
  12  */
  13
  14 #include "duk_internal.h"
  15
  16 /*
  17  *  Constructor
  18  */
  19
  20 DUK_INTERNAL duk_ret_t duk_bi_string_constructor(duk_context *ctx) {
  21         /* String constructor needs to distinguish between an argument not given at all
  22          * vs. given as 'undefined'.  We're a vararg function to handle this properly.
  23          */
  24
  25         if (duk_get_top(ctx) == 0) {
  26                 duk_push_hstring_stridx(ctx, DUK_STRIDX_EMPTY_STRING);
  27         } else {
  28                 duk_to_string(ctx, 0);
  29         }
  30         DUK_ASSERT(duk_is_string(ctx, 0));
  31         duk_set_top(ctx, 1);
  32
  33         if (duk_is_constructor_call(ctx)) {
  34                 duk_push_object_helper(ctx,
  35                                        DUK_HOBJECT_FLAG_EXTENSIBLE |
  36                                        DUK_HOBJECT_FLAG_EXOTIC_STRINGOBJ |
  37                                        DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_STRING),
  38                                        DUK_BIDX_STRING_PROTOTYPE);
  39
  40                 /* String object internal value is immutable */
  41                 duk_dup(ctx, 0);
  42                 duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_INT_VALUE, DUK_PROPDESC_FLAGS_NONE);
  43         }
  44         /* Note: unbalanced stack on purpose */
  45
  46         return 1;
  47 }
  48
  49 DUK_INTERNAL duk_ret_t duk_bi_string_constructor_from_char_code(duk_context *ctx) {
  50         duk_hthread *thr = (duk_hthread *) ctx;
  51         duk_bufwriter_ctx bw_alloc;
  52         duk_bufwriter_ctx *bw;
  53         duk_idx_t i, n;
  54         duk_ucodepoint_t cp;
  55
  56         /* XXX: It would be nice to build the string directly but ToUint16()
  57          * coercion is needed so a generic helper would not be very
  58          * helpful (perhaps coerce the value stack first here and then
  59          * build a string from a duk_tval number sequence in one go?).
  60          */
  61
  62         n = duk_get_top(ctx);
  63
  64         bw = &bw_alloc;
  65         DUK_BW_INIT_PUSHBUF(thr, bw, n);  /* initial estimate for ASCII only codepoints */
  66
  67         for (i = 0; i < n; i++) {
  68                 /* XXX: could improve bufwriter handling to write multiple codepoints
  69                  * with one ensure call but the relative benefit would be quite small.
  70                  */
  71
  72 #if defined(DUK_USE_NONSTD_STRING_FROMCHARCODE_32BIT)
  73                 /* ToUint16() coercion is mandatory in the E5.1 specification, but
  74                  * this non-compliant behavior makes more sense because we support
  75                  * non-BMP codepoints.  Don't use CESU-8 because that'd create
  76                  * surrogate pairs.
  77                  */
  78
  79                 cp = (duk_ucodepoint_t) duk_to_uint32(ctx, i);
  80                 DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp);
  81 #else
  82                 cp = (duk_ucodepoint_t) duk_to_uint32(ctx, i);
  83                 DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp);
  84 #endif
  85         }
  86
  87         DUK_BW_COMPACT(thr, bw);
  88         duk_to_string(ctx, -1);
  89         return 1;
  90 }
  91
  92 /*
  93  *  toString(), valueOf()
  94  */
  95
  96 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_to_string(duk_context *ctx) {
  97         duk_tval *tv;
  98
  99         duk_push_this(ctx);
 100         tv = duk_require_tval(ctx, -1);
 101         DUK_ASSERT(tv != NULL);
 102
 103         if (DUK_TVAL_IS_STRING(tv)) {
 104                 /* return as is */
 105                 return 1;
 106         } else if (DUK_TVAL_IS_OBJECT(tv)) {
 107                 duk_hobject *h = DUK_TVAL_GET_OBJECT(tv);
 108                 DUK_ASSERT(h != NULL);
 109
 110                 /* Must be a "string object", i.e. class "String" */
 111                 if (DUK_HOBJECT_GET_CLASS_NUMBER(h) != DUK_HOBJECT_CLASS_STRING) {
 112                         goto type_error;
 113                 }
 114
 115                 duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INT_VALUE);
 116                 DUK_ASSERT(duk_is_string(ctx, -1));
 117
 118                 return 1;
 119         } else {
 120                 goto type_error;
 121         }
 122
 123         /* never here, but fall through */
 124
 125  type_error:
 126         return DUK_RET_TYPE_ERROR;
 127 }
 128
 129 /*
 130  *  Character and charcode access
 131  */
 132
 133 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_at(duk_context *ctx) {
 134         duk_int_t pos;
 135
 136         /* XXX: faster implementation */
 137
 138         (void) duk_push_this_coercible_to_string(ctx);
 139         pos = duk_to_int(ctx, 0);
 140         duk_substring(ctx, -1, pos, pos + 1);
 141         return 1;
 142 }
 143
 144 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_context *ctx) {
 145         duk_hthread *thr = (duk_hthread *) ctx;
 146         duk_int_t pos;
 147         duk_hstring *h;
 148         duk_bool_t clamped;
 149
 150         /* XXX: faster implementation */
 151
 152         DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *) duk_get_tval(ctx, 0)));
 153
 154         h = duk_push_this_coercible_to_string(ctx);
 155         DUK_ASSERT(h != NULL);
 156
 157         pos = duk_to_int_clamped_raw(ctx,
 158                                      0 /*index*/,
 159                                      0 /*min(incl)*/,
 160                                      DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/,
 161                                      &clamped /*out_clamped*/);
 162         if (clamped) {
 163                 duk_push_number(ctx, DUK_DOUBLE_NAN);
 164                 return 1;
 165         }
 166
 167         duk_push_u32(ctx, (duk_uint32_t) duk_hstring_char_code_at_raw(thr, h, pos));
 168         return 1;
 169 }
 170
 171 /*
 172  *  substring(), substr(), slice()
 173  */
 174
 175 /* XXX: any chance of merging these three similar but still slightly
 176  * different algorithms so that footprint would be reduced?
 177  */
 178
 179 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substring(duk_context *ctx) {
 180         duk_hstring *h;
 181         duk_int_t start_pos, end_pos;
 182         duk_int_t len;
 183
 184         h = duk_push_this_coercible_to_string(ctx);
 185         DUK_ASSERT(h != NULL);
 186         len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
 187
 188         /* [ start end str ] */
 189
 190         start_pos = duk_to_int_clamped(ctx, 0, 0, len);
 191         if (duk_is_undefined(ctx, 1)) {
 192                 end_pos = len;
 193         } else {
 194                 end_pos = duk_to_int_clamped(ctx, 1, 0, len);
 195         }
 196         DUK_ASSERT(start_pos >= 0 && start_pos <= len);
 197         DUK_ASSERT(end_pos >= 0 && end_pos <= len);
 198
 199         if (start_pos > end_pos) {
 200                 duk_int_t tmp = start_pos;
 201                 start_pos = end_pos;
 202                 end_pos = tmp;
 203         }
 204
 205         DUK_ASSERT(end_pos >= start_pos);
 206
 207         duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
 208         return 1;
 209 }
 210
 211 #ifdef DUK_USE_SECTION_B
 212 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) {
 213         duk_hstring *h;
 214         duk_int_t start_pos, end_pos;
 215         duk_int_t len;
 216
 217         /* Unlike non-obsolete String calls, substr() algorithm in E5.1
 218          * specification will happily coerce undefined and null to strings
 219          * ("undefined" and "null").
 220          */
 221         duk_push_this(ctx);
 222         h = duk_to_hstring(ctx, -1);
 223         DUK_ASSERT(h != NULL);
 224         len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
 225
 226         /* [ start length str ] */
 227
 228         /* The implementation for computing of start_pos and end_pos differs
 229          * from the standard algorithm, but is intended to result in the exactly
 230          * same behavior.  This is not always obvious.
 231          */
 232
 233         /* combines steps 2 and 5; -len ensures max() not needed for step 5 */
 234         start_pos = duk_to_int_clamped(ctx, 0, -len, len);
 235         if (start_pos < 0) {
 236                 start_pos = len + start_pos;
 237         }
 238         DUK_ASSERT(start_pos >= 0 && start_pos <= len);
 239
 240         /* combines steps 3, 6; step 7 is not needed */
 241         if (duk_is_undefined(ctx, 1)) {
 242                 end_pos = len;
 243         } else {
 244                 DUK_ASSERT(start_pos <= len);
 245                 end_pos = start_pos + duk_to_int_clamped(ctx, 1, 0, len - start_pos);
 246         }
 247         DUK_ASSERT(start_pos >= 0 && start_pos <= len);
 248         DUK_ASSERT(end_pos >= 0 && end_pos <= len);
 249         DUK_ASSERT(end_pos >= start_pos);
 250
 251         duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
 252         return 1;
 253 }
 254 #else  /* DUK_USE_SECTION_B */
 255 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) {
 256         DUK_UNREF(ctx);
 257         return DUK_RET_UNSUPPORTED_ERROR;
 258 }
 259 #endif  /* DUK_USE_SECTION_B */
 260
 261 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_slice(duk_context *ctx) {
 262         duk_hstring *h;
 263         duk_int_t start_pos, end_pos;
 264         duk_int_t len;
 265
 266         h = duk_push_this_coercible_to_string(ctx);
 267         DUK_ASSERT(h != NULL);
 268         len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
 269
 270         /* [ start end str ] */
 271
 272         start_pos = duk_to_int_clamped(ctx, 0, -len, len);
 273         if (start_pos < 0) {
 274                 start_pos = len + start_pos;
 275         }
 276         if (duk_is_undefined(ctx, 1)) {
 277                 end_pos = len;
 278         } else {
 279                 end_pos = duk_to_int_clamped(ctx, 1, -len, len);
 280                 if (end_pos < 0) {
 281                         end_pos = len + end_pos;
 282                 }
 283         }
 284         DUK_ASSERT(start_pos >= 0 && start_pos <= len);
 285         DUK_ASSERT(end_pos >= 0 && end_pos <= len);
 286
 287         if (end_pos < start_pos) {
 288                 end_pos = start_pos;
 289         }
 290
 291         DUK_ASSERT(end_pos >= start_pos);
 292
 293         duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
 294         return 1;
 295 }
 296
 297 /*
 298  *  Case conversion
 299  */
 300
 301 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_caseconv_shared(duk_context *ctx) {
 302         duk_hthread *thr = (duk_hthread *) ctx;
 303         duk_small_int_t uppercase = duk_get_current_magic(ctx);
 304
 305         (void) duk_push_this_coercible_to_string(ctx);
 306         duk_unicode_case_convert_string(thr, (duk_bool_t) uppercase);
 307         return 1;
 308 }
 309
 310 /*
 311  *  indexOf() and lastIndexOf()
 312  */
 313
 314 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_indexof_shared(duk_context *ctx) {
 315         duk_hthread *thr = (duk_hthread *) ctx;
 316         duk_hstring *h_this;
 317         duk_hstring *h_search;
 318         duk_int_t clen_this;
 319         duk_int_t cpos;
 320         duk_int_t bpos;
 321         const duk_uint8_t *p_start, *p_end, *p;
 322         const duk_uint8_t *q_start;
 323         duk_int_t q_blen;
 324         duk_uint8_t firstbyte;
 325         duk_uint8_t t;
 326         duk_small_int_t is_lastindexof = duk_get_current_magic(ctx);  /* 0=indexOf, 1=lastIndexOf */
 327
 328         h_this = duk_push_this_coercible_to_string(ctx);
 329         DUK_ASSERT(h_this != NULL);
 330         clen_this = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h_this);
 331
 332         h_search = duk_to_hstring(ctx, 0);
 333         DUK_ASSERT(h_search != NULL);
 334         q_start = DUK_HSTRING_GET_DATA(h_search);
 335         q_blen = (duk_int_t) DUK_HSTRING_GET_BYTELEN(h_search);
 336
 337         duk_to_number(ctx, 1);
 338         if (duk_is_nan(ctx, 1) && is_lastindexof) {
 339                 /* indexOf: NaN should cause pos to be zero.
 340                  * lastIndexOf: NaN should cause pos to be +Infinity
 341                  * (and later be clamped to len).
 342                  */
 343                 cpos = clen_this;
 344         } else {
 345                 cpos = duk_to_int_clamped(ctx, 1, 0, clen_this);
 346         }
 347
 348         /* Empty searchstring always matches; cpos must be clamped here.
 349          * (If q_blen were < 0 due to clamped coercion, it would also be
 350          * caught here.)
 351          */
 352         if (q_blen <= 0) {
 353                 duk_push_int(ctx, cpos);
 354                 return 1;
 355         }
 356         DUK_ASSERT(q_blen > 0);
 357
 358         bpos = (duk_int_t) duk_heap_strcache_offset_char2byte(thr, h_this, (duk_uint32_t) cpos);
 359
 360         p_start = DUK_HSTRING_GET_DATA(h_this);
 361         p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this);
 362         p = p_start + bpos;
 363
 364         /* This loop is optimized for size.  For speed, there should be
 365          * two separate loops, and we should ensure that memcmp() can be
 366          * used without an extra "will searchstring fit" check.  Doing
 367          * the preconditioning for 'p' and 'p_end' is easy but cpos
 368          * must be updated if 'p' is wound back (backward scanning).
 369          */
 370
 371         firstbyte = q_start[0];  /* leading byte of match string */
 372         while (p <= p_end && p >= p_start) {
 373                 t = *p;
 374
 375                 /* For Ecmascript strings, this check can only match for
 376                  * initial UTF-8 bytes (not continuation bytes).  For other
 377                  * strings all bets are off.
 378                  */
 379
 380                 if ((t == firstbyte) && ((duk_size_t) (p_end - p) >= (duk_size_t) q_blen)) {
 381                         DUK_ASSERT(q_blen > 0);  /* no issues with memcmp() zero size, even if broken */
 382                         if (DUK_MEMCMP(p, q_start, (duk_size_t) q_blen) == 0) {
 383                                 duk_push_int(ctx, cpos);
 384                                 return 1;
 385                         }
 386                 }
 387
 388                 /* track cpos while scanning */
 389                 if (is_lastindexof) {
 390                         /* when going backwards, we decrement cpos 'early';
 391                          * 'p' may point to a continuation byte of the char
 392                          * at offset 'cpos', but that's OK because we'll
 393                          * backtrack all the way to the initial byte.
 394                          */
 395                         if ((t & 0xc0) != 0x80) {
 396                                 cpos--;
 397                         }
 398                         p--;
 399                 } else {
 400                         if ((t & 0xc0) != 0x80) {
 401                                 cpos++;
 402                         }
 403                         p++;
 404                 }
 405         }
 406
 407         /* Not found.  Empty string case is handled specially above. */
 408         duk_push_int(ctx, -1);
 409         return 1;
 410 }
 411
 412 /*
 413  *  replace()
 414  */
 415
 416 /* XXX: the current implementation works but is quite clunky; it compiles
 417  * to almost 1,4kB of x86 code so it needs to be simplified (better approach,
 418  * shared helpers, etc).  Some ideas for refactoring:
 419  *
 420  * - a primitive to convert a string into a regexp matcher (reduces matching
 421  *   code at the cost of making matching much slower)
 422  * - use replace() as a basic helper for match() and split(), which are both
 423  *   much simpler
 424  * - API call to get_prop and to_boolean
 425  */
 426
 427 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_context *ctx) {
 428         duk_hthread *thr = (duk_hthread *) ctx;
 429         duk_hstring *h_input;
 430         duk_hstring *h_match;
 431         duk_hstring *h_search;
 432         duk_hobject *h_re;
 433         duk_bufwriter_ctx bw_alloc;
 434         duk_bufwriter_ctx *bw;
 435 #ifdef DUK_USE_REGEXP_SUPPORT
 436         duk_bool_t is_regexp;
 437         duk_bool_t is_global;
 438 #endif
 439         duk_bool_t is_repl_func;
 440         duk_uint32_t match_start_coff, match_start_boff;
 441 #ifdef DUK_USE_REGEXP_SUPPORT
 442         duk_int_t match_caps;
 443 #endif
 444         duk_uint32_t prev_match_end_boff;
 445         const duk_uint8_t *r_start, *r_end, *r;   /* repl string scan */
 446         duk_size_t tmp_sz;
 447
 448         DUK_ASSERT_TOP(ctx, 2);
 449         h_input = duk_push_this_coercible_to_string(ctx);
 450         DUK_ASSERT(h_input != NULL);
 451
 452         bw = &bw_alloc;
 453         DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input));  /* input size is good output starting point */
 454
 455         DUK_ASSERT_TOP(ctx, 4);
 456
 457         /* stack[0] = search value
 458          * stack[1] = replace value
 459          * stack[2] = input string
 460          * stack[3] = result buffer
 461          */
 462
 463         h_re = duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP);
 464         if (h_re) {
 465 #ifdef DUK_USE_REGEXP_SUPPORT
 466                 is_regexp = 1;
 467                 is_global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL);
 468
 469                 if (is_global) {
 470                         /* start match from beginning */
 471                         duk_push_int(ctx, 0);
 472                         duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
 473                 }
 474 #else  /* DUK_USE_REGEXP_SUPPORT */
 475                 return DUK_RET_UNSUPPORTED_ERROR;
 476 #endif  /* DUK_USE_REGEXP_SUPPORT */
 477         } else {
 478                 duk_to_string(ctx, 0);
 479 #ifdef DUK_USE_REGEXP_SUPPORT
 480                 is_regexp = 0;
 481                 is_global = 0;
 482 #endif
 483         }
 484
 485         if (duk_is_function(ctx, 1)) {
 486                 is_repl_func = 1;
 487                 r_start = NULL;
 488                 r_end = NULL;
 489         } else {
 490                 duk_hstring *h_repl;
 491
 492                 is_repl_func = 0;
 493                 h_repl = duk_to_hstring(ctx, 1);
 494                 DUK_ASSERT(h_repl != NULL);
 495                 r_start = DUK_HSTRING_GET_DATA(h_repl);
 496                 r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl);
 497         }
 498
 499         prev_match_end_boff = 0;
 500
 501         for (;;) {
 502                 /*
 503                  *  If matching with a regexp:
 504                  *    - non-global RegExp: lastIndex not touched on a match, zeroed
 505                  *      on a non-match
 506                  *    - global RegExp: on match, lastIndex will be updated by regexp
 507                  *      executor to point to next char after the matching part (so that
 508                  *      characters in the matching part are not matched again)
 509                  *
 510                  *  If matching with a string:
 511                  *    - always non-global match, find first occurrence
 512                  *
 513                  *  We need:
 514                  *    - The character offset of start-of-match for the replacer function
 515                  *    - The byte offsets for start-of-match and end-of-match to implement
 516                  *      the replacement values $&, $`, and $', and to copy non-matching
 517                  *      input string portions (including header and trailer) verbatim.
 518                  *
 519                  *  NOTE: the E5.1 specification is a bit vague how the RegExp should
 520                  *  behave in the replacement process; e.g. is matching done first for
 521                  *  all matches (in the global RegExp case) before any replacer calls
 522                  *  are made?  See: test-bi-string-proto-replace.js for discussion.
 523                  */
 524
 525                 DUK_ASSERT_TOP(ctx, 4);
 526
 527 #ifdef DUK_USE_REGEXP_SUPPORT
 528                 if (is_regexp) {
 529                         duk_dup(ctx, 0);
 530                         duk_dup(ctx, 2);
 531                         duk_regexp_match(thr);  /* [ ... regexp input ] -> [ res_obj ] */
 532                         if (!duk_is_object(ctx, -1)) {
 533                                 duk_pop(ctx);
 534                                 break;
 535                         }
 536
 537                         duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
 538                         DUK_ASSERT(duk_is_number(ctx, -1));
 539                         match_start_coff = duk_get_int(ctx, -1);
 540                         duk_pop(ctx);
 541
 542                         duk_get_prop_index(ctx, -1, 0);
 543                         DUK_ASSERT(duk_is_string(ctx, -1));
 544                         h_match = duk_get_hstring(ctx, -1);
 545                         DUK_ASSERT(h_match != NULL);
 546                         duk_pop(ctx);  /* h_match is borrowed, remains reachable through match_obj */
 547
 548                         if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) {
 549                                 /* This should be equivalent to match() algorithm step 8.f.iii.2:
 550                                  * detect an empty match and allow it, but don't allow it twice.
 551                                  */
 552                                 duk_uint32_t last_index;
 553
 554                                 duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
 555                                 last_index = (duk_uint32_t) duk_get_uint(ctx, -1);
 556                                 DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld",
 557                                                      (long) last_index, (long) (last_index + 1)));
 558                                 duk_pop(ctx);
 559                                 duk_push_int(ctx, last_index + 1);
 560                                 duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
 561                         }
 562
 563                         DUK_ASSERT(duk_get_length(ctx, -1) <= DUK_INT_MAX);  /* string limits */
 564                         match_caps = (duk_int_t) duk_get_length(ctx, -1);
 565                 } else {
 566 #else  /* DUK_USE_REGEXP_SUPPORT */
 567                 {  /* unconditionally */
 568 #endif  /* DUK_USE_REGEXP_SUPPORT */
 569                         const duk_uint8_t *p_start, *p_end, *p;   /* input string scan */
 570                         const duk_uint8_t *q_start;               /* match string */
 571                         duk_size_t q_blen;
 572
 573 #ifdef DUK_USE_REGEXP_SUPPORT
 574                         DUK_ASSERT(!is_global);  /* single match always */
 575 #endif
 576
 577                         p_start = DUK_HSTRING_GET_DATA(h_input);
 578                         p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
 579                         p = p_start;
 580
 581                         h_search = duk_get_hstring(ctx, 0);
 582                         DUK_ASSERT(h_search != NULL);
 583                         q_start = DUK_HSTRING_GET_DATA(h_search);
 584                         q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_search);
 585
 586                         p_end -= q_blen;  /* ensure full memcmp() fits in while */
 587
 588                         match_start_coff = 0;
 589
 590                         while (p <= p_end) {
 591                                 DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input));
 592                                 if (DUK_MEMCMP((void *) p, (void *) q_start, (size_t) q_blen) == 0) {
 593                                         duk_dup(ctx, 0);
 594                                         h_match = duk_get_hstring(ctx, -1);
 595                                         DUK_ASSERT(h_match != NULL);
 596 #ifdef DUK_USE_REGEXP_SUPPORT
 597                                         match_caps = 0;
 598 #endif
 599                                         goto found;
 600                                 }
 601
 602                                 /* track utf-8 non-continuation bytes */
 603                                 if ((p[0] & 0xc0) != 0x80) {
 604                                         match_start_coff++;
 605                                 }
 606                                 p++;
 607                         }
 608
 609                         /* not found */
 610                         break;
 611                 }
 612          found:
 613
 614                 /* stack[0] = search value
 615                  * stack[1] = replace value
 616                  * stack[2] = input string
 617                  * stack[3] = result buffer
 618                  * stack[4] = regexp match OR match string
 619                  */
 620
 621                 match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff);
 622
 623                 tmp_sz = (duk_size_t) (match_start_boff - prev_match_end_boff);
 624                 DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
 625
 626                 prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match);
 627
 628                 if (is_repl_func) {
 629                         duk_idx_t idx_args;
 630                         duk_hstring *h_repl;
 631
 632                         /* regexp res_obj is at index 4 */
 633
 634                         duk_dup(ctx, 1);
 635                         idx_args = duk_get_top(ctx);
 636
 637 #ifdef DUK_USE_REGEXP_SUPPORT
 638                         if (is_regexp) {
 639                                 duk_int_t idx;
 640                                 duk_require_stack(ctx, match_caps + 2);
 641                                 for (idx = 0; idx < match_caps; idx++) {
 642                                         /* match followed by capture(s) */
 643                                         duk_get_prop_index(ctx, 4, idx);
 644                                 }
 645                         } else {
 646 #else  /* DUK_USE_REGEXP_SUPPORT */
 647                         {  /* unconditionally */
 648 #endif  /* DUK_USE_REGEXP_SUPPORT */
 649                                 /* match == search string, by definition */
 650                                 duk_dup(ctx, 0);
 651                         }
 652                         duk_push_int(ctx, match_start_coff);
 653                         duk_dup(ctx, 2);
 654
 655                         /* [ ... replacer match [captures] match_char_offset input ] */
 656
 657                         duk_call(ctx, duk_get_top(ctx) - idx_args);
 658                         h_repl = duk_to_hstring(ctx, -1);  /* -> [ ... repl_value ] */
 659                         DUK_ASSERT(h_repl != NULL);
 660
 661                         DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl);
 662
 663                         duk_pop(ctx);  /* repl_value */
 664                 } else {
 665                         r = r_start;
 666
 667                         while (r < r_end) {
 668                                 duk_int_t ch1;
 669                                 duk_int_t ch2;
 670 #ifdef DUK_USE_REGEXP_SUPPORT
 671                                 duk_int_t ch3;
 672 #endif
 673                                 duk_size_t left;
 674
 675                                 ch1 = *r++;
 676                                 if (ch1 != DUK_ASC_DOLLAR) {
 677                                         goto repl_write;
 678                                 }
 679                                 left = r_end - r;
 680
 681                                 if (left <= 0) {
 682                                         goto repl_write;
 683                                 }
 684
 685                                 ch2 = r[0];
 686                                 switch ((int) ch2) {
 687                                 case DUK_ASC_DOLLAR: {
 688                                         ch1 = (1 << 8) + DUK_ASC_DOLLAR;
 689                                         goto repl_write;
 690                                 }
 691                                 case DUK_ASC_AMP: {
 692                                         DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match);
 693                                         r++;
 694                                         continue;
 695                                 }
 696                                 case DUK_ASC_GRAVE: {
 697                                         tmp_sz = (duk_size_t) match_start_boff;
 698                                         DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input), tmp_sz);
 699                                         r++;
 700                                         continue;
 701                                 }
 702                                 case DUK_ASC_SINGLEQUOTE: {
 703                                         duk_uint32_t match_end_boff;
 704
 705                                         /* Use match charlen instead of bytelen, just in case the input and
 706                                          * match codepoint encodings would have different lengths.
 707                                          */
 708                                         match_end_boff = duk_heap_strcache_offset_char2byte(thr,
 709                                                                                             h_input,
 710                                                                                             match_start_coff + DUK_HSTRING_GET_CHARLEN(h_match));
 711
 712                                         tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff);
 713                                         DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff, tmp_sz);
 714                                         r++;
 715                                         continue;
 716                                 }
 717                                 default: {
 718 #ifdef DUK_USE_REGEXP_SUPPORT
 719                                         duk_int_t capnum, captmp, capadv;
 720                                         /* XXX: optional check, match_caps is zero if no regexp,
 721                                          * so dollar will be interpreted literally anyway.
 722                                          */
 723
 724                                         if (!is_regexp) {
 725                                                 goto repl_write;
 726                                         }
 727
 728                                         if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) {
 729                                                 goto repl_write;
 730                                         }
 731                                         capnum = ch2 - DUK_ASC_0;
 732                                         capadv = 1;
 733
 734                                         if (left >= 2) {
 735                                                 ch3 = r[1];
 736                                                 if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) {
 737                                                         captmp = capnum * 10 + (ch3 - DUK_ASC_0);
 738                                                         if (captmp < match_caps) {
 739                                                                 capnum = captmp;
 740                                                                 capadv = 2;
 741                                                         }
 742                                                 }
 743                                         }
 744
 745                                         if (capnum > 0 && capnum < match_caps) {
 746                                                 DUK_ASSERT(is_regexp != 0);  /* match_caps == 0 without regexps */
 747
 748                                                 /* regexp res_obj is at offset 4 */
 749                                                 duk_get_prop_index(ctx, 4, (duk_uarridx_t) capnum);
 750                                                 if (duk_is_string(ctx, -1)) {
 751                                                         duk_hstring *h_tmp_str;
 752
 753                                                         h_tmp_str = duk_get_hstring(ctx, -1);
 754                                                         DUK_ASSERT(h_tmp_str != NULL);
 755
 756                                                         DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str);
 757                                                 } else {
 758                                                         /* undefined -> skip (replaced with empty) */
 759                                                 }
 760                                                 duk_pop(ctx);
 761                                                 r += capadv;
 762                                                 continue;
 763                                         } else {
 764                                                 goto repl_write;
 765                                         }
 766 #else  /* DUK_USE_REGEXP_SUPPORT */
 767                                         goto repl_write;  /* unconditionally */
 768 #endif  /* DUK_USE_REGEXP_SUPPORT */
 769                                 }  /* default case */
 770                                 }  /* switch (ch2) */
 771
 772                          repl_write:
 773                                 /* ch1 = (r_increment << 8) + byte */
 774
 775                                 DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t) (ch1 & 0xff));
 776                                 r += ch1 >> 8;
 777                         }  /* while repl */
 778                 }  /* if (is_repl_func) */
 779
 780                 duk_pop(ctx);  /* pop regexp res_obj or match string */
 781
 782 #ifdef DUK_USE_REGEXP_SUPPORT
 783                 if (!is_global) {
 784 #else
 785                 {  /* unconditionally; is_global==0 */
 786 #endif
 787                         break;
 788                 }
 789         }
 790
 791         /* trailer */
 792         tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff);
 793         DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
 794
 795         DUK_ASSERT_TOP(ctx, 4);
 796         DUK_BW_COMPACT(thr, bw);
 797         duk_to_string(ctx, -1);
 798         return 1;
 799 }
 800
 801 /*
 802  *  split()
 803  */
 804
 805 /* XXX: very messy now, but works; clean up, remove unused variables (nomimally
 806  * used so compiler doesn't complain).
 807  */
 808
 809 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_context *ctx) {
 810         duk_hthread *thr = (duk_hthread *) ctx;
 811         duk_hstring *h_input;
 812         duk_hstring *h_sep;
 813         duk_uint32_t limit;
 814         duk_uint32_t arr_idx;
 815 #ifdef DUK_USE_REGEXP_SUPPORT
 816         duk_bool_t is_regexp;
 817 #endif
 818         duk_bool_t matched;  /* set to 1 if any match exists (needed for empty input special case) */
 819         duk_uint32_t prev_match_end_coff, prev_match_end_boff;
 820         duk_uint32_t match_start_boff, match_start_coff;
 821         duk_uint32_t match_end_boff, match_end_coff;
 822
 823         DUK_UNREF(thr);
 824
 825         h_input = duk_push_this_coercible_to_string(ctx);
 826         DUK_ASSERT(h_input != NULL);
 827
 828         duk_push_array(ctx);
 829
 830         if (duk_is_undefined(ctx, 1)) {
 831                 limit = 0xffffffffUL;
 832         } else {
 833                 limit = duk_to_uint32(ctx, 1);
 834         }
 835
 836         if (limit == 0) {
 837                 return 1;
 838         }
 839
 840         /* If the separator is a RegExp, make a "clone" of it.  The specification
 841          * algorithm calls [[Match]] directly for specific indices; we emulate this
 842          * by tweaking lastIndex and using a "force global" variant of duk_regexp_match()
 843          * which will use global-style matching even when the RegExp itself is non-global.
 844          */
 845
 846         if (duk_is_undefined(ctx, 0)) {
 847                 /* The spec algorithm first does "R = ToString(separator)" before checking
 848                  * whether separator is undefined.  Since this is side effect free, we can
 849                  * skip the ToString() here.
 850                  */
 851                 duk_dup(ctx, 2);
 852                 duk_put_prop_index(ctx, 3, 0);
 853                 return 1;
 854         } else if (duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP) != NULL) {
 855 #ifdef DUK_USE_REGEXP_SUPPORT
 856                 duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR);
 857                 duk_dup(ctx, 0);
 858                 duk_new(ctx, 1);  /* [ ... RegExp val ] -> [ ... res ] */
 859                 duk_replace(ctx, 0);
 860                 /* lastIndex is initialized to zero by new RegExp() */
 861                 is_regexp = 1;
 862 #else
 863                 return DUK_RET_UNSUPPORTED_ERROR;
 864 #endif
 865         } else {
 866                 duk_to_string(ctx, 0);
 867 #ifdef DUK_USE_REGEXP_SUPPORT
 868                 is_regexp = 0;
 869 #endif
 870         }
 871
 872         /* stack[0] = separator (string or regexp)
 873          * stack[1] = limit
 874          * stack[2] = input string
 875          * stack[3] = result array
 876          */
 877
 878         prev_match_end_boff = 0;
 879         prev_match_end_coff = 0;
 880         arr_idx = 0;
 881         matched = 0;
 882
 883         for (;;) {
 884                 /*
 885                  *  The specification uses RegExp [[Match]] to attempt match at specific
 886                  *  offsets.  We don't have such a primitive, so we use an actual RegExp
 887                  *  and tweak lastIndex.  Since the RegExp may be non-global, we use a
 888                  *  special variant which forces global-like behavior for matching.
 889                  */
 890
 891                 DUK_ASSERT_TOP(ctx, 4);
 892
 893 #ifdef DUK_USE_REGEXP_SUPPORT
 894                 if (is_regexp) {
 895                         duk_dup(ctx, 0);
 896                         duk_dup(ctx, 2);
 897                         duk_regexp_match_force_global(thr);  /* [ ... regexp input ] -> [ res_obj ] */
 898                         if (!duk_is_object(ctx, -1)) {
 899                                 duk_pop(ctx);
 900                                 break;
 901                         }
 902                         matched = 1;
 903
 904                         duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
 905                         DUK_ASSERT(duk_is_number(ctx, -1));
 906                         match_start_coff = duk_get_int(ctx, -1);
 907                         match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff);
 908                         duk_pop(ctx);
 909
 910                         if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) {
 911                                 /* don't allow an empty match at the end of the string */
 912                                 duk_pop(ctx);
 913                                 break;
 914                         }
 915
 916                         duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
 917                         DUK_ASSERT(duk_is_number(ctx, -1));
 918                         match_end_coff = duk_get_int(ctx, -1);
 919                         match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_end_coff);
 920                         duk_pop(ctx);
 921
 922                         /* empty match -> bump and continue */
 923                         if (prev_match_end_boff == match_end_boff) {
 924                                 duk_push_int(ctx, match_end_coff + 1);
 925                                 duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
 926                                 duk_pop(ctx);
 927                                 continue;
 928                         }
 929                 } else {
 930 #else  /* DUK_USE_REGEXP_SUPPORT */
 931                 {  /* unconditionally */
 932 #endif  /* DUK_USE_REGEXP_SUPPORT */
 933                         const duk_uint8_t *p_start, *p_end, *p;   /* input string scan */
 934                         const duk_uint8_t *q_start;               /* match string */
 935                         duk_size_t q_blen, q_clen;
 936
 937                         p_start = DUK_HSTRING_GET_DATA(h_input);
 938                         p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
 939                         p = p_start + prev_match_end_boff;
 940
 941                         h_sep = duk_get_hstring(ctx, 0);
 942                         DUK_ASSERT(h_sep != NULL);
 943                         q_start = DUK_HSTRING_GET_DATA(h_sep);
 944                         q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sep);
 945                         q_clen = (duk_size_t) DUK_HSTRING_GET_CHARLEN(h_sep);
 946
 947                         p_end -= q_blen;  /* ensure full memcmp() fits in while */
 948
 949                         match_start_coff = prev_match_end_coff;
 950
 951                         if (q_blen == 0) {
 952                                 /* Handle empty separator case: it will always match, and always
 953                                  * triggers the check in step 13.c.iii initially.  Note that we
 954                                  * must skip to either end of string or start of first codepoint,
 955                                  * skipping over any continuation bytes!
 956                                  *
 957                                  * Don't allow an empty string to match at the end of the input.
 958                                  */
 959
 960                                 matched = 1;  /* empty separator can always match */
 961
 962                                 match_start_coff++;
 963                                 p++;
 964                                 while (p < p_end) {
 965                                         if ((p[0] & 0xc0) != 0x80) {
 966                                                 goto found;
 967                                         }
 968                                         p++;
 969                                 }
 970                                 goto not_found;
 971                         }
 972
 973                         DUK_ASSERT(q_blen > 0 && q_clen > 0);
 974                         while (p <= p_end) {
 975                                 DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input));
 976                                 DUK_ASSERT(q_blen > 0);  /* no issues with empty memcmp() */
 977                                 if (DUK_MEMCMP((void *) p, (void *) q_start, (duk_size_t) q_blen) == 0) {
 978                                         /* never an empty match, so step 13.c.iii can't be triggered */
 979                                         goto found;
 980                                 }
 981
 982                                 /* track utf-8 non-continuation bytes */
 983                                 if ((p[0] & 0xc0) != 0x80) {
 984                                         match_start_coff++;
 985                                 }
 986                                 p++;
 987                         }
 988
 989                  not_found:
 990                         /* not found */
 991                         break;
 992
 993                  found:
 994                         matched = 1;
 995                         match_start_boff = (duk_uint32_t) (p - p_start);
 996                         match_end_coff = (duk_uint32_t) (match_start_coff + q_clen);  /* constrained by string length */
 997                         match_end_boff = (duk_uint32_t) (match_start_boff + q_blen);  /* ditto */
 998
 999                         /* empty match (may happen with empty separator) -> bump and continue */
1000                         if (prev_match_end_boff == match_end_boff) {
1001                                 prev_match_end_boff++;
1002                                 prev_match_end_coff++;
1003                                 continue;
1004                         }
1005                 }  /* if (is_regexp) */
1006
1007                 /* stack[0] = separator (string or regexp)
1008                  * stack[1] = limit
1009                  * stack[2] = input string
1010                  * stack[3] = result array
1011                  * stack[4] = regexp res_obj (if is_regexp)
1012                  */
1013
1014                 DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end b=%ld,c=%ld, prev_end b=%ld,c=%ld",
1015                                      (long) match_start_boff, (long) match_start_coff,
1016                                      (long) match_end_boff, (long) match_end_coff,
1017                                      (long) prev_match_end_boff, (long) prev_match_end_coff));
1018
1019                 duk_push_lstring(ctx,
1020                                  (const char *) (DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff),
1021                                  (duk_size_t) (match_start_boff - prev_match_end_boff));
1022                 duk_put_prop_index(ctx, 3, arr_idx);
1023                 arr_idx++;
1024                 if (arr_idx >= limit) {
1025                         goto hit_limit;
1026                 }
1027
1028 #ifdef DUK_USE_REGEXP_SUPPORT
1029                 if (is_regexp) {
1030                         duk_size_t i, len;
1031
1032                         len = duk_get_length(ctx, 4);
1033                         for (i = 1; i < len; i++) {
1034                                 DUK_ASSERT(i <= DUK_UARRIDX_MAX);  /* cannot have >4G captures */
1035                                 duk_get_prop_index(ctx, 4, (duk_uarridx_t) i);
1036                                 duk_put_prop_index(ctx, 3, arr_idx);
1037                                 arr_idx++;
1038                                 if (arr_idx >= limit) {
1039                                         goto hit_limit;
1040                                 }
1041                         }
1042
1043                         duk_pop(ctx);
1044                         /* lastIndex already set up for next match */
1045                 } else {
1046 #else  /* DUK_USE_REGEXP_SUPPORT */
1047                 {  /* unconditionally */
1048 #endif  /* DUK_USE_REGEXP_SUPPORT */
1049                         /* no action */
1050                 }
1051
1052                 prev_match_end_boff = match_end_boff;
1053                 prev_match_end_coff = match_end_coff;
1054                 continue;
1055         }  /* for */
1056
1057         /* Combined step 11 (empty string special case) and 14-15. */
1058
1059         DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld",
1060                              (long) prev_match_end_boff, (long) prev_match_end_coff));
1061
1062         if (DUK_HSTRING_GET_CHARLEN(h_input) > 0 || !matched) {
1063                 /* Add trailer if:
1064                  *   a) non-empty input
1065                  *   b) empty input and no (zero size) match found (step 11)
1066                  */
1067
1068                 duk_push_lstring(ctx,
1069                                  (const char *) DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff,
1070                                  (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff));
1071                 duk_put_prop_index(ctx, 3, arr_idx);
1072                 /* No arr_idx update or limit check */
1073         }
1074
1075         return 1;
1076
1077  hit_limit:
1078 #ifdef DUK_USE_REGEXP_SUPPORT
1079         if (is_regexp) {
1080                 duk_pop(ctx);
1081         }
1082 #endif
1083
1084         return 1;
1085 }
1086
1087 /*
1088  *  Various
1089  */
1090
1091 #ifdef DUK_USE_REGEXP_SUPPORT
1092 DUK_LOCAL void duk__to_regexp_helper(duk_context *ctx, duk_idx_t index, duk_bool_t force_new) {
1093         duk_hobject *h;
1094
1095         /* Shared helper for match() steps 3-4, search() steps 3-4. */
1096
1097         DUK_ASSERT(index >= 0);
1098
1099         if (force_new) {
1100                 goto do_new;
1101         }
1102
1103         h = duk_get_hobject_with_class(ctx, index, DUK_HOBJECT_CLASS_REGEXP);
1104         if (!h) {
1105                 goto do_new;
1106         }
1107         return;
1108
1109  do_new:
1110         duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR);
1111         duk_dup(ctx, index);
1112         duk_new(ctx, 1);  /* [ ... RegExp val ] -> [ ... res ] */
1113         duk_replace(ctx, index);
1114 }
1115 #endif  /* DUK_USE_REGEXP_SUPPORT */
1116
1117 #ifdef DUK_USE_REGEXP_SUPPORT
1118 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) {
1119         duk_hthread *thr = (duk_hthread *) ctx;
1120
1121         /* Easiest way to implement the search required by the specification
1122          * is to do a RegExp test() with lastIndex forced to zero.  To avoid
1123          * side effects on the argument, "clone" the RegExp if a RegExp was
1124          * given as input.
1125          *
1126          * The global flag of the RegExp should be ignored; setting lastIndex
1127          * to zero (which happens when "cloning" the RegExp) should have an
1128          * equivalent effect.
1129          */
1130
1131         DUK_ASSERT_TOP(ctx, 1);
1132         (void) duk_push_this_coercible_to_string(ctx);  /* at index 1 */
1133         duk__to_regexp_helper(ctx, 0 /*index*/, 1 /*force_new*/);
1134
1135         /* stack[0] = regexp
1136          * stack[1] = string
1137          */
1138
1139         /* Avoid using RegExp.prototype methods, as they're writable and
1140          * configurable and may have been changed.
1141          */
1142
1143         duk_dup(ctx, 0);
1144         duk_dup(ctx, 1);  /* [ ... re_obj input ] */
1145         duk_regexp_match(thr);  /* -> [ ... res_obj ] */
1146
1147         if (!duk_is_object(ctx, -1)) {
1148                 duk_push_int(ctx, -1);
1149                 return 1;
1150         }
1151
1152         duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
1153         DUK_ASSERT(duk_is_number(ctx, -1));
1154         return 1;
1155 }
1156 #else  /* DUK_USE_REGEXP_SUPPORT */
1157 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) {
1158         DUK_UNREF(ctx);
1159         return DUK_RET_UNSUPPORTED_ERROR;
1160 }
1161 #endif  /* DUK_USE_REGEXP_SUPPORT */
1162
1163 #ifdef DUK_USE_REGEXP_SUPPORT
1164 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) {
1165         duk_hthread *thr = (duk_hthread *) ctx;
1166         duk_bool_t global;
1167         duk_int_t prev_last_index;
1168         duk_int_t this_index;
1169         duk_int_t arr_idx;
1170
1171         DUK_ASSERT_TOP(ctx, 1);
1172         (void) duk_push_this_coercible_to_string(ctx);
1173         duk__to_regexp_helper(ctx, 0 /*index*/, 0 /*force_new*/);
1174         global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL);
1175         DUK_ASSERT_TOP(ctx, 2);
1176
1177         /* stack[0] = regexp
1178          * stack[1] = string
1179          */
1180
1181         if (!global) {
1182                 duk_regexp_match(thr);  /* -> [ res_obj ] */
1183                 return 1;  /* return 'res_obj' */
1184         }
1185
1186         /* Global case is more complex. */
1187
1188         /* [ regexp string ] */
1189
1190         duk_push_int(ctx, 0);
1191         duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1192         duk_push_array(ctx);
1193
1194         /* [ regexp string res_arr ] */
1195
1196         prev_last_index = 0;
1197         arr_idx = 0;
1198
1199         for (;;) {
1200                 DUK_ASSERT_TOP(ctx, 3);
1201
1202                 duk_dup(ctx, 0);
1203                 duk_dup(ctx, 1);
1204                 duk_regexp_match(thr);  /* -> [ ... regexp string ] -> [ ... res_obj ] */
1205
1206                 if (!duk_is_object(ctx, -1)) {
1207                         duk_pop(ctx);
1208                         break;
1209                 }
1210
1211                 duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1212                 DUK_ASSERT(duk_is_number(ctx, -1));
1213                 this_index = duk_get_int(ctx, -1);
1214                 duk_pop(ctx);
1215
1216                 if (this_index == prev_last_index) {
1217                         this_index++;
1218                         duk_push_int(ctx, this_index);
1219                         duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1220                 }
1221                 prev_last_index = this_index;
1222
1223                 duk_get_prop_index(ctx, -1, 0);  /* match string */
1224                 duk_put_prop_index(ctx, 2, arr_idx);
1225                 arr_idx++;
1226                 duk_pop(ctx);  /* res_obj */
1227         }
1228
1229         if (arr_idx == 0) {
1230                 duk_push_null(ctx);
1231         }
1232
1233         return 1;  /* return 'res_arr' or 'null' */
1234 }
1235 #else  /* DUK_USE_REGEXP_SUPPORT */
1236 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) {
1237         DUK_UNREF(ctx);
1238         return DUK_RET_UNSUPPORTED_ERROR;
1239 }
1240 #endif  /* DUK_USE_REGEXP_SUPPORT */
1241
1242 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_concat(duk_context *ctx) {
1243         /* duk_concat() coerces arguments with ToString() in correct order */
1244         (void) duk_push_this_coercible_to_string(ctx);
1245         duk_insert(ctx, 0);  /* this is relatively expensive */
1246         duk_concat(ctx, duk_get_top(ctx));
1247         return 1;
1248 }
1249
1250 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_trim(duk_context *ctx) {
1251         DUK_ASSERT_TOP(ctx, 0);
1252         (void) duk_push_this_coercible_to_string(ctx);
1253         duk_trim(ctx, 0);
1254         DUK_ASSERT_TOP(ctx, 1);
1255         return 1;
1256 }
1257
1258 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_locale_compare(duk_context *ctx) {
1259         duk_hstring *h1;
1260         duk_hstring *h2;
1261         duk_size_t h1_len, h2_len, prefix_len;
1262         duk_small_int_t ret = 0;
1263         duk_small_int_t rc;
1264
1265         /* The current implementation of localeCompare() is simply a codepoint
1266          * by codepoint comparison, implemented with a simple string compare
1267          * because UTF-8 should preserve codepoint ordering (assuming valid
1268          * shortest UTF-8 encoding).
1269          *
1270          * The specification requires that the return value must be related
1271          * to the sort order: e.g. negative means that 'this' comes before
1272          * 'that' in sort order.  We assume an ascending sort order.
1273          */
1274
1275         /* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */
1276
1277         h1 = duk_push_this_coercible_to_string(ctx);
1278         DUK_ASSERT(h1 != NULL);
1279
1280         h2 = duk_to_hstring(ctx, 0);
1281         DUK_ASSERT(h2 != NULL);
1282
1283         h1_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h1);
1284         h2_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h2);
1285         prefix_len = (h1_len <= h2_len ? h1_len : h2_len);
1286
1287         /* Zero size compare not an issue with DUK_MEMCMP. */
1288         rc = (duk_small_int_t) DUK_MEMCMP((const char *) DUK_HSTRING_GET_DATA(h1),
1289                                           (const char *) DUK_HSTRING_GET_DATA(h2),
1290                                           prefix_len);
1291
1292         if (rc < 0) {
1293                 ret = -1;
1294                 goto done;
1295         } else if (rc > 0) {
1296                 ret = 1;
1297                 goto done;
1298         }
1299
1300         /* prefix matches, lengths matter now */
1301         if (h1_len > h2_len) {
1302                 ret = 1;
1303                 goto done;
1304         } else if (h1_len == h2_len) {
1305                 DUK_ASSERT(ret == 0);
1306                 goto done;
1307         }
1308         ret = -1;
1309         goto done;
1310
1311  done:
1312         duk_push_int(ctx, (duk_int_t) ret);
1313         return 1;
1314 }