]> git.proxmox.com Git - ceph.git/blob - ceph/src/civetweb/src/third_party/duktape-1.3.0/src-separate/duk_bi_string.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / civetweb / src / third_party / duktape-1.3.0 / src-separate / duk_bi_string.c
1 /*
2 * String built-ins
3 */
4
5 /* XXX: There are several limitations in the current implementation for
6 * strings with >= 0x80000000UL characters. In some cases one would need
7 * to be able to represent the range [-0xffffffff,0xffffffff] and so on.
8 * Generally character and byte length are assumed to fit into signed 32
9 * bits (< 0x80000000UL). Places with issues are not marked explicitly
10 * below in all cases, look for signed type usage (duk_int_t etc) for
11 * offsets/lengths.
12 */
13
14 #include "duk_internal.h"
15
16 /*
17 * Constructor
18 */
19
20 DUK_INTERNAL duk_ret_t duk_bi_string_constructor(duk_context *ctx) {
21 /* String constructor needs to distinguish between an argument not given at all
22 * vs. given as 'undefined'. We're a vararg function to handle this properly.
23 */
24
25 if (duk_get_top(ctx) == 0) {
26 duk_push_hstring_stridx(ctx, DUK_STRIDX_EMPTY_STRING);
27 } else {
28 duk_to_string(ctx, 0);
29 }
30 DUK_ASSERT(duk_is_string(ctx, 0));
31 duk_set_top(ctx, 1);
32
33 if (duk_is_constructor_call(ctx)) {
34 duk_push_object_helper(ctx,
35 DUK_HOBJECT_FLAG_EXTENSIBLE |
36 DUK_HOBJECT_FLAG_EXOTIC_STRINGOBJ |
37 DUK_HOBJECT_CLASS_AS_FLAGS(DUK_HOBJECT_CLASS_STRING),
38 DUK_BIDX_STRING_PROTOTYPE);
39
40 /* String object internal value is immutable */
41 duk_dup(ctx, 0);
42 duk_xdef_prop_stridx(ctx, -2, DUK_STRIDX_INT_VALUE, DUK_PROPDESC_FLAGS_NONE);
43 }
44 /* Note: unbalanced stack on purpose */
45
46 return 1;
47 }
48
49 DUK_INTERNAL duk_ret_t duk_bi_string_constructor_from_char_code(duk_context *ctx) {
50 duk_hthread *thr = (duk_hthread *) ctx;
51 duk_bufwriter_ctx bw_alloc;
52 duk_bufwriter_ctx *bw;
53 duk_idx_t i, n;
54 duk_ucodepoint_t cp;
55
56 /* XXX: It would be nice to build the string directly but ToUint16()
57 * coercion is needed so a generic helper would not be very
58 * helpful (perhaps coerce the value stack first here and then
59 * build a string from a duk_tval number sequence in one go?).
60 */
61
62 n = duk_get_top(ctx);
63
64 bw = &bw_alloc;
65 DUK_BW_INIT_PUSHBUF(thr, bw, n); /* initial estimate for ASCII only codepoints */
66
67 for (i = 0; i < n; i++) {
68 /* XXX: could improve bufwriter handling to write multiple codepoints
69 * with one ensure call but the relative benefit would be quite small.
70 */
71
72 #if defined(DUK_USE_NONSTD_STRING_FROMCHARCODE_32BIT)
73 /* ToUint16() coercion is mandatory in the E5.1 specification, but
74 * this non-compliant behavior makes more sense because we support
75 * non-BMP codepoints. Don't use CESU-8 because that'd create
76 * surrogate pairs.
77 */
78
79 cp = (duk_ucodepoint_t) duk_to_uint32(ctx, i);
80 DUK_BW_WRITE_ENSURE_XUTF8(thr, bw, cp);
81 #else
82 cp = (duk_ucodepoint_t) duk_to_uint32(ctx, i);
83 DUK_BW_WRITE_ENSURE_CESU8(thr, bw, cp);
84 #endif
85 }
86
87 DUK_BW_COMPACT(thr, bw);
88 duk_to_string(ctx, -1);
89 return 1;
90 }
91
92 /*
93 * toString(), valueOf()
94 */
95
96 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_to_string(duk_context *ctx) {
97 duk_tval *tv;
98
99 duk_push_this(ctx);
100 tv = duk_require_tval(ctx, -1);
101 DUK_ASSERT(tv != NULL);
102
103 if (DUK_TVAL_IS_STRING(tv)) {
104 /* return as is */
105 return 1;
106 } else if (DUK_TVAL_IS_OBJECT(tv)) {
107 duk_hobject *h = DUK_TVAL_GET_OBJECT(tv);
108 DUK_ASSERT(h != NULL);
109
110 /* Must be a "string object", i.e. class "String" */
111 if (DUK_HOBJECT_GET_CLASS_NUMBER(h) != DUK_HOBJECT_CLASS_STRING) {
112 goto type_error;
113 }
114
115 duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INT_VALUE);
116 DUK_ASSERT(duk_is_string(ctx, -1));
117
118 return 1;
119 } else {
120 goto type_error;
121 }
122
123 /* never here, but fall through */
124
125 type_error:
126 return DUK_RET_TYPE_ERROR;
127 }
128
129 /*
130 * Character and charcode access
131 */
132
133 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_at(duk_context *ctx) {
134 duk_int_t pos;
135
136 /* XXX: faster implementation */
137
138 (void) duk_push_this_coercible_to_string(ctx);
139 pos = duk_to_int(ctx, 0);
140 duk_substring(ctx, -1, pos, pos + 1);
141 return 1;
142 }
143
144 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_char_code_at(duk_context *ctx) {
145 duk_hthread *thr = (duk_hthread *) ctx;
146 duk_int_t pos;
147 duk_hstring *h;
148 duk_bool_t clamped;
149
150 /* XXX: faster implementation */
151
152 DUK_DDD(DUK_DDDPRINT("arg=%!T", (duk_tval *) duk_get_tval(ctx, 0)));
153
154 h = duk_push_this_coercible_to_string(ctx);
155 DUK_ASSERT(h != NULL);
156
157 pos = duk_to_int_clamped_raw(ctx,
158 0 /*index*/,
159 0 /*min(incl)*/,
160 DUK_HSTRING_GET_CHARLEN(h) - 1 /*max(incl)*/,
161 &clamped /*out_clamped*/);
162 if (clamped) {
163 duk_push_number(ctx, DUK_DOUBLE_NAN);
164 return 1;
165 }
166
167 duk_push_u32(ctx, (duk_uint32_t) duk_hstring_char_code_at_raw(thr, h, pos));
168 return 1;
169 }
170
171 /*
172 * substring(), substr(), slice()
173 */
174
175 /* XXX: any chance of merging these three similar but still slightly
176 * different algorithms so that footprint would be reduced?
177 */
178
179 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substring(duk_context *ctx) {
180 duk_hstring *h;
181 duk_int_t start_pos, end_pos;
182 duk_int_t len;
183
184 h = duk_push_this_coercible_to_string(ctx);
185 DUK_ASSERT(h != NULL);
186 len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
187
188 /* [ start end str ] */
189
190 start_pos = duk_to_int_clamped(ctx, 0, 0, len);
191 if (duk_is_undefined(ctx, 1)) {
192 end_pos = len;
193 } else {
194 end_pos = duk_to_int_clamped(ctx, 1, 0, len);
195 }
196 DUK_ASSERT(start_pos >= 0 && start_pos <= len);
197 DUK_ASSERT(end_pos >= 0 && end_pos <= len);
198
199 if (start_pos > end_pos) {
200 duk_int_t tmp = start_pos;
201 start_pos = end_pos;
202 end_pos = tmp;
203 }
204
205 DUK_ASSERT(end_pos >= start_pos);
206
207 duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
208 return 1;
209 }
210
211 #ifdef DUK_USE_SECTION_B
212 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) {
213 duk_hstring *h;
214 duk_int_t start_pos, end_pos;
215 duk_int_t len;
216
217 /* Unlike non-obsolete String calls, substr() algorithm in E5.1
218 * specification will happily coerce undefined and null to strings
219 * ("undefined" and "null").
220 */
221 duk_push_this(ctx);
222 h = duk_to_hstring(ctx, -1);
223 DUK_ASSERT(h != NULL);
224 len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
225
226 /* [ start length str ] */
227
228 /* The implementation for computing of start_pos and end_pos differs
229 * from the standard algorithm, but is intended to result in the exactly
230 * same behavior. This is not always obvious.
231 */
232
233 /* combines steps 2 and 5; -len ensures max() not needed for step 5 */
234 start_pos = duk_to_int_clamped(ctx, 0, -len, len);
235 if (start_pos < 0) {
236 start_pos = len + start_pos;
237 }
238 DUK_ASSERT(start_pos >= 0 && start_pos <= len);
239
240 /* combines steps 3, 6; step 7 is not needed */
241 if (duk_is_undefined(ctx, 1)) {
242 end_pos = len;
243 } else {
244 DUK_ASSERT(start_pos <= len);
245 end_pos = start_pos + duk_to_int_clamped(ctx, 1, 0, len - start_pos);
246 }
247 DUK_ASSERT(start_pos >= 0 && start_pos <= len);
248 DUK_ASSERT(end_pos >= 0 && end_pos <= len);
249 DUK_ASSERT(end_pos >= start_pos);
250
251 duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
252 return 1;
253 }
254 #else /* DUK_USE_SECTION_B */
255 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_substr(duk_context *ctx) {
256 DUK_UNREF(ctx);
257 return DUK_RET_UNSUPPORTED_ERROR;
258 }
259 #endif /* DUK_USE_SECTION_B */
260
261 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_slice(duk_context *ctx) {
262 duk_hstring *h;
263 duk_int_t start_pos, end_pos;
264 duk_int_t len;
265
266 h = duk_push_this_coercible_to_string(ctx);
267 DUK_ASSERT(h != NULL);
268 len = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h);
269
270 /* [ start end str ] */
271
272 start_pos = duk_to_int_clamped(ctx, 0, -len, len);
273 if (start_pos < 0) {
274 start_pos = len + start_pos;
275 }
276 if (duk_is_undefined(ctx, 1)) {
277 end_pos = len;
278 } else {
279 end_pos = duk_to_int_clamped(ctx, 1, -len, len);
280 if (end_pos < 0) {
281 end_pos = len + end_pos;
282 }
283 }
284 DUK_ASSERT(start_pos >= 0 && start_pos <= len);
285 DUK_ASSERT(end_pos >= 0 && end_pos <= len);
286
287 if (end_pos < start_pos) {
288 end_pos = start_pos;
289 }
290
291 DUK_ASSERT(end_pos >= start_pos);
292
293 duk_substring(ctx, -1, (duk_size_t) start_pos, (duk_size_t) end_pos);
294 return 1;
295 }
296
297 /*
298 * Case conversion
299 */
300
301 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_caseconv_shared(duk_context *ctx) {
302 duk_hthread *thr = (duk_hthread *) ctx;
303 duk_small_int_t uppercase = duk_get_current_magic(ctx);
304
305 (void) duk_push_this_coercible_to_string(ctx);
306 duk_unicode_case_convert_string(thr, (duk_bool_t) uppercase);
307 return 1;
308 }
309
310 /*
311 * indexOf() and lastIndexOf()
312 */
313
314 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_indexof_shared(duk_context *ctx) {
315 duk_hthread *thr = (duk_hthread *) ctx;
316 duk_hstring *h_this;
317 duk_hstring *h_search;
318 duk_int_t clen_this;
319 duk_int_t cpos;
320 duk_int_t bpos;
321 const duk_uint8_t *p_start, *p_end, *p;
322 const duk_uint8_t *q_start;
323 duk_int_t q_blen;
324 duk_uint8_t firstbyte;
325 duk_uint8_t t;
326 duk_small_int_t is_lastindexof = duk_get_current_magic(ctx); /* 0=indexOf, 1=lastIndexOf */
327
328 h_this = duk_push_this_coercible_to_string(ctx);
329 DUK_ASSERT(h_this != NULL);
330 clen_this = (duk_int_t) DUK_HSTRING_GET_CHARLEN(h_this);
331
332 h_search = duk_to_hstring(ctx, 0);
333 DUK_ASSERT(h_search != NULL);
334 q_start = DUK_HSTRING_GET_DATA(h_search);
335 q_blen = (duk_int_t) DUK_HSTRING_GET_BYTELEN(h_search);
336
337 duk_to_number(ctx, 1);
338 if (duk_is_nan(ctx, 1) && is_lastindexof) {
339 /* indexOf: NaN should cause pos to be zero.
340 * lastIndexOf: NaN should cause pos to be +Infinity
341 * (and later be clamped to len).
342 */
343 cpos = clen_this;
344 } else {
345 cpos = duk_to_int_clamped(ctx, 1, 0, clen_this);
346 }
347
348 /* Empty searchstring always matches; cpos must be clamped here.
349 * (If q_blen were < 0 due to clamped coercion, it would also be
350 * caught here.)
351 */
352 if (q_blen <= 0) {
353 duk_push_int(ctx, cpos);
354 return 1;
355 }
356 DUK_ASSERT(q_blen > 0);
357
358 bpos = (duk_int_t) duk_heap_strcache_offset_char2byte(thr, h_this, (duk_uint32_t) cpos);
359
360 p_start = DUK_HSTRING_GET_DATA(h_this);
361 p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_this);
362 p = p_start + bpos;
363
364 /* This loop is optimized for size. For speed, there should be
365 * two separate loops, and we should ensure that memcmp() can be
366 * used without an extra "will searchstring fit" check. Doing
367 * the preconditioning for 'p' and 'p_end' is easy but cpos
368 * must be updated if 'p' is wound back (backward scanning).
369 */
370
371 firstbyte = q_start[0]; /* leading byte of match string */
372 while (p <= p_end && p >= p_start) {
373 t = *p;
374
375 /* For Ecmascript strings, this check can only match for
376 * initial UTF-8 bytes (not continuation bytes). For other
377 * strings all bets are off.
378 */
379
380 if ((t == firstbyte) && ((duk_size_t) (p_end - p) >= (duk_size_t) q_blen)) {
381 DUK_ASSERT(q_blen > 0); /* no issues with memcmp() zero size, even if broken */
382 if (DUK_MEMCMP(p, q_start, (duk_size_t) q_blen) == 0) {
383 duk_push_int(ctx, cpos);
384 return 1;
385 }
386 }
387
388 /* track cpos while scanning */
389 if (is_lastindexof) {
390 /* when going backwards, we decrement cpos 'early';
391 * 'p' may point to a continuation byte of the char
392 * at offset 'cpos', but that's OK because we'll
393 * backtrack all the way to the initial byte.
394 */
395 if ((t & 0xc0) != 0x80) {
396 cpos--;
397 }
398 p--;
399 } else {
400 if ((t & 0xc0) != 0x80) {
401 cpos++;
402 }
403 p++;
404 }
405 }
406
407 /* Not found. Empty string case is handled specially above. */
408 duk_push_int(ctx, -1);
409 return 1;
410 }
411
412 /*
413 * replace()
414 */
415
416 /* XXX: the current implementation works but is quite clunky; it compiles
417 * to almost 1,4kB of x86 code so it needs to be simplified (better approach,
418 * shared helpers, etc). Some ideas for refactoring:
419 *
420 * - a primitive to convert a string into a regexp matcher (reduces matching
421 * code at the cost of making matching much slower)
422 * - use replace() as a basic helper for match() and split(), which are both
423 * much simpler
424 * - API call to get_prop and to_boolean
425 */
426
427 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_replace(duk_context *ctx) {
428 duk_hthread *thr = (duk_hthread *) ctx;
429 duk_hstring *h_input;
430 duk_hstring *h_match;
431 duk_hstring *h_search;
432 duk_hobject *h_re;
433 duk_bufwriter_ctx bw_alloc;
434 duk_bufwriter_ctx *bw;
435 #ifdef DUK_USE_REGEXP_SUPPORT
436 duk_bool_t is_regexp;
437 duk_bool_t is_global;
438 #endif
439 duk_bool_t is_repl_func;
440 duk_uint32_t match_start_coff, match_start_boff;
441 #ifdef DUK_USE_REGEXP_SUPPORT
442 duk_int_t match_caps;
443 #endif
444 duk_uint32_t prev_match_end_boff;
445 const duk_uint8_t *r_start, *r_end, *r; /* repl string scan */
446 duk_size_t tmp_sz;
447
448 DUK_ASSERT_TOP(ctx, 2);
449 h_input = duk_push_this_coercible_to_string(ctx);
450 DUK_ASSERT(h_input != NULL);
451
452 bw = &bw_alloc;
453 DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input)); /* input size is good output starting point */
454
455 DUK_ASSERT_TOP(ctx, 4);
456
457 /* stack[0] = search value
458 * stack[1] = replace value
459 * stack[2] = input string
460 * stack[3] = result buffer
461 */
462
463 h_re = duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP);
464 if (h_re) {
465 #ifdef DUK_USE_REGEXP_SUPPORT
466 is_regexp = 1;
467 is_global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL);
468
469 if (is_global) {
470 /* start match from beginning */
471 duk_push_int(ctx, 0);
472 duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
473 }
474 #else /* DUK_USE_REGEXP_SUPPORT */
475 return DUK_RET_UNSUPPORTED_ERROR;
476 #endif /* DUK_USE_REGEXP_SUPPORT */
477 } else {
478 duk_to_string(ctx, 0);
479 #ifdef DUK_USE_REGEXP_SUPPORT
480 is_regexp = 0;
481 is_global = 0;
482 #endif
483 }
484
485 if (duk_is_function(ctx, 1)) {
486 is_repl_func = 1;
487 r_start = NULL;
488 r_end = NULL;
489 } else {
490 duk_hstring *h_repl;
491
492 is_repl_func = 0;
493 h_repl = duk_to_hstring(ctx, 1);
494 DUK_ASSERT(h_repl != NULL);
495 r_start = DUK_HSTRING_GET_DATA(h_repl);
496 r_end = r_start + DUK_HSTRING_GET_BYTELEN(h_repl);
497 }
498
499 prev_match_end_boff = 0;
500
501 for (;;) {
502 /*
503 * If matching with a regexp:
504 * - non-global RegExp: lastIndex not touched on a match, zeroed
505 * on a non-match
506 * - global RegExp: on match, lastIndex will be updated by regexp
507 * executor to point to next char after the matching part (so that
508 * characters in the matching part are not matched again)
509 *
510 * If matching with a string:
511 * - always non-global match, find first occurrence
512 *
513 * We need:
514 * - The character offset of start-of-match for the replacer function
515 * - The byte offsets for start-of-match and end-of-match to implement
516 * the replacement values $&, $`, and $', and to copy non-matching
517 * input string portions (including header and trailer) verbatim.
518 *
519 * NOTE: the E5.1 specification is a bit vague how the RegExp should
520 * behave in the replacement process; e.g. is matching done first for
521 * all matches (in the global RegExp case) before any replacer calls
522 * are made? See: test-bi-string-proto-replace.js for discussion.
523 */
524
525 DUK_ASSERT_TOP(ctx, 4);
526
527 #ifdef DUK_USE_REGEXP_SUPPORT
528 if (is_regexp) {
529 duk_dup(ctx, 0);
530 duk_dup(ctx, 2);
531 duk_regexp_match(thr); /* [ ... regexp input ] -> [ res_obj ] */
532 if (!duk_is_object(ctx, -1)) {
533 duk_pop(ctx);
534 break;
535 }
536
537 duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
538 DUK_ASSERT(duk_is_number(ctx, -1));
539 match_start_coff = duk_get_int(ctx, -1);
540 duk_pop(ctx);
541
542 duk_get_prop_index(ctx, -1, 0);
543 DUK_ASSERT(duk_is_string(ctx, -1));
544 h_match = duk_get_hstring(ctx, -1);
545 DUK_ASSERT(h_match != NULL);
546 duk_pop(ctx); /* h_match is borrowed, remains reachable through match_obj */
547
548 if (DUK_HSTRING_GET_BYTELEN(h_match) == 0) {
549 /* This should be equivalent to match() algorithm step 8.f.iii.2:
550 * detect an empty match and allow it, but don't allow it twice.
551 */
552 duk_uint32_t last_index;
553
554 duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
555 last_index = (duk_uint32_t) duk_get_uint(ctx, -1);
556 DUK_DDD(DUK_DDDPRINT("empty match, bump lastIndex: %ld -> %ld",
557 (long) last_index, (long) (last_index + 1)));
558 duk_pop(ctx);
559 duk_push_int(ctx, last_index + 1);
560 duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
561 }
562
563 DUK_ASSERT(duk_get_length(ctx, -1) <= DUK_INT_MAX); /* string limits */
564 match_caps = (duk_int_t) duk_get_length(ctx, -1);
565 } else {
566 #else /* DUK_USE_REGEXP_SUPPORT */
567 { /* unconditionally */
568 #endif /* DUK_USE_REGEXP_SUPPORT */
569 const duk_uint8_t *p_start, *p_end, *p; /* input string scan */
570 const duk_uint8_t *q_start; /* match string */
571 duk_size_t q_blen;
572
573 #ifdef DUK_USE_REGEXP_SUPPORT
574 DUK_ASSERT(!is_global); /* single match always */
575 #endif
576
577 p_start = DUK_HSTRING_GET_DATA(h_input);
578 p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
579 p = p_start;
580
581 h_search = duk_get_hstring(ctx, 0);
582 DUK_ASSERT(h_search != NULL);
583 q_start = DUK_HSTRING_GET_DATA(h_search);
584 q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_search);
585
586 p_end -= q_blen; /* ensure full memcmp() fits in while */
587
588 match_start_coff = 0;
589
590 while (p <= p_end) {
591 DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input));
592 if (DUK_MEMCMP((void *) p, (void *) q_start, (size_t) q_blen) == 0) {
593 duk_dup(ctx, 0);
594 h_match = duk_get_hstring(ctx, -1);
595 DUK_ASSERT(h_match != NULL);
596 #ifdef DUK_USE_REGEXP_SUPPORT
597 match_caps = 0;
598 #endif
599 goto found;
600 }
601
602 /* track utf-8 non-continuation bytes */
603 if ((p[0] & 0xc0) != 0x80) {
604 match_start_coff++;
605 }
606 p++;
607 }
608
609 /* not found */
610 break;
611 }
612 found:
613
614 /* stack[0] = search value
615 * stack[1] = replace value
616 * stack[2] = input string
617 * stack[3] = result buffer
618 * stack[4] = regexp match OR match string
619 */
620
621 match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff);
622
623 tmp_sz = (duk_size_t) (match_start_boff - prev_match_end_boff);
624 DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
625
626 prev_match_end_boff = match_start_boff + DUK_HSTRING_GET_BYTELEN(h_match);
627
628 if (is_repl_func) {
629 duk_idx_t idx_args;
630 duk_hstring *h_repl;
631
632 /* regexp res_obj is at index 4 */
633
634 duk_dup(ctx, 1);
635 idx_args = duk_get_top(ctx);
636
637 #ifdef DUK_USE_REGEXP_SUPPORT
638 if (is_regexp) {
639 duk_int_t idx;
640 duk_require_stack(ctx, match_caps + 2);
641 for (idx = 0; idx < match_caps; idx++) {
642 /* match followed by capture(s) */
643 duk_get_prop_index(ctx, 4, idx);
644 }
645 } else {
646 #else /* DUK_USE_REGEXP_SUPPORT */
647 { /* unconditionally */
648 #endif /* DUK_USE_REGEXP_SUPPORT */
649 /* match == search string, by definition */
650 duk_dup(ctx, 0);
651 }
652 duk_push_int(ctx, match_start_coff);
653 duk_dup(ctx, 2);
654
655 /* [ ... replacer match [captures] match_char_offset input ] */
656
657 duk_call(ctx, duk_get_top(ctx) - idx_args);
658 h_repl = duk_to_hstring(ctx, -1); /* -> [ ... repl_value ] */
659 DUK_ASSERT(h_repl != NULL);
660
661 DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_repl);
662
663 duk_pop(ctx); /* repl_value */
664 } else {
665 r = r_start;
666
667 while (r < r_end) {
668 duk_int_t ch1;
669 duk_int_t ch2;
670 #ifdef DUK_USE_REGEXP_SUPPORT
671 duk_int_t ch3;
672 #endif
673 duk_size_t left;
674
675 ch1 = *r++;
676 if (ch1 != DUK_ASC_DOLLAR) {
677 goto repl_write;
678 }
679 left = r_end - r;
680
681 if (left <= 0) {
682 goto repl_write;
683 }
684
685 ch2 = r[0];
686 switch ((int) ch2) {
687 case DUK_ASC_DOLLAR: {
688 ch1 = (1 << 8) + DUK_ASC_DOLLAR;
689 goto repl_write;
690 }
691 case DUK_ASC_AMP: {
692 DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_match);
693 r++;
694 continue;
695 }
696 case DUK_ASC_GRAVE: {
697 tmp_sz = (duk_size_t) match_start_boff;
698 DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input), tmp_sz);
699 r++;
700 continue;
701 }
702 case DUK_ASC_SINGLEQUOTE: {
703 duk_uint32_t match_end_boff;
704
705 /* Use match charlen instead of bytelen, just in case the input and
706 * match codepoint encodings would have different lengths.
707 */
708 match_end_boff = duk_heap_strcache_offset_char2byte(thr,
709 h_input,
710 match_start_coff + DUK_HSTRING_GET_CHARLEN(h_match));
711
712 tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - match_end_boff);
713 DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + match_end_boff, tmp_sz);
714 r++;
715 continue;
716 }
717 default: {
718 #ifdef DUK_USE_REGEXP_SUPPORT
719 duk_int_t capnum, captmp, capadv;
720 /* XXX: optional check, match_caps is zero if no regexp,
721 * so dollar will be interpreted literally anyway.
722 */
723
724 if (!is_regexp) {
725 goto repl_write;
726 }
727
728 if (!(ch2 >= DUK_ASC_0 && ch2 <= DUK_ASC_9)) {
729 goto repl_write;
730 }
731 capnum = ch2 - DUK_ASC_0;
732 capadv = 1;
733
734 if (left >= 2) {
735 ch3 = r[1];
736 if (ch3 >= DUK_ASC_0 && ch3 <= DUK_ASC_9) {
737 captmp = capnum * 10 + (ch3 - DUK_ASC_0);
738 if (captmp < match_caps) {
739 capnum = captmp;
740 capadv = 2;
741 }
742 }
743 }
744
745 if (capnum > 0 && capnum < match_caps) {
746 DUK_ASSERT(is_regexp != 0); /* match_caps == 0 without regexps */
747
748 /* regexp res_obj is at offset 4 */
749 duk_get_prop_index(ctx, 4, (duk_uarridx_t) capnum);
750 if (duk_is_string(ctx, -1)) {
751 duk_hstring *h_tmp_str;
752
753 h_tmp_str = duk_get_hstring(ctx, -1);
754 DUK_ASSERT(h_tmp_str != NULL);
755
756 DUK_BW_WRITE_ENSURE_HSTRING(thr, bw, h_tmp_str);
757 } else {
758 /* undefined -> skip (replaced with empty) */
759 }
760 duk_pop(ctx);
761 r += capadv;
762 continue;
763 } else {
764 goto repl_write;
765 }
766 #else /* DUK_USE_REGEXP_SUPPORT */
767 goto repl_write; /* unconditionally */
768 #endif /* DUK_USE_REGEXP_SUPPORT */
769 } /* default case */
770 } /* switch (ch2) */
771
772 repl_write:
773 /* ch1 = (r_increment << 8) + byte */
774
775 DUK_BW_WRITE_ENSURE_U8(thr, bw, (duk_uint8_t) (ch1 & 0xff));
776 r += ch1 >> 8;
777 } /* while repl */
778 } /* if (is_repl_func) */
779
780 duk_pop(ctx); /* pop regexp res_obj or match string */
781
782 #ifdef DUK_USE_REGEXP_SUPPORT
783 if (!is_global) {
784 #else
785 { /* unconditionally; is_global==0 */
786 #endif
787 break;
788 }
789 }
790
791 /* trailer */
792 tmp_sz = (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff);
793 DUK_BW_WRITE_ENSURE_BYTES(thr, bw, DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff, tmp_sz);
794
795 DUK_ASSERT_TOP(ctx, 4);
796 DUK_BW_COMPACT(thr, bw);
797 duk_to_string(ctx, -1);
798 return 1;
799 }
800
801 /*
802 * split()
803 */
804
805 /* XXX: very messy now, but works; clean up, remove unused variables (nomimally
806 * used so compiler doesn't complain).
807 */
808
809 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_split(duk_context *ctx) {
810 duk_hthread *thr = (duk_hthread *) ctx;
811 duk_hstring *h_input;
812 duk_hstring *h_sep;
813 duk_uint32_t limit;
814 duk_uint32_t arr_idx;
815 #ifdef DUK_USE_REGEXP_SUPPORT
816 duk_bool_t is_regexp;
817 #endif
818 duk_bool_t matched; /* set to 1 if any match exists (needed for empty input special case) */
819 duk_uint32_t prev_match_end_coff, prev_match_end_boff;
820 duk_uint32_t match_start_boff, match_start_coff;
821 duk_uint32_t match_end_boff, match_end_coff;
822
823 DUK_UNREF(thr);
824
825 h_input = duk_push_this_coercible_to_string(ctx);
826 DUK_ASSERT(h_input != NULL);
827
828 duk_push_array(ctx);
829
830 if (duk_is_undefined(ctx, 1)) {
831 limit = 0xffffffffUL;
832 } else {
833 limit = duk_to_uint32(ctx, 1);
834 }
835
836 if (limit == 0) {
837 return 1;
838 }
839
840 /* If the separator is a RegExp, make a "clone" of it. The specification
841 * algorithm calls [[Match]] directly for specific indices; we emulate this
842 * by tweaking lastIndex and using a "force global" variant of duk_regexp_match()
843 * which will use global-style matching even when the RegExp itself is non-global.
844 */
845
846 if (duk_is_undefined(ctx, 0)) {
847 /* The spec algorithm first does "R = ToString(separator)" before checking
848 * whether separator is undefined. Since this is side effect free, we can
849 * skip the ToString() here.
850 */
851 duk_dup(ctx, 2);
852 duk_put_prop_index(ctx, 3, 0);
853 return 1;
854 } else if (duk_get_hobject_with_class(ctx, 0, DUK_HOBJECT_CLASS_REGEXP) != NULL) {
855 #ifdef DUK_USE_REGEXP_SUPPORT
856 duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR);
857 duk_dup(ctx, 0);
858 duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */
859 duk_replace(ctx, 0);
860 /* lastIndex is initialized to zero by new RegExp() */
861 is_regexp = 1;
862 #else
863 return DUK_RET_UNSUPPORTED_ERROR;
864 #endif
865 } else {
866 duk_to_string(ctx, 0);
867 #ifdef DUK_USE_REGEXP_SUPPORT
868 is_regexp = 0;
869 #endif
870 }
871
872 /* stack[0] = separator (string or regexp)
873 * stack[1] = limit
874 * stack[2] = input string
875 * stack[3] = result array
876 */
877
878 prev_match_end_boff = 0;
879 prev_match_end_coff = 0;
880 arr_idx = 0;
881 matched = 0;
882
883 for (;;) {
884 /*
885 * The specification uses RegExp [[Match]] to attempt match at specific
886 * offsets. We don't have such a primitive, so we use an actual RegExp
887 * and tweak lastIndex. Since the RegExp may be non-global, we use a
888 * special variant which forces global-like behavior for matching.
889 */
890
891 DUK_ASSERT_TOP(ctx, 4);
892
893 #ifdef DUK_USE_REGEXP_SUPPORT
894 if (is_regexp) {
895 duk_dup(ctx, 0);
896 duk_dup(ctx, 2);
897 duk_regexp_match_force_global(thr); /* [ ... regexp input ] -> [ res_obj ] */
898 if (!duk_is_object(ctx, -1)) {
899 duk_pop(ctx);
900 break;
901 }
902 matched = 1;
903
904 duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
905 DUK_ASSERT(duk_is_number(ctx, -1));
906 match_start_coff = duk_get_int(ctx, -1);
907 match_start_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_start_coff);
908 duk_pop(ctx);
909
910 if (match_start_coff == DUK_HSTRING_GET_CHARLEN(h_input)) {
911 /* don't allow an empty match at the end of the string */
912 duk_pop(ctx);
913 break;
914 }
915
916 duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
917 DUK_ASSERT(duk_is_number(ctx, -1));
918 match_end_coff = duk_get_int(ctx, -1);
919 match_end_boff = duk_heap_strcache_offset_char2byte(thr, h_input, match_end_coff);
920 duk_pop(ctx);
921
922 /* empty match -> bump and continue */
923 if (prev_match_end_boff == match_end_boff) {
924 duk_push_int(ctx, match_end_coff + 1);
925 duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
926 duk_pop(ctx);
927 continue;
928 }
929 } else {
930 #else /* DUK_USE_REGEXP_SUPPORT */
931 { /* unconditionally */
932 #endif /* DUK_USE_REGEXP_SUPPORT */
933 const duk_uint8_t *p_start, *p_end, *p; /* input string scan */
934 const duk_uint8_t *q_start; /* match string */
935 duk_size_t q_blen, q_clen;
936
937 p_start = DUK_HSTRING_GET_DATA(h_input);
938 p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
939 p = p_start + prev_match_end_boff;
940
941 h_sep = duk_get_hstring(ctx, 0);
942 DUK_ASSERT(h_sep != NULL);
943 q_start = DUK_HSTRING_GET_DATA(h_sep);
944 q_blen = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h_sep);
945 q_clen = (duk_size_t) DUK_HSTRING_GET_CHARLEN(h_sep);
946
947 p_end -= q_blen; /* ensure full memcmp() fits in while */
948
949 match_start_coff = prev_match_end_coff;
950
951 if (q_blen == 0) {
952 /* Handle empty separator case: it will always match, and always
953 * triggers the check in step 13.c.iii initially. Note that we
954 * must skip to either end of string or start of first codepoint,
955 * skipping over any continuation bytes!
956 *
957 * Don't allow an empty string to match at the end of the input.
958 */
959
960 matched = 1; /* empty separator can always match */
961
962 match_start_coff++;
963 p++;
964 while (p < p_end) {
965 if ((p[0] & 0xc0) != 0x80) {
966 goto found;
967 }
968 p++;
969 }
970 goto not_found;
971 }
972
973 DUK_ASSERT(q_blen > 0 && q_clen > 0);
974 while (p <= p_end) {
975 DUK_ASSERT(p + q_blen <= DUK_HSTRING_GET_DATA(h_input) + DUK_HSTRING_GET_BYTELEN(h_input));
976 DUK_ASSERT(q_blen > 0); /* no issues with empty memcmp() */
977 if (DUK_MEMCMP((void *) p, (void *) q_start, (duk_size_t) q_blen) == 0) {
978 /* never an empty match, so step 13.c.iii can't be triggered */
979 goto found;
980 }
981
982 /* track utf-8 non-continuation bytes */
983 if ((p[0] & 0xc0) != 0x80) {
984 match_start_coff++;
985 }
986 p++;
987 }
988
989 not_found:
990 /* not found */
991 break;
992
993 found:
994 matched = 1;
995 match_start_boff = (duk_uint32_t) (p - p_start);
996 match_end_coff = (duk_uint32_t) (match_start_coff + q_clen); /* constrained by string length */
997 match_end_boff = (duk_uint32_t) (match_start_boff + q_blen); /* ditto */
998
999 /* empty match (may happen with empty separator) -> bump and continue */
1000 if (prev_match_end_boff == match_end_boff) {
1001 prev_match_end_boff++;
1002 prev_match_end_coff++;
1003 continue;
1004 }
1005 } /* if (is_regexp) */
1006
1007 /* stack[0] = separator (string or regexp)
1008 * stack[1] = limit
1009 * stack[2] = input string
1010 * stack[3] = result array
1011 * stack[4] = regexp res_obj (if is_regexp)
1012 */
1013
1014 DUK_DDD(DUK_DDDPRINT("split; match_start b=%ld,c=%ld, match_end b=%ld,c=%ld, prev_end b=%ld,c=%ld",
1015 (long) match_start_boff, (long) match_start_coff,
1016 (long) match_end_boff, (long) match_end_coff,
1017 (long) prev_match_end_boff, (long) prev_match_end_coff));
1018
1019 duk_push_lstring(ctx,
1020 (const char *) (DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff),
1021 (duk_size_t) (match_start_boff - prev_match_end_boff));
1022 duk_put_prop_index(ctx, 3, arr_idx);
1023 arr_idx++;
1024 if (arr_idx >= limit) {
1025 goto hit_limit;
1026 }
1027
1028 #ifdef DUK_USE_REGEXP_SUPPORT
1029 if (is_regexp) {
1030 duk_size_t i, len;
1031
1032 len = duk_get_length(ctx, 4);
1033 for (i = 1; i < len; i++) {
1034 DUK_ASSERT(i <= DUK_UARRIDX_MAX); /* cannot have >4G captures */
1035 duk_get_prop_index(ctx, 4, (duk_uarridx_t) i);
1036 duk_put_prop_index(ctx, 3, arr_idx);
1037 arr_idx++;
1038 if (arr_idx >= limit) {
1039 goto hit_limit;
1040 }
1041 }
1042
1043 duk_pop(ctx);
1044 /* lastIndex already set up for next match */
1045 } else {
1046 #else /* DUK_USE_REGEXP_SUPPORT */
1047 { /* unconditionally */
1048 #endif /* DUK_USE_REGEXP_SUPPORT */
1049 /* no action */
1050 }
1051
1052 prev_match_end_boff = match_end_boff;
1053 prev_match_end_coff = match_end_coff;
1054 continue;
1055 } /* for */
1056
1057 /* Combined step 11 (empty string special case) and 14-15. */
1058
1059 DUK_DDD(DUK_DDDPRINT("split trailer; prev_end b=%ld,c=%ld",
1060 (long) prev_match_end_boff, (long) prev_match_end_coff));
1061
1062 if (DUK_HSTRING_GET_CHARLEN(h_input) > 0 || !matched) {
1063 /* Add trailer if:
1064 * a) non-empty input
1065 * b) empty input and no (zero size) match found (step 11)
1066 */
1067
1068 duk_push_lstring(ctx,
1069 (const char *) DUK_HSTRING_GET_DATA(h_input) + prev_match_end_boff,
1070 (duk_size_t) (DUK_HSTRING_GET_BYTELEN(h_input) - prev_match_end_boff));
1071 duk_put_prop_index(ctx, 3, arr_idx);
1072 /* No arr_idx update or limit check */
1073 }
1074
1075 return 1;
1076
1077 hit_limit:
1078 #ifdef DUK_USE_REGEXP_SUPPORT
1079 if (is_regexp) {
1080 duk_pop(ctx);
1081 }
1082 #endif
1083
1084 return 1;
1085 }
1086
1087 /*
1088 * Various
1089 */
1090
1091 #ifdef DUK_USE_REGEXP_SUPPORT
1092 DUK_LOCAL void duk__to_regexp_helper(duk_context *ctx, duk_idx_t index, duk_bool_t force_new) {
1093 duk_hobject *h;
1094
1095 /* Shared helper for match() steps 3-4, search() steps 3-4. */
1096
1097 DUK_ASSERT(index >= 0);
1098
1099 if (force_new) {
1100 goto do_new;
1101 }
1102
1103 h = duk_get_hobject_with_class(ctx, index, DUK_HOBJECT_CLASS_REGEXP);
1104 if (!h) {
1105 goto do_new;
1106 }
1107 return;
1108
1109 do_new:
1110 duk_push_hobject_bidx(ctx, DUK_BIDX_REGEXP_CONSTRUCTOR);
1111 duk_dup(ctx, index);
1112 duk_new(ctx, 1); /* [ ... RegExp val ] -> [ ... res ] */
1113 duk_replace(ctx, index);
1114 }
1115 #endif /* DUK_USE_REGEXP_SUPPORT */
1116
1117 #ifdef DUK_USE_REGEXP_SUPPORT
1118 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) {
1119 duk_hthread *thr = (duk_hthread *) ctx;
1120
1121 /* Easiest way to implement the search required by the specification
1122 * is to do a RegExp test() with lastIndex forced to zero. To avoid
1123 * side effects on the argument, "clone" the RegExp if a RegExp was
1124 * given as input.
1125 *
1126 * The global flag of the RegExp should be ignored; setting lastIndex
1127 * to zero (which happens when "cloning" the RegExp) should have an
1128 * equivalent effect.
1129 */
1130
1131 DUK_ASSERT_TOP(ctx, 1);
1132 (void) duk_push_this_coercible_to_string(ctx); /* at index 1 */
1133 duk__to_regexp_helper(ctx, 0 /*index*/, 1 /*force_new*/);
1134
1135 /* stack[0] = regexp
1136 * stack[1] = string
1137 */
1138
1139 /* Avoid using RegExp.prototype methods, as they're writable and
1140 * configurable and may have been changed.
1141 */
1142
1143 duk_dup(ctx, 0);
1144 duk_dup(ctx, 1); /* [ ... re_obj input ] */
1145 duk_regexp_match(thr); /* -> [ ... res_obj ] */
1146
1147 if (!duk_is_object(ctx, -1)) {
1148 duk_push_int(ctx, -1);
1149 return 1;
1150 }
1151
1152 duk_get_prop_stridx(ctx, -1, DUK_STRIDX_INDEX);
1153 DUK_ASSERT(duk_is_number(ctx, -1));
1154 return 1;
1155 }
1156 #else /* DUK_USE_REGEXP_SUPPORT */
1157 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_search(duk_context *ctx) {
1158 DUK_UNREF(ctx);
1159 return DUK_RET_UNSUPPORTED_ERROR;
1160 }
1161 #endif /* DUK_USE_REGEXP_SUPPORT */
1162
1163 #ifdef DUK_USE_REGEXP_SUPPORT
1164 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) {
1165 duk_hthread *thr = (duk_hthread *) ctx;
1166 duk_bool_t global;
1167 duk_int_t prev_last_index;
1168 duk_int_t this_index;
1169 duk_int_t arr_idx;
1170
1171 DUK_ASSERT_TOP(ctx, 1);
1172 (void) duk_push_this_coercible_to_string(ctx);
1173 duk__to_regexp_helper(ctx, 0 /*index*/, 0 /*force_new*/);
1174 global = duk_get_prop_stridx_boolean(ctx, 0, DUK_STRIDX_GLOBAL, NULL);
1175 DUK_ASSERT_TOP(ctx, 2);
1176
1177 /* stack[0] = regexp
1178 * stack[1] = string
1179 */
1180
1181 if (!global) {
1182 duk_regexp_match(thr); /* -> [ res_obj ] */
1183 return 1; /* return 'res_obj' */
1184 }
1185
1186 /* Global case is more complex. */
1187
1188 /* [ regexp string ] */
1189
1190 duk_push_int(ctx, 0);
1191 duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1192 duk_push_array(ctx);
1193
1194 /* [ regexp string res_arr ] */
1195
1196 prev_last_index = 0;
1197 arr_idx = 0;
1198
1199 for (;;) {
1200 DUK_ASSERT_TOP(ctx, 3);
1201
1202 duk_dup(ctx, 0);
1203 duk_dup(ctx, 1);
1204 duk_regexp_match(thr); /* -> [ ... regexp string ] -> [ ... res_obj ] */
1205
1206 if (!duk_is_object(ctx, -1)) {
1207 duk_pop(ctx);
1208 break;
1209 }
1210
1211 duk_get_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1212 DUK_ASSERT(duk_is_number(ctx, -1));
1213 this_index = duk_get_int(ctx, -1);
1214 duk_pop(ctx);
1215
1216 if (this_index == prev_last_index) {
1217 this_index++;
1218 duk_push_int(ctx, this_index);
1219 duk_put_prop_stridx(ctx, 0, DUK_STRIDX_LAST_INDEX);
1220 }
1221 prev_last_index = this_index;
1222
1223 duk_get_prop_index(ctx, -1, 0); /* match string */
1224 duk_put_prop_index(ctx, 2, arr_idx);
1225 arr_idx++;
1226 duk_pop(ctx); /* res_obj */
1227 }
1228
1229 if (arr_idx == 0) {
1230 duk_push_null(ctx);
1231 }
1232
1233 return 1; /* return 'res_arr' or 'null' */
1234 }
1235 #else /* DUK_USE_REGEXP_SUPPORT */
1236 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_match(duk_context *ctx) {
1237 DUK_UNREF(ctx);
1238 return DUK_RET_UNSUPPORTED_ERROR;
1239 }
1240 #endif /* DUK_USE_REGEXP_SUPPORT */
1241
1242 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_concat(duk_context *ctx) {
1243 /* duk_concat() coerces arguments with ToString() in correct order */
1244 (void) duk_push_this_coercible_to_string(ctx);
1245 duk_insert(ctx, 0); /* this is relatively expensive */
1246 duk_concat(ctx, duk_get_top(ctx));
1247 return 1;
1248 }
1249
1250 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_trim(duk_context *ctx) {
1251 DUK_ASSERT_TOP(ctx, 0);
1252 (void) duk_push_this_coercible_to_string(ctx);
1253 duk_trim(ctx, 0);
1254 DUK_ASSERT_TOP(ctx, 1);
1255 return 1;
1256 }
1257
1258 DUK_INTERNAL duk_ret_t duk_bi_string_prototype_locale_compare(duk_context *ctx) {
1259 duk_hstring *h1;
1260 duk_hstring *h2;
1261 duk_size_t h1_len, h2_len, prefix_len;
1262 duk_small_int_t ret = 0;
1263 duk_small_int_t rc;
1264
1265 /* The current implementation of localeCompare() is simply a codepoint
1266 * by codepoint comparison, implemented with a simple string compare
1267 * because UTF-8 should preserve codepoint ordering (assuming valid
1268 * shortest UTF-8 encoding).
1269 *
1270 * The specification requires that the return value must be related
1271 * to the sort order: e.g. negative means that 'this' comes before
1272 * 'that' in sort order. We assume an ascending sort order.
1273 */
1274
1275 /* XXX: could share code with duk_js_ops.c, duk_js_compare_helper */
1276
1277 h1 = duk_push_this_coercible_to_string(ctx);
1278 DUK_ASSERT(h1 != NULL);
1279
1280 h2 = duk_to_hstring(ctx, 0);
1281 DUK_ASSERT(h2 != NULL);
1282
1283 h1_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h1);
1284 h2_len = (duk_size_t) DUK_HSTRING_GET_BYTELEN(h2);
1285 prefix_len = (h1_len <= h2_len ? h1_len : h2_len);
1286
1287 /* Zero size compare not an issue with DUK_MEMCMP. */
1288 rc = (duk_small_int_t) DUK_MEMCMP((const char *) DUK_HSTRING_GET_DATA(h1),
1289 (const char *) DUK_HSTRING_GET_DATA(h2),
1290 prefix_len);
1291
1292 if (rc < 0) {
1293 ret = -1;
1294 goto done;
1295 } else if (rc > 0) {
1296 ret = 1;
1297 goto done;
1298 }
1299
1300 /* prefix matches, lengths matter now */
1301 if (h1_len > h2_len) {
1302 ret = 1;
1303 goto done;
1304 } else if (h1_len == h2_len) {
1305 DUK_ASSERT(ret == 0);
1306 goto done;
1307 }
1308 ret = -1;
1309 goto done;
1310
1311 done:
1312 duk_push_int(ctx, (duk_int_t) ret);
1313 return 1;
1314 }