5 #include "duk_internal.h"
7 DUK_LOCAL
void duk__concat_and_join_helper(duk_context
*ctx
, duk_idx_t count_in
, duk_bool_t is_join
) {
8 duk_hthread
*thr
= (duk_hthread
*) ctx
;
16 DUK_ASSERT_CTX_VALID(ctx
);
18 if (DUK_UNLIKELY(count_in
<= 0)) {
20 DUK_ERROR(thr
, DUK_ERR_API_ERROR
, DUK_STR_INVALID_COUNT
);
23 DUK_ASSERT(count_in
== 0);
24 duk_push_hstring_stridx(ctx
, DUK_STRIDX_EMPTY_STRING
);
27 count
= (duk_uint_t
) count_in
;
30 duk_size_t t1
, t2
, limit
;
31 h
= duk_to_hstring(ctx
, -((duk_idx_t
) count
) - 1);
32 DUK_ASSERT(h
!= NULL
);
34 /* A bit tricky overflow test, see doc/code-issues.rst. */
35 t1
= (duk_size_t
) DUK_HSTRING_GET_BYTELEN(h
);
36 t2
= (duk_size_t
) (count
- 1);
37 limit
= (duk_size_t
) DUK_HSTRING_MAX_BYTELEN
;
38 if (DUK_UNLIKELY(t2
!= 0 && t1
> limit
/ t2
)) {
39 /* Combined size of separators already overflows */
42 len
= (duk_size_t
) (t1
* t2
);
47 for (i
= count
; i
>= 1; i
--) {
49 duk_to_string(ctx
, -((duk_idx_t
) i
));
50 h
= duk_require_hstring(ctx
, -((duk_idx_t
) i
));
51 new_len
= len
+ (duk_size_t
) DUK_HSTRING_GET_BYTELEN(h
);
53 /* Impose a string maximum length, need to handle overflow
56 if (new_len
< len
|| /* wrapped */
57 new_len
> (duk_size_t
) DUK_HSTRING_MAX_BYTELEN
) {
63 DUK_DDD(DUK_DDDPRINT("join/concat %lu strings, total length %lu bytes",
64 (unsigned long) count
, (unsigned long) len
));
66 /* use stack allocated buffer to ensure reachability in errors (e.g. intern error) */
67 buf
= (duk_uint8_t
*) duk_push_fixed_buffer(ctx
, len
);
68 DUK_ASSERT(buf
!= NULL
);
70 /* [... (sep) str1 str2 ... strN buf] */
73 for (i
= count
; i
>= 1; i
--) {
74 if (is_join
&& i
!= count
) {
75 h
= duk_require_hstring(ctx
, -((duk_idx_t
) count
) - 2); /* extra -1 for buffer */
76 DUK_MEMCPY(buf
+ idx
, DUK_HSTRING_GET_DATA(h
), DUK_HSTRING_GET_BYTELEN(h
));
77 idx
+= DUK_HSTRING_GET_BYTELEN(h
);
79 h
= duk_require_hstring(ctx
, -((duk_idx_t
) i
) - 1); /* extra -1 for buffer */
80 DUK_MEMCPY(buf
+ idx
, DUK_HSTRING_GET_DATA(h
), DUK_HSTRING_GET_BYTELEN(h
));
81 idx
+= DUK_HSTRING_GET_BYTELEN(h
);
84 DUK_ASSERT(idx
== len
);
86 /* [... (sep) str1 str2 ... strN buf] */
88 /* get rid of the strings early to minimize memory use before intern */
91 duk_replace(ctx
, -((duk_idx_t
) count
) - 2); /* overwrite sep */
92 duk_pop_n(ctx
, count
);
94 duk_replace(ctx
, -((duk_idx_t
) count
) - 1); /* overwrite str1 */
95 duk_pop_n(ctx
, count
-1);
100 (void) duk_to_string(ctx
, -1);
106 DUK_ERROR(thr
, DUK_ERR_RANGE_ERROR
, DUK_STR_CONCAT_RESULT_TOO_LONG
);
109 DUK_EXTERNAL
void duk_concat(duk_context
*ctx
, duk_idx_t count
) {
110 DUK_ASSERT_CTX_VALID(ctx
);
112 duk__concat_and_join_helper(ctx
, count
, 0 /*is_join*/);
115 DUK_EXTERNAL
void duk_join(duk_context
*ctx
, duk_idx_t count
) {
116 DUK_ASSERT_CTX_VALID(ctx
);
118 duk__concat_and_join_helper(ctx
, count
, 1 /*is_join*/);
121 /* XXX: could map/decode be unified with duk_unicode_support.c code?
122 * Case conversion needs also the character surroundings though.
125 DUK_EXTERNAL
void duk_decode_string(duk_context
*ctx
, duk_idx_t index
, duk_decode_char_function callback
, void *udata
) {
126 duk_hthread
*thr
= (duk_hthread
*) ctx
;
127 duk_hstring
*h_input
;
128 const duk_uint8_t
*p
, *p_start
, *p_end
;
131 DUK_ASSERT_CTX_VALID(ctx
);
133 h_input
= duk_require_hstring(ctx
, index
);
134 DUK_ASSERT(h_input
!= NULL
);
136 p_start
= (duk_uint8_t
*) DUK_HSTRING_GET_DATA(h_input
);
137 p_end
= p_start
+ DUK_HSTRING_GET_BYTELEN(h_input
);
144 cp
= (int) duk_unicode_decode_xutf8_checked(thr
, &p
, p_start
, p_end
);
149 DUK_EXTERNAL
void duk_map_string(duk_context
*ctx
, duk_idx_t index
, duk_map_char_function callback
, void *udata
) {
150 duk_hthread
*thr
= (duk_hthread
*) ctx
;
151 duk_hstring
*h_input
;
152 duk_bufwriter_ctx bw_alloc
;
153 duk_bufwriter_ctx
*bw
;
154 const duk_uint8_t
*p
, *p_start
, *p_end
;
157 DUK_ASSERT_CTX_VALID(ctx
);
159 index
= duk_normalize_index(ctx
, index
);
161 h_input
= duk_require_hstring(ctx
, index
);
162 DUK_ASSERT(h_input
!= NULL
);
165 DUK_BW_INIT_PUSHBUF(thr
, bw
, DUK_HSTRING_GET_BYTELEN(h_input
)); /* reasonable output estimate */
167 p_start
= (duk_uint8_t
*) DUK_HSTRING_GET_DATA(h_input
);
168 p_end
= p_start
+ DUK_HSTRING_GET_BYTELEN(h_input
);
172 /* XXX: could write output in chunks with fewer ensure calls,
173 * but relative benefit would be small here.
179 cp
= (int) duk_unicode_decode_xutf8_checked(thr
, &p
, p_start
, p_end
);
180 cp
= callback(udata
, cp
);
182 DUK_BW_WRITE_ENSURE_XUTF8(thr
, bw
, cp
);
185 DUK_BW_COMPACT(thr
, bw
);
186 duk_to_string(ctx
, -1);
187 duk_replace(ctx
, index
);
190 DUK_EXTERNAL
void duk_substring(duk_context
*ctx
, duk_idx_t index
, duk_size_t start_offset
, duk_size_t end_offset
) {
191 duk_hthread
*thr
= (duk_hthread
*) ctx
;
194 duk_size_t start_byte_offset
;
195 duk_size_t end_byte_offset
;
197 DUK_ASSERT_CTX_VALID(ctx
);
199 index
= duk_require_normalize_index(ctx
, index
);
200 h
= duk_require_hstring(ctx
, index
);
201 DUK_ASSERT(h
!= NULL
);
203 if (end_offset
>= DUK_HSTRING_GET_CHARLEN(h
)) {
204 end_offset
= DUK_HSTRING_GET_CHARLEN(h
);
206 if (start_offset
> end_offset
) {
207 start_offset
= end_offset
;
210 DUK_ASSERT_DISABLE(start_offset
>= 0);
211 DUK_ASSERT(start_offset
<= end_offset
&& start_offset
<= DUK_HSTRING_GET_CHARLEN(h
));
212 DUK_ASSERT_DISABLE(end_offset
>= 0);
213 DUK_ASSERT(end_offset
>= start_offset
&& end_offset
<= DUK_HSTRING_GET_CHARLEN(h
));
215 /* guaranteed by string limits */
216 DUK_ASSERT(start_offset
<= DUK_UINT32_MAX
);
217 DUK_ASSERT(end_offset
<= DUK_UINT32_MAX
);
219 start_byte_offset
= (duk_size_t
) duk_heap_strcache_offset_char2byte(thr
, h
, (duk_uint_fast32_t
) start_offset
);
220 end_byte_offset
= (duk_size_t
) duk_heap_strcache_offset_char2byte(thr
, h
, (duk_uint_fast32_t
) end_offset
);
222 DUK_ASSERT(end_byte_offset
>= start_byte_offset
);
223 DUK_ASSERT(end_byte_offset
- start_byte_offset
<= DUK_UINT32_MAX
); /* guaranteed by string limits */
225 /* no size check is necessary */
226 res
= duk_heap_string_intern_checked(thr
,
227 DUK_HSTRING_GET_DATA(h
) + start_byte_offset
,
228 (duk_uint32_t
) (end_byte_offset
- start_byte_offset
));
230 duk_push_hstring(ctx
, res
);
231 duk_replace(ctx
, index
);
234 /* XXX: this is quite clunky. Add Unicode helpers to scan backwards and
235 * forwards with a callback to process codepoints?
237 DUK_EXTERNAL
void duk_trim(duk_context
*ctx
, duk_idx_t index
) {
238 duk_hthread
*thr
= (duk_hthread
*) ctx
;
240 const duk_uint8_t
*p
, *p_start
, *p_end
, *p_tmp1
, *p_tmp2
; /* pointers for scanning */
241 const duk_uint8_t
*q_start
, *q_end
; /* start (incl) and end (excl) of trimmed part */
244 DUK_ASSERT_CTX_VALID(ctx
);
246 index
= duk_require_normalize_index(ctx
, index
);
247 h
= duk_require_hstring(ctx
, index
);
248 DUK_ASSERT(h
!= NULL
);
250 p_start
= DUK_HSTRING_GET_DATA(h
);
251 p_end
= p_start
+ DUK_HSTRING_GET_BYTELEN(h
);
256 cp
= (duk_codepoint_t
) duk_unicode_decode_xutf8_checked(thr
, &p_tmp1
, p_start
, p_end
);
257 if (!(duk_unicode_is_whitespace(cp
) || duk_unicode_is_line_terminator(cp
))) {
264 /* entire string is whitespace */
270 while (p
> p_start
) {
272 while (p
> p_start
) {
274 if (((*p
) & 0xc0) != 0x80) {
280 cp
= (duk_codepoint_t
) duk_unicode_decode_xutf8_checked(thr
, &p_tmp2
, p_start
, p_end
);
281 if (!(duk_unicode_is_whitespace(cp
) || duk_unicode_is_line_terminator(cp
))) {
289 /* This may happen when forward and backward scanning disagree
290 * (possible for non-extended-UTF-8 strings).
292 if (q_end
< q_start
) {
296 DUK_ASSERT(q_start
>= p_start
&& q_start
<= p_end
);
297 DUK_ASSERT(q_end
>= p_start
&& q_end
<= p_end
);
298 DUK_ASSERT(q_end
>= q_start
);
300 DUK_DDD(DUK_DDDPRINT("trim: p_start=%p, p_end=%p, q_start=%p, q_end=%p",
301 (void *) p_start
, (void *) p_end
, (void *) q_start
, (void *) q_end
));
303 if (q_start
== p_start
&& q_end
== p_end
) {
304 DUK_DDD(DUK_DDDPRINT("nothing was trimmed: avoid interning (hashing etc)"));
308 duk_push_lstring(ctx
, (const char *) q_start
, (duk_size_t
) (q_end
- q_start
));
309 duk_replace(ctx
, index
);
312 DUK_EXTERNAL duk_codepoint_t
duk_char_code_at(duk_context
*ctx
, duk_idx_t index
, duk_size_t char_offset
) {
313 duk_hthread
*thr
= (duk_hthread
*) ctx
;
317 DUK_ASSERT_CTX_VALID(ctx
);
319 h
= duk_require_hstring(ctx
, index
);
320 DUK_ASSERT(h
!= NULL
);
322 DUK_ASSERT_DISABLE(char_offset
>= 0); /* always true, arg is unsigned */
323 if (char_offset
>= DUK_HSTRING_GET_CHARLEN(h
)) {
327 DUK_ASSERT(char_offset
<= DUK_UINT_MAX
); /* guaranteed by string limits */
328 cp
= duk_hstring_char_code_at_raw(thr
, h
, (duk_uint_t
) char_offset
);
329 return (duk_codepoint_t
) cp
;