2 * Encoding and decoding basic formats: hex, base64.
4 * These are in-place operations which may allow an optimized implementation.
6 * Base-64: https://tools.ietf.org/html/rfc4648#section-4
9 #include "duk_internal.h"
11 /* Shared handling for encode/decode argument. Fast path handling for
12 * buffer and string values because they're the most common. In particular,
13 * avoid creating a temporary string or buffer when possible.
15 DUK_LOCAL
const duk_uint8_t
*duk__prep_codec_arg(duk_context
*ctx
, duk_idx_t index
, duk_size_t
*out_len
) {
16 DUK_ASSERT(duk_is_valid_index(ctx
, index
)); /* checked by caller */
17 if (duk_is_buffer(ctx
, index
)) {
18 return (const duk_uint8_t
*) duk_get_buffer(ctx
, index
, out_len
);
20 return (const duk_uint8_t
*) duk_to_lstring(ctx
, index
, out_len
);
24 #if defined(DUK_USE_BASE64_FASTPATH)
25 DUK_LOCAL
void duk__base64_encode_helper(const duk_uint8_t
*src
, duk_size_t srclen
, duk_uint8_t
*dst
) {
27 duk_size_t n_full
, n_full3
, n_final
;
28 const duk_uint8_t
*src_end_fast
;
30 n_full
= srclen
/ 3; /* full 3-byte -> 4-char conversions */
32 n_final
= srclen
- n_full3
;
33 DUK_ASSERT_DISABLE(n_final
>= 0);
34 DUK_ASSERT(n_final
<= 2);
36 src_end_fast
= src
+ n_full3
;
37 while (DUK_UNLIKELY(src
!= src_end_fast
)) {
38 t
= (duk_uint_t
) (*src
++);
39 t
= (t
<< 8) + (duk_uint_t
) (*src
++);
40 t
= (t
<< 8) + (duk_uint_t
) (*src
++);
42 *dst
++ = duk_base64_enctab
[t
>> 18];
43 *dst
++ = duk_base64_enctab
[(t
>> 12) & 0x3f];
44 *dst
++ = duk_base64_enctab
[(t
>> 6) & 0x3f];
45 *dst
++ = duk_base64_enctab
[t
& 0x3f];
47 #if 0 /* Tested: not faster on x64 */
48 /* aaaaaabb bbbbcccc ccdddddd */
49 dst
[0] = duk_base64_enctab
[(src
[0] >> 2) & 0x3f];
50 dst
[1] = duk_base64_enctab
[((src
[0] << 4) & 0x30) | ((src
[1] >> 4) & 0x0f)];
51 dst
[2] = duk_base64_enctab
[((src
[1] << 2) & 0x3f) | ((src
[2] >> 6) & 0x03)];
52 dst
[3] = duk_base64_enctab
[src
[2] & 0x3f];
61 t
= (duk_uint_t
) (*src
++);
62 *dst
++ = duk_base64_enctab
[t
>> 2]; /* XXXXXX-- */
63 *dst
++ = duk_base64_enctab
[(t
<< 4) & 0x3f]; /* ------XX */
64 *dst
++ = DUK_ASC_EQUALS
;
65 *dst
++ = DUK_ASC_EQUALS
;
70 t
= (duk_uint_t
) (*src
++);
71 t
= (t
<< 8) + (duk_uint_t
) (*src
++);
72 *dst
++ = duk_base64_enctab
[t
>> 10]; /* XXXXXX-- -------- */
73 *dst
++ = duk_base64_enctab
[(t
>> 4) & 0x3f]; /* ------XX XXXX---- */
74 *dst
++ = duk_base64_enctab
[(t
<< 2) & 0x3f]; /* -------- ----XXXX */
75 *dst
++ = DUK_ASC_EQUALS
;
80 #else /* DUK_USE_BASE64_FASTPATH */
81 DUK_LOCAL
void duk__base64_encode_helper(const duk_uint8_t
*src
, duk_size_t srclen
, duk_uint8_t
*dst
) {
82 duk_small_uint_t i
, snip
;
84 duk_uint_fast8_t x
, y
;
85 const duk_uint8_t
*src_end
;
87 src_end
= src
+ srclen
;
89 while (src
< src_end
) {
90 /* read 3 bytes into 't', padded by zero */
93 for (i
= 0; i
< 3; i
++) {
98 t
+= (duk_uint_t
) (*src
++);
103 * Missing bytes snip base64 example
109 DUK_ASSERT(snip
>= 2 && snip
<= 4);
111 for (i
= 0; i
< 4; i
++) {
112 x
= (duk_uint_fast8_t
) ((t
>> 18) & 0x3f);
115 /* A straightforward 64-byte lookup would be faster
116 * and cleaner, but this is shorter.
120 } else if (x
<= 25) {
122 } else if (x
<= 51) {
124 } else if (x
<= 61) {
126 } else if (x
== 62) {
132 *dst
++ = (duk_uint8_t
) y
;
136 #endif /* DUK_USE_BASE64_FASTPATH */
138 #if defined(DUK_USE_BASE64_FASTPATH)
139 DUK_LOCAL duk_bool_t
duk__base64_decode_helper(const duk_uint8_t
*src
, duk_size_t srclen
, duk_uint8_t
*dst
, duk_uint8_t
**out_dst_final
) {
142 duk_small_uint_t n_equal
;
143 duk_small_uint_t n_chars
;
144 const duk_uint8_t
*src_end
;
145 const duk_uint8_t
*src_end_safe
;
147 src_end
= src
+ srclen
;
148 src_end_safe
= src_end
- 4; /* if 'src < src_end_safe', safe to read 4 bytes */
150 /* Innermost fast path processes 4 valid base-64 characters at a time
151 * but bails out on whitespace, padding chars ('=') and invalid chars.
152 * Once the slow path segment has been processed, we return to the
153 * inner fast path again. This handles e.g. base64 with newlines
154 * reasonably well because the majority of a line is in the fast path.
157 /* Fast path, handle units with just actual encoding characters. */
159 while (src
<= src_end_safe
) {
160 /* The lookup byte is intentionally sign extended to (at least)
161 * 32 bits and then ORed. This ensures that is at least 1 byte
162 * is negative, the highest bit of 't' will be set at the end
163 * and we don't need to check every byte.
165 DUK_DDD(DUK_DDDPRINT("fast loop: src=%p, src_end_safe=%p, src_end=%p",
166 (const void *) src
, (const void *) src_end_safe
, (const void *) src_end
));
168 t
= (duk_int_t
) duk_base64_dectab
[*src
++];
169 t
= (t
<< 6) | (duk_int_t
) duk_base64_dectab
[*src
++];
170 t
= (t
<< 6) | (duk_int_t
) duk_base64_dectab
[*src
++];
171 t
= (t
<< 6) | (duk_int_t
) duk_base64_dectab
[*src
++];
173 if (DUK_UNLIKELY(t
< 0)) {
174 DUK_DDD(DUK_DDDPRINT("fast loop unit was not clean, process one slow path unit"));
179 DUK_ASSERT(t
<= 0xffffffL
);
180 DUK_ASSERT((t
>> 24) == 0);
181 *dst
++ = (duk_uint8_t
) (t
>> 16);
182 *dst
++ = (duk_uint8_t
) ((t
>> 8) & 0xff);
183 *dst
++ = (duk_uint8_t
) (t
& 0xff);
186 /* Handle one slow path unit (or finish if we're done). */
192 DUK_DDD(DUK_DDDPRINT("slow loop: src=%p, src_end=%p, n_chars=%ld, n_equal=%ld, t=%ld",
193 (const void *) src
, (const void *) src_end
, (long) n_chars
, (long) n_equal
, (long) t
));
195 if (DUK_UNLIKELY(src
>= src_end
)) {
196 goto done
; /* two level break */
199 x
= duk_base64_dectab
[*src
++];
200 if (DUK_UNLIKELY(x
< 0)) {
202 continue; /* allowed ascii whitespace */
203 } else if (x
== -3) {
211 DUK_ASSERT(x
>= 0 && x
<= 63);
213 /* Don't allow actual chars after equal sign. */
219 if (DUK_UNLIKELY(n_chars
== 3)) {
220 /* Emit 3 bytes and backtrack if there was padding. There's
221 * always space for the whole 3 bytes so no check needed.
223 DUK_ASSERT(t
<= 0xffffffL
);
224 DUK_ASSERT((t
>> 24) == 0);
225 *dst
++ = (duk_uint8_t
) (t
>> 16);
226 *dst
++ = (duk_uint8_t
) ((t
>> 8) & 0xff);
227 *dst
++ = (duk_uint8_t
) (t
& 0xff);
229 if (DUK_UNLIKELY(n_equal
> 0)) {
230 DUK_ASSERT(n_equal
<= 4);
232 /* There may be whitespace between the equal signs. */
236 } else if (n_equal
== 2) {
240 goto error
; /* invalid padding */
243 /* Continue parsing after padding, allows concatenated,
247 break; /* back to fast loop */
254 DUK_DDD(DUK_DDDPRINT("done; src=%p, src_end=%p, n_chars=%ld",
255 (const void *) src
, (const void *) src_end
, (long) n_chars
));
257 DUK_ASSERT(src
== src_end
);
260 /* Here we'd have the option of decoding unpadded base64
261 * (e.g. "xxxxyy" instead of "xxxxyy==". Currently not
267 *out_dst_final
= dst
;
273 #else /* DUK_USE_BASE64_FASTPATH */
274 DUK_LOCAL duk_bool_t
duk__base64_decode_helper(const duk_uint8_t
*src
, duk_size_t srclen
, duk_uint8_t
*dst
, duk_uint8_t
**out_dst_final
) {
276 duk_uint_fast8_t x
, y
;
277 duk_small_uint_t group_idx
;
278 duk_small_uint_t n_equal
;
279 const duk_uint8_t
*src_end
;
281 src_end
= src
+ srclen
;
286 while (src
< src_end
) {
289 if (x
>= 'A' && x
<= 'Z') {
291 } else if (x
>= 'a' && x
<= 'z') {
293 } else if (x
>= '0' && x
<= '9') {
295 } else if (x
== '+') {
297 } else if (x
== '/') {
299 } else if (x
== '=') {
300 /* We don't check the zero padding bytes here right now
301 * (that they're actually zero). This seems to be common
302 * behavior for base-64 decoders.
306 t
<<= 6; /* shift in zeroes */
308 } else if (x
== 0x09 || x
== 0x0a || x
== 0x0d || x
== 0x20) {
309 /* allow basic ASCII whitespace */
316 /* Don't allow mixed padding and actual chars. */
322 if (group_idx
== 3) {
323 /* output 3 bytes from 't' */
324 *dst
++ = (duk_uint8_t
) ((t
>> 16) & 0xff);
325 *dst
++ = (duk_uint8_t
) ((t
>> 8) & 0xff);
326 *dst
++ = (duk_uint8_t
) (t
& 0xff);
328 if (DUK_UNLIKELY(n_equal
> 0)) {
330 DUK_ASSERT(n_equal
<= 4);
333 } else if (n_equal
== 2) {
336 goto error
; /* invalid padding */
339 /* Here we can choose either to end parsing and ignore
340 * whatever follows, or to continue parsing in case
341 * multiple (possibly padded) base64 strings have been
342 * concatenated. Currently, keep on parsing.
354 if (group_idx
!= 0) {
355 /* Here we'd have the option of decoding unpadded base64
356 * (e.g. "xxxxyy" instead of "xxxxyy==". Currently not
362 *out_dst_final
= dst
;
368 #endif /* DUK_USE_BASE64_FASTPATH */
370 DUK_EXTERNAL
const char *duk_base64_encode(duk_context
*ctx
, duk_idx_t index
) {
371 duk_hthread
*thr
= (duk_hthread
*) ctx
;
372 const duk_uint8_t
*src
;
378 DUK_ASSERT_CTX_VALID(ctx
);
380 /* XXX: optimize for string inputs: no need to coerce to a buffer
381 * which makes a copy of the input.
384 index
= duk_require_normalize_index(ctx
, index
);
385 src
= duk__prep_codec_arg(ctx
, index
, &srclen
);
386 /* Note: for srclen=0, src may be NULL */
388 /* Computation must not wrap; this limit works for 32-bit size_t:
389 * >>> srclen = 3221225469
390 * >>> '%x' % ((srclen + 2) / 3 * 4)
393 if (srclen
> 3221225469UL) {
396 dstlen
= (srclen
+ 2) / 3 * 4;
397 dst
= (duk_uint8_t
*) duk_push_fixed_buffer(ctx
, dstlen
);
399 duk__base64_encode_helper((const duk_uint8_t
*) src
, srclen
, dst
);
401 ret
= duk_to_string(ctx
, -1);
402 duk_replace(ctx
, index
);
406 DUK_ERROR_TYPE(thr
, DUK_STR_ENCODE_FAILED
);
407 return NULL
; /* never here */
410 DUK_EXTERNAL
void duk_base64_decode(duk_context
*ctx
, duk_idx_t index
) {
411 duk_hthread
*thr
= (duk_hthread
*) ctx
;
412 const duk_uint8_t
*src
;
416 duk_uint8_t
*dst_final
;
419 DUK_ASSERT_CTX_VALID(ctx
);
421 /* XXX: optimize for buffer inputs: no need to coerce to a string
422 * which causes an unnecessary interning.
425 index
= duk_require_normalize_index(ctx
, index
);
426 src
= duk__prep_codec_arg(ctx
, index
, &srclen
);
428 /* Computation must not wrap, only srclen + 3 is at risk of
429 * wrapping because after that the number gets smaller.
430 * This limit works for 32-bit size_t:
431 * 0x100000000 - 3 - 1 = 4294967292
433 if (srclen
> 4294967292UL) {
436 dstlen
= (srclen
+ 3) / 4 * 3; /* upper limit, assuming no whitespace etc */
437 dst
= (duk_uint8_t
*) duk_push_dynamic_buffer(ctx
, dstlen
);
438 /* Note: for dstlen=0, dst may be NULL */
440 retval
= duk__base64_decode_helper((const duk_uint8_t
*) src
, srclen
, dst
, &dst_final
);
445 /* XXX: convert to fixed buffer? */
446 (void) duk_resize_buffer(ctx
, -1, (duk_size_t
) (dst_final
- dst
));
447 duk_replace(ctx
, index
);
451 DUK_ERROR_TYPE(thr
, DUK_STR_DECODE_FAILED
);
454 DUK_EXTERNAL
const char *duk_hex_encode(duk_context
*ctx
, duk_idx_t index
) {
455 const duk_uint8_t
*inp
;
460 #if defined(DUK_USE_HEX_FASTPATH)
465 DUK_ASSERT_CTX_VALID(ctx
);
467 index
= duk_require_normalize_index(ctx
, index
);
468 inp
= duk__prep_codec_arg(ctx
, index
, &len
);
469 DUK_ASSERT(inp
!= NULL
|| len
== 0);
471 /* Fixed buffer, no zeroing because we'll fill all the data. */
472 buf
= (duk_uint8_t
*) duk_push_buffer_raw(ctx
, len
* 2, DUK_BUF_FLAG_NOZERO
/*flags*/);
473 DUK_ASSERT(buf
!= NULL
);
475 #if defined(DUK_USE_HEX_FASTPATH)
476 DUK_ASSERT((((duk_size_t
) buf
) & 0x01U
) == 0); /* pointer is aligned, guaranteed for fixed buffer */
477 p16
= (duk_uint16_t
*) (void *) buf
;
478 len_safe
= len
& ~0x03U
;
479 for (i
= 0; i
< len_safe
; i
+= 4) {
480 p16
[0] = duk_hex_enctab
[inp
[i
]];
481 p16
[1] = duk_hex_enctab
[inp
[i
+ 1]];
482 p16
[2] = duk_hex_enctab
[inp
[i
+ 2]];
483 p16
[3] = duk_hex_enctab
[inp
[i
+ 3]];
486 for (; i
< len
; i
++) {
487 *p16
++ = duk_hex_enctab
[inp
[i
]];
489 #else /* DUK_USE_HEX_FASTPATH */
490 for (i
= 0; i
< len
; i
++) {
492 t
= (duk_small_uint_t
) inp
[i
];
493 buf
[i
*2 + 0] = duk_lc_digits
[t
>> 4];
494 buf
[i
*2 + 1] = duk_lc_digits
[t
& 0x0f];
496 #endif /* DUK_USE_HEX_FASTPATH */
498 /* XXX: Using a string return value forces a string intern which is
499 * not always necessary. As a rough performance measure, hex encode
500 * time for tests/perf/test-hex-encode.js dropped from ~35s to ~15s
501 * without string coercion. Change to returning a buffer and let the
502 * caller coerce to string if necessary?
505 ret
= duk_to_string(ctx
, -1);
506 duk_replace(ctx
, index
);
510 DUK_EXTERNAL
void duk_hex_decode(duk_context
*ctx
, duk_idx_t index
) {
511 duk_hthread
*thr
= (duk_hthread
*) ctx
;
512 const duk_uint8_t
*inp
;
517 #if defined(DUK_USE_HEX_FASTPATH)
523 DUK_ASSERT_CTX_VALID(ctx
);
525 index
= duk_require_normalize_index(ctx
, index
);
526 inp
= duk__prep_codec_arg(ctx
, index
, &len
);
527 DUK_ASSERT(inp
!= NULL
|| len
== 0);
533 /* Fixed buffer, no zeroing because we'll fill all the data. */
534 buf
= (duk_uint8_t
*) duk_push_buffer_raw(ctx
, len
/ 2, DUK_BUF_FLAG_NOZERO
/*flags*/);
535 DUK_ASSERT(buf
!= NULL
);
537 #if defined(DUK_USE_HEX_FASTPATH)
539 len_safe
= len
& ~0x07U
;
540 for (i
= 0; i
< len_safe
; i
+= 8) {
541 t
= ((duk_int_t
) duk_hex_dectab_shift4
[inp
[i
]]) |
542 ((duk_int_t
) duk_hex_dectab
[inp
[i
+ 1]]);
544 p
[0] = (duk_uint8_t
) t
;
545 t
= ((duk_int_t
) duk_hex_dectab_shift4
[inp
[i
+ 2]]) |
546 ((duk_int_t
) duk_hex_dectab
[inp
[i
+ 3]]);
548 p
[1] = (duk_uint8_t
) t
;
549 t
= ((duk_int_t
) duk_hex_dectab_shift4
[inp
[i
+ 4]]) |
550 ((duk_int_t
) duk_hex_dectab
[inp
[i
+ 5]]);
552 p
[2] = (duk_uint8_t
) t
;
553 t
= ((duk_int_t
) duk_hex_dectab_shift4
[inp
[i
+ 6]]) |
554 ((duk_int_t
) duk_hex_dectab
[inp
[i
+ 7]]);
556 p
[3] = (duk_uint8_t
) t
;
559 /* Check if any lookup above had a negative result. */
560 if (DUK_UNLIKELY(chk
< 0)) {
564 for (; i
< len
; i
+= 2) {
565 t
= (((duk_int_t
) duk_hex_dectab
[inp
[i
]]) << 4) |
566 ((duk_int_t
) duk_hex_dectab
[inp
[i
+ 1]]);
567 if (DUK_UNLIKELY(t
< 0)) {
570 *p
++ = (duk_uint8_t
) t
;
572 #else /* DUK_USE_HEX_FASTPATH */
573 for (i
= 0; i
< len
; i
+= 2) {
574 /* For invalid characters the value -1 gets extended to
575 * at least 16 bits. If either nybble is invalid, the
576 * resulting 't' will be < 0.
578 t
= (((duk_int_t
) duk_hex_dectab
[inp
[i
]]) << 4) |
579 ((duk_int_t
) duk_hex_dectab
[inp
[i
+ 1]]);
580 if (DUK_UNLIKELY(t
< 0)) {
583 buf
[i
>> 1] = (duk_uint8_t
) t
;
585 #endif /* DUK_USE_HEX_FASTPATH */
587 duk_replace(ctx
, index
);
591 DUK_ERROR_TYPE(thr
, DUK_STR_DECODE_FAILED
);
594 DUK_EXTERNAL
const char *duk_json_encode(duk_context
*ctx
, duk_idx_t index
) {
595 #ifdef DUK_USE_ASSERTIONS
596 duk_idx_t top_at_entry
;
600 DUK_ASSERT_CTX_VALID(ctx
);
601 #ifdef DUK_USE_ASSERTIONS
602 top_at_entry
= duk_get_top(ctx
);
605 index
= duk_require_normalize_index(ctx
, index
);
606 duk_bi_json_stringify_helper(ctx
,
608 DUK_INVALID_INDEX
/*idx_replacer*/,
609 DUK_INVALID_INDEX
/*idx_space*/,
611 DUK_ASSERT(duk_is_string(ctx
, -1));
612 duk_replace(ctx
, index
);
613 ret
= duk_get_string(ctx
, index
);
615 DUK_ASSERT(duk_get_top(ctx
) == top_at_entry
);
620 DUK_EXTERNAL
void duk_json_decode(duk_context
*ctx
, duk_idx_t index
) {
621 #ifdef DUK_USE_ASSERTIONS
622 duk_idx_t top_at_entry
;
625 DUK_ASSERT_CTX_VALID(ctx
);
626 #ifdef DUK_USE_ASSERTIONS
627 top_at_entry
= duk_get_top(ctx
);
630 index
= duk_require_normalize_index(ctx
, index
);
631 duk_bi_json_parse_helper(ctx
,
633 DUK_INVALID_INDEX
/*idx_reviver*/,
635 duk_replace(ctx
, index
);
637 DUK_ASSERT(duk_get_top(ctx
) == top_at_entry
);