]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Encoding and decoding basic formats: hex, base64. | |
3 | * | |
4 | * These are in-place operations which may allow an optimized implementation. | |
5 | */ | |
6 | ||
7 | #include "duk_internal.h" | |
8 | ||
9 | /* dst length must be exactly ceil(len/3)*4 */ | |
10 | DUK_LOCAL void duk__base64_encode_helper(const duk_uint8_t *src, const duk_uint8_t *src_end, | |
11 | duk_uint8_t *dst, duk_uint8_t *dst_end) { | |
12 | duk_small_uint_t i, snip; | |
13 | duk_uint_fast32_t t; | |
14 | duk_uint_fast8_t x, y; | |
15 | ||
16 | DUK_UNREF(dst_end); | |
17 | ||
18 | while (src < src_end) { | |
19 | /* read 3 bytes into 't', padded by zero */ | |
20 | snip = 4; | |
21 | t = 0; | |
22 | for (i = 0; i < 3; i++) { | |
23 | t = t << 8; | |
24 | if (src >= src_end) { | |
25 | snip--; | |
26 | } else { | |
27 | t += (duk_uint_fast32_t) (*src++); | |
28 | } | |
29 | } | |
30 | ||
31 | /* | |
32 | * Missing bytes snip base64 example | |
33 | * 0 4 XXXX | |
34 | * 1 3 XXX= | |
35 | * 2 2 XX== | |
36 | */ | |
37 | ||
38 | DUK_ASSERT(snip >= 2 && snip <= 4); | |
39 | ||
40 | for (i = 0; i < 4; i++) { | |
41 | x = (duk_uint_fast8_t) ((t >> 18) & 0x3f); | |
42 | t = t << 6; | |
43 | ||
44 | /* A straightforward 64-byte lookup would be faster | |
45 | * and cleaner, but this is shorter. | |
46 | */ | |
47 | if (i >= snip) { | |
48 | y = '='; | |
49 | } else if (x <= 25) { | |
50 | y = x + 'A'; | |
51 | } else if (x <= 51) { | |
52 | y = x - 26 + 'a'; | |
53 | } else if (x <= 61) { | |
54 | y = x - 52 + '0'; | |
55 | } else if (x == 62) { | |
56 | y = '+'; | |
57 | } else { | |
58 | y = '/'; | |
59 | } | |
60 | ||
61 | DUK_ASSERT(dst < dst_end); | |
62 | *dst++ = (duk_uint8_t) y; | |
63 | } | |
64 | } | |
65 | } | |
66 | ||
67 | DUK_LOCAL duk_bool_t duk__base64_decode_helper(const duk_uint8_t *src, const duk_uint8_t *src_end, | |
68 | duk_uint8_t *dst, duk_uint8_t *dst_end, duk_uint8_t **out_dst_final) { | |
69 | duk_uint_fast32_t t; | |
70 | duk_uint_fast8_t x, y; | |
71 | duk_small_uint_t group_idx; | |
72 | ||
73 | DUK_UNREF(dst_end); | |
74 | ||
75 | t = 0; | |
76 | group_idx = 0; | |
77 | ||
78 | while (src < src_end) { | |
79 | x = *src++; | |
80 | ||
81 | if (x >= 'A' && x <= 'Z') { | |
82 | y = x - 'A' + 0; | |
83 | } else if (x >= 'a' && x <= 'z') { | |
84 | y = x - 'a' + 26; | |
85 | } else if (x >= '0' && x <= '9') { | |
86 | y = x - '0' + 52; | |
87 | } else if (x == '+') { | |
88 | y = 62; | |
89 | } else if (x == '/') { | |
90 | y = 63; | |
91 | } else if (x == '=') { | |
92 | /* We don't check the zero padding bytes here right now. | |
93 | * This seems to be common behavior for base-64 decoders. | |
94 | */ | |
95 | ||
96 | if (group_idx == 2) { | |
97 | /* xx== -> 1 byte, t contains 12 bits, 4 on right are zero */ | |
98 | t = t >> 4; | |
99 | DUK_ASSERT(dst < dst_end); | |
100 | *dst++ = (duk_uint8_t) t; | |
101 | ||
102 | if (src >= src_end) { | |
103 | goto error; | |
104 | } | |
105 | x = *src++; | |
106 | if (x != '=') { | |
107 | goto error; | |
108 | } | |
109 | } else if (group_idx == 3) { | |
110 | /* xxx= -> 2 bytes, t contains 18 bits, 2 on right are zero */ | |
111 | t = t >> 2; | |
112 | DUK_ASSERT(dst < dst_end); | |
113 | *dst++ = (duk_uint8_t) ((t >> 8) & 0xff); | |
114 | DUK_ASSERT(dst < dst_end); | |
115 | *dst++ = (duk_uint8_t) (t & 0xff); | |
116 | } else { | |
117 | goto error; | |
118 | } | |
119 | ||
120 | /* Here we can choose either to end parsing and ignore | |
121 | * whatever follows, or to continue parsing in case | |
122 | * multiple (possibly padded) base64 strings have been | |
123 | * concatenated. Currently, keep on parsing. | |
124 | */ | |
125 | t = 0; | |
126 | group_idx = 0; | |
127 | continue; | |
128 | } else if (x == 0x09 || x == 0x0a || x == 0x0d || x == 0x20) { | |
129 | /* allow basic ASCII whitespace */ | |
130 | continue; | |
131 | } else { | |
132 | goto error; | |
133 | } | |
134 | ||
135 | t = (t << 6) + y; | |
136 | ||
137 | if (group_idx == 3) { | |
138 | /* output 3 bytes from 't' */ | |
139 | DUK_ASSERT(dst < dst_end); | |
140 | *dst++ = (duk_uint8_t) ((t >> 16) & 0xff); | |
141 | DUK_ASSERT(dst < dst_end); | |
142 | *dst++ = (duk_uint8_t) ((t >> 8) & 0xff); | |
143 | DUK_ASSERT(dst < dst_end); | |
144 | *dst++ = (duk_uint8_t) (t & 0xff); | |
145 | t = 0; | |
146 | group_idx = 0; | |
147 | } else { | |
148 | group_idx++; | |
149 | } | |
150 | } | |
151 | ||
152 | if (group_idx != 0) { | |
153 | /* Here we'd have the option of decoding unpadded base64 | |
154 | * (e.g. "xxxxyy" instead of "xxxxyy==". Currently not | |
155 | * accepted. | |
156 | */ | |
157 | goto error; | |
158 | } | |
159 | ||
160 | *out_dst_final = dst; | |
161 | return 1; | |
162 | ||
163 | error: | |
164 | return 0; | |
165 | } | |
166 | ||
167 | /* Shared handling for encode/decode argument. Fast path handling for | |
168 | * buffer and string values because they're the most common. In particular, | |
169 | * avoid creating a temporary string or buffer when possible. | |
170 | */ | |
171 | DUK_LOCAL const duk_uint8_t *duk__prep_codec_arg(duk_context *ctx, duk_idx_t index, duk_size_t *out_len) { | |
172 | DUK_ASSERT(duk_is_valid_index(ctx, index)); /* checked by caller */ | |
173 | if (duk_is_buffer(ctx, index)) { | |
174 | return (const duk_uint8_t *) duk_get_buffer(ctx, index, out_len); | |
175 | } else { | |
176 | return (const duk_uint8_t *) duk_to_lstring(ctx, index, out_len); | |
177 | } | |
178 | } | |
179 | ||
180 | DUK_EXTERNAL const char *duk_base64_encode(duk_context *ctx, duk_idx_t index) { | |
181 | duk_hthread *thr = (duk_hthread *) ctx; | |
182 | duk_uint8_t *src; | |
183 | duk_size_t srclen; | |
184 | duk_size_t dstlen; | |
185 | duk_uint8_t *dst; | |
186 | const char *ret; | |
187 | ||
188 | DUK_ASSERT_CTX_VALID(ctx); | |
189 | ||
190 | /* XXX: optimize for string inputs: no need to coerce to a buffer | |
191 | * which makes a copy of the input. | |
192 | */ | |
193 | ||
194 | index = duk_require_normalize_index(ctx, index); | |
195 | src = (duk_uint8_t *) duk_to_buffer(ctx, index, &srclen); | |
196 | /* Note: for srclen=0, src may be NULL */ | |
197 | ||
198 | /* Computation must not wrap; this limit works for 32-bit size_t: | |
199 | * >>> srclen = 3221225469 | |
200 | * >>> '%x' % ((srclen + 2) / 3 * 4) | |
201 | * 'fffffffc' | |
202 | */ | |
203 | if (srclen > 3221225469UL) { | |
204 | goto type_error; | |
205 | } | |
206 | dstlen = (srclen + 2) / 3 * 4; | |
207 | dst = (duk_uint8_t *) duk_push_fixed_buffer(ctx, dstlen); | |
208 | ||
209 | duk__base64_encode_helper((const duk_uint8_t *) src, (const duk_uint8_t *) (src + srclen), | |
210 | dst, (dst + dstlen)); | |
211 | ||
212 | ret = duk_to_string(ctx, -1); | |
213 | duk_replace(ctx, index); | |
214 | return ret; | |
215 | ||
216 | type_error: | |
217 | DUK_ERROR(thr, DUK_ERR_TYPE_ERROR, DUK_STR_ENCODE_FAILED); | |
218 | return NULL; /* never here */ | |
219 | } | |
220 | ||
221 | DUK_EXTERNAL void duk_base64_decode(duk_context *ctx, duk_idx_t index) { | |
222 | duk_hthread *thr = (duk_hthread *) ctx; | |
223 | const duk_uint8_t *src; | |
224 | duk_size_t srclen; | |
225 | duk_size_t dstlen; | |
226 | duk_uint8_t *dst; | |
227 | duk_uint8_t *dst_final; | |
228 | duk_bool_t retval; | |
229 | ||
230 | DUK_ASSERT_CTX_VALID(ctx); | |
231 | ||
232 | /* XXX: optimize for buffer inputs: no need to coerce to a string | |
233 | * which causes an unnecessary interning. | |
234 | */ | |
235 | ||
236 | index = duk_require_normalize_index(ctx, index); | |
237 | src = (const duk_uint8_t *) duk_to_lstring(ctx, index, &srclen); | |
238 | ||
239 | /* Computation must not wrap, only srclen + 3 is at risk of | |
240 | * wrapping because after that the number gets smaller. | |
241 | * This limit works for 32-bit size_t: | |
242 | * 0x100000000 - 3 - 1 = 4294967292 | |
243 | */ | |
244 | if (srclen > 4294967292UL) { | |
245 | goto type_error; | |
246 | } | |
247 | dstlen = (srclen + 3) / 4 * 3; /* upper limit */ | |
248 | dst = (duk_uint8_t *) duk_push_dynamic_buffer(ctx, dstlen); | |
249 | /* Note: for dstlen=0, dst may be NULL */ | |
250 | ||
251 | retval = duk__base64_decode_helper((const duk_uint8_t *) src, (const duk_uint8_t *) (src + srclen), | |
252 | dst, dst + dstlen, &dst_final); | |
253 | if (!retval) { | |
254 | goto type_error; | |
255 | } | |
256 | ||
257 | /* XXX: convert to fixed buffer? */ | |
258 | (void) duk_resize_buffer(ctx, -1, (duk_size_t) (dst_final - dst)); | |
259 | duk_replace(ctx, index); | |
260 | return; | |
261 | ||
262 | type_error: | |
263 | DUK_ERROR(thr, DUK_ERR_TYPE_ERROR, DUK_STR_DECODE_FAILED); | |
264 | } | |
265 | ||
266 | DUK_EXTERNAL const char *duk_hex_encode(duk_context *ctx, duk_idx_t index) { | |
267 | const duk_uint8_t *inp; | |
268 | duk_size_t len; | |
269 | duk_size_t i; | |
270 | duk_small_uint_t t; | |
271 | duk_uint8_t *buf; | |
272 | const char *ret; | |
273 | ||
274 | DUK_ASSERT_CTX_VALID(ctx); | |
275 | ||
276 | index = duk_require_normalize_index(ctx, index); | |
277 | inp = duk__prep_codec_arg(ctx, index, &len); | |
278 | DUK_ASSERT(inp != NULL || len == 0); | |
279 | ||
280 | /* Fixed buffer, no zeroing because we'll fill all the data. */ | |
281 | buf = (duk_uint8_t *) duk_push_buffer_raw(ctx, len * 2, DUK_BUF_FLAG_NOZERO /*flags*/); | |
282 | DUK_ASSERT(buf != NULL); | |
283 | ||
284 | for (i = 0; i < len; i++) { | |
285 | /* XXX: by using two 256-entry tables could avoid shifting and masking. */ | |
286 | t = (duk_small_uint_t) inp[i]; | |
287 | buf[i*2 + 0] = duk_lc_digits[t >> 4]; | |
288 | buf[i*2 + 1] = duk_lc_digits[t & 0x0f]; | |
289 | } | |
290 | ||
291 | /* XXX: Using a string return value forces a string intern which is | |
292 | * not always necessary. As a rough performance measure, hex encode | |
293 | * time for tests/perf/test-hex-encode.js dropped from ~35s to ~15s | |
294 | * without string coercion. Change to returning a buffer and let the | |
295 | * caller coerce to string if necessary? | |
296 | */ | |
297 | ||
298 | ret = duk_to_string(ctx, -1); | |
299 | duk_replace(ctx, index); | |
300 | return ret; | |
301 | } | |
302 | ||
303 | DUK_EXTERNAL void duk_hex_decode(duk_context *ctx, duk_idx_t index) { | |
304 | duk_hthread *thr = (duk_hthread *) ctx; | |
305 | const duk_uint8_t *inp; | |
306 | duk_size_t len; | |
307 | duk_size_t i; | |
308 | duk_small_int_t t; | |
309 | duk_uint8_t *buf; | |
310 | ||
311 | DUK_ASSERT_CTX_VALID(ctx); | |
312 | ||
313 | index = duk_require_normalize_index(ctx, index); | |
314 | inp = duk__prep_codec_arg(ctx, index, &len); | |
315 | DUK_ASSERT(inp != NULL || len == 0); | |
316 | ||
317 | if (len & 0x01) { | |
318 | goto type_error; | |
319 | } | |
320 | ||
321 | /* Fixed buffer, no zeroing because we'll fill all the data. */ | |
322 | buf = (duk_uint8_t *) duk_push_buffer_raw(ctx, len / 2, DUK_BUF_FLAG_NOZERO /*flags*/); | |
323 | DUK_ASSERT(buf != NULL); | |
324 | ||
325 | for (i = 0; i < len; i += 2) { | |
326 | /* For invalid characters the value -1 gets extended to | |
327 | * at least 16 bits. If either nybble is invalid, the | |
328 | * resulting 't' will be < 0. | |
329 | */ | |
330 | t = (((duk_small_int_t) duk_hex_dectab[inp[i]]) << 4) | | |
331 | ((duk_small_int_t) duk_hex_dectab[inp[i + 1]]); | |
332 | if (DUK_UNLIKELY(t < 0)) { | |
333 | goto type_error; | |
334 | } | |
335 | buf[i >> 1] = (duk_uint8_t) t; | |
336 | } | |
337 | ||
338 | duk_replace(ctx, index); | |
339 | return; | |
340 | ||
341 | type_error: | |
342 | DUK_ERROR(thr, DUK_ERR_TYPE_ERROR, DUK_STR_DECODE_FAILED); | |
343 | } | |
344 | ||
345 | DUK_EXTERNAL const char *duk_json_encode(duk_context *ctx, duk_idx_t index) { | |
346 | #ifdef DUK_USE_ASSERTIONS | |
347 | duk_idx_t top_at_entry; | |
348 | #endif | |
349 | const char *ret; | |
350 | ||
351 | DUK_ASSERT_CTX_VALID(ctx); | |
352 | #ifdef DUK_USE_ASSERTIONS | |
353 | top_at_entry = duk_get_top(ctx); | |
354 | #endif | |
355 | ||
356 | index = duk_require_normalize_index(ctx, index); | |
357 | duk_bi_json_stringify_helper(ctx, | |
358 | index /*idx_value*/, | |
359 | DUK_INVALID_INDEX /*idx_replacer*/, | |
360 | DUK_INVALID_INDEX /*idx_space*/, | |
361 | 0 /*flags*/); | |
362 | DUK_ASSERT(duk_is_string(ctx, -1)); | |
363 | duk_replace(ctx, index); | |
364 | ret = duk_get_string(ctx, index); | |
365 | ||
366 | DUK_ASSERT(duk_get_top(ctx) == top_at_entry); | |
367 | ||
368 | return ret; | |
369 | } | |
370 | ||
371 | DUK_EXTERNAL void duk_json_decode(duk_context *ctx, duk_idx_t index) { | |
372 | #ifdef DUK_USE_ASSERTIONS | |
373 | duk_idx_t top_at_entry; | |
374 | #endif | |
375 | ||
376 | DUK_ASSERT_CTX_VALID(ctx); | |
377 | #ifdef DUK_USE_ASSERTIONS | |
378 | top_at_entry = duk_get_top(ctx); | |
379 | #endif | |
380 | ||
381 | index = duk_require_normalize_index(ctx, index); | |
382 | duk_bi_json_parse_helper(ctx, | |
383 | index /*idx_value*/, | |
384 | DUK_INVALID_INDEX /*idx_reviver*/, | |
385 | 0 /*flags*/); | |
386 | duk_replace(ctx, index); | |
387 | ||
388 | DUK_ASSERT(duk_get_top(ctx) == top_at_entry); | |
389 | } |