]> git.proxmox.com Git - ceph.git/blob - ceph/src/civetweb/src/third_party/duktape-1.3.0/src-separate/duk_lexer.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / civetweb / src / third_party / duktape-1.3.0 / src-separate / duk_lexer.h
1 /*
2 * Lexer defines.
3 */
4
5 #ifndef DUK_LEXER_H_INCLUDED
6 #define DUK_LEXER_H_INCLUDED
7
8 typedef void (*duk_re_range_callback)(void *user, duk_codepoint_t r1, duk_codepoint_t r2, duk_bool_t direct);
9
10 /*
11 * A token is interpreted as any possible production of InputElementDiv
12 * and InputElementRegExp, see E5 Section 7 in its entirety. Note that
13 * the E5 "Token" production does not cover all actual tokens of the
14 * language (which is explicitly stated in the specification, Section 7.5).
15 * Null and boolean literals are defined as part of both ReservedWord
16 * (E5 Section 7.6.1) and Literal (E5 Section 7.8) productions. Here,
17 * null and boolean values have literal tokens, and are not reserved
18 * words.
19 *
20 * Decimal literal negative/positive sign is -not- part of DUK_TOK_NUMBER.
21 * The number tokens always have a non-negative value. The unary minus
22 * operator in "-1.0" is optimized during compilation to yield a single
23 * negative constant.
24 *
25 * Token numbering is free except that reserved words are required to be
26 * in a continuous range and in a particular order. See genstrings.py.
27 */
28
29 #define DUK_LEXER_INITCTX(ctx) duk_lexer_initctx((ctx))
30
31 #define DUK_LEXER_SETPOINT(ctx,pt) duk_lexer_setpoint((ctx), (pt))
32
33 #define DUK_LEXER_GETPOINT(ctx,pt) do { (pt)->offset = (ctx)->window[0].offset; \
34 (pt)->line = (ctx)->window[0].line; } while (0)
35
36 /* currently 6 characters of lookup are actually needed (duk_lexer.c) */
37 #define DUK_LEXER_WINDOW_SIZE 6
38 #if defined(DUK_USE_LEXER_SLIDING_WINDOW)
39 #define DUK_LEXER_BUFFER_SIZE 64
40 #endif
41
42 #define DUK_TOK_MINVAL 0
43
44 /* returned after EOF (infinite amount) */
45 #define DUK_TOK_EOF 0
46
47 /* identifier names (E5 Section 7.6) */
48 #define DUK_TOK_IDENTIFIER 1
49
50 /* reserved words: keywords */
51 #define DUK_TOK_START_RESERVED 2
52 #define DUK_TOK_BREAK 2
53 #define DUK_TOK_CASE 3
54 #define DUK_TOK_CATCH 4
55 #define DUK_TOK_CONTINUE 5
56 #define DUK_TOK_DEBUGGER 6
57 #define DUK_TOK_DEFAULT 7
58 #define DUK_TOK_DELETE 8
59 #define DUK_TOK_DO 9
60 #define DUK_TOK_ELSE 10
61 #define DUK_TOK_FINALLY 11
62 #define DUK_TOK_FOR 12
63 #define DUK_TOK_FUNCTION 13
64 #define DUK_TOK_IF 14
65 #define DUK_TOK_IN 15
66 #define DUK_TOK_INSTANCEOF 16
67 #define DUK_TOK_NEW 17
68 #define DUK_TOK_RETURN 18
69 #define DUK_TOK_SWITCH 19
70 #define DUK_TOK_THIS 20
71 #define DUK_TOK_THROW 21
72 #define DUK_TOK_TRY 22
73 #define DUK_TOK_TYPEOF 23
74 #define DUK_TOK_VAR 24
75 #define DUK_TOK_VOID 25
76 #define DUK_TOK_WHILE 26
77 #define DUK_TOK_WITH 27
78
79 /* reserved words: future reserved words */
80 #define DUK_TOK_CLASS 28
81 #define DUK_TOK_CONST 29
82 #define DUK_TOK_ENUM 30
83 #define DUK_TOK_EXPORT 31
84 #define DUK_TOK_EXTENDS 32
85 #define DUK_TOK_IMPORT 33
86 #define DUK_TOK_SUPER 34
87
88 /* "null", "true", and "false" are always reserved words.
89 * Note that "get" and "set" are not!
90 */
91 #define DUK_TOK_NULL 35
92 #define DUK_TOK_TRUE 36
93 #define DUK_TOK_FALSE 37
94
95 /* reserved words: additional future reserved words in strict mode */
96 #define DUK_TOK_START_STRICT_RESERVED 38 /* inclusive */
97 #define DUK_TOK_IMPLEMENTS 38
98 #define DUK_TOK_INTERFACE 39
99 #define DUK_TOK_LET 40
100 #define DUK_TOK_PACKAGE 41
101 #define DUK_TOK_PRIVATE 42
102 #define DUK_TOK_PROTECTED 43
103 #define DUK_TOK_PUBLIC 44
104 #define DUK_TOK_STATIC 45
105 #define DUK_TOK_YIELD 46
106
107 #define DUK_TOK_END_RESERVED 47 /* exclusive */
108
109 /* "get" and "set" are tokens but NOT ReservedWords. They are currently
110 * parsed and identifiers and these defines are actually now unused.
111 */
112 #define DUK_TOK_GET 47
113 #define DUK_TOK_SET 48
114
115 /* punctuators (unlike the spec, also includes "/" and "/=") */
116 #define DUK_TOK_LCURLY 49
117 #define DUK_TOK_RCURLY 50
118 #define DUK_TOK_LBRACKET 51
119 #define DUK_TOK_RBRACKET 52
120 #define DUK_TOK_LPAREN 53
121 #define DUK_TOK_RPAREN 54
122 #define DUK_TOK_PERIOD 55
123 #define DUK_TOK_SEMICOLON 56
124 #define DUK_TOK_COMMA 57
125 #define DUK_TOK_LT 58
126 #define DUK_TOK_GT 59
127 #define DUK_TOK_LE 60
128 #define DUK_TOK_GE 61
129 #define DUK_TOK_EQ 62
130 #define DUK_TOK_NEQ 63
131 #define DUK_TOK_SEQ 64
132 #define DUK_TOK_SNEQ 65
133 #define DUK_TOK_ADD 66
134 #define DUK_TOK_SUB 67
135 #define DUK_TOK_MUL 68
136 #define DUK_TOK_DIV 69
137 #define DUK_TOK_MOD 70
138 #define DUK_TOK_INCREMENT 71
139 #define DUK_TOK_DECREMENT 72
140 #define DUK_TOK_ALSHIFT 73 /* named "arithmetic" because result is signed */
141 #define DUK_TOK_ARSHIFT 74
142 #define DUK_TOK_RSHIFT 75
143 #define DUK_TOK_BAND 76
144 #define DUK_TOK_BOR 77
145 #define DUK_TOK_BXOR 78
146 #define DUK_TOK_LNOT 79
147 #define DUK_TOK_BNOT 80
148 #define DUK_TOK_LAND 81
149 #define DUK_TOK_LOR 82
150 #define DUK_TOK_QUESTION 83
151 #define DUK_TOK_COLON 84
152 #define DUK_TOK_EQUALSIGN 85
153 #define DUK_TOK_ADD_EQ 86
154 #define DUK_TOK_SUB_EQ 87
155 #define DUK_TOK_MUL_EQ 88
156 #define DUK_TOK_DIV_EQ 89
157 #define DUK_TOK_MOD_EQ 90
158 #define DUK_TOK_ALSHIFT_EQ 91
159 #define DUK_TOK_ARSHIFT_EQ 92
160 #define DUK_TOK_RSHIFT_EQ 93
161 #define DUK_TOK_BAND_EQ 94
162 #define DUK_TOK_BOR_EQ 95
163 #define DUK_TOK_BXOR_EQ 96
164
165 /* literals (E5 Section 7.8), except null, true, false, which are treated
166 * like reserved words (above).
167 */
168 #define DUK_TOK_NUMBER 97
169 #define DUK_TOK_STRING 98
170 #define DUK_TOK_REGEXP 99
171
172 #define DUK_TOK_MAXVAL 99 /* inclusive */
173
174 /* Convert heap string index to a token (reserved words) */
175 #define DUK_STRIDX_TO_TOK(x) ((x) - DUK_STRIDX_START_RESERVED + DUK_TOK_START_RESERVED)
176
177 /* Sanity check */
178 #if (DUK_TOK_MAXVAL > 255)
179 #error DUK_TOK_MAXVAL too large, code assumes it fits into 8 bits
180 #endif
181
182 /* Sanity checks for string and token defines */
183 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_BREAK) != DUK_TOK_BREAK)
184 #error mismatch in token defines
185 #endif
186 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CASE) != DUK_TOK_CASE)
187 #error mismatch in token defines
188 #endif
189 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CATCH) != DUK_TOK_CATCH)
190 #error mismatch in token defines
191 #endif
192 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONTINUE) != DUK_TOK_CONTINUE)
193 #error mismatch in token defines
194 #endif
195 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEBUGGER) != DUK_TOK_DEBUGGER)
196 #error mismatch in token defines
197 #endif
198 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEFAULT) != DUK_TOK_DEFAULT)
199 #error mismatch in token defines
200 #endif
201 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DELETE) != DUK_TOK_DELETE)
202 #error mismatch in token defines
203 #endif
204 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DO) != DUK_TOK_DO)
205 #error mismatch in token defines
206 #endif
207 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ELSE) != DUK_TOK_ELSE)
208 #error mismatch in token defines
209 #endif
210 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FINALLY) != DUK_TOK_FINALLY)
211 #error mismatch in token defines
212 #endif
213 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FOR) != DUK_TOK_FOR)
214 #error mismatch in token defines
215 #endif
216 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_FUNCTION) != DUK_TOK_FUNCTION)
217 #error mismatch in token defines
218 #endif
219 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IF) != DUK_TOK_IF)
220 #error mismatch in token defines
221 #endif
222 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IN) != DUK_TOK_IN)
223 #error mismatch in token defines
224 #endif
225 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INSTANCEOF) != DUK_TOK_INSTANCEOF)
226 #error mismatch in token defines
227 #endif
228 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_NEW) != DUK_TOK_NEW)
229 #error mismatch in token defines
230 #endif
231 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_RETURN) != DUK_TOK_RETURN)
232 #error mismatch in token defines
233 #endif
234 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SWITCH) != DUK_TOK_SWITCH)
235 #error mismatch in token defines
236 #endif
237 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THIS) != DUK_TOK_THIS)
238 #error mismatch in token defines
239 #endif
240 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THROW) != DUK_TOK_THROW)
241 #error mismatch in token defines
242 #endif
243 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRY) != DUK_TOK_TRY)
244 #error mismatch in token defines
245 #endif
246 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TYPEOF) != DUK_TOK_TYPEOF)
247 #error mismatch in token defines
248 #endif
249 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VAR) != DUK_TOK_VAR)
250 #error mismatch in token defines
251 #endif
252 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VOID) != DUK_TOK_VOID)
253 #error mismatch in token defines
254 #endif
255 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WHILE) != DUK_TOK_WHILE)
256 #error mismatch in token defines
257 #endif
258 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WITH) != DUK_TOK_WITH)
259 #error mismatch in token defines
260 #endif
261 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CLASS) != DUK_TOK_CLASS)
262 #error mismatch in token defines
263 #endif
264 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONST) != DUK_TOK_CONST)
265 #error mismatch in token defines
266 #endif
267 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ENUM) != DUK_TOK_ENUM)
268 #error mismatch in token defines
269 #endif
270 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXPORT) != DUK_TOK_EXPORT)
271 #error mismatch in token defines
272 #endif
273 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXTENDS) != DUK_TOK_EXTENDS)
274 #error mismatch in token defines
275 #endif
276 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPORT) != DUK_TOK_IMPORT)
277 #error mismatch in token defines
278 #endif
279 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SUPER) != DUK_TOK_SUPER)
280 #error mismatch in token defines
281 #endif
282 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_NULL) != DUK_TOK_NULL)
283 #error mismatch in token defines
284 #endif
285 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRUE) != DUK_TOK_TRUE)
286 #error mismatch in token defines
287 #endif
288 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FALSE) != DUK_TOK_FALSE)
289 #error mismatch in token defines
290 #endif
291 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPLEMENTS) != DUK_TOK_IMPLEMENTS)
292 #error mismatch in token defines
293 #endif
294 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INTERFACE) != DUK_TOK_INTERFACE)
295 #error mismatch in token defines
296 #endif
297 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LET) != DUK_TOK_LET)
298 #error mismatch in token defines
299 #endif
300 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PACKAGE) != DUK_TOK_PACKAGE)
301 #error mismatch in token defines
302 #endif
303 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PRIVATE) != DUK_TOK_PRIVATE)
304 #error mismatch in token defines
305 #endif
306 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PROTECTED) != DUK_TOK_PROTECTED)
307 #error mismatch in token defines
308 #endif
309 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PUBLIC) != DUK_TOK_PUBLIC)
310 #error mismatch in token defines
311 #endif
312 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_STATIC) != DUK_TOK_STATIC)
313 #error mismatch in token defines
314 #endif
315 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_YIELD) != DUK_TOK_YIELD)
316 #error mismatch in token defines
317 #endif
318
319 /* Regexp tokens */
320 #define DUK_RETOK_EOF 0
321 #define DUK_RETOK_DISJUNCTION 1
322 #define DUK_RETOK_QUANTIFIER 2
323 #define DUK_RETOK_ASSERT_START 3
324 #define DUK_RETOK_ASSERT_END 4
325 #define DUK_RETOK_ASSERT_WORD_BOUNDARY 5
326 #define DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY 6
327 #define DUK_RETOK_ASSERT_START_POS_LOOKAHEAD 7
328 #define DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD 8
329 #define DUK_RETOK_ATOM_PERIOD 9
330 #define DUK_RETOK_ATOM_CHAR 10
331 #define DUK_RETOK_ATOM_DIGIT 11
332 #define DUK_RETOK_ATOM_NOT_DIGIT 12
333 #define DUK_RETOK_ATOM_WHITE 13
334 #define DUK_RETOK_ATOM_NOT_WHITE 14
335 #define DUK_RETOK_ATOM_WORD_CHAR 15
336 #define DUK_RETOK_ATOM_NOT_WORD_CHAR 16
337 #define DUK_RETOK_ATOM_BACKREFERENCE 17
338 #define DUK_RETOK_ATOM_START_CAPTURE_GROUP 18
339 #define DUK_RETOK_ATOM_START_NONCAPTURE_GROUP 19
340 #define DUK_RETOK_ATOM_START_CHARCLASS 20
341 #define DUK_RETOK_ATOM_START_CHARCLASS_INVERTED 21
342 #define DUK_RETOK_ATOM_END_GROUP 22
343
344 /* Constants for duk_lexer_ctx.buf. */
345 #define DUK_LEXER_TEMP_BUF_LIMIT 256
346
347 /* A token value. Can be memcpy()'d, but note that slot1/slot2 values are on the valstack.
348 * Some fields (like num, str1, str2) are only valid for specific token types and may have
349 * stale values otherwise.
350 */
351 struct duk_token {
352 duk_small_int_t t; /* token type (with reserved word identification) */
353 duk_small_int_t t_nores; /* token type (with reserved words as DUK_TOK_IDENTIFER) */
354 duk_double_t num; /* numeric value of token */
355 duk_hstring *str1; /* string 1 of token (borrowed, stored to ctx->slot1_idx) */
356 duk_hstring *str2; /* string 2 of token (borrowed, stored to ctx->slot2_idx) */
357 duk_size_t start_offset; /* start byte offset of token in lexer input */
358 duk_int_t start_line; /* start line of token (first char) */
359 duk_int_t num_escapes; /* number of escapes and line continuations (for directive prologue) */
360 duk_bool_t lineterm; /* token was preceded by a lineterm */
361 duk_bool_t allow_auto_semi; /* token allows automatic semicolon insertion (eof or preceded by newline) */
362 };
363
364 #define DUK_RE_QUANTIFIER_INFINITE ((duk_uint32_t) 0xffffffffUL)
365
366 /* A regexp token value. */
367 struct duk_re_token {
368 duk_small_int_t t; /* token type */
369 duk_small_int_t greedy;
370 duk_uint_fast32_t num; /* numeric value (character, count) */
371 duk_uint_fast32_t qmin;
372 duk_uint_fast32_t qmax;
373 };
374
375 /* A structure for 'snapshotting' a point for rewinding */
376 struct duk_lexer_point {
377 duk_size_t offset;
378 duk_int_t line;
379 };
380
381 /* Lexer codepoint with additional info like offset/line number */
382 struct duk_lexer_codepoint {
383 duk_codepoint_t codepoint;
384 duk_size_t offset;
385 duk_int_t line;
386 };
387
388 /* Lexer context. Same context is used for Ecmascript and Regexp parsing. */
389 struct duk_lexer_ctx {
390 #if defined(DUK_USE_LEXER_SLIDING_WINDOW)
391 duk_lexer_codepoint *window; /* unicode code points, window[0] is always next, points to 'buffer' */
392 duk_lexer_codepoint buffer[DUK_LEXER_BUFFER_SIZE];
393 #else
394 duk_lexer_codepoint window[DUK_LEXER_WINDOW_SIZE]; /* unicode code points, window[0] is always next */
395 #endif
396
397 duk_hthread *thr; /* thread; minimizes argument passing */
398
399 const duk_uint8_t *input; /* input string (may be a user pointer) */
400 duk_size_t input_length; /* input byte length */
401 duk_size_t input_offset; /* input offset for window leading edge (not window[0]) */
402 duk_int_t input_line; /* input linenumber at input_offset (not window[0]), init to 1 */
403
404 duk_idx_t slot1_idx; /* valstack slot for 1st token value */
405 duk_idx_t slot2_idx; /* valstack slot for 2nd token value */
406 duk_idx_t buf_idx; /* valstack slot for temp buffer */
407 duk_hbuffer_dynamic *buf; /* temp accumulation buffer */
408 duk_bufwriter_ctx bw; /* bufwriter for temp accumulation */
409
410 duk_int_t token_count; /* number of tokens parsed */
411 duk_int_t token_limit; /* maximum token count before error (sanity backstop) */
412 };
413
414 /*
415 * Prototypes
416 */
417
418 DUK_INTERNAL_DECL void duk_lexer_initctx(duk_lexer_ctx *lex_ctx);
419
420 DUK_INTERNAL_DECL void duk_lexer_setpoint(duk_lexer_ctx *lex_ctx, duk_lexer_point *pt);
421
422 DUK_INTERNAL_DECL
423 void duk_lexer_parse_js_input_element(duk_lexer_ctx *lex_ctx,
424 duk_token *out_token,
425 duk_bool_t strict_mode,
426 duk_bool_t regexp_mode);
427 #ifdef DUK_USE_REGEXP_SUPPORT
428 DUK_INTERNAL_DECL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token *out_token);
429 DUK_INTERNAL_DECL void duk_lexer_parse_re_ranges(duk_lexer_ctx *lex_ctx, duk_re_range_callback gen_range, void *userdata);
430 #endif /* DUK_USE_REGEXP_SUPPORT */
431
432 #endif /* DUK_LEXER_H_INCLUDED */