5 #ifndef DUK_LEXER_H_INCLUDED
6 #define DUK_LEXER_H_INCLUDED
8 typedef void (*duk_re_range_callback
)(void *user
, duk_codepoint_t r1
, duk_codepoint_t r2
, duk_bool_t direct
);
11 * A token is interpreted as any possible production of InputElementDiv
12 * and InputElementRegExp, see E5 Section 7 in its entirety. Note that
13 * the E5 "Token" production does not cover all actual tokens of the
14 * language (which is explicitly stated in the specification, Section 7.5).
15 * Null and boolean literals are defined as part of both ReservedWord
16 * (E5 Section 7.6.1) and Literal (E5 Section 7.8) productions. Here,
17 * null and boolean values have literal tokens, and are not reserved
20 * Decimal literal negative/positive sign is -not- part of DUK_TOK_NUMBER.
21 * The number tokens always have a non-negative value. The unary minus
22 * operator in "-1.0" is optimized during compilation to yield a single
25 * Token numbering is free except that reserved words are required to be
26 * in a continuous range and in a particular order. See genstrings.py.
29 #define DUK_LEXER_INITCTX(ctx) duk_lexer_initctx((ctx))
31 #define DUK_LEXER_SETPOINT(ctx,pt) duk_lexer_setpoint((ctx), (pt))
33 #define DUK_LEXER_GETPOINT(ctx,pt) do { (pt)->offset = (ctx)->window[0].offset; \
34 (pt)->line = (ctx)->window[0].line; } while (0)
36 /* currently 6 characters of lookup are actually needed (duk_lexer.c) */
37 #define DUK_LEXER_WINDOW_SIZE 6
38 #if defined(DUK_USE_LEXER_SLIDING_WINDOW)
39 #define DUK_LEXER_BUFFER_SIZE 64
42 #define DUK_TOK_MINVAL 0
44 /* returned after EOF (infinite amount) */
47 /* identifier names (E5 Section 7.6) */
48 #define DUK_TOK_IDENTIFIER 1
50 /* reserved words: keywords */
51 #define DUK_TOK_START_RESERVED 2
52 #define DUK_TOK_BREAK 2
53 #define DUK_TOK_CASE 3
54 #define DUK_TOK_CATCH 4
55 #define DUK_TOK_CONTINUE 5
56 #define DUK_TOK_DEBUGGER 6
57 #define DUK_TOK_DEFAULT 7
58 #define DUK_TOK_DELETE 8
60 #define DUK_TOK_ELSE 10
61 #define DUK_TOK_FINALLY 11
62 #define DUK_TOK_FOR 12
63 #define DUK_TOK_FUNCTION 13
66 #define DUK_TOK_INSTANCEOF 16
67 #define DUK_TOK_NEW 17
68 #define DUK_TOK_RETURN 18
69 #define DUK_TOK_SWITCH 19
70 #define DUK_TOK_THIS 20
71 #define DUK_TOK_THROW 21
72 #define DUK_TOK_TRY 22
73 #define DUK_TOK_TYPEOF 23
74 #define DUK_TOK_VAR 24
75 #define DUK_TOK_CONST 25
76 #define DUK_TOK_VOID 26
77 #define DUK_TOK_WHILE 27
78 #define DUK_TOK_WITH 28
80 /* reserved words: future reserved words */
81 #define DUK_TOK_CLASS 29
82 #define DUK_TOK_ENUM 30
83 #define DUK_TOK_EXPORT 31
84 #define DUK_TOK_EXTENDS 32
85 #define DUK_TOK_IMPORT 33
86 #define DUK_TOK_SUPER 34
88 /* "null", "true", and "false" are always reserved words.
89 * Note that "get" and "set" are not!
91 #define DUK_TOK_NULL 35
92 #define DUK_TOK_TRUE 36
93 #define DUK_TOK_FALSE 37
95 /* reserved words: additional future reserved words in strict mode */
96 #define DUK_TOK_START_STRICT_RESERVED 38 /* inclusive */
97 #define DUK_TOK_IMPLEMENTS 38
98 #define DUK_TOK_INTERFACE 39
99 #define DUK_TOK_LET 40
100 #define DUK_TOK_PACKAGE 41
101 #define DUK_TOK_PRIVATE 42
102 #define DUK_TOK_PROTECTED 43
103 #define DUK_TOK_PUBLIC 44
104 #define DUK_TOK_STATIC 45
105 #define DUK_TOK_YIELD 46
107 #define DUK_TOK_END_RESERVED 47 /* exclusive */
109 /* "get" and "set" are tokens but NOT ReservedWords. They are currently
110 * parsed and identifiers and these defines are actually now unused.
112 #define DUK_TOK_GET 47
113 #define DUK_TOK_SET 48
115 /* punctuators (unlike the spec, also includes "/" and "/=") */
116 #define DUK_TOK_LCURLY 49
117 #define DUK_TOK_RCURLY 50
118 #define DUK_TOK_LBRACKET 51
119 #define DUK_TOK_RBRACKET 52
120 #define DUK_TOK_LPAREN 53
121 #define DUK_TOK_RPAREN 54
122 #define DUK_TOK_PERIOD 55
123 #define DUK_TOK_SEMICOLON 56
124 #define DUK_TOK_COMMA 57
125 #define DUK_TOK_LT 58
126 #define DUK_TOK_GT 59
127 #define DUK_TOK_LE 60
128 #define DUK_TOK_GE 61
129 #define DUK_TOK_EQ 62
130 #define DUK_TOK_NEQ 63
131 #define DUK_TOK_SEQ 64
132 #define DUK_TOK_SNEQ 65
133 #define DUK_TOK_ADD 66
134 #define DUK_TOK_SUB 67
135 #define DUK_TOK_MUL 68
136 #define DUK_TOK_DIV 69
137 #define DUK_TOK_MOD 70
138 #define DUK_TOK_INCREMENT 71
139 #define DUK_TOK_DECREMENT 72
140 #define DUK_TOK_ALSHIFT 73 /* named "arithmetic" because result is signed */
141 #define DUK_TOK_ARSHIFT 74
142 #define DUK_TOK_RSHIFT 75
143 #define DUK_TOK_BAND 76
144 #define DUK_TOK_BOR 77
145 #define DUK_TOK_BXOR 78
146 #define DUK_TOK_LNOT 79
147 #define DUK_TOK_BNOT 80
148 #define DUK_TOK_LAND 81
149 #define DUK_TOK_LOR 82
150 #define DUK_TOK_QUESTION 83
151 #define DUK_TOK_COLON 84
152 #define DUK_TOK_EQUALSIGN 85
153 #define DUK_TOK_ADD_EQ 86
154 #define DUK_TOK_SUB_EQ 87
155 #define DUK_TOK_MUL_EQ 88
156 #define DUK_TOK_DIV_EQ 89
157 #define DUK_TOK_MOD_EQ 90
158 #define DUK_TOK_ALSHIFT_EQ 91
159 #define DUK_TOK_ARSHIFT_EQ 92
160 #define DUK_TOK_RSHIFT_EQ 93
161 #define DUK_TOK_BAND_EQ 94
162 #define DUK_TOK_BOR_EQ 95
163 #define DUK_TOK_BXOR_EQ 96
165 /* literals (E5 Section 7.8), except null, true, false, which are treated
166 * like reserved words (above).
168 #define DUK_TOK_NUMBER 97
169 #define DUK_TOK_STRING 98
170 #define DUK_TOK_REGEXP 99
172 #define DUK_TOK_MAXVAL 99 /* inclusive */
174 /* Convert heap string index to a token (reserved words) */
175 #define DUK_STRIDX_TO_TOK(x) ((x) - DUK_STRIDX_START_RESERVED + DUK_TOK_START_RESERVED)
178 #if (DUK_TOK_MAXVAL > 255)
179 #error DUK_TOK_MAXVAL too large, code assumes it fits into 8 bits
182 /* Sanity checks for string and token defines */
183 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_BREAK) != DUK_TOK_BREAK)
184 #error mismatch in token defines
186 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CASE) != DUK_TOK_CASE)
187 #error mismatch in token defines
189 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CATCH) != DUK_TOK_CATCH)
190 #error mismatch in token defines
192 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONTINUE) != DUK_TOK_CONTINUE)
193 #error mismatch in token defines
195 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEBUGGER) != DUK_TOK_DEBUGGER)
196 #error mismatch in token defines
198 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEFAULT) != DUK_TOK_DEFAULT)
199 #error mismatch in token defines
201 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DELETE) != DUK_TOK_DELETE)
202 #error mismatch in token defines
204 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DO) != DUK_TOK_DO)
205 #error mismatch in token defines
207 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ELSE) != DUK_TOK_ELSE)
208 #error mismatch in token defines
210 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FINALLY) != DUK_TOK_FINALLY)
211 #error mismatch in token defines
213 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FOR) != DUK_TOK_FOR)
214 #error mismatch in token defines
216 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_FUNCTION) != DUK_TOK_FUNCTION)
217 #error mismatch in token defines
219 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IF) != DUK_TOK_IF)
220 #error mismatch in token defines
222 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IN) != DUK_TOK_IN)
223 #error mismatch in token defines
225 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INSTANCEOF) != DUK_TOK_INSTANCEOF)
226 #error mismatch in token defines
228 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_NEW) != DUK_TOK_NEW)
229 #error mismatch in token defines
231 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_RETURN) != DUK_TOK_RETURN)
232 #error mismatch in token defines
234 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SWITCH) != DUK_TOK_SWITCH)
235 #error mismatch in token defines
237 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THIS) != DUK_TOK_THIS)
238 #error mismatch in token defines
240 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THROW) != DUK_TOK_THROW)
241 #error mismatch in token defines
243 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRY) != DUK_TOK_TRY)
244 #error mismatch in token defines
246 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TYPEOF) != DUK_TOK_TYPEOF)
247 #error mismatch in token defines
249 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VAR) != DUK_TOK_VAR)
250 #error mismatch in token defines
252 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VOID) != DUK_TOK_VOID)
253 #error mismatch in token defines
255 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WHILE) != DUK_TOK_WHILE)
256 #error mismatch in token defines
258 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WITH) != DUK_TOK_WITH)
259 #error mismatch in token defines
261 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CLASS) != DUK_TOK_CLASS)
262 #error mismatch in token defines
264 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONST) != DUK_TOK_CONST)
265 #error mismatch in token defines
267 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ENUM) != DUK_TOK_ENUM)
268 #error mismatch in token defines
270 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXPORT) != DUK_TOK_EXPORT)
271 #error mismatch in token defines
273 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXTENDS) != DUK_TOK_EXTENDS)
274 #error mismatch in token defines
276 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPORT) != DUK_TOK_IMPORT)
277 #error mismatch in token defines
279 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SUPER) != DUK_TOK_SUPER)
280 #error mismatch in token defines
282 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_NULL) != DUK_TOK_NULL)
283 #error mismatch in token defines
285 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRUE) != DUK_TOK_TRUE)
286 #error mismatch in token defines
288 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FALSE) != DUK_TOK_FALSE)
289 #error mismatch in token defines
291 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPLEMENTS) != DUK_TOK_IMPLEMENTS)
292 #error mismatch in token defines
294 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INTERFACE) != DUK_TOK_INTERFACE)
295 #error mismatch in token defines
297 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LET) != DUK_TOK_LET)
298 #error mismatch in token defines
300 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PACKAGE) != DUK_TOK_PACKAGE)
301 #error mismatch in token defines
303 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PRIVATE) != DUK_TOK_PRIVATE)
304 #error mismatch in token defines
306 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PROTECTED) != DUK_TOK_PROTECTED)
307 #error mismatch in token defines
309 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PUBLIC) != DUK_TOK_PUBLIC)
310 #error mismatch in token defines
312 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_STATIC) != DUK_TOK_STATIC)
313 #error mismatch in token defines
315 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_YIELD) != DUK_TOK_YIELD)
316 #error mismatch in token defines
320 #define DUK_RETOK_EOF 0
321 #define DUK_RETOK_DISJUNCTION 1
322 #define DUK_RETOK_QUANTIFIER 2
323 #define DUK_RETOK_ASSERT_START 3
324 #define DUK_RETOK_ASSERT_END 4
325 #define DUK_RETOK_ASSERT_WORD_BOUNDARY 5
326 #define DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY 6
327 #define DUK_RETOK_ASSERT_START_POS_LOOKAHEAD 7
328 #define DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD 8
329 #define DUK_RETOK_ATOM_PERIOD 9
330 #define DUK_RETOK_ATOM_CHAR 10
331 #define DUK_RETOK_ATOM_DIGIT 11
332 #define DUK_RETOK_ATOM_NOT_DIGIT 12
333 #define DUK_RETOK_ATOM_WHITE 13
334 #define DUK_RETOK_ATOM_NOT_WHITE 14
335 #define DUK_RETOK_ATOM_WORD_CHAR 15
336 #define DUK_RETOK_ATOM_NOT_WORD_CHAR 16
337 #define DUK_RETOK_ATOM_BACKREFERENCE 17
338 #define DUK_RETOK_ATOM_START_CAPTURE_GROUP 18
339 #define DUK_RETOK_ATOM_START_NONCAPTURE_GROUP 19
340 #define DUK_RETOK_ATOM_START_CHARCLASS 20
341 #define DUK_RETOK_ATOM_START_CHARCLASS_INVERTED 21
342 #define DUK_RETOK_ATOM_END_GROUP 22
344 /* Constants for duk_lexer_ctx.buf. */
345 #define DUK_LEXER_TEMP_BUF_LIMIT 256
347 /* A token value. Can be memcpy()'d, but note that slot1/slot2 values are on the valstack.
348 * Some fields (like num, str1, str2) are only valid for specific token types and may have
349 * stale values otherwise.
352 duk_small_int_t t
; /* token type (with reserved word identification) */
353 duk_small_int_t t_nores
; /* token type (with reserved words as DUK_TOK_IDENTIFER) */
354 duk_double_t num
; /* numeric value of token */
355 duk_hstring
*str1
; /* string 1 of token (borrowed, stored to ctx->slot1_idx) */
356 duk_hstring
*str2
; /* string 2 of token (borrowed, stored to ctx->slot2_idx) */
357 duk_size_t start_offset
; /* start byte offset of token in lexer input */
358 duk_int_t start_line
; /* start line of token (first char) */
359 duk_int_t num_escapes
; /* number of escapes and line continuations (for directive prologue) */
360 duk_bool_t lineterm
; /* token was preceded by a lineterm */
361 duk_bool_t allow_auto_semi
; /* token allows automatic semicolon insertion (eof or preceded by newline) */
364 #define DUK_RE_QUANTIFIER_INFINITE ((duk_uint32_t) 0xffffffffUL)
366 /* A regexp token value. */
367 struct duk_re_token
{
368 duk_small_int_t t
; /* token type */
369 duk_small_int_t greedy
;
370 duk_uint_fast32_t num
; /* numeric value (character, count) */
371 duk_uint_fast32_t qmin
;
372 duk_uint_fast32_t qmax
;
375 /* A structure for 'snapshotting' a point for rewinding */
376 struct duk_lexer_point
{
381 /* Lexer codepoint with additional info like offset/line number */
382 struct duk_lexer_codepoint
{
383 duk_codepoint_t codepoint
;
388 /* Lexer context. Same context is used for Ecmascript and Regexp parsing. */
389 struct duk_lexer_ctx
{
390 #if defined(DUK_USE_LEXER_SLIDING_WINDOW)
391 duk_lexer_codepoint
*window
; /* unicode code points, window[0] is always next, points to 'buffer' */
392 duk_lexer_codepoint buffer
[DUK_LEXER_BUFFER_SIZE
];
394 duk_lexer_codepoint window
[DUK_LEXER_WINDOW_SIZE
]; /* unicode code points, window[0] is always next */
397 duk_hthread
*thr
; /* thread; minimizes argument passing */
399 const duk_uint8_t
*input
; /* input string (may be a user pointer) */
400 duk_size_t input_length
; /* input byte length */
401 duk_size_t input_offset
; /* input offset for window leading edge (not window[0]) */
402 duk_int_t input_line
; /* input linenumber at input_offset (not window[0]), init to 1 */
404 duk_idx_t slot1_idx
; /* valstack slot for 1st token value */
405 duk_idx_t slot2_idx
; /* valstack slot for 2nd token value */
406 duk_idx_t buf_idx
; /* valstack slot for temp buffer */
407 duk_hbuffer_dynamic
*buf
; /* temp accumulation buffer */
408 duk_bufwriter_ctx bw
; /* bufwriter for temp accumulation */
410 duk_int_t token_count
; /* number of tokens parsed */
411 duk_int_t token_limit
; /* maximum token count before error (sanity backstop) */
418 DUK_INTERNAL_DECL
void duk_lexer_initctx(duk_lexer_ctx
*lex_ctx
);
420 DUK_INTERNAL_DECL
void duk_lexer_setpoint(duk_lexer_ctx
*lex_ctx
, duk_lexer_point
*pt
);
423 void duk_lexer_parse_js_input_element(duk_lexer_ctx
*lex_ctx
,
424 duk_token
*out_token
,
425 duk_bool_t strict_mode
,
426 duk_bool_t regexp_mode
);
427 #ifdef DUK_USE_REGEXP_SUPPORT
428 DUK_INTERNAL_DECL
void duk_lexer_parse_re_token(duk_lexer_ctx
*lex_ctx
, duk_re_token
*out_token
);
429 DUK_INTERNAL_DECL
void duk_lexer_parse_re_ranges(duk_lexer_ctx
*lex_ctx
, duk_re_range_callback gen_range
, void *userdata
);
430 #endif /* DUK_USE_REGEXP_SUPPORT */
432 #endif /* DUK_LEXER_H_INCLUDED */