]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | ** $Id: llex.c,v 2.20.1.2 2009/11/23 14:58:22 roberto Exp $ | |
3 | ** Lexical Analyzer | |
4 | ** See Copyright Notice in lua.h | |
5 | */ | |
6 | ||
7 | ||
8 | #include <ctype.h> | |
9 | #include <locale.h> | |
10 | #include <string.h> | |
11 | ||
12 | #define llex_c | |
13 | #define LUA_CORE | |
14 | ||
15 | #include "lua.h" | |
16 | ||
17 | #include "ldo.h" | |
18 | #include "llex.h" | |
19 | #include "lobject.h" | |
20 | #include "lparser.h" | |
21 | #include "lstate.h" | |
22 | #include "lstring.h" | |
23 | #include "ltable.h" | |
24 | #include "lzio.h" | |
25 | ||
26 | ||
27 | ||
28 | #define next(ls) (ls->current = zgetc(ls->z)) | |
29 | ||
30 | ||
31 | ||
32 | ||
33 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') | |
34 | ||
35 | ||
36 | /* ORDER RESERVED */ | |
37 | const char *const luaX_tokens [] = { | |
38 | "and", "break", "do", "else", "elseif", | |
39 | "end", "false", "for", "function", "if", | |
40 | "in", "local", "nil", "not", "or", "repeat", | |
41 | "return", "then", "true", "until", "while", | |
42 | "..", "...", "==", ">=", "<=", "~=", | |
43 | "<number>", "<name>", "<string>", "<eof>", | |
44 | NULL | |
45 | }; | |
46 | ||
47 | ||
48 | #define save_and_next(ls) (save(ls, ls->current), next(ls)) | |
49 | ||
50 | ||
51 | static void save (LexState *ls, int c) { | |
52 | Mbuffer *b = ls->buff; | |
53 | if (b->n + 1 > b->buffsize) { | |
54 | size_t newsize; | |
55 | if (b->buffsize >= MAX_SIZET/2) | |
56 | luaX_lexerror(ls, "lexical element too long", 0); | |
57 | newsize = b->buffsize * 2; | |
58 | luaZ_resizebuffer(ls->L, b, newsize); | |
59 | } | |
60 | b->buffer[b->n++] = cast(char, c); | |
61 | } | |
62 | ||
63 | ||
64 | void luaX_init (lua_State *L) { | |
65 | int i; | |
66 | for (i=0; i<NUM_RESERVED; i++) { | |
67 | TString *ts = luaS_new(L, luaX_tokens[i]); | |
68 | luaS_fix(ts); /* reserved words are never collected */ | |
69 | lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); | |
70 | ts->tsv.reserved = cast_byte(i+1); /* reserved word */ | |
71 | } | |
72 | } | |
73 | ||
74 | ||
75 | #define MAXSRC 80 | |
76 | ||
77 | ||
78 | const char *luaX_token2str (LexState *ls, int token) { | |
79 | if (token < FIRST_RESERVED) { | |
80 | lua_assert(token == cast(unsigned char, token)); | |
81 | return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : | |
82 | luaO_pushfstring(ls->L, "%c", token); | |
83 | } | |
84 | else | |
85 | return luaX_tokens[token-FIRST_RESERVED]; | |
86 | } | |
87 | ||
88 | ||
89 | static const char *txtToken (LexState *ls, int token) { | |
90 | switch (token) { | |
91 | case TK_NAME: | |
92 | case TK_STRING: | |
93 | case TK_NUMBER: | |
94 | save(ls, '\0'); | |
95 | return luaZ_buffer(ls->buff); | |
96 | default: | |
97 | return luaX_token2str(ls, token); | |
98 | } | |
99 | } | |
100 | ||
101 | ||
102 | void luaX_lexerror (LexState *ls, const char *msg, int token) { | |
103 | char buff[MAXSRC]; | |
104 | luaO_chunkid(buff, getstr(ls->source), MAXSRC); | |
105 | msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); | |
106 | if (token) | |
107 | luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); | |
108 | luaD_throw(ls->L, LUA_ERRSYNTAX); | |
109 | } | |
110 | ||
111 | ||
112 | void luaX_syntaxerror (LexState *ls, const char *msg) { | |
113 | luaX_lexerror(ls, msg, ls->t.token); | |
114 | } | |
115 | ||
116 | ||
117 | TString *luaX_newstring (LexState *ls, const char *str, size_t l) { | |
118 | lua_State *L = ls->L; | |
119 | TString *ts = luaS_newlstr(L, str, l); | |
120 | TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ | |
121 | if (ttisnil(o)) { | |
122 | setbvalue(o, 1); /* make sure `str' will not be collected */ | |
123 | luaC_checkGC(L); | |
124 | } | |
125 | return ts; | |
126 | } | |
127 | ||
128 | ||
129 | static void inclinenumber (LexState *ls) { | |
130 | int old = ls->current; | |
131 | lua_assert(currIsNewline(ls)); | |
132 | next(ls); /* skip `\n' or `\r' */ | |
133 | if (currIsNewline(ls) && ls->current != old) | |
134 | next(ls); /* skip `\n\r' or `\r\n' */ | |
135 | if (++ls->linenumber >= MAX_INT) | |
136 | luaX_syntaxerror(ls, "chunk has too many lines"); | |
137 | } | |
138 | ||
139 | ||
140 | void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { | |
141 | ls->decpoint = '.'; | |
142 | ls->L = L; | |
143 | ls->lookahead.token = TK_EOS; /* no look-ahead token */ | |
144 | ls->z = z; | |
145 | ls->fs = NULL; | |
146 | ls->linenumber = 1; | |
147 | ls->lastline = 1; | |
148 | ls->source = source; | |
149 | luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ | |
150 | next(ls); /* read first char */ | |
151 | } | |
152 | ||
153 | ||
154 | ||
155 | /* | |
156 | ** ======================================================= | |
157 | ** LEXICAL ANALYZER | |
158 | ** ======================================================= | |
159 | */ | |
160 | ||
161 | ||
162 | ||
163 | static int check_next (LexState *ls, const char *set) { | |
164 | if (!strchr(set, ls->current)) | |
165 | return 0; | |
166 | save_and_next(ls); | |
167 | return 1; | |
168 | } | |
169 | ||
170 | ||
171 | static void buffreplace (LexState *ls, char from, char to) { | |
172 | size_t n = luaZ_bufflen(ls->buff); | |
173 | char *p = luaZ_buffer(ls->buff); | |
174 | while (n--) | |
175 | if (p[n] == from) p[n] = to; | |
176 | } | |
177 | ||
178 | ||
179 | static void trydecpoint (LexState *ls, SemInfo *seminfo) { | |
180 | /* format error: try to update decimal point separator */ | |
181 | struct lconv *cv = localeconv(); | |
182 | char old = ls->decpoint; | |
183 | ls->decpoint = (cv ? cv->decimal_point[0] : '.'); | |
184 | buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ | |
185 | if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { | |
186 | /* format error with correct decimal point: no more options */ | |
187 | buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ | |
188 | luaX_lexerror(ls, "malformed number", TK_NUMBER); | |
189 | } | |
190 | } | |
191 | ||
192 | ||
193 | /* LUA_NUMBER */ | |
194 | static void read_numeral (LexState *ls, SemInfo *seminfo) { | |
195 | lua_assert(isdigit(ls->current)); | |
196 | do { | |
197 | save_and_next(ls); | |
198 | } while (isdigit(ls->current) || ls->current == '.'); | |
199 | if (check_next(ls, "Ee")) /* `E'? */ | |
200 | check_next(ls, "+-"); /* optional exponent sign */ | |
201 | while (isalnum(ls->current) || ls->current == '_') | |
202 | save_and_next(ls); | |
203 | save(ls, '\0'); | |
204 | buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ | |
205 | if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ | |
206 | trydecpoint(ls, seminfo); /* try to update decimal point separator */ | |
207 | } | |
208 | ||
209 | ||
210 | static int skip_sep (LexState *ls) { | |
211 | int count = 0; | |
212 | int s = ls->current; | |
213 | lua_assert(s == '[' || s == ']'); | |
214 | save_and_next(ls); | |
215 | while (ls->current == '=') { | |
216 | save_and_next(ls); | |
217 | count++; | |
218 | } | |
219 | return (ls->current == s) ? count : (-count) - 1; | |
220 | } | |
221 | ||
222 | ||
223 | static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { | |
224 | int cont = 0; | |
225 | (void)(cont); /* avoid warnings when `cont' is not used */ | |
226 | save_and_next(ls); /* skip 2nd `[' */ | |
227 | if (currIsNewline(ls)) /* string starts with a newline? */ | |
228 | inclinenumber(ls); /* skip it */ | |
229 | for (;;) { | |
230 | switch (ls->current) { | |
231 | case EOZ: | |
232 | luaX_lexerror(ls, (seminfo) ? "unfinished long string" : | |
233 | "unfinished long comment", TK_EOS); | |
234 | break; /* to avoid warnings */ | |
235 | #if defined(LUA_COMPAT_LSTR) | |
236 | case '[': { | |
237 | if (skip_sep(ls) == sep) { | |
238 | save_and_next(ls); /* skip 2nd `[' */ | |
239 | cont++; | |
240 | #if LUA_COMPAT_LSTR == 1 | |
241 | if (sep == 0) | |
242 | luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); | |
243 | #endif | |
244 | } | |
245 | break; | |
246 | } | |
247 | #endif | |
248 | case ']': { | |
249 | if (skip_sep(ls) == sep) { | |
250 | save_and_next(ls); /* skip 2nd `]' */ | |
251 | #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 | |
252 | cont--; | |
253 | if (sep == 0 && cont >= 0) break; | |
254 | #endif | |
255 | goto endloop; | |
256 | } | |
257 | break; | |
258 | } | |
259 | case '\n': | |
260 | case '\r': { | |
261 | save(ls, '\n'); | |
262 | inclinenumber(ls); | |
263 | if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ | |
264 | break; | |
265 | } | |
266 | default: { | |
267 | if (seminfo) save_and_next(ls); | |
268 | else next(ls); | |
269 | } | |
270 | } | |
271 | } endloop: | |
272 | if (seminfo) | |
273 | seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), | |
274 | luaZ_bufflen(ls->buff) - 2*(2 + sep)); | |
275 | } | |
276 | ||
277 | ||
278 | static void read_string (LexState *ls, int del, SemInfo *seminfo) { | |
279 | save_and_next(ls); | |
280 | while (ls->current != del) { | |
281 | switch (ls->current) { | |
282 | case EOZ: | |
283 | luaX_lexerror(ls, "unfinished string", TK_EOS); | |
284 | continue; /* to avoid warnings */ | |
285 | case '\n': | |
286 | case '\r': | |
287 | luaX_lexerror(ls, "unfinished string", TK_STRING); | |
288 | continue; /* to avoid warnings */ | |
289 | case '\\': { | |
290 | int c; | |
291 | next(ls); /* do not save the `\' */ | |
292 | switch (ls->current) { | |
293 | case 'a': c = '\a'; break; | |
294 | case 'b': c = '\b'; break; | |
295 | case 'f': c = '\f'; break; | |
296 | case 'n': c = '\n'; break; | |
297 | case 'r': c = '\r'; break; | |
298 | case 't': c = '\t'; break; | |
299 | case 'v': c = '\v'; break; | |
300 | case '\n': /* go through */ | |
301 | case '\r': save(ls, '\n'); inclinenumber(ls); continue; | |
302 | case EOZ: continue; /* will raise an error next loop */ | |
303 | default: { | |
304 | if (!isdigit(ls->current)) | |
305 | save_and_next(ls); /* handles \\, \", \', and \? */ | |
306 | else { /* \xxx */ | |
307 | int i = 0; | |
308 | c = 0; | |
309 | do { | |
310 | c = 10*c + (ls->current-'0'); | |
311 | next(ls); | |
312 | } while (++i<3 && isdigit(ls->current)); | |
313 | if (c > UCHAR_MAX) | |
314 | luaX_lexerror(ls, "escape sequence too large", TK_STRING); | |
315 | save(ls, c); | |
316 | } | |
317 | continue; | |
318 | } | |
319 | } | |
320 | save(ls, c); | |
321 | next(ls); | |
322 | continue; | |
323 | } | |
324 | default: | |
325 | save_and_next(ls); | |
326 | } | |
327 | } | |
328 | save_and_next(ls); /* skip delimiter */ | |
329 | seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, | |
330 | luaZ_bufflen(ls->buff) - 2); | |
331 | } | |
332 | ||
333 | ||
334 | static int llex (LexState *ls, SemInfo *seminfo) { | |
335 | luaZ_resetbuffer(ls->buff); | |
336 | for (;;) { | |
337 | switch (ls->current) { | |
338 | case '\n': | |
339 | case '\r': { | |
340 | inclinenumber(ls); | |
341 | continue; | |
342 | } | |
343 | case '-': { | |
344 | next(ls); | |
345 | if (ls->current != '-') return '-'; | |
346 | /* else is a comment */ | |
347 | next(ls); | |
348 | if (ls->current == '[') { | |
349 | int sep = skip_sep(ls); | |
350 | luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ | |
351 | if (sep >= 0) { | |
352 | read_long_string(ls, NULL, sep); /* long comment */ | |
353 | luaZ_resetbuffer(ls->buff); | |
354 | continue; | |
355 | } | |
356 | } | |
357 | /* else short comment */ | |
358 | while (!currIsNewline(ls) && ls->current != EOZ) | |
359 | next(ls); | |
360 | continue; | |
361 | } | |
362 | case '[': { | |
363 | int sep = skip_sep(ls); | |
364 | if (sep >= 0) { | |
365 | read_long_string(ls, seminfo, sep); | |
366 | return TK_STRING; | |
367 | } | |
368 | else if (sep == -1) return '['; | |
369 | else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); | |
370 | } | |
371 | case '=': { | |
372 | next(ls); | |
373 | if (ls->current != '=') return '='; | |
374 | else { next(ls); return TK_EQ; } | |
375 | } | |
376 | case '<': { | |
377 | next(ls); | |
378 | if (ls->current != '=') return '<'; | |
379 | else { next(ls); return TK_LE; } | |
380 | } | |
381 | case '>': { | |
382 | next(ls); | |
383 | if (ls->current != '=') return '>'; | |
384 | else { next(ls); return TK_GE; } | |
385 | } | |
386 | case '~': { | |
387 | next(ls); | |
388 | if (ls->current != '=') return '~'; | |
389 | else { next(ls); return TK_NE; } | |
390 | } | |
391 | case '"': | |
392 | case '\'': { | |
393 | read_string(ls, ls->current, seminfo); | |
394 | return TK_STRING; | |
395 | } | |
396 | case '.': { | |
397 | save_and_next(ls); | |
398 | if (check_next(ls, ".")) { | |
399 | if (check_next(ls, ".")) | |
400 | return TK_DOTS; /* ... */ | |
401 | else return TK_CONCAT; /* .. */ | |
402 | } | |
403 | else if (!isdigit(ls->current)) return '.'; | |
404 | else { | |
405 | read_numeral(ls, seminfo); | |
406 | return TK_NUMBER; | |
407 | } | |
408 | } | |
409 | case EOZ: { | |
410 | return TK_EOS; | |
411 | } | |
412 | default: { | |
413 | if (isspace(ls->current)) { | |
414 | lua_assert(!currIsNewline(ls)); | |
415 | next(ls); | |
416 | continue; | |
417 | } | |
418 | else if (isdigit(ls->current)) { | |
419 | read_numeral(ls, seminfo); | |
420 | return TK_NUMBER; | |
421 | } | |
422 | else if (isalpha(ls->current) || ls->current == '_') { | |
423 | /* identifier or reserved word */ | |
424 | TString *ts; | |
425 | do { | |
426 | save_and_next(ls); | |
427 | } while (isalnum(ls->current) || ls->current == '_'); | |
428 | ts = luaX_newstring(ls, luaZ_buffer(ls->buff), | |
429 | luaZ_bufflen(ls->buff)); | |
430 | if (ts->tsv.reserved > 0) /* reserved word? */ | |
431 | return ts->tsv.reserved - 1 + FIRST_RESERVED; | |
432 | else { | |
433 | seminfo->ts = ts; | |
434 | return TK_NAME; | |
435 | } | |
436 | } | |
437 | else { | |
438 | int c = ls->current; | |
439 | next(ls); | |
440 | return c; /* single-char tokens (+ - / ...) */ | |
441 | } | |
442 | } | |
443 | } | |
444 | } | |
445 | } | |
446 | ||
447 | ||
448 | void luaX_next (LexState *ls) { | |
449 | ls->lastline = ls->linenumber; | |
450 | if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ | |
451 | ls->t = ls->lookahead; /* use this one */ | |
452 | ls->lookahead.token = TK_EOS; /* and discharge it */ | |
453 | } | |
454 | else | |
455 | ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ | |
456 | } | |
457 | ||
458 | ||
459 | void luaX_lookahead (LexState *ls) { | |
460 | lua_assert(ls->lookahead.token == TK_EOS); | |
461 | ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); | |
462 | } | |
463 |