]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This class implements the lexer for assembly files. | |
11 | // | |
12 | //===----------------------------------------------------------------------===// | |
13 | ||
14 | #include "llvm/MC/MCParser/AsmLexer.h" | |
223e47cc | 15 | #include "llvm/MC/MCAsmInfo.h" |
970d7e83 LB |
16 | #include "llvm/Support/MemoryBuffer.h" |
17 | #include "llvm/Support/SMLoc.h" | |
223e47cc LB |
18 | #include <cctype> |
19 | #include <cerrno> | |
20 | #include <cstdio> | |
21 | #include <cstdlib> | |
22 | using namespace llvm; | |
23 | ||
24 | AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { | |
1a4d82fc | 25 | CurPtr = nullptr; |
223e47cc | 26 | isAtStartOfLine = true; |
1a4d82fc | 27 | AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); |
223e47cc LB |
28 | } |
29 | ||
30 | AsmLexer::~AsmLexer() { | |
31 | } | |
32 | ||
1a4d82fc JJ |
33 | void AsmLexer::setBuffer(StringRef Buf, const char *ptr) { |
34 | CurBuf = Buf; | |
223e47cc LB |
35 | |
36 | if (ptr) | |
37 | CurPtr = ptr; | |
38 | else | |
1a4d82fc | 39 | CurPtr = CurBuf.begin(); |
223e47cc | 40 | |
1a4d82fc | 41 | TokStart = nullptr; |
223e47cc LB |
42 | } |
43 | ||
44 | /// ReturnError - Set the error to the specified string at the specified | |
45 | /// location. This is defined to always return AsmToken::Error. | |
46 | AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { | |
47 | SetError(SMLoc::getFromPointer(Loc), Msg); | |
48 | ||
49 | return AsmToken(AsmToken::Error, StringRef(Loc, 0)); | |
50 | } | |
51 | ||
52 | int AsmLexer::getNextChar() { | |
53 | char CurChar = *CurPtr++; | |
54 | switch (CurChar) { | |
55 | default: | |
56 | return (unsigned char)CurChar; | |
57 | case 0: | |
58 | // A nul character in the stream is either the end of the current buffer or | |
59 | // a random nul in the file. Disambiguate that here. | |
1a4d82fc | 60 | if (CurPtr - 1 != CurBuf.end()) |
223e47cc LB |
61 | return 0; // Just whitespace. |
62 | ||
63 | // Otherwise, return end of file. | |
64 | --CurPtr; // Another call to lex will return EOF again. | |
65 | return EOF; | |
66 | } | |
67 | } | |
68 | ||
69 | /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? | |
70 | /// | |
71 | /// The leading integral digit sequence and dot should have already been | |
72 | /// consumed, some or all of the fractional digit sequence *can* have been | |
73 | /// consumed. | |
74 | AsmToken AsmLexer::LexFloatLiteral() { | |
75 | // Skip the fractional digit sequence. | |
76 | while (isdigit(*CurPtr)) | |
77 | ++CurPtr; | |
78 | ||
79 | // Check for exponent; we intentionally accept a slighlty wider set of | |
80 | // literals here and rely on the upstream client to reject invalid ones (e.g., | |
81 | // "1e+"). | |
82 | if (*CurPtr == 'e' || *CurPtr == 'E') { | |
83 | ++CurPtr; | |
84 | if (*CurPtr == '-' || *CurPtr == '+') | |
85 | ++CurPtr; | |
86 | while (isdigit(*CurPtr)) | |
87 | ++CurPtr; | |
88 | } | |
89 | ||
90 | return AsmToken(AsmToken::Real, | |
91 | StringRef(TokStart, CurPtr - TokStart)); | |
92 | } | |
93 | ||
1a4d82fc JJ |
94 | /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ |
95 | /// while making sure there are enough actual digits around for the constant to | |
96 | /// be valid. | |
97 | /// | |
98 | /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed | |
99 | /// before we get here. | |
100 | AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { | |
101 | assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && | |
102 | "unexpected parse state in floating hex"); | |
103 | bool NoFracDigits = true; | |
104 | ||
105 | // Skip the fractional part if there is one | |
106 | if (*CurPtr == '.') { | |
107 | ++CurPtr; | |
108 | ||
109 | const char *FracStart = CurPtr; | |
110 | while (isxdigit(*CurPtr)) | |
111 | ++CurPtr; | |
112 | ||
113 | NoFracDigits = CurPtr == FracStart; | |
114 | } | |
115 | ||
116 | if (NoIntDigits && NoFracDigits) | |
117 | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " | |
118 | "expected at least one significand digit"); | |
119 | ||
120 | // Make sure we do have some kind of proper exponent part | |
121 | if (*CurPtr != 'p' && *CurPtr != 'P') | |
122 | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " | |
123 | "expected exponent part 'p'"); | |
124 | ++CurPtr; | |
125 | ||
126 | if (*CurPtr == '+' || *CurPtr == '-') | |
127 | ++CurPtr; | |
128 | ||
129 | // N.b. exponent digits are *not* hex | |
130 | const char *ExpStart = CurPtr; | |
131 | while (isdigit(*CurPtr)) | |
132 | ++CurPtr; | |
133 | ||
134 | if (CurPtr == ExpStart) | |
135 | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " | |
136 | "expected at least one exponent digit"); | |
137 | ||
138 | return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); | |
139 | } | |
140 | ||
141 | /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* | |
142 | static bool IsIdentifierChar(char c, bool AllowAt) { | |
143 | return isalnum(c) || c == '_' || c == '$' || c == '.' || | |
144 | (c == '@' && AllowAt) || c == '?'; | |
223e47cc LB |
145 | } |
146 | AsmToken AsmLexer::LexIdentifier() { | |
147 | // Check for floating point literals. | |
148 | if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { | |
149 | // Disambiguate a .1243foo identifier from a floating literal. | |
150 | while (isdigit(*CurPtr)) | |
151 | ++CurPtr; | |
1a4d82fc JJ |
152 | if (*CurPtr == 'e' || *CurPtr == 'E' || |
153 | !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) | |
223e47cc LB |
154 | return LexFloatLiteral(); |
155 | } | |
156 | ||
1a4d82fc | 157 | while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) |
223e47cc LB |
158 | ++CurPtr; |
159 | ||
160 | // Handle . as a special case. | |
161 | if (CurPtr == TokStart+1 && TokStart[0] == '.') | |
162 | return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); | |
163 | ||
164 | return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); | |
165 | } | |
166 | ||
167 | /// LexSlash: Slash: / | |
168 | /// C-Style Comment: /* ... */ | |
169 | AsmToken AsmLexer::LexSlash() { | |
170 | switch (*CurPtr) { | |
171 | case '*': break; // C style comment. | |
172 | case '/': return ++CurPtr, LexLineComment(); | |
173 | default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); | |
174 | } | |
175 | ||
176 | // C Style comment. | |
177 | ++CurPtr; // skip the star. | |
178 | while (1) { | |
179 | int CurChar = getNextChar(); | |
180 | switch (CurChar) { | |
181 | case EOF: | |
182 | return ReturnError(TokStart, "unterminated comment"); | |
183 | case '*': | |
184 | // End of the comment? | |
185 | if (CurPtr[0] != '/') break; | |
186 | ||
187 | ++CurPtr; // End the */. | |
188 | return LexToken(); | |
189 | } | |
190 | } | |
191 | } | |
192 | ||
193 | /// LexLineComment: Comment: #[^\n]* | |
194 | /// : //[^\n]* | |
195 | AsmToken AsmLexer::LexLineComment() { | |
196 | // FIXME: This is broken if we happen to a comment at the end of a file, which | |
197 | // was .included, and which doesn't end with a newline. | |
198 | int CurChar = getNextChar(); | |
199 | while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) | |
200 | CurChar = getNextChar(); | |
201 | ||
202 | if (CurChar == EOF) | |
1a4d82fc JJ |
203 | return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); |
204 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); | |
223e47cc LB |
205 | } |
206 | ||
207 | static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { | |
970d7e83 LB |
208 | // Skip ULL, UL, U, L and LL suffices. |
209 | if (CurPtr[0] == 'U') | |
210 | ++CurPtr; | |
211 | if (CurPtr[0] == 'L') | |
212 | ++CurPtr; | |
213 | if (CurPtr[0] == 'L') | |
214 | ++CurPtr; | |
215 | } | |
216 | ||
217 | // Look ahead to search for first non-hex digit, if it's [hH], then we treat the | |
218 | // integer as a hexadecimal, possibly with leading zeroes. | |
219 | static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { | |
1a4d82fc | 220 | const char *FirstHex = nullptr; |
970d7e83 LB |
221 | const char *LookAhead = CurPtr; |
222 | while (1) { | |
223 | if (isdigit(*LookAhead)) { | |
224 | ++LookAhead; | |
225 | } else if (isxdigit(*LookAhead)) { | |
226 | if (!FirstHex) | |
227 | FirstHex = LookAhead; | |
228 | ++LookAhead; | |
229 | } else { | |
230 | break; | |
231 | } | |
232 | } | |
233 | bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; | |
234 | CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; | |
235 | if (isHex) | |
236 | return 16; | |
237 | return DefaultRadix; | |
223e47cc LB |
238 | } |
239 | ||
1a4d82fc JJ |
240 | static AsmToken intToken(StringRef Ref, APInt &Value) |
241 | { | |
242 | if (Value.isIntN(64)) | |
243 | return AsmToken(AsmToken::Integer, Ref, Value); | |
244 | return AsmToken(AsmToken::BigNum, Ref, Value); | |
245 | } | |
246 | ||
223e47cc LB |
247 | /// LexDigit: First character is [0-9]. |
248 | /// Local Label: [0-9][:] | |
249 | /// Forward/Backward Label: [0-9][fb] | |
250 | /// Binary integer: 0b[01]+ | |
251 | /// Octal integer: 0[0-7]+ | |
970d7e83 | 252 | /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] |
223e47cc LB |
253 | /// Decimal integer: [1-9][0-9]* |
254 | AsmToken AsmLexer::LexDigit() { | |
255 | // Decimal integer: [1-9][0-9]* | |
256 | if (CurPtr[-1] != '0' || CurPtr[0] == '.') { | |
970d7e83 LB |
257 | unsigned Radix = doLookAhead(CurPtr, 10); |
258 | bool isHex = Radix == 16; | |
223e47cc | 259 | // Check for floating point literals. |
970d7e83 | 260 | if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { |
223e47cc LB |
261 | ++CurPtr; |
262 | return LexFloatLiteral(); | |
263 | } | |
264 | ||
265 | StringRef Result(TokStart, CurPtr - TokStart); | |
266 | ||
1a4d82fc JJ |
267 | APInt Value(128, 0, true); |
268 | if (Result.getAsInteger(Radix, Value)) | |
269 | return ReturnError(TokStart, !isHex ? "invalid decimal number" : | |
970d7e83 | 270 | "invalid hexdecimal number"); |
223e47cc | 271 | |
970d7e83 LB |
272 | // Consume the [bB][hH]. |
273 | if (Radix == 2 || Radix == 16) | |
274 | ++CurPtr; | |
275 | ||
276 | // The darwin/x86 (and x86-64) assembler accepts and ignores type | |
277 | // suffices on integer literals. | |
223e47cc LB |
278 | SkipIgnoredIntegerSuffix(CurPtr); |
279 | ||
1a4d82fc | 280 | return intToken(Result, Value); |
223e47cc LB |
281 | } |
282 | ||
283 | if (*CurPtr == 'b') { | |
284 | ++CurPtr; | |
285 | // See if we actually have "0b" as part of something like "jmp 0b\n" | |
286 | if (!isdigit(CurPtr[0])) { | |
287 | --CurPtr; | |
288 | StringRef Result(TokStart, CurPtr - TokStart); | |
289 | return AsmToken(AsmToken::Integer, Result, 0); | |
290 | } | |
291 | const char *NumStart = CurPtr; | |
292 | while (CurPtr[0] == '0' || CurPtr[0] == '1') | |
293 | ++CurPtr; | |
294 | ||
295 | // Requires at least one binary digit. | |
296 | if (CurPtr == NumStart) | |
297 | return ReturnError(TokStart, "invalid binary number"); | |
298 | ||
299 | StringRef Result(TokStart, CurPtr - TokStart); | |
300 | ||
1a4d82fc | 301 | APInt Value(128, 0, true); |
223e47cc LB |
302 | if (Result.substr(2).getAsInteger(2, Value)) |
303 | return ReturnError(TokStart, "invalid binary number"); | |
304 | ||
305 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL | |
306 | // suffixes on integer literals. | |
307 | SkipIgnoredIntegerSuffix(CurPtr); | |
308 | ||
1a4d82fc | 309 | return intToken(Result, Value); |
223e47cc LB |
310 | } |
311 | ||
312 | if (*CurPtr == 'x') { | |
313 | ++CurPtr; | |
314 | const char *NumStart = CurPtr; | |
315 | while (isxdigit(CurPtr[0])) | |
316 | ++CurPtr; | |
317 | ||
1a4d82fc JJ |
318 | // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be |
319 | // diagnosed by LexHexFloatLiteral). | |
320 | if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') | |
321 | return LexHexFloatLiteral(NumStart == CurPtr); | |
322 | ||
323 | // Otherwise requires at least one hex digit. | |
223e47cc LB |
324 | if (CurPtr == NumStart) |
325 | return ReturnError(CurPtr-2, "invalid hexadecimal number"); | |
326 | ||
1a4d82fc | 327 | APInt Result(128, 0); |
223e47cc LB |
328 | if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) |
329 | return ReturnError(TokStart, "invalid hexadecimal number"); | |
330 | ||
970d7e83 LB |
331 | // Consume the optional [hH]. |
332 | if (*CurPtr == 'h' || *CurPtr == 'H') | |
333 | ++CurPtr; | |
334 | ||
223e47cc LB |
335 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
336 | // suffixes on integer literals. | |
337 | SkipIgnoredIntegerSuffix(CurPtr); | |
338 | ||
1a4d82fc | 339 | return intToken(StringRef(TokStart, CurPtr - TokStart), Result); |
223e47cc LB |
340 | } |
341 | ||
970d7e83 | 342 | // Either octal or hexadecimal. |
1a4d82fc | 343 | APInt Value(128, 0, true); |
970d7e83 LB |
344 | unsigned Radix = doLookAhead(CurPtr, 8); |
345 | bool isHex = Radix == 16; | |
346 | StringRef Result(TokStart, CurPtr - TokStart); | |
347 | if (Result.getAsInteger(Radix, Value)) | |
348 | return ReturnError(TokStart, !isHex ? "invalid octal number" : | |
349 | "invalid hexdecimal number"); | |
350 | ||
351 | // Consume the [hH]. | |
352 | if (Radix == 16) | |
353 | ++CurPtr; | |
223e47cc LB |
354 | |
355 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL | |
356 | // suffixes on integer literals. | |
357 | SkipIgnoredIntegerSuffix(CurPtr); | |
358 | ||
1a4d82fc | 359 | return intToken(Result, Value); |
223e47cc LB |
360 | } |
361 | ||
362 | /// LexSingleQuote: Integer: 'b' | |
363 | AsmToken AsmLexer::LexSingleQuote() { | |
364 | int CurChar = getNextChar(); | |
365 | ||
366 | if (CurChar == '\\') | |
367 | CurChar = getNextChar(); | |
368 | ||
369 | if (CurChar == EOF) | |
370 | return ReturnError(TokStart, "unterminated single quote"); | |
371 | ||
372 | CurChar = getNextChar(); | |
373 | ||
374 | if (CurChar != '\'') | |
375 | return ReturnError(TokStart, "single quote way too long"); | |
376 | ||
377 | // The idea here being that 'c' is basically just an integral | |
378 | // constant. | |
379 | StringRef Res = StringRef(TokStart,CurPtr - TokStart); | |
380 | long long Value; | |
381 | ||
382 | if (Res.startswith("\'\\")) { | |
383 | char theChar = Res[2]; | |
384 | switch (theChar) { | |
385 | default: Value = theChar; break; | |
386 | case '\'': Value = '\''; break; | |
387 | case 't': Value = '\t'; break; | |
388 | case 'n': Value = '\n'; break; | |
389 | case 'b': Value = '\b'; break; | |
390 | } | |
391 | } else | |
392 | Value = TokStart[1]; | |
393 | ||
394 | return AsmToken(AsmToken::Integer, Res, Value); | |
395 | } | |
396 | ||
397 | ||
398 | /// LexQuote: String: "..." | |
399 | AsmToken AsmLexer::LexQuote() { | |
400 | int CurChar = getNextChar(); | |
401 | // TODO: does gas allow multiline string constants? | |
402 | while (CurChar != '"') { | |
403 | if (CurChar == '\\') { | |
404 | // Allow \", etc. | |
405 | CurChar = getNextChar(); | |
406 | } | |
407 | ||
408 | if (CurChar == EOF) | |
409 | return ReturnError(TokStart, "unterminated string constant"); | |
410 | ||
411 | CurChar = getNextChar(); | |
412 | } | |
413 | ||
414 | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); | |
415 | } | |
416 | ||
417 | StringRef AsmLexer::LexUntilEndOfStatement() { | |
418 | TokStart = CurPtr; | |
419 | ||
1a4d82fc | 420 | while (!isAtStartOfComment(CurPtr) && // Start of line comment. |
223e47cc | 421 | !isAtStatementSeparator(CurPtr) && // End of statement marker. |
1a4d82fc JJ |
422 | *CurPtr != '\n' && *CurPtr != '\r' && |
423 | (*CurPtr != 0 || CurPtr != CurBuf.end())) { | |
223e47cc LB |
424 | ++CurPtr; |
425 | } | |
426 | return StringRef(TokStart, CurPtr-TokStart); | |
427 | } | |
428 | ||
429 | StringRef AsmLexer::LexUntilEndOfLine() { | |
430 | TokStart = CurPtr; | |
431 | ||
1a4d82fc JJ |
432 | while (*CurPtr != '\n' && *CurPtr != '\r' && |
433 | (*CurPtr != 0 || CurPtr != CurBuf.end())) { | |
223e47cc LB |
434 | ++CurPtr; |
435 | } | |
436 | return StringRef(TokStart, CurPtr-TokStart); | |
437 | } | |
438 | ||
1a4d82fc JJ |
439 | const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) { |
440 | const char *SavedTokStart = TokStart; | |
441 | const char *SavedCurPtr = CurPtr; | |
442 | bool SavedAtStartOfLine = isAtStartOfLine; | |
443 | bool SavedSkipSpace = SkipSpace; | |
444 | ||
445 | std::string SavedErr = getErr(); | |
446 | SMLoc SavedErrLoc = getErrLoc(); | |
447 | ||
448 | SkipSpace = ShouldSkipSpace; | |
449 | AsmToken Token = LexToken(); | |
450 | ||
451 | SetError(SavedErrLoc, SavedErr); | |
452 | ||
453 | SkipSpace = SavedSkipSpace; | |
454 | isAtStartOfLine = SavedAtStartOfLine; | |
455 | CurPtr = SavedCurPtr; | |
456 | TokStart = SavedTokStart; | |
457 | ||
458 | return Token; | |
459 | } | |
460 | ||
461 | bool AsmLexer::isAtStartOfComment(const char *Ptr) { | |
462 | const char *CommentString = MAI.getCommentString(); | |
463 | ||
464 | if (CommentString[1] == '\0') | |
465 | return CommentString[0] == Ptr[0]; | |
466 | ||
467 | // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin | |
468 | if (CommentString[1] == '#') | |
469 | return CommentString[0] == Ptr[0]; | |
470 | ||
471 | return strncmp(Ptr, CommentString, strlen(CommentString)) == 0; | |
223e47cc LB |
472 | } |
473 | ||
474 | bool AsmLexer::isAtStatementSeparator(const char *Ptr) { | |
475 | return strncmp(Ptr, MAI.getSeparatorString(), | |
476 | strlen(MAI.getSeparatorString())) == 0; | |
477 | } | |
478 | ||
479 | AsmToken AsmLexer::LexToken() { | |
480 | TokStart = CurPtr; | |
481 | // This always consumes at least one character. | |
482 | int CurChar = getNextChar(); | |
483 | ||
1a4d82fc | 484 | if (isAtStartOfComment(TokStart)) { |
223e47cc LB |
485 | // If this comment starts with a '#', then return the Hash token and let |
486 | // the assembler parser see if it can be parsed as a cpp line filename | |
487 | // comment. We do this only if we are at the start of a line. | |
488 | if (CurChar == '#' && isAtStartOfLine) | |
489 | return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); | |
490 | isAtStartOfLine = true; | |
491 | return LexLineComment(); | |
492 | } | |
493 | if (isAtStatementSeparator(TokStart)) { | |
494 | CurPtr += strlen(MAI.getSeparatorString()) - 1; | |
495 | return AsmToken(AsmToken::EndOfStatement, | |
496 | StringRef(TokStart, strlen(MAI.getSeparatorString()))); | |
497 | } | |
498 | ||
499 | // If we're missing a newline at EOF, make sure we still get an | |
500 | // EndOfStatement token before the Eof token. | |
501 | if (CurChar == EOF && !isAtStartOfLine) { | |
502 | isAtStartOfLine = true; | |
503 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); | |
504 | } | |
505 | ||
506 | isAtStartOfLine = false; | |
507 | switch (CurChar) { | |
508 | default: | |
509 | // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* | |
510 | if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') | |
511 | return LexIdentifier(); | |
512 | ||
513 | // Unknown character, emit an error. | |
514 | return ReturnError(TokStart, "invalid character in input"); | |
515 | case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); | |
516 | case 0: | |
517 | case ' ': | |
518 | case '\t': | |
519 | if (SkipSpace) { | |
520 | // Ignore whitespace. | |
521 | return LexToken(); | |
522 | } else { | |
523 | int len = 1; | |
524 | while (*CurPtr==' ' || *CurPtr=='\t') { | |
525 | CurPtr++; | |
526 | len++; | |
527 | } | |
528 | return AsmToken(AsmToken::Space, StringRef(TokStart, len)); | |
529 | } | |
530 | case '\n': // FALL THROUGH. | |
531 | case '\r': | |
532 | isAtStartOfLine = true; | |
533 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); | |
534 | case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); | |
535 | case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); | |
536 | case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); | |
537 | case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); | |
538 | case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); | |
539 | case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); | |
540 | case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); | |
541 | case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); | |
542 | case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); | |
543 | case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); | |
544 | case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); | |
545 | case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); | |
546 | case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); | |
547 | case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); | |
548 | case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); | |
549 | case '=': | |
550 | if (*CurPtr == '=') | |
551 | return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); | |
552 | return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); | |
553 | case '|': | |
554 | if (*CurPtr == '|') | |
555 | return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); | |
556 | return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); | |
557 | case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); | |
558 | case '&': | |
559 | if (*CurPtr == '&') | |
560 | return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); | |
561 | return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); | |
562 | case '!': | |
563 | if (*CurPtr == '=') | |
564 | return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); | |
565 | return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); | |
566 | case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); | |
567 | case '/': return LexSlash(); | |
568 | case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); | |
569 | case '\'': return LexSingleQuote(); | |
570 | case '"': return LexQuote(); | |
571 | case '0': case '1': case '2': case '3': case '4': | |
572 | case '5': case '6': case '7': case '8': case '9': | |
573 | return LexDigit(); | |
574 | case '<': | |
575 | switch (*CurPtr) { | |
576 | case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, | |
577 | StringRef(TokStart, 2)); | |
578 | case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, | |
579 | StringRef(TokStart, 2)); | |
580 | case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, | |
581 | StringRef(TokStart, 2)); | |
582 | default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); | |
583 | } | |
584 | case '>': | |
585 | switch (*CurPtr) { | |
586 | case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, | |
587 | StringRef(TokStart, 2)); | |
588 | case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, | |
589 | StringRef(TokStart, 2)); | |
590 | default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); | |
591 | } | |
592 | ||
593 | // TODO: Quoted identifiers (objc methods etc) | |
594 | // local labels: [0-9][:] | |
595 | // Forward/backward labels: [0-9][fb] | |
596 | // Integers, fp constants, character constants. | |
597 | } | |
598 | } |