src/llvm/lib/AsmParser/LLLexer.cpp

   1 //===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // Implement the Lexer for .ll files.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "LLLexer.h"
  15 #include "llvm/ADT/StringExtras.h"
  16 #include "llvm/ADT/Twine.h"
  17 #include "llvm/AsmParser/Parser.h"
  18 #include "llvm/IR/DerivedTypes.h"
  19 #include "llvm/IR/Instruction.h"
  20 #include "llvm/IR/LLVMContext.h"
  21 #include "llvm/Support/ErrorHandling.h"
  22 #include "llvm/Support/MathExtras.h"
  23 #include "llvm/Support/MemoryBuffer.h"
  24 #include "llvm/Support/SourceMgr.h"
  25 #include "llvm/Support/raw_ostream.h"
  26 #include <cctype>
  27 #include <cstdio>
  28 #include <cstdlib>
  29 #include <cstring>
  30 using namespace llvm;
  31
  32 bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
  33   ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
  34   return true;
  35 }
  36
  37 void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
  38   SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
  39 }
  40
  41 //===----------------------------------------------------------------------===//
  42 // Helper functions.
  43 //===----------------------------------------------------------------------===//
  44
  45 // atoull - Convert an ascii string of decimal digits into the unsigned long
  46 // long representation... this does not have to do input error checking,
  47 // because we know that the input will be matched by a suitable regex...
  48 //
  49 uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
  50   uint64_t Result = 0;
  51   for (; Buffer != End; Buffer++) {
  52     uint64_t OldRes = Result;
  53     Result *= 10;
  54     Result += *Buffer-'0';
  55     if (Result < OldRes) {  // Uh, oh, overflow detected!!!
  56       Error("constant bigger than 64 bits detected!");
  57       return 0;
  58     }
  59   }
  60   return Result;
  61 }
  62
  63 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
  64   uint64_t Result = 0;
  65   for (; Buffer != End; ++Buffer) {
  66     uint64_t OldRes = Result;
  67     Result *= 16;
  68     Result += hexDigitValue(*Buffer);
  69
  70     if (Result < OldRes) {   // Uh, oh, overflow detected!!!
  71       Error("constant bigger than 64 bits detected!");
  72       return 0;
  73     }
  74   }
  75   return Result;
  76 }
  77
  78 void LLLexer::HexToIntPair(const char *Buffer, const char *End,
  79                            uint64_t Pair[2]) {
  80   Pair[0] = 0;
  81   if (End - Buffer >= 16) {
  82     for (int i = 0; i < 16; i++, Buffer++) {
  83       assert(Buffer != End);
  84       Pair[0] *= 16;
  85       Pair[0] += hexDigitValue(*Buffer);
  86     }
  87   }
  88   Pair[1] = 0;
  89   for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
  90     Pair[1] *= 16;
  91     Pair[1] += hexDigitValue(*Buffer);
  92   }
  93   if (Buffer != End)
  94     Error("constant bigger than 128 bits detected!");
  95 }
  96
  97 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
  98 /// { low64, high16 } as usual for an APInt.
  99 void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
 100                            uint64_t Pair[2]) {
 101   Pair[1] = 0;
 102   for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
 103     assert(Buffer != End);
 104     Pair[1] *= 16;
 105     Pair[1] += hexDigitValue(*Buffer);
 106   }
 107   Pair[0] = 0;
 108   for (int i=0; i<16; i++, Buffer++) {
 109     Pair[0] *= 16;
 110     Pair[0] += hexDigitValue(*Buffer);
 111   }
 112   if (Buffer != End)
 113     Error("constant bigger than 128 bits detected!");
 114 }
 115
 116 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
 117 // appropriate character.
 118 static void UnEscapeLexed(std::string &Str) {
 119   if (Str.empty()) return;
 120
 121   char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
 122   char *BOut = Buffer;
 123   for (char *BIn = Buffer; BIn != EndBuffer; ) {
 124     if (BIn[0] == '\\') {
 125       if (BIn < EndBuffer-1 && BIn[1] == '\\') {
 126         *BOut++ = '\\'; // Two \ becomes one
 127         BIn += 2;
 128       } else if (BIn < EndBuffer-2 &&
 129                  isxdigit(static_cast<unsigned char>(BIn[1])) &&
 130                  isxdigit(static_cast<unsigned char>(BIn[2]))) {
 131         *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
 132         BIn += 3;                           // Skip over handled chars
 133         ++BOut;
 134       } else {
 135         *BOut++ = *BIn++;
 136       }
 137     } else {
 138       *BOut++ = *BIn++;
 139     }
 140   }
 141   Str.resize(BOut-Buffer);
 142 }
 143
 144 /// isLabelChar - Return true for [-a-zA-Z$._0-9].
 145 static bool isLabelChar(char C) {
 146   return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
 147          C == '.' || C == '_';
 148 }
 149
 150
 151 /// isLabelTail - Return true if this pointer points to a valid end of a label.
 152 static const char *isLabelTail(const char *CurPtr) {
 153   while (1) {
 154     if (CurPtr[0] == ':') return CurPtr+1;
 155     if (!isLabelChar(CurPtr[0])) return nullptr;
 156     ++CurPtr;
 157   }
 158 }
 159
 160
 161
 162 //===----------------------------------------------------------------------===//
 163 // Lexer definition.
 164 //===----------------------------------------------------------------------===//
 165
 166 LLLexer::LLLexer(StringRef StartBuf, SourceMgr &sm, SMDiagnostic &Err,
 167                  LLVMContext &C)
 168   : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
 169   CurPtr = CurBuf.begin();
 170 }
 171
 172 int LLLexer::getNextChar() {
 173   char CurChar = *CurPtr++;
 174   switch (CurChar) {
 175   default: return (unsigned char)CurChar;
 176   case 0:
 177     // A nul character in the stream is either the end of the current buffer or
 178     // a random nul in the file.  Disambiguate that here.
 179     if (CurPtr-1 != CurBuf.end())
 180       return 0;  // Just whitespace.
 181
 182     // Otherwise, return end of file.
 183     --CurPtr;  // Another call to lex will return EOF again.
 184     return EOF;
 185   }
 186 }
 187
 188
 189 lltok::Kind LLLexer::LexToken() {
 190   TokStart = CurPtr;
 191
 192   int CurChar = getNextChar();
 193   switch (CurChar) {
 194   default:
 195     // Handle letters: [a-zA-Z_]
 196     if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
 197       return LexIdentifier();
 198
 199     return lltok::Error;
 200   case EOF: return lltok::Eof;
 201   case 0:
 202   case ' ':
 203   case '\t':
 204   case '\n':
 205   case '\r':
 206     // Ignore whitespace.
 207     return LexToken();
 208   case '+': return LexPositive();
 209   case '@': return LexAt();
 210   case '$': return LexDollar();
 211   case '%': return LexPercent();
 212   case '"': return LexQuote();
 213   case '.':
 214     if (const char *Ptr = isLabelTail(CurPtr)) {
 215       CurPtr = Ptr;
 216       StrVal.assign(TokStart, CurPtr-1);
 217       return lltok::LabelStr;
 218     }
 219     if (CurPtr[0] == '.' && CurPtr[1] == '.') {
 220       CurPtr += 2;
 221       return lltok::dotdotdot;
 222     }
 223     return lltok::Error;
 224   case ';':
 225     SkipLineComment();
 226     return LexToken();
 227   case '!': return LexExclaim();
 228   case '#': return LexHash();
 229   case '0': case '1': case '2': case '3': case '4':
 230   case '5': case '6': case '7': case '8': case '9':
 231   case '-':
 232     return LexDigitOrNegative();
 233   case '=': return lltok::equal;
 234   case '[': return lltok::lsquare;
 235   case ']': return lltok::rsquare;
 236   case '{': return lltok::lbrace;
 237   case '}': return lltok::rbrace;
 238   case '<': return lltok::less;
 239   case '>': return lltok::greater;
 240   case '(': return lltok::lparen;
 241   case ')': return lltok::rparen;
 242   case ',': return lltok::comma;
 243   case '*': return lltok::star;
 244   case '\\': return lltok::backslash;
 245   }
 246 }
 247
 248 void LLLexer::SkipLineComment() {
 249   while (1) {
 250     if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
 251       return;
 252   }
 253 }
 254
 255 /// LexAt - Lex all tokens that start with an @ character:
 256 ///   GlobalVar   @\"[^\"]*\"
 257 ///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
 258 ///   GlobalVarID @[0-9]+
 259 lltok::Kind LLLexer::LexAt() {
 260   return LexVar(lltok::GlobalVar, lltok::GlobalID);
 261 }
 262
 263 lltok::Kind LLLexer::LexDollar() {
 264   if (const char *Ptr = isLabelTail(TokStart)) {
 265     CurPtr = Ptr;
 266     StrVal.assign(TokStart, CurPtr - 1);
 267     return lltok::LabelStr;
 268   }
 269
 270   // Handle DollarStringConstant: $\"[^\"]*\"
 271   if (CurPtr[0] == '"') {
 272     ++CurPtr;
 273
 274     while (1) {
 275       int CurChar = getNextChar();
 276
 277       if (CurChar == EOF) {
 278         Error("end of file in COMDAT variable name");
 279         return lltok::Error;
 280       }
 281       if (CurChar == '"') {
 282         StrVal.assign(TokStart + 2, CurPtr - 1);
 283         UnEscapeLexed(StrVal);
 284         if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
 285           Error("Null bytes are not allowed in names");
 286           return lltok::Error;
 287         }
 288         return lltok::ComdatVar;
 289       }
 290     }
 291   }
 292
 293   // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
 294   if (ReadVarName())
 295     return lltok::ComdatVar;
 296
 297   return lltok::Error;
 298 }
 299
 300 /// ReadString - Read a string until the closing quote.
 301 lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
 302   const char *Start = CurPtr;
 303   while (1) {
 304     int CurChar = getNextChar();
 305
 306     if (CurChar == EOF) {
 307       Error("end of file in string constant");
 308       return lltok::Error;
 309     }
 310     if (CurChar == '"') {
 311       StrVal.assign(Start, CurPtr-1);
 312       UnEscapeLexed(StrVal);
 313       return kind;
 314     }
 315   }
 316 }
 317
 318 /// ReadVarName - Read the rest of a token containing a variable name.
 319 bool LLLexer::ReadVarName() {
 320   const char *NameStart = CurPtr;
 321   if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
 322       CurPtr[0] == '-' || CurPtr[0] == '$' ||
 323       CurPtr[0] == '.' || CurPtr[0] == '_') {
 324     ++CurPtr;
 325     while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
 326            CurPtr[0] == '-' || CurPtr[0] == '$' ||
 327            CurPtr[0] == '.' || CurPtr[0] == '_')
 328       ++CurPtr;
 329
 330     StrVal.assign(NameStart, CurPtr);
 331     return true;
 332   }
 333   return false;
 334 }
 335
 336 lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
 337   // Handle StringConstant: \"[^\"]*\"
 338   if (CurPtr[0] == '"') {
 339     ++CurPtr;
 340
 341     while (1) {
 342       int CurChar = getNextChar();
 343
 344       if (CurChar == EOF) {
 345         Error("end of file in global variable name");
 346         return lltok::Error;
 347       }
 348       if (CurChar == '"') {
 349         StrVal.assign(TokStart+2, CurPtr-1);
 350         UnEscapeLexed(StrVal);
 351         if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
 352           Error("Null bytes are not allowed in names");
 353           return lltok::Error;
 354         }
 355         return Var;
 356       }
 357     }
 358   }
 359
 360   // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
 361   if (ReadVarName())
 362     return Var;
 363
 364   // Handle VarID: [0-9]+
 365   if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
 366     for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
 367       /*empty*/;
 368
 369     uint64_t Val = atoull(TokStart+1, CurPtr);
 370     if ((unsigned)Val != Val)
 371       Error("invalid value number (too large)!");
 372     UIntVal = unsigned(Val);
 373     return VarID;
 374   }
 375   return lltok::Error;
 376 }
 377
 378 /// LexPercent - Lex all tokens that start with a % character:
 379 ///   LocalVar   ::= %\"[^\"]*\"
 380 ///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
 381 ///   LocalVarID ::= %[0-9]+
 382 lltok::Kind LLLexer::LexPercent() {
 383   return LexVar(lltok::LocalVar, lltok::LocalVarID);
 384 }
 385
 386 /// LexQuote - Lex all tokens that start with a " character:
 387 ///   QuoteLabel        "[^"]+":
 388 ///   StringConstant    "[^"]*"
 389 lltok::Kind LLLexer::LexQuote() {
 390   lltok::Kind kind = ReadString(lltok::StringConstant);
 391   if (kind == lltok::Error || kind == lltok::Eof)
 392     return kind;
 393
 394   if (CurPtr[0] == ':') {
 395     ++CurPtr;
 396     if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
 397       Error("Null bytes are not allowed in names");
 398       kind = lltok::Error;
 399     } else {
 400       kind = lltok::LabelStr;
 401     }
 402   }
 403
 404   return kind;
 405 }
 406
 407 /// LexExclaim:
 408 ///    !foo
 409 ///    !
 410 lltok::Kind LLLexer::LexExclaim() {
 411   // Lex a metadata name as a MetadataVar.
 412   if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
 413       CurPtr[0] == '-' || CurPtr[0] == '$' ||
 414       CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
 415     ++CurPtr;
 416     while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
 417            CurPtr[0] == '-' || CurPtr[0] == '$' ||
 418            CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
 419       ++CurPtr;
 420
 421     StrVal.assign(TokStart+1, CurPtr);   // Skip !
 422     UnEscapeLexed(StrVal);
 423     return lltok::MetadataVar;
 424   }
 425   return lltok::exclaim;
 426 }
 427
 428 /// LexHash - Lex all tokens that start with a # character:
 429 ///    AttrGrpID ::= #[0-9]+
 430 lltok::Kind LLLexer::LexHash() {
 431   // Handle AttrGrpID: #[0-9]+
 432   if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
 433     for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
 434       /*empty*/;
 435
 436     uint64_t Val = atoull(TokStart+1, CurPtr);
 437     if ((unsigned)Val != Val)
 438       Error("invalid value number (too large)!");
 439     UIntVal = unsigned(Val);
 440     return lltok::AttrGrpID;
 441   }
 442
 443   return lltok::Error;
 444 }
 445
 446 /// LexIdentifier: Handle several related productions:
 447 ///    Label           [-a-zA-Z$._0-9]+:
 448 ///    IntegerType     i[0-9]+
 449 ///    Keyword         sdiv, float, ...
 450 ///    HexIntConstant  [us]0x[0-9A-Fa-f]+
 451 lltok::Kind LLLexer::LexIdentifier() {
 452   const char *StartChar = CurPtr;
 453   const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
 454   const char *KeywordEnd = nullptr;
 455
 456   for (; isLabelChar(*CurPtr); ++CurPtr) {
 457     // If we decide this is an integer, remember the end of the sequence.
 458     if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
 459       IntEnd = CurPtr;
 460     if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
 461         *CurPtr != '_')
 462       KeywordEnd = CurPtr;
 463   }
 464
 465   // If we stopped due to a colon, this really is a label.
 466   if (*CurPtr == ':') {
 467     StrVal.assign(StartChar-1, CurPtr++);
 468     return lltok::LabelStr;
 469   }
 470
 471   // Otherwise, this wasn't a label.  If this was valid as an integer type,
 472   // return it.
 473   if (!IntEnd) IntEnd = CurPtr;
 474   if (IntEnd != StartChar) {
 475     CurPtr = IntEnd;
 476     uint64_t NumBits = atoull(StartChar, CurPtr);
 477     if (NumBits < IntegerType::MIN_INT_BITS ||
 478         NumBits > IntegerType::MAX_INT_BITS) {
 479       Error("bitwidth for integer type out of range!");
 480       return lltok::Error;
 481     }
 482     TyVal = IntegerType::get(Context, NumBits);
 483     return lltok::Type;
 484   }
 485
 486   // Otherwise, this was a letter sequence.  See which keyword this is.
 487   if (!KeywordEnd) KeywordEnd = CurPtr;
 488   CurPtr = KeywordEnd;
 489   --StartChar;
 490   unsigned Len = CurPtr-StartChar;
 491 #define KEYWORD(STR)                                                    \
 492   do {                                                                  \
 493     if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR)))  \
 494       return lltok::kw_##STR;                                           \
 495   } while (0)
 496
 497   KEYWORD(true);    KEYWORD(false);
 498   KEYWORD(declare); KEYWORD(define);
 499   KEYWORD(global);  KEYWORD(constant);
 500
 501   KEYWORD(private);
 502   KEYWORD(internal);
 503   KEYWORD(available_externally);
 504   KEYWORD(linkonce);
 505   KEYWORD(linkonce_odr);
 506   KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
 507   KEYWORD(weak_odr);
 508   KEYWORD(appending);
 509   KEYWORD(dllimport);
 510   KEYWORD(dllexport);
 511   KEYWORD(common);
 512   KEYWORD(default);
 513   KEYWORD(hidden);
 514   KEYWORD(protected);
 515   KEYWORD(unnamed_addr);
 516   KEYWORD(externally_initialized);
 517   KEYWORD(extern_weak);
 518   KEYWORD(external);
 519   KEYWORD(thread_local);
 520   KEYWORD(localdynamic);
 521   KEYWORD(initialexec);
 522   KEYWORD(localexec);
 523   KEYWORD(zeroinitializer);
 524   KEYWORD(undef);
 525   KEYWORD(null);
 526   KEYWORD(to);
 527   KEYWORD(tail);
 528   KEYWORD(musttail);
 529   KEYWORD(target);
 530   KEYWORD(triple);
 531   KEYWORD(unwind);
 532   KEYWORD(deplibs);             // FIXME: Remove in 4.0.
 533   KEYWORD(datalayout);
 534   KEYWORD(volatile);
 535   KEYWORD(atomic);
 536   KEYWORD(unordered);
 537   KEYWORD(monotonic);
 538   KEYWORD(acquire);
 539   KEYWORD(release);
 540   KEYWORD(acq_rel);
 541   KEYWORD(seq_cst);
 542   KEYWORD(singlethread);
 543
 544   KEYWORD(nnan);
 545   KEYWORD(ninf);
 546   KEYWORD(nsz);
 547   KEYWORD(arcp);
 548   KEYWORD(fast);
 549   KEYWORD(nuw);
 550   KEYWORD(nsw);
 551   KEYWORD(exact);
 552   KEYWORD(inbounds);
 553   KEYWORD(align);
 554   KEYWORD(addrspace);
 555   KEYWORD(section);
 556   KEYWORD(alias);
 557   KEYWORD(module);
 558   KEYWORD(asm);
 559   KEYWORD(sideeffect);
 560   KEYWORD(alignstack);
 561   KEYWORD(inteldialect);
 562   KEYWORD(gc);
 563   KEYWORD(prefix);
 564   KEYWORD(prologue);
 565
 566   KEYWORD(ccc);
 567   KEYWORD(fastcc);
 568   KEYWORD(coldcc);
 569   KEYWORD(x86_stdcallcc);
 570   KEYWORD(x86_fastcallcc);
 571   KEYWORD(x86_thiscallcc);
 572   KEYWORD(x86_vectorcallcc);
 573   KEYWORD(arm_apcscc);
 574   KEYWORD(arm_aapcscc);
 575   KEYWORD(arm_aapcs_vfpcc);
 576   KEYWORD(msp430_intrcc);
 577   KEYWORD(ptx_kernel);
 578   KEYWORD(ptx_device);
 579   KEYWORD(spir_kernel);
 580   KEYWORD(spir_func);
 581   KEYWORD(intel_ocl_bicc);
 582   KEYWORD(x86_64_sysvcc);
 583   KEYWORD(x86_64_win64cc);
 584   KEYWORD(webkit_jscc);
 585   KEYWORD(anyregcc);
 586   KEYWORD(preserve_mostcc);
 587   KEYWORD(preserve_allcc);
 588   KEYWORD(ghccc);
 589
 590   KEYWORD(cc);
 591   KEYWORD(c);
 592
 593   KEYWORD(attributes);
 594
 595   KEYWORD(alwaysinline);
 596   KEYWORD(builtin);
 597   KEYWORD(byval);
 598   KEYWORD(inalloca);
 599   KEYWORD(cold);
 600   KEYWORD(dereferenceable);
 601   KEYWORD(inlinehint);
 602   KEYWORD(inreg);
 603   KEYWORD(jumptable);
 604   KEYWORD(minsize);
 605   KEYWORD(naked);
 606   KEYWORD(nest);
 607   KEYWORD(noalias);
 608   KEYWORD(nobuiltin);
 609   KEYWORD(nocapture);
 610   KEYWORD(noduplicate);
 611   KEYWORD(noimplicitfloat);
 612   KEYWORD(noinline);
 613   KEYWORD(nonlazybind);
 614   KEYWORD(nonnull);
 615   KEYWORD(noredzone);
 616   KEYWORD(noreturn);
 617   KEYWORD(nounwind);
 618   KEYWORD(optnone);
 619   KEYWORD(optsize);
 620   KEYWORD(readnone);
 621   KEYWORD(readonly);
 622   KEYWORD(returned);
 623   KEYWORD(returns_twice);
 624   KEYWORD(signext);
 625   KEYWORD(sret);
 626   KEYWORD(ssp);
 627   KEYWORD(sspreq);
 628   KEYWORD(sspstrong);
 629   KEYWORD(sanitize_address);
 630   KEYWORD(sanitize_thread);
 631   KEYWORD(sanitize_memory);
 632   KEYWORD(uwtable);
 633   KEYWORD(zeroext);
 634
 635   KEYWORD(type);
 636   KEYWORD(opaque);
 637
 638   KEYWORD(comdat);
 639
 640   // Comdat types
 641   KEYWORD(any);
 642   KEYWORD(exactmatch);
 643   KEYWORD(largest);
 644   KEYWORD(noduplicates);
 645   KEYWORD(samesize);
 646
 647   KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
 648   KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
 649   KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
 650   KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
 651
 652   KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
 653   KEYWORD(umin);
 654
 655   KEYWORD(x);
 656   KEYWORD(blockaddress);
 657
 658   // Metadata types.
 659   KEYWORD(distinct);
 660
 661   // Use-list order directives.
 662   KEYWORD(uselistorder);
 663   KEYWORD(uselistorder_bb);
 664
 665   KEYWORD(personality);
 666   KEYWORD(cleanup);
 667   KEYWORD(catch);
 668   KEYWORD(filter);
 669 #undef KEYWORD
 670
 671   // Keywords for types.
 672 #define TYPEKEYWORD(STR, LLVMTY) \
 673   if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
 674     TyVal = LLVMTY; return lltok::Type; }
 675   TYPEKEYWORD("void",      Type::getVoidTy(Context));
 676   TYPEKEYWORD("half",      Type::getHalfTy(Context));
 677   TYPEKEYWORD("float",     Type::getFloatTy(Context));
 678   TYPEKEYWORD("double",    Type::getDoubleTy(Context));
 679   TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
 680   TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
 681   TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
 682   TYPEKEYWORD("label",     Type::getLabelTy(Context));
 683   TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
 684   TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
 685 #undef TYPEKEYWORD
 686
 687   // Keywords for instructions.
 688 #define INSTKEYWORD(STR, Enum) \
 689   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
 690     UIntVal = Instruction::Enum; return lltok::kw_##STR; }
 691
 692   INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
 693   INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
 694   INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
 695   INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
 696   INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
 697   INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
 698   INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
 699   INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
 700
 701   INSTKEYWORD(phi,         PHI);
 702   INSTKEYWORD(call,        Call);
 703   INSTKEYWORD(trunc,       Trunc);
 704   INSTKEYWORD(zext,        ZExt);
 705   INSTKEYWORD(sext,        SExt);
 706   INSTKEYWORD(fptrunc,     FPTrunc);
 707   INSTKEYWORD(fpext,       FPExt);
 708   INSTKEYWORD(uitofp,      UIToFP);
 709   INSTKEYWORD(sitofp,      SIToFP);
 710   INSTKEYWORD(fptoui,      FPToUI);
 711   INSTKEYWORD(fptosi,      FPToSI);
 712   INSTKEYWORD(inttoptr,    IntToPtr);
 713   INSTKEYWORD(ptrtoint,    PtrToInt);
 714   INSTKEYWORD(bitcast,     BitCast);
 715   INSTKEYWORD(addrspacecast, AddrSpaceCast);
 716   INSTKEYWORD(select,      Select);
 717   INSTKEYWORD(va_arg,      VAArg);
 718   INSTKEYWORD(ret,         Ret);
 719   INSTKEYWORD(br,          Br);
 720   INSTKEYWORD(switch,      Switch);
 721   INSTKEYWORD(indirectbr,  IndirectBr);
 722   INSTKEYWORD(invoke,      Invoke);
 723   INSTKEYWORD(resume,      Resume);
 724   INSTKEYWORD(unreachable, Unreachable);
 725
 726   INSTKEYWORD(alloca,      Alloca);
 727   INSTKEYWORD(load,        Load);
 728   INSTKEYWORD(store,       Store);
 729   INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
 730   INSTKEYWORD(atomicrmw,   AtomicRMW);
 731   INSTKEYWORD(fence,       Fence);
 732   INSTKEYWORD(getelementptr, GetElementPtr);
 733
 734   INSTKEYWORD(extractelement, ExtractElement);
 735   INSTKEYWORD(insertelement,  InsertElement);
 736   INSTKEYWORD(shufflevector,  ShuffleVector);
 737   INSTKEYWORD(extractvalue,   ExtractValue);
 738   INSTKEYWORD(insertvalue,    InsertValue);
 739   INSTKEYWORD(landingpad,     LandingPad);
 740 #undef INSTKEYWORD
 741
 742   // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
 743   // the CFE to avoid forcing it to deal with 64-bit numbers.
 744   if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
 745       TokStart[1] == '0' && TokStart[2] == 'x' &&
 746       isxdigit(static_cast<unsigned char>(TokStart[3]))) {
 747     int len = CurPtr-TokStart-3;
 748     uint32_t bits = len * 4;
 749     StringRef HexStr(TokStart + 3, len);
 750     if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) {
 751       // Bad token, return it as an error.
 752       CurPtr = TokStart+3;
 753       return lltok::Error;
 754     }
 755     APInt Tmp(bits, HexStr, 16);
 756     uint32_t activeBits = Tmp.getActiveBits();
 757     if (activeBits > 0 && activeBits < bits)
 758       Tmp = Tmp.trunc(activeBits);
 759     APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
 760     return lltok::APSInt;
 761   }
 762
 763   // If this is "cc1234", return this as just "cc".
 764   if (TokStart[0] == 'c' && TokStart[1] == 'c') {
 765     CurPtr = TokStart+2;
 766     return lltok::kw_cc;
 767   }
 768
 769   // Finally, if this isn't known, return an error.
 770   CurPtr = TokStart+1;
 771   return lltok::Error;
 772 }
 773
 774
 775 /// Lex0x: Handle productions that start with 0x, knowing that it matches and
 776 /// that this is not a label:
 777 ///    HexFPConstant     0x[0-9A-Fa-f]+
 778 ///    HexFP80Constant   0xK[0-9A-Fa-f]+
 779 ///    HexFP128Constant  0xL[0-9A-Fa-f]+
 780 ///    HexPPC128Constant 0xM[0-9A-Fa-f]+
 781 ///    HexHalfConstant   0xH[0-9A-Fa-f]+
 782 lltok::Kind LLLexer::Lex0x() {
 783   CurPtr = TokStart + 2;
 784
 785   char Kind;
 786   if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
 787     Kind = *CurPtr++;
 788   } else {
 789     Kind = 'J';
 790   }
 791
 792   if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
 793     // Bad token, return it as an error.
 794     CurPtr = TokStart+1;
 795     return lltok::Error;
 796   }
 797
 798   while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
 799     ++CurPtr;
 800
 801   if (Kind == 'J') {
 802     // HexFPConstant - Floating point constant represented in IEEE format as a
 803     // hexadecimal number for when exponential notation is not precise enough.
 804     // Half, Float, and double only.
 805     APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
 806     return lltok::APFloat;
 807   }
 808
 809   uint64_t Pair[2];
 810   switch (Kind) {
 811   default: llvm_unreachable("Unknown kind!");
 812   case 'K':
 813     // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
 814     FP80HexToIntPair(TokStart+3, CurPtr, Pair);
 815     APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
 816     return lltok::APFloat;
 817   case 'L':
 818     // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
 819     HexToIntPair(TokStart+3, CurPtr, Pair);
 820     APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
 821     return lltok::APFloat;
 822   case 'M':
 823     // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
 824     HexToIntPair(TokStart+3, CurPtr, Pair);
 825     APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
 826     return lltok::APFloat;
 827   case 'H':
 828     APFloatVal = APFloat(APFloat::IEEEhalf,
 829                          APInt(16,HexIntToVal(TokStart+3, CurPtr)));
 830     return lltok::APFloat;
 831   }
 832 }
 833
 834 /// LexIdentifier: Handle several related productions:
 835 ///    Label             [-a-zA-Z$._0-9]+:
 836 ///    NInteger          -[0-9]+
 837 ///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
 838 ///    PInteger          [0-9]+
 839 ///    HexFPConstant     0x[0-9A-Fa-f]+
 840 ///    HexFP80Constant   0xK[0-9A-Fa-f]+
 841 ///    HexFP128Constant  0xL[0-9A-Fa-f]+
 842 ///    HexPPC128Constant 0xM[0-9A-Fa-f]+
 843 lltok::Kind LLLexer::LexDigitOrNegative() {
 844   // If the letter after the negative is not a number, this is probably a label.
 845   if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
 846       !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
 847     // Okay, this is not a number after the -, it's probably a label.
 848     if (const char *End = isLabelTail(CurPtr)) {
 849       StrVal.assign(TokStart, End-1);
 850       CurPtr = End;
 851       return lltok::LabelStr;
 852     }
 853
 854     return lltok::Error;
 855   }
 856
 857   // At this point, it is either a label, int or fp constant.
 858
 859   // Skip digits, we have at least one.
 860   for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
 861     /*empty*/;
 862
 863   // Check to see if this really is a label afterall, e.g. "-1:".
 864   if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
 865     if (const char *End = isLabelTail(CurPtr)) {
 866       StrVal.assign(TokStart, End-1);
 867       CurPtr = End;
 868       return lltok::LabelStr;
 869     }
 870   }
 871
 872   // If the next character is a '.', then it is a fp value, otherwise its
 873   // integer.
 874   if (CurPtr[0] != '.') {
 875     if (TokStart[0] == '0' && TokStart[1] == 'x')
 876       return Lex0x();
 877     unsigned Len = CurPtr-TokStart;
 878     uint32_t numBits = ((Len * 64) / 19) + 2;
 879     APInt Tmp(numBits, StringRef(TokStart, Len), 10);
 880     if (TokStart[0] == '-') {
 881       uint32_t minBits = Tmp.getMinSignedBits();
 882       if (minBits > 0 && minBits < numBits)
 883         Tmp = Tmp.trunc(minBits);
 884       APSIntVal = APSInt(Tmp, false);
 885     } else {
 886       uint32_t activeBits = Tmp.getActiveBits();
 887       if (activeBits > 0 && activeBits < numBits)
 888         Tmp = Tmp.trunc(activeBits);
 889       APSIntVal = APSInt(Tmp, true);
 890     }
 891     return lltok::APSInt;
 892   }
 893
 894   ++CurPtr;
 895
 896   // Skip over [0-9]*([eE][-+]?[0-9]+)?
 897   while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
 898
 899   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
 900     if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
 901         ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
 902           isdigit(static_cast<unsigned char>(CurPtr[2])))) {
 903       CurPtr += 2;
 904       while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
 905     }
 906   }
 907
 908   APFloatVal = APFloat(std::atof(TokStart));
 909   return lltok::APFloat;
 910 }
 911
 912 ///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
 913 lltok::Kind LLLexer::LexPositive() {
 914   // If the letter after the negative is a number, this is probably not a
 915   // label.
 916   if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
 917     return lltok::Error;
 918
 919   // Skip digits.
 920   for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
 921     /*empty*/;
 922
 923   // At this point, we need a '.'.
 924   if (CurPtr[0] != '.') {
 925     CurPtr = TokStart+1;
 926     return lltok::Error;
 927   }
 928
 929   ++CurPtr;
 930
 931   // Skip over [0-9]*([eE][-+]?[0-9]+)?
 932   while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
 933
 934   if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
 935     if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
 936         ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
 937         isdigit(static_cast<unsigned char>(CurPtr[2])))) {
 938       CurPtr += 2;
 939       while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
 940     }
 941   }
 942
 943   APFloatVal = APFloat(std::atof(TokStart));
 944   return lltok::APFloat;
 945 }