]> git.proxmox.com Git - rustc.git/blame - src/llvm/utils/FileCheck/FileCheck.cpp
Imported Upstream version 1.0.0+dfsg1
[rustc.git] / src / llvm / utils / FileCheck / FileCheck.cpp
CommitLineData
223e47cc
LB
1//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// FileCheck does a line-by line check of a file that validates whether it
11// contains the expected content. This is useful for regression tests etc.
12//
13// This program exits with an error status of 2 on error, exit status of 0 if
14// the file matched the expected contents, and exit status of 1 if it did not
15// contain the expected contents.
16//
17//===----------------------------------------------------------------------===//
18
970d7e83
LB
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringExtras.h"
21#include "llvm/ADT/StringMap.h"
1a4d82fc 22#include "llvm/ADT/StringSet.h"
223e47cc
LB
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/MemoryBuffer.h"
25#include "llvm/Support/PrettyStackTrace.h"
26#include "llvm/Support/Regex.h"
970d7e83 27#include "llvm/Support/Signals.h"
223e47cc
LB
28#include "llvm/Support/SourceMgr.h"
29#include "llvm/Support/raw_ostream.h"
223e47cc 30#include <algorithm>
1a4d82fc 31#include <cctype>
970d7e83
LB
32#include <map>
33#include <string>
1a4d82fc 34#include <system_error>
970d7e83 35#include <vector>
223e47cc
LB
36using namespace llvm;
37
38static cl::opt<std::string>
39CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41static cl::opt<std::string>
42InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
44
1a4d82fc
JJ
45static cl::list<std::string>
46CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
223e47cc
LB
48
49static cl::opt<bool>
50NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
52
1a4d82fc
JJ
53static cl::list<std::string> ImplicitCheckNot(
54 "implicit-check-not",
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
59
60static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
64
65typedef cl::list<std::string>::const_iterator prefix_iterator;
66
223e47cc
LB
67//===----------------------------------------------------------------------===//
68// Pattern Handling Code.
69//===----------------------------------------------------------------------===//
70
1a4d82fc
JJ
71namespace Check {
72 enum CheckType {
73 CheckNone = 0,
74 CheckPlain,
75 CheckNext,
76 CheckNot,
77 CheckDAG,
78 CheckLabel,
79
80 /// MatchEOF - When set, this pattern only matches the end of file. This is
81 /// used for trailing CHECK-NOTs.
82 CheckEOF
83 };
84}
85
223e47cc
LB
86class Pattern {
87 SMLoc PatternLoc;
88
1a4d82fc 89 Check::CheckType CheckTy;
223e47cc
LB
90
91 /// FixedStr - If non-empty, this pattern is a fixed string match with the
92 /// specified fixed string.
93 StringRef FixedStr;
94
95 /// RegEx - If non-empty, this is a regex pattern.
96 std::string RegExStr;
97
970d7e83
LB
98 /// \brief Contains the number of line this pattern is in.
99 unsigned LineNumber;
100
223e47cc
LB
101 /// VariableUses - Entries in this vector map to uses of a variable in the
102 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
103 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
104 /// value of bar at offset 3.
105 std::vector<std::pair<StringRef, unsigned> > VariableUses;
106
970d7e83
LB
107 /// VariableDefs - Maps definitions of variables to their parenthesized
108 /// capture numbers.
109 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
110 std::map<StringRef, unsigned> VariableDefs;
223e47cc
LB
111
112public:
113
1a4d82fc
JJ
114 Pattern(Check::CheckType Ty)
115 : CheckTy(Ty) { }
116
117 /// getLoc - Return the location in source code.
118 SMLoc getLoc() const { return PatternLoc; }
223e47cc 119
1a4d82fc
JJ
120 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
121 /// which prefix is being matched, SM provides the SourceMgr used for error
122 /// reports, and LineNumber is the line number in the input file from which
123 /// the pattern string was read. Returns true in case of an error, false
124 /// otherwise.
125 bool ParsePattern(StringRef PatternStr,
126 StringRef Prefix,
127 SourceMgr &SM,
128 unsigned LineNumber);
223e47cc
LB
129
130 /// Match - Match the pattern string against the input buffer Buffer. This
131 /// returns the position that is matched or npos if there is no match. If
132 /// there is a match, the size of the matched string is returned in MatchLen.
133 ///
134 /// The VariableTable StringMap provides the current values of filecheck
135 /// variables and is updated if this match defines new values.
136 size_t Match(StringRef Buffer, size_t &MatchLen,
137 StringMap<StringRef> &VariableTable) const;
138
139 /// PrintFailureInfo - Print additional information about a failure to match
140 /// involving this pattern.
141 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
142 const StringMap<StringRef> &VariableTable) const;
143
1a4d82fc
JJ
144 bool hasVariable() const { return !(VariableUses.empty() &&
145 VariableDefs.empty()); }
146
147 Check::CheckType getCheckTy() const { return CheckTy; }
148
223e47cc 149private:
970d7e83
LB
150 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
151 void AddBackrefToRegEx(unsigned BackrefNum);
223e47cc
LB
152
153 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
154 /// matching this pattern at the start of \arg Buffer; a distance of zero
155 /// should correspond to a perfect match.
156 unsigned ComputeMatchDistance(StringRef Buffer,
157 const StringMap<StringRef> &VariableTable) const;
970d7e83
LB
158
159 /// \brief Evaluates expression and stores the result to \p Value.
160 /// \return true on success. false when the expression has invalid syntax.
161 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
162
163 /// \brief Finds the closing sequence of a regex variable usage or
164 /// definition. Str has to point in the beginning of the definition
165 /// (right after the opening sequence).
166 /// \return offset of the closing sequence within Str, or npos if it was not
167 /// found.
1a4d82fc 168 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
223e47cc
LB
169};
170
171
1a4d82fc
JJ
172bool Pattern::ParsePattern(StringRef PatternStr,
173 StringRef Prefix,
174 SourceMgr &SM,
970d7e83
LB
175 unsigned LineNumber) {
176 this->LineNumber = LineNumber;
223e47cc
LB
177 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
178
179 // Ignore trailing whitespace.
180 while (!PatternStr.empty() &&
181 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
182 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
183
184 // Check that there is something on the line.
185 if (PatternStr.empty()) {
186 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
187 "found empty check string with prefix '" +
1a4d82fc 188 Prefix + ":'");
223e47cc
LB
189 return true;
190 }
191
192 // Check to see if this is a fixed string, or if it has regex pieces.
193 if (PatternStr.size() < 2 ||
194 (PatternStr.find("{{") == StringRef::npos &&
195 PatternStr.find("[[") == StringRef::npos)) {
196 FixedStr = PatternStr;
197 return false;
198 }
199
200 // Paren value #0 is for the fully matched string. Any new parenthesized
201 // values add from there.
202 unsigned CurParen = 1;
203
204 // Otherwise, there is at least one regex piece. Build up the regex pattern
205 // by escaping scary characters in fixed strings, building up one big regex.
206 while (!PatternStr.empty()) {
207 // RegEx matches.
208 if (PatternStr.startswith("{{")) {
970d7e83 209 // This is the start of a regex match. Scan for the }}.
223e47cc
LB
210 size_t End = PatternStr.find("}}");
211 if (End == StringRef::npos) {
212 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
213 SourceMgr::DK_Error,
214 "found start of regex string with no end '}}'");
215 return true;
216 }
217
218 // Enclose {{}} patterns in parens just like [[]] even though we're not
219 // capturing the result for any purpose. This is required in case the
220 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
221 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
222 RegExStr += '(';
223 ++CurParen;
224
225 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
226 return true;
227 RegExStr += ')';
228
229 PatternStr = PatternStr.substr(End+2);
230 continue;
231 }
232
233 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
234 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
235 // second form is [[foo]] which is a reference to foo. The variable name
236 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
237 // it. This is to catch some common errors.
238 if (PatternStr.startswith("[[")) {
970d7e83
LB
239 // Find the closing bracket pair ending the match. End is going to be an
240 // offset relative to the beginning of the match string.
1a4d82fc 241 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
970d7e83 242
223e47cc
LB
243 if (End == StringRef::npos) {
244 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
245 SourceMgr::DK_Error,
246 "invalid named regex reference, no ]] found");
247 return true;
248 }
249
970d7e83
LB
250 StringRef MatchStr = PatternStr.substr(2, End);
251 PatternStr = PatternStr.substr(End+4);
223e47cc
LB
252
253 // Get the regex name (e.g. "foo").
254 size_t NameEnd = MatchStr.find(':');
255 StringRef Name = MatchStr.substr(0, NameEnd);
256
257 if (Name.empty()) {
258 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
259 "invalid name in named regex: empty name");
260 return true;
261 }
262
970d7e83
LB
263 // Verify that the name/expression is well formed. FileCheck currently
264 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
265 // is relaxed, more strict check is performed in \c EvaluateExpression.
266 bool IsExpression = false;
267 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
268 if (i == 0 && Name[i] == '@') {
269 if (NameEnd != StringRef::npos) {
270 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
271 SourceMgr::DK_Error,
272 "invalid name in named regex definition");
273 return true;
274 }
275 IsExpression = true;
276 continue;
277 }
278 if (Name[i] != '_' && !isalnum(Name[i]) &&
279 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
223e47cc
LB
280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
281 SourceMgr::DK_Error, "invalid name in named regex");
282 return true;
283 }
970d7e83 284 }
223e47cc
LB
285
286 // Name can't start with a digit.
970d7e83 287 if (isdigit(static_cast<unsigned char>(Name[0]))) {
223e47cc
LB
288 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
289 "invalid name in named regex");
290 return true;
291 }
292
293 // Handle [[foo]].
294 if (NameEnd == StringRef::npos) {
970d7e83
LB
295 // Handle variables that were defined earlier on the same line by
296 // emitting a backreference.
297 if (VariableDefs.find(Name) != VariableDefs.end()) {
298 unsigned VarParenNum = VariableDefs[Name];
299 if (VarParenNum < 1 || VarParenNum > 9) {
300 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
301 SourceMgr::DK_Error,
302 "Can't back-reference more than 9 variables");
303 return true;
304 }
305 AddBackrefToRegEx(VarParenNum);
306 } else {
307 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
308 }
223e47cc
LB
309 continue;
310 }
311
312 // Handle [[foo:.*]].
970d7e83 313 VariableDefs[Name] = CurParen;
223e47cc
LB
314 RegExStr += '(';
315 ++CurParen;
316
317 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
318 return true;
319
320 RegExStr += ')';
321 }
322
323 // Handle fixed string matches.
324 // Find the end, which is the start of the next regex.
325 size_t FixedMatchEnd = PatternStr.find("{{");
326 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
1a4d82fc 327 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
223e47cc 328 PatternStr = PatternStr.substr(FixedMatchEnd);
223e47cc
LB
329 }
330
331 return false;
332}
333
970d7e83 334bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
223e47cc 335 SourceMgr &SM) {
970d7e83 336 Regex R(RS);
223e47cc
LB
337 std::string Error;
338 if (!R.isValid(Error)) {
970d7e83 339 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
223e47cc
LB
340 "invalid regex: " + Error);
341 return true;
342 }
343
970d7e83 344 RegExStr += RS.str();
223e47cc
LB
345 CurParen += R.getNumMatches();
346 return false;
347}
348
970d7e83
LB
349void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
350 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
351 std::string Backref = std::string("\\") +
352 std::string(1, '0' + BackrefNum);
353 RegExStr += Backref;
354}
355
356bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
357 // The only supported expression is @LINE([\+-]\d+)?
358 if (!Expr.startswith("@LINE"))
359 return false;
360 Expr = Expr.substr(StringRef("@LINE").size());
361 int Offset = 0;
362 if (!Expr.empty()) {
363 if (Expr[0] == '+')
364 Expr = Expr.substr(1);
365 else if (Expr[0] != '-')
366 return false;
367 if (Expr.getAsInteger(10, Offset))
368 return false;
369 }
370 Value = llvm::itostr(LineNumber + Offset);
371 return true;
372}
373
223e47cc
LB
374/// Match - Match the pattern string against the input buffer Buffer. This
375/// returns the position that is matched or npos if there is no match. If
376/// there is a match, the size of the matched string is returned in MatchLen.
377size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
378 StringMap<StringRef> &VariableTable) const {
379 // If this is the EOF pattern, match it immediately.
1a4d82fc 380 if (CheckTy == Check::CheckEOF) {
223e47cc
LB
381 MatchLen = 0;
382 return Buffer.size();
383 }
384
385 // If this is a fixed string pattern, just match it now.
386 if (!FixedStr.empty()) {
387 MatchLen = FixedStr.size();
388 return Buffer.find(FixedStr);
389 }
390
391 // Regex match.
392
393 // If there are variable uses, we need to create a temporary string with the
394 // actual value.
395 StringRef RegExToMatch = RegExStr;
396 std::string TmpStr;
397 if (!VariableUses.empty()) {
398 TmpStr = RegExStr;
399
400 unsigned InsertOffset = 0;
401 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
223e47cc 402 std::string Value;
970d7e83
LB
403
404 if (VariableUses[i].first[0] == '@') {
405 if (!EvaluateExpression(VariableUses[i].first, Value))
406 return StringRef::npos;
407 } else {
408 StringMap<StringRef>::iterator it =
409 VariableTable.find(VariableUses[i].first);
410 // If the variable is undefined, return an error.
411 if (it == VariableTable.end())
412 return StringRef::npos;
413
1a4d82fc
JJ
414 // Look up the value and escape it so that we can put it into the regex.
415 Value += Regex::escape(it->second);
970d7e83 416 }
223e47cc
LB
417
418 // Plop it into the regex at the adjusted offset.
419 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
420 Value.begin(), Value.end());
421 InsertOffset += Value.size();
422 }
423
424 // Match the newly constructed regex.
425 RegExToMatch = TmpStr;
426 }
427
428
429 SmallVector<StringRef, 4> MatchInfo;
430 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
431 return StringRef::npos;
432
433 // Successful regex match.
434 assert(!MatchInfo.empty() && "Didn't get any match");
435 StringRef FullMatch = MatchInfo[0];
436
437 // If this defines any variables, remember their values.
970d7e83
LB
438 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
439 E = VariableDefs.end();
440 I != E; ++I) {
441 assert(I->second < MatchInfo.size() && "Internal paren error");
442 VariableTable[I->first] = MatchInfo[I->second];
223e47cc
LB
443 }
444
445 MatchLen = FullMatch.size();
446 return FullMatch.data()-Buffer.data();
447}
448
449unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
450 const StringMap<StringRef> &VariableTable) const {
451 // Just compute the number of matching characters. For regular expressions, we
452 // just compare against the regex itself and hope for the best.
453 //
454 // FIXME: One easy improvement here is have the regex lib generate a single
455 // example regular expression which matches, and use that as the example
456 // string.
457 StringRef ExampleString(FixedStr);
458 if (ExampleString.empty())
459 ExampleString = RegExStr;
460
461 // Only compare up to the first line in the buffer, or the string size.
462 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
463 BufferPrefix = BufferPrefix.split('\n').first;
464 return BufferPrefix.edit_distance(ExampleString);
465}
466
467void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
468 const StringMap<StringRef> &VariableTable) const{
469 // If this was a regular expression using variables, print the current
470 // variable values.
471 if (!VariableUses.empty()) {
472 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
223e47cc
LB
473 SmallString<256> Msg;
474 raw_svector_ostream OS(Msg);
970d7e83
LB
475 StringRef Var = VariableUses[i].first;
476 if (Var[0] == '@') {
477 std::string Value;
478 if (EvaluateExpression(Var, Value)) {
479 OS << "with expression \"";
480 OS.write_escaped(Var) << "\" equal to \"";
481 OS.write_escaped(Value) << "\"";
482 } else {
483 OS << "uses incorrect expression \"";
484 OS.write_escaped(Var) << "\"";
485 }
223e47cc 486 } else {
970d7e83
LB
487 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
488
489 // Check for undefined variable references.
490 if (it == VariableTable.end()) {
491 OS << "uses undefined variable \"";
492 OS.write_escaped(Var) << "\"";
493 } else {
494 OS << "with variable \"";
495 OS.write_escaped(Var) << "\" equal to \"";
496 OS.write_escaped(it->second) << "\"";
497 }
223e47cc
LB
498 }
499
500 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
501 OS.str());
502 }
503 }
504
505 // Attempt to find the closest/best fuzzy match. Usually an error happens
506 // because some string in the output didn't exactly match. In these cases, we
507 // would like to show the user a best guess at what "should have" matched, to
508 // save them having to actually check the input manually.
509 size_t NumLinesForward = 0;
510 size_t Best = StringRef::npos;
511 double BestQuality = 0;
512
513 // Use an arbitrary 4k limit on how far we will search.
514 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
515 if (Buffer[i] == '\n')
516 ++NumLinesForward;
517
518 // Patterns have leading whitespace stripped, so skip whitespace when
519 // looking for something which looks like a pattern.
520 if (Buffer[i] == ' ' || Buffer[i] == '\t')
521 continue;
522
523 // Compute the "quality" of this match as an arbitrary combination of the
524 // match distance and the number of lines skipped to get to this match.
525 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
526 double Quality = Distance + (NumLinesForward / 100.);
527
528 if (Quality < BestQuality || Best == StringRef::npos) {
529 Best = i;
530 BestQuality = Quality;
531 }
532 }
533
534 // Print the "possible intended match here" line if we found something
535 // reasonable and not equal to what we showed in the "scanning from here"
536 // line.
537 if (Best && Best != StringRef::npos && BestQuality < 50) {
538 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
539 SourceMgr::DK_Note, "possible intended match here");
540
541 // FIXME: If we wanted to be really friendly we would show why the match
542 // failed, as it can be hard to spot simple one character differences.
543 }
544}
545
1a4d82fc 546size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
970d7e83
LB
547 // Offset keeps track of the current offset within the input Str
548 size_t Offset = 0;
549 // [...] Nesting depth
550 size_t BracketDepth = 0;
551
552 while (!Str.empty()) {
553 if (Str.startswith("]]") && BracketDepth == 0)
554 return Offset;
555 if (Str[0] == '\\') {
556 // Backslash escapes the next char within regexes, so skip them both.
557 Str = Str.substr(2);
558 Offset += 2;
559 } else {
560 switch (Str[0]) {
561 default:
562 break;
563 case '[':
564 BracketDepth++;
565 break;
566 case ']':
1a4d82fc
JJ
567 if (BracketDepth == 0) {
568 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
569 SourceMgr::DK_Error,
570 "missing closing \"]\" for regex variable");
571 exit(1);
572 }
970d7e83
LB
573 BracketDepth--;
574 break;
575 }
576 Str = Str.substr(1);
577 Offset++;
578 }
579 }
580
581 return StringRef::npos;
582}
583
584
223e47cc
LB
585//===----------------------------------------------------------------------===//
586// Check Strings.
587//===----------------------------------------------------------------------===//
588
589/// CheckString - This is a check that we found in the input file.
590struct CheckString {
591 /// Pat - The pattern to match.
592 Pattern Pat;
593
1a4d82fc
JJ
594 /// Prefix - Which prefix name this check matched.
595 StringRef Prefix;
596
223e47cc
LB
597 /// Loc - The location in the match file that the check string was specified.
598 SMLoc Loc;
599
1a4d82fc
JJ
600 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
601 /// as opposed to a CHECK: directive.
602 Check::CheckType CheckTy;
223e47cc 603
1a4d82fc 604 /// DagNotStrings - These are all of the strings that are disallowed from
223e47cc
LB
605 /// occurring between this match string and the previous one (or start of
606 /// file).
1a4d82fc
JJ
607 std::vector<Pattern> DagNotStrings;
608
609
610 CheckString(const Pattern &P,
611 StringRef S,
612 SMLoc L,
613 Check::CheckType Ty)
614 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
223e47cc 615
1a4d82fc
JJ
616 /// Check - Match check string and its "not strings" and/or "dag strings".
617 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
618 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
619
620 /// CheckNext - Verify there is a single line in the given buffer.
621 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
622
623 /// CheckNot - Verify there's no "not strings" in the given buffer.
624 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
625 const std::vector<const Pattern *> &NotStrings,
626 StringMap<StringRef> &VariableTable) const;
627
628 /// CheckDag - Match "dag strings" and their mixed "not strings".
629 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
630 std::vector<const Pattern *> &NotStrings,
631 StringMap<StringRef> &VariableTable) const;
223e47cc
LB
632};
633
970d7e83
LB
634/// Canonicalize whitespaces in the input file. Line endings are replaced
635/// with UNIX-style '\n'.
636///
637/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
638/// characters to a single space.
1a4d82fc
JJ
639static std::unique_ptr<MemoryBuffer>
640CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
641 bool PreserveHorizontal) {
223e47cc
LB
642 SmallString<128> NewFile;
643 NewFile.reserve(MB->getBufferSize());
644
645 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
646 Ptr != End; ++Ptr) {
647 // Eliminate trailing dosish \r.
648 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
649 continue;
650 }
651
1a4d82fc 652 // If current char is not a horizontal whitespace or if horizontal
970d7e83
LB
653 // whitespace canonicalization is disabled, dump it to output as is.
654 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
223e47cc
LB
655 NewFile.push_back(*Ptr);
656 continue;
657 }
658
659 // Otherwise, add one space and advance over neighboring space.
660 NewFile.push_back(' ');
661 while (Ptr+1 != End &&
662 (Ptr[1] == ' ' || Ptr[1] == '\t'))
663 ++Ptr;
664 }
665
1a4d82fc
JJ
666 return std::unique_ptr<MemoryBuffer>(
667 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
668}
669
670static bool IsPartOfWord(char c) {
671 return (isalnum(c) || c == '-' || c == '_');
672}
673
674// Get the size of the prefix extension.
675static size_t CheckTypeSize(Check::CheckType Ty) {
676 switch (Ty) {
677 case Check::CheckNone:
678 return 0;
679
680 case Check::CheckPlain:
681 return sizeof(":") - 1;
682
683 case Check::CheckNext:
684 return sizeof("-NEXT:") - 1;
685
686 case Check::CheckNot:
687 return sizeof("-NOT:") - 1;
688
689 case Check::CheckDAG:
690 return sizeof("-DAG:") - 1;
691
692 case Check::CheckLabel:
693 return sizeof("-LABEL:") - 1;
694
695 case Check::CheckEOF:
696 llvm_unreachable("Should not be using EOF size");
697 }
698
699 llvm_unreachable("Bad check type");
700}
701
702static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
703 char NextChar = Buffer[Prefix.size()];
704
705 // Verify that the : is present after the prefix.
706 if (NextChar == ':')
707 return Check::CheckPlain;
708
709 if (NextChar != '-')
710 return Check::CheckNone;
711
712 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
713 if (Rest.startswith("NEXT:"))
714 return Check::CheckNext;
715
716 if (Rest.startswith("NOT:"))
717 return Check::CheckNot;
718
719 if (Rest.startswith("DAG:"))
720 return Check::CheckDAG;
721
722 if (Rest.startswith("LABEL:"))
723 return Check::CheckLabel;
724
725 return Check::CheckNone;
726}
727
728// From the given position, find the next character after the word.
729static size_t SkipWord(StringRef Str, size_t Loc) {
730 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
731 ++Loc;
732 return Loc;
733}
734
735// Try to find the first match in buffer for any prefix. If a valid match is
736// found, return that prefix and set its type and location. If there are almost
737// matches (e.g. the actual prefix string is found, but is not an actual check
738// string), but no valid match, return an empty string and set the position to
739// resume searching from. If no partial matches are found, return an empty
740// string and the location will be StringRef::npos. If one prefix is a substring
741// of another, the maximal match should be found. e.g. if "A" and "AA" are
742// prefixes then AA-CHECK: should match the second one.
743static StringRef FindFirstCandidateMatch(StringRef &Buffer,
744 Check::CheckType &CheckTy,
745 size_t &CheckLoc) {
746 StringRef FirstPrefix;
747 size_t FirstLoc = StringRef::npos;
748 size_t SearchLoc = StringRef::npos;
749 Check::CheckType FirstTy = Check::CheckNone;
750
751 CheckTy = Check::CheckNone;
752 CheckLoc = StringRef::npos;
753
754 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
755 I != E; ++I) {
756 StringRef Prefix(*I);
757 size_t PrefixLoc = Buffer.find(Prefix);
758
759 if (PrefixLoc == StringRef::npos)
760 continue;
761
762 // Track where we are searching for invalid prefixes that look almost right.
763 // We need to only advance to the first partial match on the next attempt
764 // since a partial match could be a substring of a later, valid prefix.
765 // Need to skip to the end of the word, otherwise we could end up
766 // matching a prefix in a substring later.
767 if (PrefixLoc < SearchLoc)
768 SearchLoc = SkipWord(Buffer, PrefixLoc);
769
770 // We only want to find the first match to avoid skipping some.
771 if (PrefixLoc > FirstLoc)
772 continue;
773 // If one matching check-prefix is a prefix of another, choose the
774 // longer one.
775 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
776 continue;
777
778 StringRef Rest = Buffer.drop_front(PrefixLoc);
779 // Make sure we have actually found the prefix, and not a word containing
780 // it. This should also prevent matching the wrong prefix when one is a
781 // substring of another.
782 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
783 FirstTy = Check::CheckNone;
784 else
785 FirstTy = FindCheckType(Rest, Prefix);
786
787 FirstLoc = PrefixLoc;
788 FirstPrefix = Prefix;
789 }
790
791 // If the first prefix is invalid, we should continue the search after it.
792 if (FirstTy == Check::CheckNone) {
793 CheckLoc = SearchLoc;
794 return "";
795 }
223e47cc 796
1a4d82fc
JJ
797 CheckTy = FirstTy;
798 CheckLoc = FirstLoc;
799 return FirstPrefix;
223e47cc
LB
800}
801
1a4d82fc
JJ
802static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
803 unsigned &LineNumber,
804 Check::CheckType &CheckTy,
805 size_t &CheckLoc) {
806 while (!Buffer.empty()) {
807 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
808 // If we found a real match, we are done.
809 if (!Prefix.empty()) {
810 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
811 return Prefix;
812 }
813
814 // We didn't find any almost matches either, we are also done.
815 if (CheckLoc == StringRef::npos)
816 return StringRef();
817
818 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
819
820 // Advance to the last possible match we found and try again.
821 Buffer = Buffer.drop_front(CheckLoc + 1);
822 }
823
824 return StringRef();
825}
223e47cc
LB
826
827/// ReadCheckFile - Read the check file, which specifies the sequence of
828/// expected strings. The strings are added to the CheckStrings vector.
970d7e83 829/// Returns true in case of an error, false otherwise.
223e47cc
LB
830static bool ReadCheckFile(SourceMgr &SM,
831 std::vector<CheckString> &CheckStrings) {
1a4d82fc
JJ
832 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
833 MemoryBuffer::getFileOrSTDIN(CheckFilename);
834 if (std::error_code EC = FileOrErr.getError()) {
835 errs() << "Could not open check file '" << CheckFilename
836 << "': " << EC.message() << '\n';
223e47cc
LB
837 return true;
838 }
223e47cc
LB
839
840 // If we want to canonicalize whitespace, strip excess whitespace from the
970d7e83 841 // buffer containing the CHECK lines. Remove DOS style line endings.
1a4d82fc
JJ
842 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
843 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
223e47cc
LB
844
845 // Find all instances of CheckPrefix followed by : in the file.
846 StringRef Buffer = F->getBuffer();
1a4d82fc
JJ
847
848 SM.AddNewSourceBuffer(std::move(F), SMLoc());
849
850 std::vector<Pattern> ImplicitNegativeChecks;
851 for (const auto &PatternString : ImplicitCheckNot) {
852 // Create a buffer with fake command line content in order to display the
853 // command line option responsible for the specific implicit CHECK-NOT.
854 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
855 std::string Suffix = "'";
856 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
857 Prefix + PatternString + Suffix, "command line");
858
859 StringRef PatternInBuffer =
860 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
861 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
862
863 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
864 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
865 "IMPLICIT-CHECK", SM, 0);
866 }
867
868
869 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
223e47cc 870
970d7e83
LB
871 // LineNumber keeps track of the line on which CheckPrefix instances are
872 // found.
873 unsigned LineNumber = 1;
874
223e47cc 875 while (1) {
1a4d82fc
JJ
876 Check::CheckType CheckTy;
877 size_t PrefixLoc;
878
879 // See if a prefix occurs in the memory buffer.
880 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
881 LineNumber,
882 CheckTy,
883 PrefixLoc);
884 if (UsedPrefix.empty())
223e47cc
LB
885 break;
886
1a4d82fc 887 Buffer = Buffer.drop_front(PrefixLoc);
223e47cc 888
1a4d82fc
JJ
889 // Location to use for error messages.
890 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
223e47cc 891
1a4d82fc
JJ
892 // PrefixLoc is to the start of the prefix. Skip to the end.
893 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
223e47cc 894
1a4d82fc
JJ
895 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
896 // leading and trailing whitespace.
223e47cc
LB
897 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
898
899 // Scan ahead to the end of line.
900 size_t EOL = Buffer.find_first_of("\n\r");
901
902 // Remember the location of the start of the pattern, for diagnostics.
903 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
904
905 // Parse the pattern.
1a4d82fc
JJ
906 Pattern P(CheckTy);
907 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
223e47cc
LB
908 return true;
909
1a4d82fc
JJ
910 // Verify that CHECK-LABEL lines do not define or use variables
911 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
912 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
913 SourceMgr::DK_Error,
914 "found '" + UsedPrefix + "-LABEL:'"
915 " with variable definition or use");
916 return true;
917 }
918
223e47cc
LB
919 Buffer = Buffer.substr(EOL);
920
223e47cc 921 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
1a4d82fc
JJ
922 if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) {
923 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
223e47cc 924 SourceMgr::DK_Error,
1a4d82fc
JJ
925 "found '" + UsedPrefix + "-NEXT:' without previous '"
926 + UsedPrefix + ": line");
223e47cc
LB
927 return true;
928 }
929
1a4d82fc
JJ
930 // Handle CHECK-DAG/-NOT.
931 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
932 DagNotMatches.push_back(P);
223e47cc
LB
933 continue;
934 }
935
223e47cc
LB
936 // Okay, add the string we captured to the output vector and move on.
937 CheckStrings.push_back(CheckString(P,
1a4d82fc 938 UsedPrefix,
223e47cc 939 PatternLoc,
1a4d82fc
JJ
940 CheckTy));
941 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
942 DagNotMatches = ImplicitNegativeChecks;
223e47cc
LB
943 }
944
1a4d82fc
JJ
945 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
946 // prefix as a filler for the error message.
947 if (!DagNotMatches.empty()) {
948 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
949 CheckPrefixes[0],
223e47cc 950 SMLoc::getFromPointer(Buffer.data()),
1a4d82fc
JJ
951 Check::CheckEOF));
952 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
223e47cc
LB
953 }
954
955 if (CheckStrings.empty()) {
1a4d82fc
JJ
956 errs() << "error: no check strings found with prefix"
957 << (CheckPrefixes.size() > 1 ? "es " : " ");
958 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
959 StringRef Prefix(CheckPrefixes[I]);
960 errs() << '\'' << Prefix << ":'";
961 if (I != N - 1)
962 errs() << ", ";
963 }
964
965 errs() << '\n';
223e47cc
LB
966 return true;
967 }
968
969 return false;
970}
971
1a4d82fc
JJ
972static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
973 const Pattern &Pat, StringRef Buffer,
223e47cc
LB
974 StringMap<StringRef> &VariableTable) {
975 // Otherwise, we have an error, emit an error message.
1a4d82fc 976 SM.PrintMessage(Loc, SourceMgr::DK_Error,
223e47cc
LB
977 "expected string not found in input");
978
979 // Print the "scanning from here" line. If the current position is at the
980 // end of a line, advance to the start of the next line.
981 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
982
983 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
984 "scanning from here");
985
986 // Allow the pattern to print additional information if desired.
1a4d82fc
JJ
987 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
988}
989
990static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
991 StringRef Buffer,
992 StringMap<StringRef> &VariableTable) {
993 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
223e47cc
LB
994}
995
996/// CountNumNewlinesBetween - Count the number of newlines in the specified
997/// range.
1a4d82fc
JJ
998static unsigned CountNumNewlinesBetween(StringRef Range,
999 const char *&FirstNewLine) {
223e47cc
LB
1000 unsigned NumNewLines = 0;
1001 while (1) {
1002 // Scan for newline.
1003 Range = Range.substr(Range.find_first_of("\n\r"));
1004 if (Range.empty()) return NumNewLines;
1005
1006 ++NumNewLines;
1007
1008 // Handle \n\r and \r\n as a single newline.
1009 if (Range.size() > 1 &&
1010 (Range[1] == '\n' || Range[1] == '\r') &&
1011 (Range[0] != Range[1]))
1012 Range = Range.substr(1);
1013 Range = Range.substr(1);
1a4d82fc
JJ
1014
1015 if (NumNewLines == 1)
1016 FirstNewLine = Range.begin();
1017 }
1018}
1019
1020size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1021 bool IsLabelScanMode, size_t &MatchLen,
1022 StringMap<StringRef> &VariableTable) const {
1023 size_t LastPos = 0;
1024 std::vector<const Pattern *> NotStrings;
1025
1026 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1027 // bounds; we have not processed variable definitions within the bounded block
1028 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1029 // over the block again (including the last CHECK-LABEL) in normal mode.
1030 if (!IsLabelScanMode) {
1031 // Match "dag strings" (with mixed "not strings" if any).
1032 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1033 if (LastPos == StringRef::npos)
1034 return StringRef::npos;
1035 }
1036
1037 // Match itself from the last position after matching CHECK-DAG.
1038 StringRef MatchBuffer = Buffer.substr(LastPos);
1039 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1040 if (MatchPos == StringRef::npos) {
1041 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1042 return StringRef::npos;
1043 }
1044 MatchPos += LastPos;
1045
1046 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1047 // or CHECK-NOT
1048 if (!IsLabelScanMode) {
1049 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1050
1051 // If this check is a "CHECK-NEXT", verify that the previous match was on
1052 // the previous line (i.e. that there is one newline between them).
1053 if (CheckNext(SM, SkippedRegion))
1054 return StringRef::npos;
1055
1056 // If this match had "not strings", verify that they don't exist in the
1057 // skipped region.
1058 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1059 return StringRef::npos;
1060 }
1061
1062 return MatchPos;
1063}
1064
1065bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1066 if (CheckTy != Check::CheckNext)
1067 return false;
1068
1069 // Count the number of newlines between the previous match and this one.
1070 assert(Buffer.data() !=
1071 SM.getMemoryBuffer(
1072 SM.FindBufferContainingLoc(
1073 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1074 "CHECK-NEXT can't be the first check in a file");
1075
1076 const char *FirstNewLine = nullptr;
1077 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1078
1079 if (NumNewLines == 0) {
1080 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1081 "-NEXT: is on the same line as previous match");
1082 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1083 SourceMgr::DK_Note, "'next' match was here");
1084 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1085 "previous match ended here");
1086 return true;
1087 }
1088
1089 if (NumNewLines != 1) {
1090 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1091 "-NEXT: is not on the line after the previous match");
1092 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1093 SourceMgr::DK_Note, "'next' match was here");
1094 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1095 "previous match ended here");
1096 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1097 "non-matching line after previous match is here");
1098 return true;
1099 }
1100
1101 return false;
1102}
1103
1104bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1105 const std::vector<const Pattern *> &NotStrings,
1106 StringMap<StringRef> &VariableTable) const {
1107 for (unsigned ChunkNo = 0, e = NotStrings.size();
1108 ChunkNo != e; ++ChunkNo) {
1109 const Pattern *Pat = NotStrings[ChunkNo];
1110 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1111
1112 size_t MatchLen = 0;
1113 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1114
1115 if (Pos == StringRef::npos) continue;
1116
1117 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1118 SourceMgr::DK_Error,
1119 Prefix + "-NOT: string occurred!");
1120 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1121 Prefix + "-NOT: pattern specified here");
1122 return true;
1123 }
1124
1125 return false;
1126}
1127
1128size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1129 std::vector<const Pattern *> &NotStrings,
1130 StringMap<StringRef> &VariableTable) const {
1131 if (DagNotStrings.empty())
1132 return 0;
1133
1134 size_t LastPos = 0;
1135 size_t StartPos = LastPos;
1136
1137 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1138 ChunkNo != e; ++ChunkNo) {
1139 const Pattern &Pat = DagNotStrings[ChunkNo];
1140
1141 assert((Pat.getCheckTy() == Check::CheckDAG ||
1142 Pat.getCheckTy() == Check::CheckNot) &&
1143 "Invalid CHECK-DAG or CHECK-NOT!");
1144
1145 if (Pat.getCheckTy() == Check::CheckNot) {
1146 NotStrings.push_back(&Pat);
1147 continue;
1148 }
1149
1150 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1151
1152 size_t MatchLen = 0, MatchPos;
1153
1154 // CHECK-DAG always matches from the start.
1155 StringRef MatchBuffer = Buffer.substr(StartPos);
1156 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1157 // With a group of CHECK-DAGs, a single mismatching means the match on
1158 // that group of CHECK-DAGs fails immediately.
1159 if (MatchPos == StringRef::npos) {
1160 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1161 return StringRef::npos;
1162 }
1163 // Re-calc it as the offset relative to the start of the original string.
1164 MatchPos += StartPos;
1165
1166 if (!NotStrings.empty()) {
1167 if (MatchPos < LastPos) {
1168 // Reordered?
1169 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1170 SourceMgr::DK_Error,
1171 Prefix + "-DAG: found a match of CHECK-DAG"
1172 " reordering across a CHECK-NOT");
1173 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1174 SourceMgr::DK_Note,
1175 Prefix + "-DAG: the farthest match of CHECK-DAG"
1176 " is found here");
1177 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1178 Prefix + "-NOT: the crossed pattern specified"
1179 " here");
1180 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1181 Prefix + "-DAG: the reordered pattern specified"
1182 " here");
1183 return StringRef::npos;
1184 }
1185 // All subsequent CHECK-DAGs should be matched from the farthest
1186 // position of all precedent CHECK-DAGs (including this one.)
1187 StartPos = LastPos;
1188 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1189 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1190 // region.
1191 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1192 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1193 return StringRef::npos;
1194 // Clear "not strings".
1195 NotStrings.clear();
1196 }
1197
1198 // Update the last position with CHECK-DAG matches.
1199 LastPos = std::max(MatchPos + MatchLen, LastPos);
223e47cc 1200 }
1a4d82fc
JJ
1201
1202 return LastPos;
1203}
1204
1205// A check prefix must contain only alphanumeric, hyphens and underscores.
1206static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1207 Regex Validator("^[a-zA-Z0-9_-]*$");
1208 return Validator.match(CheckPrefix);
1209}
1210
1211static bool ValidateCheckPrefixes() {
1212 StringSet<> PrefixSet;
1213
1214 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1215 I != E; ++I) {
1216 StringRef Prefix(*I);
1217
1218 // Reject empty prefixes.
1219 if (Prefix == "")
1220 return false;
1221
85aaf69f 1222 if (!PrefixSet.insert(Prefix).second)
1a4d82fc
JJ
1223 return false;
1224
1225 if (!ValidateCheckPrefix(Prefix))
1226 return false;
1227 }
1228
1229 return true;
1230}
1231
1232// I don't think there's a way to specify an initial value for cl::list,
1233// so if nothing was specified, add the default
1234static void AddCheckPrefixIfNeeded() {
1235 if (CheckPrefixes.empty())
1236 CheckPrefixes.push_back("CHECK");
223e47cc
LB
1237}
1238
1239int main(int argc, char **argv) {
1240 sys::PrintStackTraceOnErrorSignal();
1241 PrettyStackTraceProgram X(argc, argv);
1242 cl::ParseCommandLineOptions(argc, argv);
1243
1a4d82fc
JJ
1244 if (!ValidateCheckPrefixes()) {
1245 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1246 "start with a letter and contain only alphanumeric characters, "
1247 "hyphens and underscores\n";
1248 return 2;
1249 }
1250
1251 AddCheckPrefixIfNeeded();
1252
223e47cc
LB
1253 SourceMgr SM;
1254
1255 // Read the expected strings from the check file.
1256 std::vector<CheckString> CheckStrings;
1257 if (ReadCheckFile(SM, CheckStrings))
1258 return 2;
1259
1260 // Open the file to check and add it to SourceMgr.
1a4d82fc
JJ
1261 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1262 MemoryBuffer::getFileOrSTDIN(InputFilename);
1263 if (std::error_code EC = FileOrErr.getError()) {
1264 errs() << "Could not open input file '" << InputFilename
1265 << "': " << EC.message() << '\n';
970d7e83 1266 return 2;
223e47cc 1267 }
1a4d82fc 1268 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
223e47cc 1269
1a4d82fc 1270 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
223e47cc 1271 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
970d7e83 1272 return 2;
223e47cc 1273 }
1a4d82fc 1274
223e47cc 1275 // Remove duplicate spaces in the input file if requested.
970d7e83 1276 // Remove DOS style line endings.
1a4d82fc
JJ
1277 std::unique_ptr<MemoryBuffer> F =
1278 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
223e47cc
LB
1279
1280 // Check that we have all of the expected strings, in order, in the input
1281 // file.
1282 StringRef Buffer = F->getBuffer();
1283
1a4d82fc 1284 SM.AddNewSourceBuffer(std::move(F), SMLoc());
223e47cc 1285
1a4d82fc
JJ
1286 /// VariableTable - This holds all the current filecheck variables.
1287 StringMap<StringRef> VariableTable;
223e47cc 1288
1a4d82fc 1289 bool hasError = false;
223e47cc 1290
1a4d82fc 1291 unsigned i = 0, j = 0, e = CheckStrings.size();
223e47cc 1292
1a4d82fc
JJ
1293 while (true) {
1294 StringRef CheckRegion;
1295 if (j == e) {
1296 CheckRegion = Buffer;
1297 } else {
1298 const CheckString &CheckLabelStr = CheckStrings[j];
1299 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1300 ++j;
1301 continue;
223e47cc
LB
1302 }
1303
1a4d82fc
JJ
1304 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1305 size_t MatchLabelLen = 0;
1306 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1307 MatchLabelLen, VariableTable);
1308 if (MatchLabelPos == StringRef::npos) {
1309 hasError = true;
1310 break;
223e47cc 1311 }
1a4d82fc
JJ
1312
1313 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1314 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1315 ++j;
223e47cc
LB
1316 }
1317
1a4d82fc
JJ
1318 for ( ; i != j; ++i) {
1319 const CheckString &CheckStr = CheckStrings[i];
1320
1321 // Check each string within the scanned region, including a second check
1322 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
223e47cc 1323 size_t MatchLen = 0;
1a4d82fc
JJ
1324 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1325 VariableTable);
1326
1327 if (MatchPos == StringRef::npos) {
1328 hasError = true;
1329 i = j;
1330 break;
1331 }
223e47cc 1332
1a4d82fc
JJ
1333 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1334 }
223e47cc 1335
1a4d82fc
JJ
1336 if (j == e)
1337 break;
223e47cc
LB
1338 }
1339
1a4d82fc 1340 return hasError ? 1 : 0;
223e47cc 1341}