]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===- FileCheck.cpp - Check that File's Contents match what is expected --===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // FileCheck does a line-by line check of a file that validates whether it | |
11 | // contains the expected content. This is useful for regression tests etc. | |
12 | // | |
13 | // This program exits with an error status of 2 on error, exit status of 0 if | |
14 | // the file matched the expected contents, and exit status of 1 if it did not | |
15 | // contain the expected contents. | |
16 | // | |
17 | //===----------------------------------------------------------------------===// | |
18 | ||
970d7e83 LB |
19 | #include "llvm/ADT/SmallString.h" |
20 | #include "llvm/ADT/StringExtras.h" | |
21 | #include "llvm/ADT/StringMap.h" | |
1a4d82fc | 22 | #include "llvm/ADT/StringSet.h" |
223e47cc LB |
23 | #include "llvm/Support/CommandLine.h" |
24 | #include "llvm/Support/MemoryBuffer.h" | |
25 | #include "llvm/Support/PrettyStackTrace.h" | |
26 | #include "llvm/Support/Regex.h" | |
970d7e83 | 27 | #include "llvm/Support/Signals.h" |
223e47cc LB |
28 | #include "llvm/Support/SourceMgr.h" |
29 | #include "llvm/Support/raw_ostream.h" | |
223e47cc | 30 | #include <algorithm> |
1a4d82fc | 31 | #include <cctype> |
970d7e83 LB |
32 | #include <map> |
33 | #include <string> | |
1a4d82fc | 34 | #include <system_error> |
970d7e83 | 35 | #include <vector> |
223e47cc LB |
36 | using namespace llvm; |
37 | ||
38 | static cl::opt<std::string> | |
39 | CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required); | |
40 | ||
41 | static cl::opt<std::string> | |
42 | InputFilename("input-file", cl::desc("File to check (defaults to stdin)"), | |
43 | cl::init("-"), cl::value_desc("filename")); | |
44 | ||
1a4d82fc JJ |
45 | static cl::list<std::string> |
46 | CheckPrefixes("check-prefix", | |
47 | cl::desc("Prefix to use from check file (defaults to 'CHECK')")); | |
223e47cc LB |
48 | |
49 | static cl::opt<bool> | |
50 | NoCanonicalizeWhiteSpace("strict-whitespace", | |
51 | cl::desc("Do not treat all horizontal whitespace as equivalent")); | |
52 | ||
1a4d82fc JJ |
53 | static cl::list<std::string> ImplicitCheckNot( |
54 | "implicit-check-not", | |
55 | cl::desc("Add an implicit negative check with this pattern to every\n" | |
56 | "positive check. This can be used to ensure that no instances of\n" | |
57 | "this pattern occur which are not matched by a positive pattern"), | |
58 | cl::value_desc("pattern")); | |
59 | ||
60 | static cl::opt<bool> AllowEmptyInput( | |
61 | "allow-empty", cl::init(false), | |
62 | cl::desc("Allow the input file to be empty. This is useful when making\n" | |
63 | "checks that some error message does not occur, for example.")); | |
64 | ||
65 | typedef cl::list<std::string>::const_iterator prefix_iterator; | |
66 | ||
223e47cc LB |
67 | //===----------------------------------------------------------------------===// |
68 | // Pattern Handling Code. | |
69 | //===----------------------------------------------------------------------===// | |
70 | ||
1a4d82fc JJ |
71 | namespace Check { |
72 | enum CheckType { | |
73 | CheckNone = 0, | |
74 | CheckPlain, | |
75 | CheckNext, | |
76 | CheckNot, | |
77 | CheckDAG, | |
78 | CheckLabel, | |
79 | ||
80 | /// MatchEOF - When set, this pattern only matches the end of file. This is | |
81 | /// used for trailing CHECK-NOTs. | |
82 | CheckEOF | |
83 | }; | |
84 | } | |
85 | ||
223e47cc LB |
86 | class Pattern { |
87 | SMLoc PatternLoc; | |
88 | ||
1a4d82fc | 89 | Check::CheckType CheckTy; |
223e47cc LB |
90 | |
91 | /// FixedStr - If non-empty, this pattern is a fixed string match with the | |
92 | /// specified fixed string. | |
93 | StringRef FixedStr; | |
94 | ||
95 | /// RegEx - If non-empty, this is a regex pattern. | |
96 | std::string RegExStr; | |
97 | ||
970d7e83 LB |
98 | /// \brief Contains the number of line this pattern is in. |
99 | unsigned LineNumber; | |
100 | ||
223e47cc LB |
101 | /// VariableUses - Entries in this vector map to uses of a variable in the |
102 | /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain | |
103 | /// "foobaz" and we'll get an entry in this vector that tells us to insert the | |
104 | /// value of bar at offset 3. | |
105 | std::vector<std::pair<StringRef, unsigned> > VariableUses; | |
106 | ||
970d7e83 LB |
107 | /// VariableDefs - Maps definitions of variables to their parenthesized |
108 | /// capture numbers. | |
109 | /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. | |
110 | std::map<StringRef, unsigned> VariableDefs; | |
223e47cc LB |
111 | |
112 | public: | |
113 | ||
1a4d82fc JJ |
114 | Pattern(Check::CheckType Ty) |
115 | : CheckTy(Ty) { } | |
116 | ||
117 | /// getLoc - Return the location in source code. | |
118 | SMLoc getLoc() const { return PatternLoc; } | |
223e47cc | 119 | |
1a4d82fc JJ |
120 | /// ParsePattern - Parse the given string into the Pattern. Prefix provides |
121 | /// which prefix is being matched, SM provides the SourceMgr used for error | |
122 | /// reports, and LineNumber is the line number in the input file from which | |
123 | /// the pattern string was read. Returns true in case of an error, false | |
124 | /// otherwise. | |
125 | bool ParsePattern(StringRef PatternStr, | |
126 | StringRef Prefix, | |
127 | SourceMgr &SM, | |
128 | unsigned LineNumber); | |
223e47cc LB |
129 | |
130 | /// Match - Match the pattern string against the input buffer Buffer. This | |
131 | /// returns the position that is matched or npos if there is no match. If | |
132 | /// there is a match, the size of the matched string is returned in MatchLen. | |
133 | /// | |
134 | /// The VariableTable StringMap provides the current values of filecheck | |
135 | /// variables and is updated if this match defines new values. | |
136 | size_t Match(StringRef Buffer, size_t &MatchLen, | |
137 | StringMap<StringRef> &VariableTable) const; | |
138 | ||
139 | /// PrintFailureInfo - Print additional information about a failure to match | |
140 | /// involving this pattern. | |
141 | void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, | |
142 | const StringMap<StringRef> &VariableTable) const; | |
143 | ||
1a4d82fc JJ |
144 | bool hasVariable() const { return !(VariableUses.empty() && |
145 | VariableDefs.empty()); } | |
146 | ||
147 | Check::CheckType getCheckTy() const { return CheckTy; } | |
148 | ||
223e47cc | 149 | private: |
970d7e83 LB |
150 | bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); |
151 | void AddBackrefToRegEx(unsigned BackrefNum); | |
223e47cc LB |
152 | |
153 | /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of | |
154 | /// matching this pattern at the start of \arg Buffer; a distance of zero | |
155 | /// should correspond to a perfect match. | |
156 | unsigned ComputeMatchDistance(StringRef Buffer, | |
157 | const StringMap<StringRef> &VariableTable) const; | |
970d7e83 LB |
158 | |
159 | /// \brief Evaluates expression and stores the result to \p Value. | |
160 | /// \return true on success. false when the expression has invalid syntax. | |
161 | bool EvaluateExpression(StringRef Expr, std::string &Value) const; | |
162 | ||
163 | /// \brief Finds the closing sequence of a regex variable usage or | |
164 | /// definition. Str has to point in the beginning of the definition | |
165 | /// (right after the opening sequence). | |
166 | /// \return offset of the closing sequence within Str, or npos if it was not | |
167 | /// found. | |
1a4d82fc | 168 | size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); |
223e47cc LB |
169 | }; |
170 | ||
171 | ||
1a4d82fc JJ |
172 | bool Pattern::ParsePattern(StringRef PatternStr, |
173 | StringRef Prefix, | |
174 | SourceMgr &SM, | |
970d7e83 LB |
175 | unsigned LineNumber) { |
176 | this->LineNumber = LineNumber; | |
223e47cc LB |
177 | PatternLoc = SMLoc::getFromPointer(PatternStr.data()); |
178 | ||
179 | // Ignore trailing whitespace. | |
180 | while (!PatternStr.empty() && | |
181 | (PatternStr.back() == ' ' || PatternStr.back() == '\t')) | |
182 | PatternStr = PatternStr.substr(0, PatternStr.size()-1); | |
183 | ||
184 | // Check that there is something on the line. | |
185 | if (PatternStr.empty()) { | |
186 | SM.PrintMessage(PatternLoc, SourceMgr::DK_Error, | |
187 | "found empty check string with prefix '" + | |
1a4d82fc | 188 | Prefix + ":'"); |
223e47cc LB |
189 | return true; |
190 | } | |
191 | ||
192 | // Check to see if this is a fixed string, or if it has regex pieces. | |
193 | if (PatternStr.size() < 2 || | |
194 | (PatternStr.find("{{") == StringRef::npos && | |
195 | PatternStr.find("[[") == StringRef::npos)) { | |
196 | FixedStr = PatternStr; | |
197 | return false; | |
198 | } | |
199 | ||
200 | // Paren value #0 is for the fully matched string. Any new parenthesized | |
201 | // values add from there. | |
202 | unsigned CurParen = 1; | |
203 | ||
204 | // Otherwise, there is at least one regex piece. Build up the regex pattern | |
205 | // by escaping scary characters in fixed strings, building up one big regex. | |
206 | while (!PatternStr.empty()) { | |
207 | // RegEx matches. | |
208 | if (PatternStr.startswith("{{")) { | |
970d7e83 | 209 | // This is the start of a regex match. Scan for the }}. |
223e47cc LB |
210 | size_t End = PatternStr.find("}}"); |
211 | if (End == StringRef::npos) { | |
212 | SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), | |
213 | SourceMgr::DK_Error, | |
214 | "found start of regex string with no end '}}'"); | |
215 | return true; | |
216 | } | |
217 | ||
218 | // Enclose {{}} patterns in parens just like [[]] even though we're not | |
219 | // capturing the result for any purpose. This is required in case the | |
220 | // expression contains an alternation like: CHECK: abc{{x|z}}def. We | |
221 | // want this to turn into: "abc(x|z)def" not "abcx|zdef". | |
222 | RegExStr += '('; | |
223 | ++CurParen; | |
224 | ||
225 | if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) | |
226 | return true; | |
227 | RegExStr += ')'; | |
228 | ||
229 | PatternStr = PatternStr.substr(End+2); | |
230 | continue; | |
231 | } | |
232 | ||
233 | // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* | |
234 | // (or some other regex) and assigns it to the FileCheck variable 'foo'. The | |
235 | // second form is [[foo]] which is a reference to foo. The variable name | |
236 | // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject | |
237 | // it. This is to catch some common errors. | |
238 | if (PatternStr.startswith("[[")) { | |
970d7e83 LB |
239 | // Find the closing bracket pair ending the match. End is going to be an |
240 | // offset relative to the beginning of the match string. | |
1a4d82fc | 241 | size_t End = FindRegexVarEnd(PatternStr.substr(2), SM); |
970d7e83 | 242 | |
223e47cc LB |
243 | if (End == StringRef::npos) { |
244 | SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), | |
245 | SourceMgr::DK_Error, | |
246 | "invalid named regex reference, no ]] found"); | |
247 | return true; | |
248 | } | |
249 | ||
970d7e83 LB |
250 | StringRef MatchStr = PatternStr.substr(2, End); |
251 | PatternStr = PatternStr.substr(End+4); | |
223e47cc LB |
252 | |
253 | // Get the regex name (e.g. "foo"). | |
254 | size_t NameEnd = MatchStr.find(':'); | |
255 | StringRef Name = MatchStr.substr(0, NameEnd); | |
256 | ||
257 | if (Name.empty()) { | |
258 | SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, | |
259 | "invalid name in named regex: empty name"); | |
260 | return true; | |
261 | } | |
262 | ||
970d7e83 LB |
263 | // Verify that the name/expression is well formed. FileCheck currently |
264 | // supports @LINE, @LINE+number, @LINE-number expressions. The check here | |
265 | // is relaxed, more strict check is performed in \c EvaluateExpression. | |
266 | bool IsExpression = false; | |
267 | for (unsigned i = 0, e = Name.size(); i != e; ++i) { | |
268 | if (i == 0 && Name[i] == '@') { | |
269 | if (NameEnd != StringRef::npos) { | |
270 | SM.PrintMessage(SMLoc::getFromPointer(Name.data()), | |
271 | SourceMgr::DK_Error, | |
272 | "invalid name in named regex definition"); | |
273 | return true; | |
274 | } | |
275 | IsExpression = true; | |
276 | continue; | |
277 | } | |
278 | if (Name[i] != '_' && !isalnum(Name[i]) && | |
279 | (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { | |
223e47cc LB |
280 | SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), |
281 | SourceMgr::DK_Error, "invalid name in named regex"); | |
282 | return true; | |
283 | } | |
970d7e83 | 284 | } |
223e47cc LB |
285 | |
286 | // Name can't start with a digit. | |
970d7e83 | 287 | if (isdigit(static_cast<unsigned char>(Name[0]))) { |
223e47cc LB |
288 | SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, |
289 | "invalid name in named regex"); | |
290 | return true; | |
291 | } | |
292 | ||
293 | // Handle [[foo]]. | |
294 | if (NameEnd == StringRef::npos) { | |
970d7e83 LB |
295 | // Handle variables that were defined earlier on the same line by |
296 | // emitting a backreference. | |
297 | if (VariableDefs.find(Name) != VariableDefs.end()) { | |
298 | unsigned VarParenNum = VariableDefs[Name]; | |
299 | if (VarParenNum < 1 || VarParenNum > 9) { | |
300 | SM.PrintMessage(SMLoc::getFromPointer(Name.data()), | |
301 | SourceMgr::DK_Error, | |
302 | "Can't back-reference more than 9 variables"); | |
303 | return true; | |
304 | } | |
305 | AddBackrefToRegEx(VarParenNum); | |
306 | } else { | |
307 | VariableUses.push_back(std::make_pair(Name, RegExStr.size())); | |
308 | } | |
223e47cc LB |
309 | continue; |
310 | } | |
311 | ||
312 | // Handle [[foo:.*]]. | |
970d7e83 | 313 | VariableDefs[Name] = CurParen; |
223e47cc LB |
314 | RegExStr += '('; |
315 | ++CurParen; | |
316 | ||
317 | if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) | |
318 | return true; | |
319 | ||
320 | RegExStr += ')'; | |
321 | } | |
322 | ||
323 | // Handle fixed string matches. | |
324 | // Find the end, which is the start of the next regex. | |
325 | size_t FixedMatchEnd = PatternStr.find("{{"); | |
326 | FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); | |
1a4d82fc | 327 | RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd)); |
223e47cc | 328 | PatternStr = PatternStr.substr(FixedMatchEnd); |
223e47cc LB |
329 | } |
330 | ||
331 | return false; | |
332 | } | |
333 | ||
970d7e83 | 334 | bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, |
223e47cc | 335 | SourceMgr &SM) { |
970d7e83 | 336 | Regex R(RS); |
223e47cc LB |
337 | std::string Error; |
338 | if (!R.isValid(Error)) { | |
970d7e83 | 339 | SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, |
223e47cc LB |
340 | "invalid regex: " + Error); |
341 | return true; | |
342 | } | |
343 | ||
970d7e83 | 344 | RegExStr += RS.str(); |
223e47cc LB |
345 | CurParen += R.getNumMatches(); |
346 | return false; | |
347 | } | |
348 | ||
970d7e83 LB |
349 | void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { |
350 | assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); | |
351 | std::string Backref = std::string("\\") + | |
352 | std::string(1, '0' + BackrefNum); | |
353 | RegExStr += Backref; | |
354 | } | |
355 | ||
356 | bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { | |
357 | // The only supported expression is @LINE([\+-]\d+)? | |
358 | if (!Expr.startswith("@LINE")) | |
359 | return false; | |
360 | Expr = Expr.substr(StringRef("@LINE").size()); | |
361 | int Offset = 0; | |
362 | if (!Expr.empty()) { | |
363 | if (Expr[0] == '+') | |
364 | Expr = Expr.substr(1); | |
365 | else if (Expr[0] != '-') | |
366 | return false; | |
367 | if (Expr.getAsInteger(10, Offset)) | |
368 | return false; | |
369 | } | |
370 | Value = llvm::itostr(LineNumber + Offset); | |
371 | return true; | |
372 | } | |
373 | ||
223e47cc LB |
374 | /// Match - Match the pattern string against the input buffer Buffer. This |
375 | /// returns the position that is matched or npos if there is no match. If | |
376 | /// there is a match, the size of the matched string is returned in MatchLen. | |
377 | size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, | |
378 | StringMap<StringRef> &VariableTable) const { | |
379 | // If this is the EOF pattern, match it immediately. | |
1a4d82fc | 380 | if (CheckTy == Check::CheckEOF) { |
223e47cc LB |
381 | MatchLen = 0; |
382 | return Buffer.size(); | |
383 | } | |
384 | ||
385 | // If this is a fixed string pattern, just match it now. | |
386 | if (!FixedStr.empty()) { | |
387 | MatchLen = FixedStr.size(); | |
388 | return Buffer.find(FixedStr); | |
389 | } | |
390 | ||
391 | // Regex match. | |
392 | ||
393 | // If there are variable uses, we need to create a temporary string with the | |
394 | // actual value. | |
395 | StringRef RegExToMatch = RegExStr; | |
396 | std::string TmpStr; | |
397 | if (!VariableUses.empty()) { | |
398 | TmpStr = RegExStr; | |
399 | ||
400 | unsigned InsertOffset = 0; | |
401 | for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { | |
223e47cc | 402 | std::string Value; |
970d7e83 LB |
403 | |
404 | if (VariableUses[i].first[0] == '@') { | |
405 | if (!EvaluateExpression(VariableUses[i].first, Value)) | |
406 | return StringRef::npos; | |
407 | } else { | |
408 | StringMap<StringRef>::iterator it = | |
409 | VariableTable.find(VariableUses[i].first); | |
410 | // If the variable is undefined, return an error. | |
411 | if (it == VariableTable.end()) | |
412 | return StringRef::npos; | |
413 | ||
1a4d82fc JJ |
414 | // Look up the value and escape it so that we can put it into the regex. |
415 | Value += Regex::escape(it->second); | |
970d7e83 | 416 | } |
223e47cc LB |
417 | |
418 | // Plop it into the regex at the adjusted offset. | |
419 | TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, | |
420 | Value.begin(), Value.end()); | |
421 | InsertOffset += Value.size(); | |
422 | } | |
423 | ||
424 | // Match the newly constructed regex. | |
425 | RegExToMatch = TmpStr; | |
426 | } | |
427 | ||
428 | ||
429 | SmallVector<StringRef, 4> MatchInfo; | |
430 | if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) | |
431 | return StringRef::npos; | |
432 | ||
433 | // Successful regex match. | |
434 | assert(!MatchInfo.empty() && "Didn't get any match"); | |
435 | StringRef FullMatch = MatchInfo[0]; | |
436 | ||
437 | // If this defines any variables, remember their values. | |
970d7e83 LB |
438 | for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(), |
439 | E = VariableDefs.end(); | |
440 | I != E; ++I) { | |
441 | assert(I->second < MatchInfo.size() && "Internal paren error"); | |
442 | VariableTable[I->first] = MatchInfo[I->second]; | |
223e47cc LB |
443 | } |
444 | ||
445 | MatchLen = FullMatch.size(); | |
446 | return FullMatch.data()-Buffer.data(); | |
447 | } | |
448 | ||
449 | unsigned Pattern::ComputeMatchDistance(StringRef Buffer, | |
450 | const StringMap<StringRef> &VariableTable) const { | |
451 | // Just compute the number of matching characters. For regular expressions, we | |
452 | // just compare against the regex itself and hope for the best. | |
453 | // | |
454 | // FIXME: One easy improvement here is have the regex lib generate a single | |
455 | // example regular expression which matches, and use that as the example | |
456 | // string. | |
457 | StringRef ExampleString(FixedStr); | |
458 | if (ExampleString.empty()) | |
459 | ExampleString = RegExStr; | |
460 | ||
461 | // Only compare up to the first line in the buffer, or the string size. | |
462 | StringRef BufferPrefix = Buffer.substr(0, ExampleString.size()); | |
463 | BufferPrefix = BufferPrefix.split('\n').first; | |
464 | return BufferPrefix.edit_distance(ExampleString); | |
465 | } | |
466 | ||
467 | void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, | |
468 | const StringMap<StringRef> &VariableTable) const{ | |
469 | // If this was a regular expression using variables, print the current | |
470 | // variable values. | |
471 | if (!VariableUses.empty()) { | |
472 | for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { | |
223e47cc LB |
473 | SmallString<256> Msg; |
474 | raw_svector_ostream OS(Msg); | |
970d7e83 LB |
475 | StringRef Var = VariableUses[i].first; |
476 | if (Var[0] == '@') { | |
477 | std::string Value; | |
478 | if (EvaluateExpression(Var, Value)) { | |
479 | OS << "with expression \""; | |
480 | OS.write_escaped(Var) << "\" equal to \""; | |
481 | OS.write_escaped(Value) << "\""; | |
482 | } else { | |
483 | OS << "uses incorrect expression \""; | |
484 | OS.write_escaped(Var) << "\""; | |
485 | } | |
223e47cc | 486 | } else { |
970d7e83 LB |
487 | StringMap<StringRef>::const_iterator it = VariableTable.find(Var); |
488 | ||
489 | // Check for undefined variable references. | |
490 | if (it == VariableTable.end()) { | |
491 | OS << "uses undefined variable \""; | |
492 | OS.write_escaped(Var) << "\""; | |
493 | } else { | |
494 | OS << "with variable \""; | |
495 | OS.write_escaped(Var) << "\" equal to \""; | |
496 | OS.write_escaped(it->second) << "\""; | |
497 | } | |
223e47cc LB |
498 | } |
499 | ||
500 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, | |
501 | OS.str()); | |
502 | } | |
503 | } | |
504 | ||
505 | // Attempt to find the closest/best fuzzy match. Usually an error happens | |
506 | // because some string in the output didn't exactly match. In these cases, we | |
507 | // would like to show the user a best guess at what "should have" matched, to | |
508 | // save them having to actually check the input manually. | |
509 | size_t NumLinesForward = 0; | |
510 | size_t Best = StringRef::npos; | |
511 | double BestQuality = 0; | |
512 | ||
513 | // Use an arbitrary 4k limit on how far we will search. | |
514 | for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) { | |
515 | if (Buffer[i] == '\n') | |
516 | ++NumLinesForward; | |
517 | ||
518 | // Patterns have leading whitespace stripped, so skip whitespace when | |
519 | // looking for something which looks like a pattern. | |
520 | if (Buffer[i] == ' ' || Buffer[i] == '\t') | |
521 | continue; | |
522 | ||
523 | // Compute the "quality" of this match as an arbitrary combination of the | |
524 | // match distance and the number of lines skipped to get to this match. | |
525 | unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable); | |
526 | double Quality = Distance + (NumLinesForward / 100.); | |
527 | ||
528 | if (Quality < BestQuality || Best == StringRef::npos) { | |
529 | Best = i; | |
530 | BestQuality = Quality; | |
531 | } | |
532 | } | |
533 | ||
534 | // Print the "possible intended match here" line if we found something | |
535 | // reasonable and not equal to what we showed in the "scanning from here" | |
536 | // line. | |
537 | if (Best && Best != StringRef::npos && BestQuality < 50) { | |
538 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best), | |
539 | SourceMgr::DK_Note, "possible intended match here"); | |
540 | ||
541 | // FIXME: If we wanted to be really friendly we would show why the match | |
542 | // failed, as it can be hard to spot simple one character differences. | |
543 | } | |
544 | } | |
545 | ||
1a4d82fc | 546 | size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) { |
970d7e83 LB |
547 | // Offset keeps track of the current offset within the input Str |
548 | size_t Offset = 0; | |
549 | // [...] Nesting depth | |
550 | size_t BracketDepth = 0; | |
551 | ||
552 | while (!Str.empty()) { | |
553 | if (Str.startswith("]]") && BracketDepth == 0) | |
554 | return Offset; | |
555 | if (Str[0] == '\\') { | |
556 | // Backslash escapes the next char within regexes, so skip them both. | |
557 | Str = Str.substr(2); | |
558 | Offset += 2; | |
559 | } else { | |
560 | switch (Str[0]) { | |
561 | default: | |
562 | break; | |
563 | case '[': | |
564 | BracketDepth++; | |
565 | break; | |
566 | case ']': | |
1a4d82fc JJ |
567 | if (BracketDepth == 0) { |
568 | SM.PrintMessage(SMLoc::getFromPointer(Str.data()), | |
569 | SourceMgr::DK_Error, | |
570 | "missing closing \"]\" for regex variable"); | |
571 | exit(1); | |
572 | } | |
970d7e83 LB |
573 | BracketDepth--; |
574 | break; | |
575 | } | |
576 | Str = Str.substr(1); | |
577 | Offset++; | |
578 | } | |
579 | } | |
580 | ||
581 | return StringRef::npos; | |
582 | } | |
583 | ||
584 | ||
223e47cc LB |
585 | //===----------------------------------------------------------------------===// |
586 | // Check Strings. | |
587 | //===----------------------------------------------------------------------===// | |
588 | ||
589 | /// CheckString - This is a check that we found in the input file. | |
590 | struct CheckString { | |
591 | /// Pat - The pattern to match. | |
592 | Pattern Pat; | |
593 | ||
1a4d82fc JJ |
594 | /// Prefix - Which prefix name this check matched. |
595 | StringRef Prefix; | |
596 | ||
223e47cc LB |
597 | /// Loc - The location in the match file that the check string was specified. |
598 | SMLoc Loc; | |
599 | ||
1a4d82fc JJ |
600 | /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive, |
601 | /// as opposed to a CHECK: directive. | |
602 | Check::CheckType CheckTy; | |
223e47cc | 603 | |
1a4d82fc | 604 | /// DagNotStrings - These are all of the strings that are disallowed from |
223e47cc LB |
605 | /// occurring between this match string and the previous one (or start of |
606 | /// file). | |
1a4d82fc JJ |
607 | std::vector<Pattern> DagNotStrings; |
608 | ||
609 | ||
610 | CheckString(const Pattern &P, | |
611 | StringRef S, | |
612 | SMLoc L, | |
613 | Check::CheckType Ty) | |
614 | : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {} | |
223e47cc | 615 | |
1a4d82fc JJ |
616 | /// Check - Match check string and its "not strings" and/or "dag strings". |
617 | size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, | |
618 | size_t &MatchLen, StringMap<StringRef> &VariableTable) const; | |
619 | ||
620 | /// CheckNext - Verify there is a single line in the given buffer. | |
621 | bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; | |
622 | ||
623 | /// CheckNot - Verify there's no "not strings" in the given buffer. | |
624 | bool CheckNot(const SourceMgr &SM, StringRef Buffer, | |
625 | const std::vector<const Pattern *> &NotStrings, | |
626 | StringMap<StringRef> &VariableTable) const; | |
627 | ||
628 | /// CheckDag - Match "dag strings" and their mixed "not strings". | |
629 | size_t CheckDag(const SourceMgr &SM, StringRef Buffer, | |
630 | std::vector<const Pattern *> &NotStrings, | |
631 | StringMap<StringRef> &VariableTable) const; | |
223e47cc LB |
632 | }; |
633 | ||
970d7e83 LB |
634 | /// Canonicalize whitespaces in the input file. Line endings are replaced |
635 | /// with UNIX-style '\n'. | |
636 | /// | |
637 | /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace | |
638 | /// characters to a single space. | |
1a4d82fc JJ |
639 | static std::unique_ptr<MemoryBuffer> |
640 | CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB, | |
641 | bool PreserveHorizontal) { | |
223e47cc LB |
642 | SmallString<128> NewFile; |
643 | NewFile.reserve(MB->getBufferSize()); | |
644 | ||
645 | for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd(); | |
646 | Ptr != End; ++Ptr) { | |
647 | // Eliminate trailing dosish \r. | |
648 | if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') { | |
649 | continue; | |
650 | } | |
651 | ||
1a4d82fc | 652 | // If current char is not a horizontal whitespace or if horizontal |
970d7e83 LB |
653 | // whitespace canonicalization is disabled, dump it to output as is. |
654 | if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { | |
223e47cc LB |
655 | NewFile.push_back(*Ptr); |
656 | continue; | |
657 | } | |
658 | ||
659 | // Otherwise, add one space and advance over neighboring space. | |
660 | NewFile.push_back(' '); | |
661 | while (Ptr+1 != End && | |
662 | (Ptr[1] == ' ' || Ptr[1] == '\t')) | |
663 | ++Ptr; | |
664 | } | |
665 | ||
1a4d82fc JJ |
666 | return std::unique_ptr<MemoryBuffer>( |
667 | MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier())); | |
668 | } | |
669 | ||
670 | static bool IsPartOfWord(char c) { | |
671 | return (isalnum(c) || c == '-' || c == '_'); | |
672 | } | |
673 | ||
674 | // Get the size of the prefix extension. | |
675 | static size_t CheckTypeSize(Check::CheckType Ty) { | |
676 | switch (Ty) { | |
677 | case Check::CheckNone: | |
678 | return 0; | |
679 | ||
680 | case Check::CheckPlain: | |
681 | return sizeof(":") - 1; | |
682 | ||
683 | case Check::CheckNext: | |
684 | return sizeof("-NEXT:") - 1; | |
685 | ||
686 | case Check::CheckNot: | |
687 | return sizeof("-NOT:") - 1; | |
688 | ||
689 | case Check::CheckDAG: | |
690 | return sizeof("-DAG:") - 1; | |
691 | ||
692 | case Check::CheckLabel: | |
693 | return sizeof("-LABEL:") - 1; | |
694 | ||
695 | case Check::CheckEOF: | |
696 | llvm_unreachable("Should not be using EOF size"); | |
697 | } | |
698 | ||
699 | llvm_unreachable("Bad check type"); | |
700 | } | |
701 | ||
702 | static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) { | |
703 | char NextChar = Buffer[Prefix.size()]; | |
704 | ||
705 | // Verify that the : is present after the prefix. | |
706 | if (NextChar == ':') | |
707 | return Check::CheckPlain; | |
708 | ||
709 | if (NextChar != '-') | |
710 | return Check::CheckNone; | |
711 | ||
712 | StringRef Rest = Buffer.drop_front(Prefix.size() + 1); | |
713 | if (Rest.startswith("NEXT:")) | |
714 | return Check::CheckNext; | |
715 | ||
716 | if (Rest.startswith("NOT:")) | |
717 | return Check::CheckNot; | |
718 | ||
719 | if (Rest.startswith("DAG:")) | |
720 | return Check::CheckDAG; | |
721 | ||
722 | if (Rest.startswith("LABEL:")) | |
723 | return Check::CheckLabel; | |
724 | ||
725 | return Check::CheckNone; | |
726 | } | |
727 | ||
728 | // From the given position, find the next character after the word. | |
729 | static size_t SkipWord(StringRef Str, size_t Loc) { | |
730 | while (Loc < Str.size() && IsPartOfWord(Str[Loc])) | |
731 | ++Loc; | |
732 | return Loc; | |
733 | } | |
734 | ||
735 | // Try to find the first match in buffer for any prefix. If a valid match is | |
736 | // found, return that prefix and set its type and location. If there are almost | |
737 | // matches (e.g. the actual prefix string is found, but is not an actual check | |
738 | // string), but no valid match, return an empty string and set the position to | |
739 | // resume searching from. If no partial matches are found, return an empty | |
740 | // string and the location will be StringRef::npos. If one prefix is a substring | |
741 | // of another, the maximal match should be found. e.g. if "A" and "AA" are | |
742 | // prefixes then AA-CHECK: should match the second one. | |
743 | static StringRef FindFirstCandidateMatch(StringRef &Buffer, | |
744 | Check::CheckType &CheckTy, | |
745 | size_t &CheckLoc) { | |
746 | StringRef FirstPrefix; | |
747 | size_t FirstLoc = StringRef::npos; | |
748 | size_t SearchLoc = StringRef::npos; | |
749 | Check::CheckType FirstTy = Check::CheckNone; | |
750 | ||
751 | CheckTy = Check::CheckNone; | |
752 | CheckLoc = StringRef::npos; | |
753 | ||
754 | for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); | |
755 | I != E; ++I) { | |
756 | StringRef Prefix(*I); | |
757 | size_t PrefixLoc = Buffer.find(Prefix); | |
758 | ||
759 | if (PrefixLoc == StringRef::npos) | |
760 | continue; | |
761 | ||
762 | // Track where we are searching for invalid prefixes that look almost right. | |
763 | // We need to only advance to the first partial match on the next attempt | |
764 | // since a partial match could be a substring of a later, valid prefix. | |
765 | // Need to skip to the end of the word, otherwise we could end up | |
766 | // matching a prefix in a substring later. | |
767 | if (PrefixLoc < SearchLoc) | |
768 | SearchLoc = SkipWord(Buffer, PrefixLoc); | |
769 | ||
770 | // We only want to find the first match to avoid skipping some. | |
771 | if (PrefixLoc > FirstLoc) | |
772 | continue; | |
773 | // If one matching check-prefix is a prefix of another, choose the | |
774 | // longer one. | |
775 | if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size()) | |
776 | continue; | |
777 | ||
778 | StringRef Rest = Buffer.drop_front(PrefixLoc); | |
779 | // Make sure we have actually found the prefix, and not a word containing | |
780 | // it. This should also prevent matching the wrong prefix when one is a | |
781 | // substring of another. | |
782 | if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1])) | |
783 | FirstTy = Check::CheckNone; | |
784 | else | |
785 | FirstTy = FindCheckType(Rest, Prefix); | |
786 | ||
787 | FirstLoc = PrefixLoc; | |
788 | FirstPrefix = Prefix; | |
789 | } | |
790 | ||
791 | // If the first prefix is invalid, we should continue the search after it. | |
792 | if (FirstTy == Check::CheckNone) { | |
793 | CheckLoc = SearchLoc; | |
794 | return ""; | |
795 | } | |
223e47cc | 796 | |
1a4d82fc JJ |
797 | CheckTy = FirstTy; |
798 | CheckLoc = FirstLoc; | |
799 | return FirstPrefix; | |
223e47cc LB |
800 | } |
801 | ||
1a4d82fc JJ |
802 | static StringRef FindFirstMatchingPrefix(StringRef &Buffer, |
803 | unsigned &LineNumber, | |
804 | Check::CheckType &CheckTy, | |
805 | size_t &CheckLoc) { | |
806 | while (!Buffer.empty()) { | |
807 | StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc); | |
808 | // If we found a real match, we are done. | |
809 | if (!Prefix.empty()) { | |
810 | LineNumber += Buffer.substr(0, CheckLoc).count('\n'); | |
811 | return Prefix; | |
812 | } | |
813 | ||
814 | // We didn't find any almost matches either, we are also done. | |
815 | if (CheckLoc == StringRef::npos) | |
816 | return StringRef(); | |
817 | ||
818 | LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n'); | |
819 | ||
820 | // Advance to the last possible match we found and try again. | |
821 | Buffer = Buffer.drop_front(CheckLoc + 1); | |
822 | } | |
823 | ||
824 | return StringRef(); | |
825 | } | |
223e47cc LB |
826 | |
827 | /// ReadCheckFile - Read the check file, which specifies the sequence of | |
828 | /// expected strings. The strings are added to the CheckStrings vector. | |
970d7e83 | 829 | /// Returns true in case of an error, false otherwise. |
223e47cc LB |
830 | static bool ReadCheckFile(SourceMgr &SM, |
831 | std::vector<CheckString> &CheckStrings) { | |
1a4d82fc JJ |
832 | ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = |
833 | MemoryBuffer::getFileOrSTDIN(CheckFilename); | |
834 | if (std::error_code EC = FileOrErr.getError()) { | |
835 | errs() << "Could not open check file '" << CheckFilename | |
836 | << "': " << EC.message() << '\n'; | |
223e47cc LB |
837 | return true; |
838 | } | |
223e47cc LB |
839 | |
840 | // If we want to canonicalize whitespace, strip excess whitespace from the | |
970d7e83 | 841 | // buffer containing the CHECK lines. Remove DOS style line endings. |
1a4d82fc JJ |
842 | std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile( |
843 | std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace); | |
223e47cc LB |
844 | |
845 | // Find all instances of CheckPrefix followed by : in the file. | |
846 | StringRef Buffer = F->getBuffer(); | |
1a4d82fc JJ |
847 | |
848 | SM.AddNewSourceBuffer(std::move(F), SMLoc()); | |
849 | ||
850 | std::vector<Pattern> ImplicitNegativeChecks; | |
851 | for (const auto &PatternString : ImplicitCheckNot) { | |
852 | // Create a buffer with fake command line content in order to display the | |
853 | // command line option responsible for the specific implicit CHECK-NOT. | |
854 | std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='"; | |
855 | std::string Suffix = "'"; | |
856 | std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy( | |
857 | Prefix + PatternString + Suffix, "command line"); | |
858 | ||
859 | StringRef PatternInBuffer = | |
860 | CmdLine->getBuffer().substr(Prefix.size(), PatternString.size()); | |
861 | SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc()); | |
862 | ||
863 | ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot)); | |
864 | ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer, | |
865 | "IMPLICIT-CHECK", SM, 0); | |
866 | } | |
867 | ||
868 | ||
869 | std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks; | |
223e47cc | 870 | |
970d7e83 LB |
871 | // LineNumber keeps track of the line on which CheckPrefix instances are |
872 | // found. | |
873 | unsigned LineNumber = 1; | |
874 | ||
223e47cc | 875 | while (1) { |
1a4d82fc JJ |
876 | Check::CheckType CheckTy; |
877 | size_t PrefixLoc; | |
878 | ||
879 | // See if a prefix occurs in the memory buffer. | |
880 | StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer, | |
881 | LineNumber, | |
882 | CheckTy, | |
883 | PrefixLoc); | |
884 | if (UsedPrefix.empty()) | |
223e47cc LB |
885 | break; |
886 | ||
1a4d82fc | 887 | Buffer = Buffer.drop_front(PrefixLoc); |
223e47cc | 888 | |
1a4d82fc JJ |
889 | // Location to use for error messages. |
890 | const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1); | |
223e47cc | 891 | |
1a4d82fc JJ |
892 | // PrefixLoc is to the start of the prefix. Skip to the end. |
893 | Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy)); | |
223e47cc | 894 | |
1a4d82fc JJ |
895 | // Okay, we found the prefix, yay. Remember the rest of the line, but ignore |
896 | // leading and trailing whitespace. | |
223e47cc LB |
897 | Buffer = Buffer.substr(Buffer.find_first_not_of(" \t")); |
898 | ||
899 | // Scan ahead to the end of line. | |
900 | size_t EOL = Buffer.find_first_of("\n\r"); | |
901 | ||
902 | // Remember the location of the start of the pattern, for diagnostics. | |
903 | SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data()); | |
904 | ||
905 | // Parse the pattern. | |
1a4d82fc JJ |
906 | Pattern P(CheckTy); |
907 | if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber)) | |
223e47cc LB |
908 | return true; |
909 | ||
1a4d82fc JJ |
910 | // Verify that CHECK-LABEL lines do not define or use variables |
911 | if ((CheckTy == Check::CheckLabel) && P.hasVariable()) { | |
912 | SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), | |
913 | SourceMgr::DK_Error, | |
914 | "found '" + UsedPrefix + "-LABEL:'" | |
915 | " with variable definition or use"); | |
916 | return true; | |
917 | } | |
918 | ||
223e47cc LB |
919 | Buffer = Buffer.substr(EOL); |
920 | ||
223e47cc | 921 | // Verify that CHECK-NEXT lines have at least one CHECK line before them. |
1a4d82fc JJ |
922 | if ((CheckTy == Check::CheckNext) && CheckStrings.empty()) { |
923 | SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart), | |
223e47cc | 924 | SourceMgr::DK_Error, |
1a4d82fc JJ |
925 | "found '" + UsedPrefix + "-NEXT:' without previous '" |
926 | + UsedPrefix + ": line"); | |
223e47cc LB |
927 | return true; |
928 | } | |
929 | ||
1a4d82fc JJ |
930 | // Handle CHECK-DAG/-NOT. |
931 | if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) { | |
932 | DagNotMatches.push_back(P); | |
223e47cc LB |
933 | continue; |
934 | } | |
935 | ||
223e47cc LB |
936 | // Okay, add the string we captured to the output vector and move on. |
937 | CheckStrings.push_back(CheckString(P, | |
1a4d82fc | 938 | UsedPrefix, |
223e47cc | 939 | PatternLoc, |
1a4d82fc JJ |
940 | CheckTy)); |
941 | std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); | |
942 | DagNotMatches = ImplicitNegativeChecks; | |
223e47cc LB |
943 | } |
944 | ||
1a4d82fc JJ |
945 | // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first |
946 | // prefix as a filler for the error message. | |
947 | if (!DagNotMatches.empty()) { | |
948 | CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF), | |
949 | CheckPrefixes[0], | |
223e47cc | 950 | SMLoc::getFromPointer(Buffer.data()), |
1a4d82fc JJ |
951 | Check::CheckEOF)); |
952 | std::swap(DagNotMatches, CheckStrings.back().DagNotStrings); | |
223e47cc LB |
953 | } |
954 | ||
955 | if (CheckStrings.empty()) { | |
1a4d82fc JJ |
956 | errs() << "error: no check strings found with prefix" |
957 | << (CheckPrefixes.size() > 1 ? "es " : " "); | |
958 | for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) { | |
959 | StringRef Prefix(CheckPrefixes[I]); | |
960 | errs() << '\'' << Prefix << ":'"; | |
961 | if (I != N - 1) | |
962 | errs() << ", "; | |
963 | } | |
964 | ||
965 | errs() << '\n'; | |
223e47cc LB |
966 | return true; |
967 | } | |
968 | ||
969 | return false; | |
970 | } | |
971 | ||
1a4d82fc JJ |
972 | static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc, |
973 | const Pattern &Pat, StringRef Buffer, | |
223e47cc LB |
974 | StringMap<StringRef> &VariableTable) { |
975 | // Otherwise, we have an error, emit an error message. | |
1a4d82fc | 976 | SM.PrintMessage(Loc, SourceMgr::DK_Error, |
223e47cc LB |
977 | "expected string not found in input"); |
978 | ||
979 | // Print the "scanning from here" line. If the current position is at the | |
980 | // end of a line, advance to the start of the next line. | |
981 | Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r")); | |
982 | ||
983 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, | |
984 | "scanning from here"); | |
985 | ||
986 | // Allow the pattern to print additional information if desired. | |
1a4d82fc JJ |
987 | Pat.PrintFailureInfo(SM, Buffer, VariableTable); |
988 | } | |
989 | ||
990 | static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr, | |
991 | StringRef Buffer, | |
992 | StringMap<StringRef> &VariableTable) { | |
993 | PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable); | |
223e47cc LB |
994 | } |
995 | ||
996 | /// CountNumNewlinesBetween - Count the number of newlines in the specified | |
997 | /// range. | |
1a4d82fc JJ |
998 | static unsigned CountNumNewlinesBetween(StringRef Range, |
999 | const char *&FirstNewLine) { | |
223e47cc LB |
1000 | unsigned NumNewLines = 0; |
1001 | while (1) { | |
1002 | // Scan for newline. | |
1003 | Range = Range.substr(Range.find_first_of("\n\r")); | |
1004 | if (Range.empty()) return NumNewLines; | |
1005 | ||
1006 | ++NumNewLines; | |
1007 | ||
1008 | // Handle \n\r and \r\n as a single newline. | |
1009 | if (Range.size() > 1 && | |
1010 | (Range[1] == '\n' || Range[1] == '\r') && | |
1011 | (Range[0] != Range[1])) | |
1012 | Range = Range.substr(1); | |
1013 | Range = Range.substr(1); | |
1a4d82fc JJ |
1014 | |
1015 | if (NumNewLines == 1) | |
1016 | FirstNewLine = Range.begin(); | |
1017 | } | |
1018 | } | |
1019 | ||
1020 | size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer, | |
1021 | bool IsLabelScanMode, size_t &MatchLen, | |
1022 | StringMap<StringRef> &VariableTable) const { | |
1023 | size_t LastPos = 0; | |
1024 | std::vector<const Pattern *> NotStrings; | |
1025 | ||
1026 | // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL | |
1027 | // bounds; we have not processed variable definitions within the bounded block | |
1028 | // yet so cannot handle any final CHECK-DAG yet; this is handled when going | |
1029 | // over the block again (including the last CHECK-LABEL) in normal mode. | |
1030 | if (!IsLabelScanMode) { | |
1031 | // Match "dag strings" (with mixed "not strings" if any). | |
1032 | LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable); | |
1033 | if (LastPos == StringRef::npos) | |
1034 | return StringRef::npos; | |
1035 | } | |
1036 | ||
1037 | // Match itself from the last position after matching CHECK-DAG. | |
1038 | StringRef MatchBuffer = Buffer.substr(LastPos); | |
1039 | size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); | |
1040 | if (MatchPos == StringRef::npos) { | |
1041 | PrintCheckFailed(SM, *this, MatchBuffer, VariableTable); | |
1042 | return StringRef::npos; | |
1043 | } | |
1044 | MatchPos += LastPos; | |
1045 | ||
1046 | // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT | |
1047 | // or CHECK-NOT | |
1048 | if (!IsLabelScanMode) { | |
1049 | StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); | |
1050 | ||
1051 | // If this check is a "CHECK-NEXT", verify that the previous match was on | |
1052 | // the previous line (i.e. that there is one newline between them). | |
1053 | if (CheckNext(SM, SkippedRegion)) | |
1054 | return StringRef::npos; | |
1055 | ||
1056 | // If this match had "not strings", verify that they don't exist in the | |
1057 | // skipped region. | |
1058 | if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) | |
1059 | return StringRef::npos; | |
1060 | } | |
1061 | ||
1062 | return MatchPos; | |
1063 | } | |
1064 | ||
1065 | bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const { | |
1066 | if (CheckTy != Check::CheckNext) | |
1067 | return false; | |
1068 | ||
1069 | // Count the number of newlines between the previous match and this one. | |
1070 | assert(Buffer.data() != | |
1071 | SM.getMemoryBuffer( | |
1072 | SM.FindBufferContainingLoc( | |
1073 | SMLoc::getFromPointer(Buffer.data())))->getBufferStart() && | |
1074 | "CHECK-NEXT can't be the first check in a file"); | |
1075 | ||
1076 | const char *FirstNewLine = nullptr; | |
1077 | unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine); | |
1078 | ||
1079 | if (NumNewLines == 0) { | |
1080 | SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + | |
1081 | "-NEXT: is on the same line as previous match"); | |
1082 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), | |
1083 | SourceMgr::DK_Note, "'next' match was here"); | |
1084 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, | |
1085 | "previous match ended here"); | |
1086 | return true; | |
1087 | } | |
1088 | ||
1089 | if (NumNewLines != 1) { | |
1090 | SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix + | |
1091 | "-NEXT: is not on the line after the previous match"); | |
1092 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), | |
1093 | SourceMgr::DK_Note, "'next' match was here"); | |
1094 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, | |
1095 | "previous match ended here"); | |
1096 | SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note, | |
1097 | "non-matching line after previous match is here"); | |
1098 | return true; | |
1099 | } | |
1100 | ||
1101 | return false; | |
1102 | } | |
1103 | ||
1104 | bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer, | |
1105 | const std::vector<const Pattern *> &NotStrings, | |
1106 | StringMap<StringRef> &VariableTable) const { | |
1107 | for (unsigned ChunkNo = 0, e = NotStrings.size(); | |
1108 | ChunkNo != e; ++ChunkNo) { | |
1109 | const Pattern *Pat = NotStrings[ChunkNo]; | |
1110 | assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!"); | |
1111 | ||
1112 | size_t MatchLen = 0; | |
1113 | size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable); | |
1114 | ||
1115 | if (Pos == StringRef::npos) continue; | |
1116 | ||
1117 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos), | |
1118 | SourceMgr::DK_Error, | |
1119 | Prefix + "-NOT: string occurred!"); | |
1120 | SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note, | |
1121 | Prefix + "-NOT: pattern specified here"); | |
1122 | return true; | |
1123 | } | |
1124 | ||
1125 | return false; | |
1126 | } | |
1127 | ||
1128 | size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer, | |
1129 | std::vector<const Pattern *> &NotStrings, | |
1130 | StringMap<StringRef> &VariableTable) const { | |
1131 | if (DagNotStrings.empty()) | |
1132 | return 0; | |
1133 | ||
1134 | size_t LastPos = 0; | |
1135 | size_t StartPos = LastPos; | |
1136 | ||
1137 | for (unsigned ChunkNo = 0, e = DagNotStrings.size(); | |
1138 | ChunkNo != e; ++ChunkNo) { | |
1139 | const Pattern &Pat = DagNotStrings[ChunkNo]; | |
1140 | ||
1141 | assert((Pat.getCheckTy() == Check::CheckDAG || | |
1142 | Pat.getCheckTy() == Check::CheckNot) && | |
1143 | "Invalid CHECK-DAG or CHECK-NOT!"); | |
1144 | ||
1145 | if (Pat.getCheckTy() == Check::CheckNot) { | |
1146 | NotStrings.push_back(&Pat); | |
1147 | continue; | |
1148 | } | |
1149 | ||
1150 | assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!"); | |
1151 | ||
1152 | size_t MatchLen = 0, MatchPos; | |
1153 | ||
1154 | // CHECK-DAG always matches from the start. | |
1155 | StringRef MatchBuffer = Buffer.substr(StartPos); | |
1156 | MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable); | |
1157 | // With a group of CHECK-DAGs, a single mismatching means the match on | |
1158 | // that group of CHECK-DAGs fails immediately. | |
1159 | if (MatchPos == StringRef::npos) { | |
1160 | PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable); | |
1161 | return StringRef::npos; | |
1162 | } | |
1163 | // Re-calc it as the offset relative to the start of the original string. | |
1164 | MatchPos += StartPos; | |
1165 | ||
1166 | if (!NotStrings.empty()) { | |
1167 | if (MatchPos < LastPos) { | |
1168 | // Reordered? | |
1169 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos), | |
1170 | SourceMgr::DK_Error, | |
1171 | Prefix + "-DAG: found a match of CHECK-DAG" | |
1172 | " reordering across a CHECK-NOT"); | |
1173 | SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos), | |
1174 | SourceMgr::DK_Note, | |
1175 | Prefix + "-DAG: the farthest match of CHECK-DAG" | |
1176 | " is found here"); | |
1177 | SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note, | |
1178 | Prefix + "-NOT: the crossed pattern specified" | |
1179 | " here"); | |
1180 | SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note, | |
1181 | Prefix + "-DAG: the reordered pattern specified" | |
1182 | " here"); | |
1183 | return StringRef::npos; | |
1184 | } | |
1185 | // All subsequent CHECK-DAGs should be matched from the farthest | |
1186 | // position of all precedent CHECK-DAGs (including this one.) | |
1187 | StartPos = LastPos; | |
1188 | // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to | |
1189 | // CHECK-DAG, verify that there's no 'not' strings occurred in that | |
1190 | // region. | |
1191 | StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos); | |
1192 | if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable)) | |
1193 | return StringRef::npos; | |
1194 | // Clear "not strings". | |
1195 | NotStrings.clear(); | |
1196 | } | |
1197 | ||
1198 | // Update the last position with CHECK-DAG matches. | |
1199 | LastPos = std::max(MatchPos + MatchLen, LastPos); | |
223e47cc | 1200 | } |
1a4d82fc JJ |
1201 | |
1202 | return LastPos; | |
1203 | } | |
1204 | ||
1205 | // A check prefix must contain only alphanumeric, hyphens and underscores. | |
1206 | static bool ValidateCheckPrefix(StringRef CheckPrefix) { | |
1207 | Regex Validator("^[a-zA-Z0-9_-]*$"); | |
1208 | return Validator.match(CheckPrefix); | |
1209 | } | |
1210 | ||
1211 | static bool ValidateCheckPrefixes() { | |
1212 | StringSet<> PrefixSet; | |
1213 | ||
1214 | for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end(); | |
1215 | I != E; ++I) { | |
1216 | StringRef Prefix(*I); | |
1217 | ||
1218 | // Reject empty prefixes. | |
1219 | if (Prefix == "") | |
1220 | return false; | |
1221 | ||
85aaf69f | 1222 | if (!PrefixSet.insert(Prefix).second) |
1a4d82fc JJ |
1223 | return false; |
1224 | ||
1225 | if (!ValidateCheckPrefix(Prefix)) | |
1226 | return false; | |
1227 | } | |
1228 | ||
1229 | return true; | |
1230 | } | |
1231 | ||
1232 | // I don't think there's a way to specify an initial value for cl::list, | |
1233 | // so if nothing was specified, add the default | |
1234 | static void AddCheckPrefixIfNeeded() { | |
1235 | if (CheckPrefixes.empty()) | |
1236 | CheckPrefixes.push_back("CHECK"); | |
223e47cc LB |
1237 | } |
1238 | ||
1239 | int main(int argc, char **argv) { | |
1240 | sys::PrintStackTraceOnErrorSignal(); | |
1241 | PrettyStackTraceProgram X(argc, argv); | |
1242 | cl::ParseCommandLineOptions(argc, argv); | |
1243 | ||
1a4d82fc JJ |
1244 | if (!ValidateCheckPrefixes()) { |
1245 | errs() << "Supplied check-prefix is invalid! Prefixes must be unique and " | |
1246 | "start with a letter and contain only alphanumeric characters, " | |
1247 | "hyphens and underscores\n"; | |
1248 | return 2; | |
1249 | } | |
1250 | ||
1251 | AddCheckPrefixIfNeeded(); | |
1252 | ||
223e47cc LB |
1253 | SourceMgr SM; |
1254 | ||
1255 | // Read the expected strings from the check file. | |
1256 | std::vector<CheckString> CheckStrings; | |
1257 | if (ReadCheckFile(SM, CheckStrings)) | |
1258 | return 2; | |
1259 | ||
1260 | // Open the file to check and add it to SourceMgr. | |
1a4d82fc JJ |
1261 | ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = |
1262 | MemoryBuffer::getFileOrSTDIN(InputFilename); | |
1263 | if (std::error_code EC = FileOrErr.getError()) { | |
1264 | errs() << "Could not open input file '" << InputFilename | |
1265 | << "': " << EC.message() << '\n'; | |
970d7e83 | 1266 | return 2; |
223e47cc | 1267 | } |
1a4d82fc | 1268 | std::unique_ptr<MemoryBuffer> &File = FileOrErr.get(); |
223e47cc | 1269 | |
1a4d82fc | 1270 | if (File->getBufferSize() == 0 && !AllowEmptyInput) { |
223e47cc | 1271 | errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; |
970d7e83 | 1272 | return 2; |
223e47cc | 1273 | } |
1a4d82fc | 1274 | |
223e47cc | 1275 | // Remove duplicate spaces in the input file if requested. |
970d7e83 | 1276 | // Remove DOS style line endings. |
1a4d82fc JJ |
1277 | std::unique_ptr<MemoryBuffer> F = |
1278 | CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace); | |
223e47cc LB |
1279 | |
1280 | // Check that we have all of the expected strings, in order, in the input | |
1281 | // file. | |
1282 | StringRef Buffer = F->getBuffer(); | |
1283 | ||
1a4d82fc | 1284 | SM.AddNewSourceBuffer(std::move(F), SMLoc()); |
223e47cc | 1285 | |
1a4d82fc JJ |
1286 | /// VariableTable - This holds all the current filecheck variables. |
1287 | StringMap<StringRef> VariableTable; | |
223e47cc | 1288 | |
1a4d82fc | 1289 | bool hasError = false; |
223e47cc | 1290 | |
1a4d82fc | 1291 | unsigned i = 0, j = 0, e = CheckStrings.size(); |
223e47cc | 1292 | |
1a4d82fc JJ |
1293 | while (true) { |
1294 | StringRef CheckRegion; | |
1295 | if (j == e) { | |
1296 | CheckRegion = Buffer; | |
1297 | } else { | |
1298 | const CheckString &CheckLabelStr = CheckStrings[j]; | |
1299 | if (CheckLabelStr.CheckTy != Check::CheckLabel) { | |
1300 | ++j; | |
1301 | continue; | |
223e47cc LB |
1302 | } |
1303 | ||
1a4d82fc JJ |
1304 | // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG |
1305 | size_t MatchLabelLen = 0; | |
1306 | size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true, | |
1307 | MatchLabelLen, VariableTable); | |
1308 | if (MatchLabelPos == StringRef::npos) { | |
1309 | hasError = true; | |
1310 | break; | |
223e47cc | 1311 | } |
1a4d82fc JJ |
1312 | |
1313 | CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen); | |
1314 | Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen); | |
1315 | ++j; | |
223e47cc LB |
1316 | } |
1317 | ||
1a4d82fc JJ |
1318 | for ( ; i != j; ++i) { |
1319 | const CheckString &CheckStr = CheckStrings[i]; | |
1320 | ||
1321 | // Check each string within the scanned region, including a second check | |
1322 | // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG) | |
223e47cc | 1323 | size_t MatchLen = 0; |
1a4d82fc JJ |
1324 | size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen, |
1325 | VariableTable); | |
1326 | ||
1327 | if (MatchPos == StringRef::npos) { | |
1328 | hasError = true; | |
1329 | i = j; | |
1330 | break; | |
1331 | } | |
223e47cc | 1332 | |
1a4d82fc JJ |
1333 | CheckRegion = CheckRegion.substr(MatchPos + MatchLen); |
1334 | } | |
223e47cc | 1335 | |
1a4d82fc JJ |
1336 | if (j == e) |
1337 | break; | |
223e47cc LB |
1338 | } |
1339 | ||
1a4d82fc | 1340 | return hasError ? 1 : 0; |
223e47cc | 1341 | } |