]> git.proxmox.com Git - rustc.git/blame - vendor/pest_derive/src/validator.rs
New upstream version 1.33.0+dfsg1
[rustc.git] / vendor / pest_derive / src / validator.rs
CommitLineData
83c7162d
XL
1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use std::collections::{HashMap, HashSet};
11
12use pest::Error;
13use pest::Span;
14use pest::iterators::Pairs;
15
16use quote::Ident;
17
18use parser::{GrammarRule, ParserExpr, ParserNode, ParserRule};
19
20pub fn validate_pairs<'i>(pairs: Pairs<'i, GrammarRule>) -> Vec<Ident> {
21 let mut rust_keywords = HashSet::new();
22 rust_keywords.insert("abstract");
23 rust_keywords.insert("alignof");
24 rust_keywords.insert("as");
25 rust_keywords.insert("become");
26 rust_keywords.insert("box");
27 rust_keywords.insert("break");
28 rust_keywords.insert("const");
29 rust_keywords.insert("continue");
30 rust_keywords.insert("crate");
31 rust_keywords.insert("do");
32 rust_keywords.insert("else");
33 rust_keywords.insert("enum");
34 rust_keywords.insert("extern");
35 rust_keywords.insert("false");
36 rust_keywords.insert("final");
37 rust_keywords.insert("fn");
38 rust_keywords.insert("for");
39 rust_keywords.insert("if");
40 rust_keywords.insert("impl");
41 rust_keywords.insert("in");
42 rust_keywords.insert("let");
43 rust_keywords.insert("loop");
44 rust_keywords.insert("macro");
45 rust_keywords.insert("match");
46 rust_keywords.insert("mod");
47 rust_keywords.insert("move");
48 rust_keywords.insert("mut");
49 rust_keywords.insert("offsetof");
50 rust_keywords.insert("override");
51 rust_keywords.insert("priv");
52 rust_keywords.insert("proc");
53 rust_keywords.insert("pure");
54 rust_keywords.insert("pub");
55 rust_keywords.insert("ref");
56 rust_keywords.insert("return");
57 rust_keywords.insert("Self");
58 rust_keywords.insert("self");
59 rust_keywords.insert("sizeof");
60 rust_keywords.insert("static");
61 rust_keywords.insert("struct");
62 rust_keywords.insert("super");
63 rust_keywords.insert("trait");
64 rust_keywords.insert("true");
65 rust_keywords.insert("type");
66 rust_keywords.insert("typeof");
67 rust_keywords.insert("unsafe");
68 rust_keywords.insert("unsized");
69 rust_keywords.insert("use");
70 rust_keywords.insert("virtual");
71 rust_keywords.insert("where");
72 rust_keywords.insert("while");
73 rust_keywords.insert("yield");
74
75 let mut pest_keywords = HashSet::new();
76 pest_keywords.insert("any");
77 pest_keywords.insert("eoi");
78 pest_keywords.insert("peek");
79 pest_keywords.insert("pop");
80 pest_keywords.insert("push");
81 pest_keywords.insert("soi");
82
83 let mut predefined = HashSet::new();
84 predefined.insert("any");
85 predefined.insert("eoi");
86 predefined.insert("peek");
87 predefined.insert("pop");
88 predefined.insert("soi");
89
90 let definitions: Vec<_> = pairs
91 .clone()
92 .filter(|pair| pair.as_rule() == GrammarRule::grammar_rule)
93 .map(|pair| pair.into_inner().next().unwrap().into_span())
94 .collect();
95 let called_rules: Vec<_> = pairs
96 .clone()
97 .filter(|pair| pair.as_rule() == GrammarRule::grammar_rule)
98 .flat_map(|pair| {
99 pair.into_inner()
100 .flatten()
101 .skip(1)
102 .filter(|pair| pair.as_rule() == GrammarRule::identifier)
103 .map(|pair| pair.into_span())
104 })
105 .collect();
106
107 let mut errors = vec![];
108
109 errors.extend(validate_rust_keywords(&definitions, &rust_keywords));
110 errors.extend(validate_pest_keywords(&definitions, &pest_keywords));
111 errors.extend(validate_already_defined(&definitions));
112 errors.extend(validate_undefined(&definitions, &called_rules, &predefined));
113
114 let errors = errors
115 .into_iter()
116 .map(|error| format!("grammar error\n\n{}", error))
117 .collect::<Vec<_>>()
118 .join("\n\n");
119
120 if errors.len() > 0 {
121 panic!("{}", errors);
122 }
123
124 let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect();
125 let called_rules: HashSet<_> = called_rules.iter().map(|span| span.as_str()).collect();
126
127 let defaults = called_rules.difference(&definitions);
128
129 defaults
130 .into_iter()
131 .map(|string| Ident::new(*string))
132 .collect()
133}
134
135pub fn validate_rust_keywords<'i>(
136 definitions: &Vec<Span<'i>>,
137 rust_keywords: &HashSet<&str>
138) -> Vec<Error<'i, GrammarRule>> {
139 let mut errors = vec![];
140
141 for definition in definitions {
142 let name = definition.as_str();
143
144 if rust_keywords.contains(name) {
145 errors.push(Error::CustomErrorSpan {
146 message: format!("{} is a rust keyword", name),
147 span: definition.clone()
148 })
149 }
150 }
151
152 errors
153}
154
155pub fn validate_pest_keywords<'i>(
156 definitions: &Vec<Span<'i>>,
157 pest_keywords: &HashSet<&str>
158) -> Vec<Error<'i, GrammarRule>> {
159 let mut errors = vec![];
160
161 for definition in definitions {
162 let name = definition.as_str();
163
164 if pest_keywords.contains(name) {
165 errors.push(Error::CustomErrorSpan {
166 message: format!("{} is a pest keyword", name),
167 span: definition.clone()
168 })
169 }
170 }
171
172 errors
173}
174
175pub fn validate_already_defined<'i>(definitions: &Vec<Span<'i>>) -> Vec<Error<'i, GrammarRule>> {
176 let mut errors = vec![];
177 let mut defined = HashSet::new();
178
179 for definition in definitions {
180 let name = definition.as_str();
181
182 if defined.contains(&name) {
183 errors.push(Error::CustomErrorSpan {
184 message: format!("rule {} already defined", name),
185 span: definition.clone()
186 })
187 } else {
188 defined.insert(name);
189 }
190 }
191
192 errors
193}
194
195pub fn validate_undefined<'i>(
196 definitions: &Vec<Span<'i>>,
197 called_rules: &Vec<Span<'i>>,
198 predefined: &HashSet<&str>
199) -> Vec<Error<'i, GrammarRule>> {
200 let mut errors = vec![];
201 let definitions: HashSet<_> = definitions.iter().map(|span| span.as_str()).collect();
202
203 for rule in called_rules {
204 let name = rule.as_str();
205
206 if !definitions.contains(name) && !predefined.contains(name) {
207 errors.push(Error::CustomErrorSpan {
208 message: format!("rule {} is undefined", name),
209 span: rule.clone()
210 })
211 }
212 }
213
214 errors
215}
216
217pub fn validate_ast<'i>(rules: &Vec<ParserRule<'i>>) {
218 let mut errors = vec![];
219
220 errors.extend(validate_repetition(rules));
221 errors.extend(validate_whitespace_comment(rules));
222 errors.extend(validate_left_recursion(rules));
223
224 errors.sort_by_key(|error| match *error {
225 Error::CustomErrorSpan { ref span, .. } => span.clone(),
226 _ => unreachable!()
227 });
228
229 let errors = errors
230 .into_iter()
231 .map(|error| format!("{}", error))
232 .collect::<Vec<_>>()
233 .join("\n\n");
234
235 if errors.len() > 0 {
236 panic!("grammar error\n\n{}", errors);
237 }
238}
239
240fn is_non_progressing<'i>(expr: &ParserExpr<'i>) -> bool {
241 match *expr {
242 ParserExpr::Str(ref string) => string == "",
243 ParserExpr::Ident(ref ident) => ident == "soi" || ident == "eoi",
244 ParserExpr::PosPred(_) => true,
245 ParserExpr::NegPred(_) => true,
246 ParserExpr::Seq(ref lhs, ref rhs) => {
247 is_non_progressing(&lhs.expr) && is_non_progressing(&rhs.expr)
248 }
249 ParserExpr::Choice(ref lhs, ref rhs) => {
250 is_non_progressing(&lhs.expr) || is_non_progressing(&rhs.expr)
251 }
252 _ => false
253 }
254}
255
256fn is_non_failing<'i>(expr: &ParserExpr<'i>) -> bool {
257 match *expr {
258 ParserExpr::Str(ref string) => string == "",
259 ParserExpr::Opt(_) => true,
260 ParserExpr::Rep(_) => true,
261 ParserExpr::Seq(ref lhs, ref rhs) => is_non_failing(&lhs.expr) && is_non_failing(&rhs.expr),
262 ParserExpr::Choice(ref lhs, ref rhs) => {
263 is_non_failing(&lhs.expr) || is_non_failing(&rhs.expr)
264 }
265 _ => false
266 }
267}
268
269fn validate_repetition<'i>(rules: &Vec<ParserRule<'i>>) -> Vec<Error<'i, GrammarRule>> {
270 rules
271 .into_iter()
272 .filter_map(|rule| match rule.node.expr {
273 ParserExpr::Rep(ref other)
274 | ParserExpr::RepOnce(ref other)
275 | ParserExpr::RepMin(ref other, _) => {
276 if is_non_failing(&other.expr) {
277 Some(Error::CustomErrorSpan {
278 message: "expression inside repetition is non-failing and will repeat \
279 infinitely"
280 .to_owned(),
281 span: rule.node.span.clone()
282 })
283 } else if is_non_progressing(&other.expr) {
284 Some(Error::CustomErrorSpan {
285 message: "expression inside repetition is non-progressing and will repeat \
286 infinitely"
287 .to_owned(),
288 span: rule.node.span.clone()
289 })
290 } else {
291 None
292 }
293 }
294 _ => None
295 })
296 .collect()
297}
298
299fn validate_whitespace_comment<'i>(rules: &Vec<ParserRule<'i>>) -> Vec<Error<'i, GrammarRule>> {
300 rules
301 .into_iter()
302 .filter_map(|rule| {
303 if &rule.name == "whitespace" || &rule.name == "comment" {
304 if is_non_failing(&rule.node.expr) {
305 Some(Error::CustomErrorSpan {
306 message: format!(
307 "{} is non-failing and will repeat infinitely",
308 &rule.name
309 ),
310 span: rule.node.span.clone()
311 })
312 } else if is_non_progressing(&rule.node.expr) {
313 Some(Error::CustomErrorSpan {
314 message: format!(
315 "{} is non-progressing and will repeat infinitely",
316 &rule.name
317 ),
318 span: rule.node.span.clone()
319 })
320 } else {
321 None
322 }
323 } else {
324 None
325 }
326 })
327 .collect()
328}
329
330fn validate_left_recursion<'i>(rules: &Vec<ParserRule<'i>>) -> Vec<Error<'i, GrammarRule>> {
331 left_recursion(to_hash_map(rules))
332}
333
334fn to_hash_map<'a, 'i>(rules: &'a Vec<ParserRule<'i>>) -> HashMap<Ident, &'a ParserNode<'i>> {
335 let mut hash_map = HashMap::new();
336
337 for rule in rules {
338 hash_map.insert(rule.name.clone(), &rule.node);
339 }
340
341 hash_map
342}
343
344fn left_recursion<'i>(rules: HashMap<Ident, &ParserNode<'i>>) -> Vec<Error<'i, GrammarRule>> {
345 fn check_expr<'i>(
346 node: &ParserNode<'i>,
347 rules: &HashMap<Ident, &ParserNode<'i>>,
348 trace: &mut Vec<Ident>
349 ) -> Option<Error<'i, GrammarRule>> {
350 match node.expr {
351 ParserExpr::Ident(ref other) => {
352 if trace[0] == other {
353 trace.push(other.clone());
354 let chain = trace
355 .iter()
356 .map(|ident| ident.as_ref())
357 .collect::<Vec<_>>()
358 .join(" -> ");
359
360 return Some(Error::CustomErrorSpan {
361 message: format!(
362 "rule {} is left-recursive ({}); pest::prec_climber might \
363 be useful in this case",
364 node.span.as_str(),
365 chain
366 ),
367 span: node.span.clone()
368 });
369 }
370
371 if !trace.contains(other) {
372 if let Some(node) = rules.get(other) {
373 trace.push(other.clone());
374 let result = check_expr(node, rules, trace);
375 trace.pop().unwrap();
376
377 return result;
378 }
379 }
380
381 None
382 }
383 ParserExpr::Seq(ref lhs, ref rhs) => {
384 if is_non_failing(&lhs.expr) {
385 check_expr(rhs, rules, trace)
386 } else {
387 check_expr(lhs, rules, trace)
388 }
389 }
390 ParserExpr::Choice(ref lhs, ref rhs) => {
391 check_expr(&lhs, rules, trace).or(check_expr(&rhs, rules, trace))
392 }
393 ParserExpr::Rep(ref node) => check_expr(&node, rules, trace),
394 ParserExpr::RepOnce(ref node) => check_expr(&node, rules, trace),
395 ParserExpr::Opt(ref node) => check_expr(&node, rules, trace),
396 ParserExpr::PosPred(ref node) => check_expr(&node, rules, trace),
397 ParserExpr::NegPred(ref node) => check_expr(&node, rules, trace),
398 ParserExpr::Push(ref node) => check_expr(&node, rules, trace),
399 _ => None
400 }
401 }
402
403 let mut errors = vec![];
404
405 for (ref name, ref node) in &rules {
406 let name = (*name).clone();
407
408 if let Some(error) = check_expr(node, &rules, &mut vec![name]) {
409 errors.push(error);
410 }
411 }
412
413 errors
414}
415
416#[cfg(test)]
417mod tests {
418 use pest::Parser;
419
420 use super::*;
421 use super::super::parser::{consume_rules, GrammarParser};
422
423 #[test]
424 #[should_panic(expected = "grammar error
425
426 --> 1:1
427 |
4281 | let = { \"a\" }
429 | ^-^
430 |
431 = let is a rust keyword")]
432 fn rust_keyword() {
433 let input = "let = { \"a\" }";
434 validate_pairs(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
435 }
436
437 #[test]
438 #[should_panic(expected = "grammar error
439
440 --> 1:1
441 |
4421 | any = { \"a\" }
443 | ^-^
444 |
445 = any is a pest keyword")]
446 fn pest_keyword() {
447 let input = "any = { \"a\" }";
448 validate_pairs(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
449 }
450
451 #[test]
452 #[should_panic(expected = "grammar error
453
454 --> 1:13
455 |
4561 | a = { \"a\" } a = { \"a\" }
457 | ^
458 |
459 = rule a already defined")]
460 fn already_defined() {
461 let input = "a = { \"a\" } a = { \"a\" }";
462 validate_pairs(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
463 }
464
465 #[test]
466 #[should_panic(expected = "grammar error
467
468 --> 1:7
469 |
4701 | a = { b }
471 | ^
472 |
473 = rule b is undefined")]
474 fn undefined() {
475 let input = "a = { b }";
476 validate_pairs(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
477 }
478
479 #[test]
480 fn valid_recursion() {
481 let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"b\") ~ a }";
482 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
483 }
484
485 #[test]
486 #[should_panic(expected = "grammar error
487
488 --> 1:16
489 |
4901 | whitespace = { \"\" }
491 | ^^
492 |
493 = whitespace is non-failing and will repeat infinitely")]
494 fn non_failing_whitespace() {
495 let input = "whitespace = { \"\" }";
496 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
497 }
498
499 #[test]
500 #[should_panic(expected = "grammar error
501
502 --> 1:13
503 |
5041 | comment = { soi }
505 | ^-^
506 |
507 = comment is non-progressing and will repeat infinitely")]
508 fn non_progressing_comment() {
509 let input = "comment = { soi }";
510 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
511 }
512
513 #[test]
514 #[should_panic(expected = "grammar error
515
516 --> 1:7
517 |
5181 | a = { (\"\")* }
519 | ^---^
520 |
521 = expression inside repetition is non-failing and will repeat infinitely")]
522 fn non_failing_repetition() {
523 let input = "a = { (\"\")* }";
524 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
525 }
526
527 #[test]
528 #[should_panic(expected = "grammar error
529
530 --> 1:7
531 |
5321 | a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* }
533 | ^-------------------------------^
534 |
535 = expression inside repetition is non-progressing and will repeat infinitely")]
536 fn non_progressing_repetition() {
537 let input = "a = { (\"\" ~ &\"a\" ~ !\"a\" ~ (soi | eoi))* }";
538 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
539 }
540
541 #[test]
542 #[should_panic(expected = "grammar error
543
544 --> 1:7
545 |
5461 | a = { a }
547 | ^
548 |
549 = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")]
550 fn simple_left_recursion() {
551 let input = "a = { a }";
552 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
553 }
554
555 #[test]
556 #[should_panic(expected = "grammar error
557
558 --> 1:7
559 |
5601 | a = { b } b = { a }
561 | ^
562 |
563 = rule b is left-recursive (b -> a -> b); pest::prec_climber might be useful in this case
564
565 --> 1:17
566 |
5671 | a = { b } b = { a }
568 | ^
569 |
570 = rule a is left-recursive (a -> b -> a); pest::prec_climber might be useful in this case")]
571 fn indirect_left_recursion() {
572 let input = "a = { b } b = { a }";
573 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
574 }
575
576 #[test]
577 #[should_panic(expected = "grammar error
578
579 --> 1:39
580 |
5811 | a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a }
582 | ^
583 |
584 = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")]
585 fn non_failing_left_recursion() {
586 let input = "a = { \"\" ~ \"a\"? ~ \"a\"* ~ (\"a\" | \"\") ~ a }";
587 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
588 }
589
590 #[test]
591 #[should_panic(expected = "grammar error
592
593 --> 1:13
594 |
5951 | a = { \"a\" | a }
596 | ^
597 |
598 = rule a is left-recursive (a -> a); pest::prec_climber might be useful in this case")]
599 fn non_primary_choice_left_recursion() {
600 let input = "a = { \"a\" | a }";
601 consume_rules(GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap());
602 }
603}