]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 DragoČ™ Tiselice | |
3 | // | |
4 | // Licensed under the Apache License, Version 2.0 | |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT | |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
7 | // option. All files in the project carrying such notice may not be copied, | |
8 | // modified, or distributed except according to those terms. | |
9 | ||
10 | use std::iter::Peekable; | |
11 | ||
12 | use pest::{self, Error, Parser, ParserState}; | |
13 | use pest::Position; | |
14 | use pest::Span; | |
15 | use pest::iterators::{Pair, Pairs}; | |
16 | use pest::prec_climber::{Assoc, Operator, PrecClimber}; | |
17 | ||
18 | use quote::Ident; | |
19 | ||
20 | use ast::{Expr, Rule, RuleType}; | |
21 | use validator; | |
22 | ||
23 | #[allow(dead_code, non_camel_case_types)] | |
24 | #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] | |
25 | pub enum GrammarRule { | |
26 | grammar_rules, | |
27 | soi, | |
28 | eoi, | |
29 | grammar_rule, | |
30 | assignment_operator, | |
31 | silent_modifier, | |
32 | atomic_modifier, | |
33 | compound_atomic_modifier, | |
34 | non_atomic_modifier, | |
35 | opening_brace, | |
36 | closing_brace, | |
37 | opening_paren, | |
38 | closing_paren, | |
39 | expression, | |
40 | term, | |
41 | positive_predicate_operator, | |
42 | negative_predicate_operator, | |
43 | sequence_operator, | |
44 | choice_operator, | |
45 | optional_operator, | |
46 | repeat_operator, | |
47 | repeat_once_operator, | |
48 | repeat_exact, | |
49 | repeat_min, | |
50 | repeat_max, | |
51 | repeat_min_max, | |
52 | comma, | |
53 | push, | |
54 | identifier, | |
55 | string, | |
56 | quote, | |
57 | insensitive_string, | |
58 | range, | |
59 | range_operator, | |
60 | character, | |
61 | number, | |
62 | single_quote | |
63 | } | |
64 | ||
65 | pub struct GrammarParser; | |
66 | ||
67 | impl Parser<GrammarRule> for GrammarParser { | |
68 | fn parse<'i>( | |
69 | rule: GrammarRule, | |
70 | input: &'i str | |
71 | ) -> Result<Pairs<'i, GrammarRule>, Error<'i, GrammarRule>> { | |
72 | fn grammar_rules<'i>( | |
73 | pos: Position<'i>, | |
74 | state: &mut ParserState<'i, GrammarRule> | |
75 | ) -> Result<Position<'i>, Position<'i>> { | |
76 | pos.sequence(|pos| { | |
77 | soi(pos, state) | |
78 | .and_then(|pos| skip(pos, state)) | |
79 | .and_then(|pos| grammar_rule(pos, state)) | |
80 | .and_then(|pos| { | |
81 | pos.repeat(|pos| { | |
82 | state.sequence(move |state| { | |
83 | pos.sequence(|pos| { | |
84 | skip(pos, state).and_then(|pos| grammar_rule(pos, state)) | |
85 | }) | |
86 | }) | |
87 | }) | |
88 | }) | |
89 | .and_then(|pos| skip(pos, state)) | |
90 | .and_then(|pos| eoi(pos, state)) | |
91 | }) | |
92 | } | |
93 | ||
94 | fn soi<'i>( | |
95 | pos: Position<'i>, | |
96 | _: &mut ParserState<'i, GrammarRule> | |
97 | ) -> Result<Position<'i>, Position<'i>> { | |
98 | pos.at_start() | |
99 | } | |
100 | ||
101 | fn eoi<'i>( | |
102 | pos: Position<'i>, | |
103 | _: &mut ParserState<'i, GrammarRule> | |
104 | ) -> Result<Position<'i>, Position<'i>> { | |
105 | pos.at_end() | |
106 | } | |
107 | ||
108 | fn grammar_rule<'i>( | |
109 | pos: Position<'i>, | |
110 | state: &mut ParserState<'i, GrammarRule> | |
111 | ) -> Result<Position<'i>, Position<'i>> { | |
112 | state.rule(GrammarRule::grammar_rule, pos, |state, pos| { | |
113 | state.sequence(move |state| { | |
114 | pos.sequence(|pos| { | |
115 | identifier(pos, state) | |
116 | .and_then(|pos| skip(pos, state)) | |
117 | .and_then(|pos| assignment_operator(pos, state)) | |
118 | .and_then(|pos| skip(pos, state)) | |
119 | .and_then(|pos| pos.optional(|pos| modifier(pos, state))) | |
120 | .and_then(|pos| skip(pos, state)) | |
121 | .and_then(|pos| opening_brace(pos, state)) | |
122 | .and_then(|pos| skip(pos, state)) | |
123 | .and_then(|pos| expression(pos, state)) | |
124 | .and_then(|pos| skip(pos, state)) | |
125 | .and_then(|pos| closing_brace(pos, state)) | |
126 | }) | |
127 | }) | |
128 | }) | |
129 | } | |
130 | ||
131 | fn assignment_operator<'i>( | |
132 | pos: Position<'i>, | |
133 | state: &mut ParserState<'i, GrammarRule> | |
134 | ) -> Result<Position<'i>, Position<'i>> { | |
135 | state.rule(GrammarRule::assignment_operator, pos, |_, pos| { | |
136 | pos.match_string("=") | |
137 | }) | |
138 | } | |
139 | ||
140 | fn modifier<'i>( | |
141 | pos: Position<'i>, | |
142 | state: &mut ParserState<'i, GrammarRule> | |
143 | ) -> Result<Position<'i>, Position<'i>> { | |
144 | silent_modifier(pos, state) | |
145 | .or_else(|pos| atomic_modifier(pos, state)) | |
146 | .or_else(|pos| compound_atomic_modifier(pos, state)) | |
147 | .or_else(|pos| non_atomic_modifier(pos, state)) | |
148 | } | |
149 | ||
150 | fn silent_modifier<'i>( | |
151 | pos: Position<'i>, | |
152 | state: &mut ParserState<'i, GrammarRule> | |
153 | ) -> Result<Position<'i>, Position<'i>> { | |
154 | state.rule(GrammarRule::silent_modifier, pos, |_, pos| { | |
155 | pos.match_string("_") | |
156 | }) | |
157 | } | |
158 | ||
159 | fn atomic_modifier<'i>( | |
160 | pos: Position<'i>, | |
161 | state: &mut ParserState<'i, GrammarRule> | |
162 | ) -> Result<Position<'i>, Position<'i>> { | |
163 | state.rule(GrammarRule::atomic_modifier, pos, |_, pos| { | |
164 | pos.match_string("@") | |
165 | }) | |
166 | } | |
167 | ||
168 | fn compound_atomic_modifier<'i>( | |
169 | pos: Position<'i>, | |
170 | state: &mut ParserState<'i, GrammarRule> | |
171 | ) -> Result<Position<'i>, Position<'i>> { | |
172 | state.rule(GrammarRule::compound_atomic_modifier, pos, |_, pos| { | |
173 | pos.match_string("$") | |
174 | }) | |
175 | } | |
176 | ||
177 | fn non_atomic_modifier<'i>( | |
178 | pos: Position<'i>, | |
179 | state: &mut ParserState<'i, GrammarRule> | |
180 | ) -> Result<Position<'i>, Position<'i>> { | |
181 | state.rule(GrammarRule::non_atomic_modifier, pos, |_, pos| { | |
182 | pos.match_string("!") | |
183 | }) | |
184 | } | |
185 | ||
186 | fn opening_brace<'i>( | |
187 | pos: Position<'i>, | |
188 | state: &mut ParserState<'i, GrammarRule> | |
189 | ) -> Result<Position<'i>, Position<'i>> { | |
190 | state.rule(GrammarRule::opening_brace, pos, |_, pos| { | |
191 | pos.match_string("{") | |
192 | }) | |
193 | } | |
194 | ||
195 | fn closing_brace<'i>( | |
196 | pos: Position<'i>, | |
197 | state: &mut ParserState<'i, GrammarRule> | |
198 | ) -> Result<Position<'i>, Position<'i>> { | |
199 | state.rule(GrammarRule::closing_brace, pos, |_, pos| { | |
200 | pos.match_string("}") | |
201 | }) | |
202 | } | |
203 | ||
204 | fn opening_paren<'i>( | |
205 | pos: Position<'i>, | |
206 | state: &mut ParserState<'i, GrammarRule> | |
207 | ) -> Result<Position<'i>, Position<'i>> { | |
208 | state.rule(GrammarRule::opening_paren, pos, |_, pos| { | |
209 | pos.match_string("(") | |
210 | }) | |
211 | } | |
212 | ||
213 | fn closing_paren<'i>( | |
214 | pos: Position<'i>, | |
215 | state: &mut ParserState<'i, GrammarRule> | |
216 | ) -> Result<Position<'i>, Position<'i>> { | |
217 | state.rule(GrammarRule::closing_paren, pos, |_, pos| { | |
218 | pos.match_string(")") | |
219 | }) | |
220 | } | |
221 | ||
222 | fn expression<'i>( | |
223 | pos: Position<'i>, | |
224 | state: &mut ParserState<'i, GrammarRule> | |
225 | ) -> Result<Position<'i>, Position<'i>> { | |
226 | state.rule(GrammarRule::expression, pos, |state, pos| { | |
227 | state.sequence(move |state| { | |
228 | pos.sequence(|pos| { | |
229 | term(pos, state).and_then(|pos| { | |
230 | pos.repeat(|pos| { | |
231 | state.sequence(move |state| { | |
232 | pos.sequence(|pos| { | |
233 | skip(pos, state) | |
234 | .and_then(|pos| infix_operator(pos, state)) | |
235 | .and_then(|pos| skip(pos, state)) | |
236 | .and_then(|pos| term(pos, state)) | |
237 | }) | |
238 | }) | |
239 | }) | |
240 | }) | |
241 | }) | |
242 | }) | |
243 | }) | |
244 | } | |
245 | ||
246 | fn term<'i>( | |
247 | pos: Position<'i>, | |
248 | state: &mut ParserState<'i, GrammarRule> | |
249 | ) -> Result<Position<'i>, Position<'i>> { | |
250 | state.rule(GrammarRule::term, pos, |state, pos| { | |
251 | state.sequence(move |state| { | |
252 | pos.sequence(|pos| { | |
253 | pos.repeat(|pos| { | |
254 | pos.sequence(|pos| { | |
255 | prefix_operator(pos, state).and_then(|pos| skip(pos, state)) | |
256 | }) | |
257 | }).and_then(|pos| { | |
258 | state | |
259 | .sequence(move |state| { | |
260 | pos.sequence(|pos| { | |
261 | opening_paren(pos, state) | |
262 | .and_then(|pos| skip(pos, state)) | |
263 | .and_then(|pos| expression(pos, state)) | |
264 | .and_then(|pos| skip(pos, state)) | |
265 | .and_then(|pos| closing_paren(pos, state)) | |
266 | }) | |
267 | }) | |
268 | .or_else(|pos| terminal(pos, state)) | |
269 | }) | |
270 | .and_then(|pos| { | |
271 | pos.repeat(|pos| { | |
272 | pos.sequence(|pos| { | |
273 | skip(pos, state) | |
274 | .and_then(|pos| postfix_operator(pos, state)) | |
275 | }) | |
276 | }) | |
277 | }) | |
278 | }) | |
279 | }) | |
280 | }) | |
281 | } | |
282 | ||
283 | fn terminal<'i>( | |
284 | pos: Position<'i>, | |
285 | state: &mut ParserState<'i, GrammarRule> | |
286 | ) -> Result<Position<'i>, Position<'i>> { | |
287 | push(pos, state) | |
288 | .or_else(|pos| identifier(pos, state)) | |
289 | .or_else(|pos| string(pos, state)) | |
290 | .or_else(|pos| insensitive_string(pos, state)) | |
291 | .or_else(|pos| range(pos, state)) | |
292 | } | |
293 | ||
294 | fn prefix_operator<'i>( | |
295 | pos: Position<'i>, | |
296 | state: &mut ParserState<'i, GrammarRule> | |
297 | ) -> Result<Position<'i>, Position<'i>> { | |
298 | positive_predicate_operator(pos, state) | |
299 | .or_else(|pos| negative_predicate_operator(pos, state)) | |
300 | } | |
301 | ||
302 | fn infix_operator<'i>( | |
303 | pos: Position<'i>, | |
304 | state: &mut ParserState<'i, GrammarRule> | |
305 | ) -> Result<Position<'i>, Position<'i>> { | |
306 | sequence_operator(pos, state).or_else(|pos| choice_operator(pos, state)) | |
307 | } | |
308 | ||
309 | fn postfix_operator<'i>( | |
310 | pos: Position<'i>, | |
311 | state: &mut ParserState<'i, GrammarRule> | |
312 | ) -> Result<Position<'i>, Position<'i>> { | |
313 | optional_operator(pos, state) | |
314 | .or_else(|pos| repeat_operator(pos, state)) | |
315 | .or_else(|pos| repeat_once_operator(pos, state)) | |
316 | .or_else(|pos| repeat_exact(pos, state)) | |
317 | .or_else(|pos| repeat_min(pos, state)) | |
318 | .or_else(|pos| repeat_max(pos, state)) | |
319 | .or_else(|pos| repeat_min_max(pos, state)) | |
320 | } | |
321 | ||
322 | fn positive_predicate_operator<'i>( | |
323 | pos: Position<'i>, | |
324 | state: &mut ParserState<'i, GrammarRule> | |
325 | ) -> Result<Position<'i>, Position<'i>> { | |
326 | state.rule(GrammarRule::positive_predicate_operator, pos, |_, pos| { | |
327 | pos.match_string("&") | |
328 | }) | |
329 | } | |
330 | ||
331 | fn negative_predicate_operator<'i>( | |
332 | pos: Position<'i>, | |
333 | state: &mut ParserState<'i, GrammarRule> | |
334 | ) -> Result<Position<'i>, Position<'i>> { | |
335 | state.rule(GrammarRule::negative_predicate_operator, pos, |_, pos| { | |
336 | pos.match_string("!") | |
337 | }) | |
338 | } | |
339 | ||
340 | fn sequence_operator<'i>( | |
341 | pos: Position<'i>, | |
342 | state: &mut ParserState<'i, GrammarRule> | |
343 | ) -> Result<Position<'i>, Position<'i>> { | |
344 | state.rule(GrammarRule::sequence_operator, pos, |_, pos| { | |
345 | pos.match_string("~") | |
346 | }) | |
347 | } | |
348 | ||
349 | fn choice_operator<'i>( | |
350 | pos: Position<'i>, | |
351 | state: &mut ParserState<'i, GrammarRule> | |
352 | ) -> Result<Position<'i>, Position<'i>> { | |
353 | state.rule(GrammarRule::choice_operator, pos, |_, pos| { | |
354 | pos.match_string("|") | |
355 | }) | |
356 | } | |
357 | ||
358 | fn optional_operator<'i>( | |
359 | pos: Position<'i>, | |
360 | state: &mut ParserState<'i, GrammarRule> | |
361 | ) -> Result<Position<'i>, Position<'i>> { | |
362 | state.rule(GrammarRule::optional_operator, pos, |_, pos| { | |
363 | pos.match_string("?") | |
364 | }) | |
365 | } | |
366 | ||
367 | fn repeat_operator<'i>( | |
368 | pos: Position<'i>, | |
369 | state: &mut ParserState<'i, GrammarRule> | |
370 | ) -> Result<Position<'i>, Position<'i>> { | |
371 | state.rule(GrammarRule::repeat_operator, pos, |_, pos| { | |
372 | pos.match_string("*") | |
373 | }) | |
374 | } | |
375 | ||
376 | fn repeat_once_operator<'i>( | |
377 | pos: Position<'i>, | |
378 | state: &mut ParserState<'i, GrammarRule> | |
379 | ) -> Result<Position<'i>, Position<'i>> { | |
380 | state.rule(GrammarRule::repeat_once_operator, pos, |_, pos| { | |
381 | pos.match_string("+") | |
382 | }) | |
383 | } | |
384 | ||
385 | fn repeat_exact<'i>( | |
386 | pos: Position<'i>, | |
387 | state: &mut ParserState<'i, GrammarRule> | |
388 | ) -> Result<Position<'i>, Position<'i>> { | |
389 | state.rule(GrammarRule::repeat_exact, pos, |state, pos| { | |
390 | state.sequence(move |state| { | |
391 | pos.sequence(|pos| { | |
392 | opening_brace(pos, state) | |
393 | .and_then(|pos| skip(pos, state)) | |
394 | .and_then(|pos| number(pos, state)) | |
395 | .and_then(|pos| skip(pos, state)) | |
396 | .and_then(|pos| closing_brace(pos, state)) | |
397 | }) | |
398 | }) | |
399 | }) | |
400 | } | |
401 | ||
402 | fn repeat_min<'i>( | |
403 | pos: Position<'i>, | |
404 | state: &mut ParserState<'i, GrammarRule> | |
405 | ) -> Result<Position<'i>, Position<'i>> { | |
406 | state.rule(GrammarRule::repeat_min, pos, |state, pos| { | |
407 | state.sequence(move |state| { | |
408 | pos.sequence(|pos| { | |
409 | opening_brace(pos, state) | |
410 | .and_then(|pos| skip(pos, state)) | |
411 | .and_then(|pos| number(pos, state)) | |
412 | .and_then(|pos| skip(pos, state)) | |
413 | .and_then(|pos| comma(pos, state)) | |
414 | .and_then(|pos| skip(pos, state)) | |
415 | .and_then(|pos| closing_brace(pos, state)) | |
416 | }) | |
417 | }) | |
418 | }) | |
419 | } | |
420 | ||
421 | fn repeat_max<'i>( | |
422 | pos: Position<'i>, | |
423 | state: &mut ParserState<'i, GrammarRule> | |
424 | ) -> Result<Position<'i>, Position<'i>> { | |
425 | state.rule(GrammarRule::repeat_max, pos, |state, pos| { | |
426 | state.sequence(move |state| { | |
427 | pos.sequence(|pos| { | |
428 | opening_brace(pos, state) | |
429 | .and_then(|pos| skip(pos, state)) | |
430 | .and_then(|pos| comma(pos, state)) | |
431 | .and_then(|pos| skip(pos, state)) | |
432 | .and_then(|pos| number(pos, state)) | |
433 | .and_then(|pos| skip(pos, state)) | |
434 | .and_then(|pos| closing_brace(pos, state)) | |
435 | }) | |
436 | }) | |
437 | }) | |
438 | } | |
439 | ||
440 | fn repeat_min_max<'i>( | |
441 | pos: Position<'i>, | |
442 | state: &mut ParserState<'i, GrammarRule> | |
443 | ) -> Result<Position<'i>, Position<'i>> { | |
444 | state.rule(GrammarRule::repeat_min_max, pos, |state, pos| { | |
445 | state.sequence(move |state| { | |
446 | pos.sequence(|pos| { | |
447 | opening_brace(pos, state) | |
448 | .and_then(|pos| skip(pos, state)) | |
449 | .and_then(|pos| number(pos, state)) | |
450 | .and_then(|pos| skip(pos, state)) | |
451 | .and_then(|pos| comma(pos, state)) | |
452 | .and_then(|pos| skip(pos, state)) | |
453 | .and_then(|pos| number(pos, state)) | |
454 | .and_then(|pos| skip(pos, state)) | |
455 | .and_then(|pos| closing_brace(pos, state)) | |
456 | }) | |
457 | }) | |
458 | }) | |
459 | } | |
460 | ||
461 | fn comma<'i>( | |
462 | pos: Position<'i>, | |
463 | state: &mut ParserState<'i, GrammarRule> | |
464 | ) -> Result<Position<'i>, Position<'i>> { | |
465 | state.rule(GrammarRule::comma, pos, |_, pos| pos.match_string(",")) | |
466 | } | |
467 | ||
468 | fn push<'i>( | |
469 | pos: Position<'i>, | |
470 | state: &mut ParserState<'i, GrammarRule> | |
471 | ) -> Result<Position<'i>, Position<'i>> { | |
472 | state.rule(GrammarRule::push, pos, |state, pos| { | |
473 | pos.sequence(|pos| { | |
474 | pos.match_string("push") | |
475 | .and_then(|pos| skip(pos, state)) | |
476 | .and_then(|pos| opening_paren(pos, state)) | |
477 | .and_then(|pos| skip(pos, state)) | |
478 | .and_then(|pos| expression(pos, state)) | |
479 | .and_then(|pos| skip(pos, state)) | |
480 | .and_then(|pos| closing_paren(pos, state)) | |
481 | }) | |
482 | }) | |
483 | } | |
484 | ||
485 | fn identifier<'i>( | |
486 | pos: Position<'i>, | |
487 | state: &mut ParserState<'i, GrammarRule> | |
488 | ) -> Result<Position<'i>, Position<'i>> { | |
489 | state.rule(GrammarRule::identifier, pos, |state, pos| { | |
490 | pos.sequence(|pos| { | |
491 | pos.match_string("_") | |
492 | .or_else(|pos| alpha(pos, state)) | |
493 | .and_then(|pos| { | |
494 | pos.repeat(|pos| { | |
495 | pos.match_string("_").or_else(|pos| alpha_num(pos, state)) | |
496 | }) | |
497 | }) | |
498 | }) | |
499 | }) | |
500 | } | |
501 | ||
502 | fn alpha<'i>( | |
503 | pos: Position<'i>, | |
504 | _: &mut ParserState<'i, GrammarRule> | |
505 | ) -> Result<Position<'i>, Position<'i>> { | |
506 | pos.match_range('a'..'z') | |
507 | .or_else(|pos| pos.match_range('A'..'Z')) | |
508 | } | |
509 | ||
510 | fn alpha_num<'i>( | |
511 | pos: Position<'i>, | |
512 | state: &mut ParserState<'i, GrammarRule> | |
513 | ) -> Result<Position<'i>, Position<'i>> { | |
514 | alpha(pos, state).or_else(|pos| pos.match_range('0'..'9')) | |
515 | } | |
516 | ||
517 | fn string<'i>( | |
518 | pos: Position<'i>, | |
519 | state: &mut ParserState<'i, GrammarRule> | |
520 | ) -> Result<Position<'i>, Position<'i>> { | |
521 | state.rule(GrammarRule::string, pos, |state, pos| { | |
522 | pos.sequence(|pos| { | |
523 | quote(pos, state) | |
524 | .and_then(|pos| { | |
525 | pos.repeat(|pos| { | |
526 | pos.sequence(|pos| { | |
527 | pos.lookahead(false, |pos| { | |
528 | pos.match_string("\"").or_else(|pos| pos.match_string("\\")) | |
529 | }).and_then(|pos| pos.skip(1)) | |
530 | }).or_else(|pos| escape(pos, state)) | |
531 | }) | |
532 | }) | |
533 | .and_then(|pos| quote(pos, state)) | |
534 | }) | |
535 | }) | |
536 | } | |
537 | ||
538 | fn quote<'i>( | |
539 | pos: Position<'i>, | |
540 | state: &mut ParserState<'i, GrammarRule> | |
541 | ) -> Result<Position<'i>, Position<'i>> { | |
542 | state.rule(GrammarRule::quote, pos, |_, pos| pos.match_string("\"")) | |
543 | } | |
544 | ||
545 | fn insensitive_string<'i>( | |
546 | pos: Position<'i>, | |
547 | state: &mut ParserState<'i, GrammarRule> | |
548 | ) -> Result<Position<'i>, Position<'i>> { | |
549 | state.rule(GrammarRule::insensitive_string, pos, |state, pos| { | |
550 | pos.sequence(|pos| { | |
551 | pos.match_string("^") | |
552 | .and_then(|pos| skip(pos, state)) | |
553 | .and_then(|pos| string(pos, state)) | |
554 | }) | |
555 | }) | |
556 | } | |
557 | ||
558 | fn range<'i>( | |
559 | pos: Position<'i>, | |
560 | state: &mut ParserState<'i, GrammarRule> | |
561 | ) -> Result<Position<'i>, Position<'i>> { | |
562 | state.rule(GrammarRule::range, pos, |state, pos| { | |
563 | state.sequence(move |state| { | |
564 | pos.sequence(|pos| { | |
565 | character(pos, state) | |
566 | .and_then(|pos| skip(pos, state)) | |
567 | .and_then(|pos| range_operator(pos, state)) | |
568 | .and_then(|pos| skip(pos, state)) | |
569 | .and_then(|pos| character(pos, state)) | |
570 | }) | |
571 | }) | |
572 | }) | |
573 | } | |
574 | ||
575 | fn range_operator<'i>( | |
576 | pos: Position<'i>, | |
577 | state: &mut ParserState<'i, GrammarRule> | |
578 | ) -> Result<Position<'i>, Position<'i>> { | |
579 | state.rule(GrammarRule::range_operator, pos, |_, pos| { | |
580 | pos.match_string("..") | |
581 | }) | |
582 | } | |
583 | ||
584 | fn character<'i>( | |
585 | pos: Position<'i>, | |
586 | state: &mut ParserState<'i, GrammarRule> | |
587 | ) -> Result<Position<'i>, Position<'i>> { | |
588 | state.rule(GrammarRule::character, pos, |state, pos| { | |
589 | pos.sequence(|pos| { | |
590 | single_quote(pos, state) | |
591 | .and_then(|pos| { | |
592 | pos.sequence(|pos| { | |
593 | pos.lookahead(false, |pos| { | |
594 | pos.match_string("'").or_else(|pos| pos.match_string("\\")) | |
595 | }).and_then(|pos| pos.skip(1)) | |
596 | }).or_else(|pos| escape(pos, state)) | |
597 | }) | |
598 | .and_then(|pos| single_quote(pos, state)) | |
599 | }) | |
600 | }) | |
601 | } | |
602 | ||
603 | fn number<'i>( | |
604 | pos: Position<'i>, | |
605 | state: &mut ParserState<'i, GrammarRule> | |
606 | ) -> Result<Position<'i>, Position<'i>> { | |
607 | state.rule(GrammarRule::number, pos, |_, pos| { | |
608 | pos.sequence(|pos| { | |
609 | pos.match_range('0'..'9') | |
610 | .and_then(|pos| pos.repeat(|pos| pos.match_range('0'..'9'))) | |
611 | }) | |
612 | }) | |
613 | } | |
614 | ||
615 | fn single_quote<'i>( | |
616 | pos: Position<'i>, | |
617 | state: &mut ParserState<'i, GrammarRule> | |
618 | ) -> Result<Position<'i>, Position<'i>> { | |
619 | state.rule(GrammarRule::single_quote, pos, |_, pos| { | |
620 | pos.match_string("'") | |
621 | }) | |
622 | } | |
623 | ||
624 | fn escape<'i>( | |
625 | pos: Position<'i>, | |
626 | state: &mut ParserState<'i, GrammarRule> | |
627 | ) -> Result<Position<'i>, Position<'i>> { | |
628 | pos.sequence(|pos| { | |
629 | pos.match_string("\\").and_then(|pos| { | |
630 | pos.match_string("n") | |
631 | .or_else(|pos| pos.match_string("r")) | |
632 | .or_else(|pos| pos.match_string("t")) | |
633 | .or_else(|pos| pos.match_string("\\")) | |
634 | .or_else(|pos| pos.match_string("0")) | |
635 | .or_else(|pos| pos.match_string("'")) | |
636 | .or_else(|pos| pos.match_string("\"")) | |
637 | .or_else(|pos| unicode(pos, state)) | |
638 | .or_else(|pos| code(pos, state)) | |
639 | }) | |
640 | }) | |
641 | } | |
642 | ||
643 | fn unicode<'i>( | |
644 | pos: Position<'i>, | |
645 | state: &mut ParserState<'i, GrammarRule> | |
646 | ) -> Result<Position<'i>, Position<'i>> { | |
647 | pos.sequence(|pos| { | |
648 | pos.match_string("u") | |
649 | .and_then(|pos| pos.match_string("{")) | |
650 | .and_then(|pos| hex_digit(pos, state)) | |
651 | .and_then(|pos| hex_digit(pos, state)) | |
652 | .and_then(|pos| pos.optional(|pos| hex_digit(pos, state))) | |
653 | .and_then(|pos| pos.optional(|pos| hex_digit(pos, state))) | |
654 | .and_then(|pos| pos.optional(|pos| hex_digit(pos, state))) | |
655 | .and_then(|pos| pos.optional(|pos| hex_digit(pos, state))) | |
656 | .and_then(|pos| pos.match_string("}")) | |
657 | }) | |
658 | } | |
659 | ||
660 | fn code<'i>( | |
661 | pos: Position<'i>, | |
662 | state: &mut ParserState<'i, GrammarRule> | |
663 | ) -> Result<Position<'i>, Position<'i>> { | |
664 | pos.sequence(|pos| { | |
665 | pos.match_string("x") | |
666 | .and_then(|pos| hex_digit(pos, state)) | |
667 | .and_then(|pos| hex_digit(pos, state)) | |
668 | }) | |
669 | } | |
670 | ||
671 | fn hex_digit<'i>( | |
672 | pos: Position<'i>, | |
673 | _: &mut ParserState<'i, GrammarRule> | |
674 | ) -> Result<Position<'i>, Position<'i>> { | |
675 | pos.match_range('0'..'9') | |
676 | .or_else(|pos| pos.match_range('a'..'f')) | |
677 | .or_else(|pos| pos.match_range('A'..'F')) | |
678 | } | |
679 | ||
680 | fn skip<'i>( | |
681 | pos: Position<'i>, | |
682 | state: &mut ParserState<'i, GrammarRule> | |
683 | ) -> Result<Position<'i>, Position<'i>> { | |
684 | pos.sequence(|pos| { | |
685 | pos.repeat(|pos| whitespace(pos, state)).and_then(|pos| { | |
686 | pos.repeat(|pos| { | |
687 | pos.sequence(|pos| { | |
688 | pos.optional(|pos| comment(pos, state)).and_then(|pos| { | |
689 | pos.sequence(|pos| { | |
690 | whitespace(pos, state) | |
691 | .and_then(|pos| pos.repeat(|pos| whitespace(pos, state))) | |
692 | }) | |
693 | }) | |
694 | }) | |
695 | }) | |
696 | }) | |
697 | }) | |
698 | } | |
699 | ||
700 | fn whitespace<'i>( | |
701 | pos: Position<'i>, | |
702 | _: &mut ParserState<'i, GrammarRule> | |
703 | ) -> Result<Position<'i>, Position<'i>> { | |
704 | pos.match_string(" ") | |
705 | .or_else(|pos| pos.match_string("\t")) | |
706 | .or_else(|pos| pos.match_string("\r")) | |
707 | .or_else(|pos| pos.match_string("\n")) | |
708 | } | |
709 | ||
710 | fn comment<'i>( | |
711 | pos: Position<'i>, | |
712 | _: &mut ParserState<'i, GrammarRule> | |
713 | ) -> Result<Position<'i>, Position<'i>> { | |
714 | pos.sequence(|pos| { | |
715 | pos.match_string("//").and_then(|pos| { | |
716 | pos.repeat(|pos| { | |
717 | pos.sequence(|pos| { | |
718 | pos.lookahead(false, |pos| pos.match_string("\n")) | |
719 | .and_then(|pos| pos.skip(1)) | |
720 | }) | |
721 | }) | |
722 | }) | |
723 | }) | |
724 | } | |
725 | ||
726 | pest::state(input, move |mut state, pos: Position<'i>| match rule { | |
727 | GrammarRule::grammar_rules => grammar_rules(pos, &mut state), | |
728 | GrammarRule::soi => soi(pos, &mut state), | |
729 | GrammarRule::eoi => eoi(pos, &mut state), | |
730 | GrammarRule::grammar_rule => grammar_rule(pos, &mut state), | |
731 | GrammarRule::assignment_operator => assignment_operator(pos, &mut state), | |
732 | GrammarRule::silent_modifier => silent_modifier(pos, &mut state), | |
733 | GrammarRule::atomic_modifier => atomic_modifier(pos, &mut state), | |
734 | GrammarRule::compound_atomic_modifier => compound_atomic_modifier(pos, &mut state), | |
735 | GrammarRule::non_atomic_modifier => non_atomic_modifier(pos, &mut state), | |
736 | GrammarRule::opening_brace => opening_brace(pos, &mut state), | |
737 | GrammarRule::closing_brace => closing_brace(pos, &mut state), | |
738 | GrammarRule::opening_paren => opening_paren(pos, &mut state), | |
739 | GrammarRule::closing_paren => closing_paren(pos, &mut state), | |
740 | GrammarRule::expression => expression(pos, &mut state), | |
741 | GrammarRule::term => term(pos, &mut state), | |
742 | GrammarRule::positive_predicate_operator => { | |
743 | positive_predicate_operator(pos, &mut state) | |
744 | } | |
745 | GrammarRule::negative_predicate_operator => { | |
746 | negative_predicate_operator(pos, &mut state) | |
747 | } | |
748 | GrammarRule::sequence_operator => sequence_operator(pos, &mut state), | |
749 | GrammarRule::choice_operator => choice_operator(pos, &mut state), | |
750 | GrammarRule::optional_operator => optional_operator(pos, &mut state), | |
751 | GrammarRule::repeat_operator => repeat_operator(pos, &mut state), | |
752 | GrammarRule::repeat_once_operator => repeat_once_operator(pos, &mut state), | |
753 | GrammarRule::repeat_exact => repeat_exact(pos, &mut state), | |
754 | GrammarRule::repeat_min => repeat_min(pos, &mut state), | |
755 | GrammarRule::repeat_max => repeat_max(pos, &mut state), | |
756 | GrammarRule::repeat_min_max => repeat_min_max(pos, &mut state), | |
757 | GrammarRule::comma => comma(pos, &mut state), | |
758 | GrammarRule::push => push(pos, &mut state), | |
759 | GrammarRule::identifier => identifier(pos, &mut state), | |
760 | GrammarRule::string => string(pos, &mut state), | |
761 | GrammarRule::quote => quote(pos, &mut state), | |
762 | GrammarRule::insensitive_string => insensitive_string(pos, &mut state), | |
763 | GrammarRule::range => range(pos, &mut state), | |
764 | GrammarRule::range_operator => range_operator(pos, &mut state), | |
765 | GrammarRule::character => character(pos, &mut state), | |
766 | GrammarRule::number => number(pos, &mut state), | |
767 | GrammarRule::single_quote => single_quote(pos, &mut state) | |
768 | }) | |
769 | } | |
770 | } | |
771 | ||
772 | #[derive(Clone, Debug, Eq, PartialEq)] | |
773 | pub struct ParserRule<'i> { | |
774 | pub name: Ident, | |
775 | pub span: Span<'i>, | |
776 | pub ty: RuleType, | |
777 | pub node: ParserNode<'i> | |
778 | } | |
779 | ||
780 | #[derive(Clone, Debug, Eq, PartialEq)] | |
781 | pub struct ParserNode<'i> { | |
782 | pub expr: ParserExpr<'i>, | |
783 | pub span: Span<'i> | |
784 | } | |
785 | ||
786 | #[derive(Clone, Debug, Eq, PartialEq)] | |
787 | pub enum ParserExpr<'i> { | |
788 | Str(String), | |
789 | Insens(String), | |
790 | Range(String, String), | |
791 | Ident(Ident), | |
792 | PosPred(Box<ParserNode<'i>>), | |
793 | NegPred(Box<ParserNode<'i>>), | |
794 | Seq(Box<ParserNode<'i>>, Box<ParserNode<'i>>), | |
795 | Choice(Box<ParserNode<'i>>, Box<ParserNode<'i>>), | |
796 | Opt(Box<ParserNode<'i>>), | |
797 | Rep(Box<ParserNode<'i>>), | |
798 | RepOnce(Box<ParserNode<'i>>), | |
799 | RepExact(Box<ParserNode<'i>>, u32), | |
800 | RepMin(Box<ParserNode<'i>>, u32), | |
801 | RepMax(Box<ParserNode<'i>>, u32), | |
802 | RepMinMax(Box<ParserNode<'i>>, u32, u32), | |
803 | Push(Box<ParserNode<'i>>) | |
804 | } | |
805 | ||
806 | fn convert_rule<'i>(rule: ParserRule<'i>) -> Rule { | |
807 | match rule { | |
808 | ParserRule { name, ty, node, .. } => { | |
809 | let expr = convert_node(node); | |
810 | ||
811 | Rule { name, ty, expr } | |
812 | } | |
813 | } | |
814 | } | |
815 | ||
816 | fn convert_node<'i>(node: ParserNode<'i>) -> Expr { | |
817 | match node.expr { | |
818 | ParserExpr::Str(string) => Expr::Str(string), | |
819 | ParserExpr::Insens(string) => Expr::Insens(string), | |
820 | ParserExpr::Range(start, end) => Expr::Range(start, end), | |
821 | ParserExpr::Ident(ident) => Expr::Ident(ident), | |
822 | ParserExpr::PosPred(node) => Expr::PosPred(Box::new(convert_node(*node))), | |
823 | ParserExpr::NegPred(node) => Expr::NegPred(Box::new(convert_node(*node))), | |
824 | ParserExpr::Seq(node1, node2) => Expr::Seq( | |
825 | Box::new(convert_node(*node1)), | |
826 | Box::new(convert_node(*node2)) | |
827 | ), | |
828 | ParserExpr::Choice(node1, node2) => Expr::Choice( | |
829 | Box::new(convert_node(*node1)), | |
830 | Box::new(convert_node(*node2)) | |
831 | ), | |
832 | ParserExpr::Opt(node) => Expr::Opt(Box::new(convert_node(*node))), | |
833 | ParserExpr::Rep(node) => Expr::Rep(Box::new(convert_node(*node))), | |
834 | ParserExpr::RepOnce(node) => Expr::RepOnce(Box::new(convert_node(*node))), | |
835 | ParserExpr::RepExact(node, num) => Expr::RepExact(Box::new(convert_node(*node)), num), | |
836 | ParserExpr::RepMin(node, max) => Expr::RepMin(Box::new(convert_node(*node)), max), | |
837 | ParserExpr::RepMax(node, max) => Expr::RepMax(Box::new(convert_node(*node)), max), | |
838 | ParserExpr::RepMinMax(node, min, max) => { | |
839 | Expr::RepMinMax(Box::new(convert_node(*node)), min, max) | |
840 | } | |
841 | ParserExpr::Push(node) => Expr::Push(Box::new(convert_node(*node))) | |
842 | } | |
843 | } | |
844 | ||
845 | pub fn consume_rules<'i>(pairs: Pairs<'i, GrammarRule>) -> (Vec<Rule>, Vec<Ident>) { | |
846 | let defaults = validator::validate_pairs(pairs.clone()); | |
847 | let rules = consume_rules_with_spans(pairs); | |
848 | ||
849 | validator::validate_ast(&rules); | |
850 | ||
851 | ( | |
852 | rules.into_iter().map(|rule| convert_rule(rule)).collect(), | |
853 | defaults | |
854 | ) | |
855 | } | |
856 | ||
857 | fn consume_rules_with_spans<'i>(pairs: Pairs<'i, GrammarRule>) -> Vec<ParserRule<'i>> { | |
858 | let climber = PrecClimber::new(vec![ | |
859 | Operator::new(GrammarRule::choice_operator, Assoc::Left), | |
860 | Operator::new(GrammarRule::sequence_operator, Assoc::Left), | |
861 | ]); | |
862 | ||
863 | pairs | |
864 | .filter(|pair| pair.as_rule() == GrammarRule::grammar_rule) | |
865 | .map(|pair| { | |
866 | let mut pairs = pair.into_inner().peekable(); | |
867 | ||
868 | let span = pairs.next().unwrap().into_span(); | |
869 | let name = Ident::new(span.as_str()); | |
870 | ||
871 | pairs.next().unwrap(); // assignment_operator | |
872 | ||
873 | let ty = if pairs.peek().unwrap().as_rule() != GrammarRule::opening_brace { | |
874 | match pairs.next().unwrap().as_rule() { | |
875 | GrammarRule::silent_modifier => RuleType::Silent, | |
876 | GrammarRule::atomic_modifier => RuleType::Atomic, | |
877 | GrammarRule::compound_atomic_modifier => RuleType::CompoundAtomic, | |
878 | GrammarRule::non_atomic_modifier => RuleType::NonAtomic, | |
879 | _ => unreachable!() | |
880 | } | |
881 | } else { | |
882 | RuleType::Normal | |
883 | }; | |
884 | ||
885 | pairs.next().unwrap(); // opening_brace | |
886 | ||
887 | let node = consume_expr(pairs.next().unwrap().into_inner().peekable(), &climber); | |
888 | ||
889 | ParserRule { | |
890 | name, | |
891 | span, | |
892 | ty, | |
893 | node | |
894 | } | |
895 | }) | |
896 | .collect() | |
897 | } | |
898 | ||
899 | fn consume_expr<'i>( | |
900 | pairs: Peekable<Pairs<'i, GrammarRule>>, | |
901 | climber: &PrecClimber<GrammarRule> | |
902 | ) -> ParserNode<'i> { | |
903 | fn unaries<'i>( | |
904 | mut pairs: Peekable<Pairs<'i, GrammarRule>>, | |
905 | climber: &PrecClimber<GrammarRule> | |
906 | ) -> ParserNode<'i> { | |
907 | let pair = pairs.next().unwrap(); | |
908 | ||
909 | match pair.as_rule() { | |
910 | GrammarRule::opening_paren => { | |
911 | let node = unaries(pairs, climber); | |
912 | let end = node.span.end_pos(); | |
913 | ||
914 | ParserNode { | |
915 | expr: node.expr, | |
916 | span: pair.into_span().start_pos().span(&end) | |
917 | } | |
918 | } | |
919 | GrammarRule::positive_predicate_operator => { | |
920 | let node = unaries(pairs, climber); | |
921 | let end = node.span.end_pos(); | |
922 | ||
923 | ParserNode { | |
924 | expr: ParserExpr::PosPred(Box::new(node)), | |
925 | span: pair.into_span().start_pos().span(&end) | |
926 | } | |
927 | } | |
928 | GrammarRule::negative_predicate_operator => { | |
929 | let node = unaries(pairs, climber); | |
930 | let end = node.span.end_pos(); | |
931 | ||
932 | ParserNode { | |
933 | expr: ParserExpr::NegPred(Box::new(node)), | |
934 | span: pair.into_span().start_pos().span(&end) | |
935 | } | |
936 | } | |
937 | other_rule => { | |
938 | let node = match other_rule { | |
939 | GrammarRule::expression => consume_expr(pair.into_inner().peekable(), climber), | |
940 | GrammarRule::push => { | |
941 | let start = pair.clone().into_span().start_pos(); | |
942 | let mut pairs = pair.into_inner(); | |
943 | pairs.next().unwrap(); // opening_paren | |
944 | let pair = pairs.next().unwrap(); | |
945 | ||
946 | let node = consume_expr(pair.into_inner().peekable(), climber); | |
947 | let end = node.span.end_pos(); | |
948 | ||
949 | ParserNode { | |
950 | expr: ParserExpr::Push(Box::new(node)), | |
951 | span: start.span(&end) | |
952 | } | |
953 | } | |
954 | GrammarRule::identifier => ParserNode { | |
955 | expr: ParserExpr::Ident(Ident::new(pair.as_str())), | |
956 | span: pair.clone().into_span() | |
957 | }, | |
958 | GrammarRule::string => { | |
959 | let string = pair.as_str(); | |
960 | ParserNode { | |
961 | expr: ParserExpr::Str(string[1..string.len() - 1].to_owned()), | |
962 | span: pair.clone().into_span() | |
963 | } | |
964 | } | |
965 | GrammarRule::insensitive_string => { | |
966 | let string = pair.as_str(); | |
967 | ParserNode { | |
968 | expr: ParserExpr::Insens(string[2..string.len() - 1].to_owned()), | |
969 | span: pair.clone().into_span() | |
970 | } | |
971 | } | |
972 | GrammarRule::range => { | |
973 | let mut pairs = pair.into_inner(); | |
974 | let pair = pairs.next().unwrap(); | |
975 | let start = pair.as_str(); | |
976 | let start_pos = pair.clone().into_span().start_pos(); | |
977 | pairs.next(); | |
978 | let pair = pairs.next().unwrap(); | |
979 | let end = pair.as_str(); | |
980 | let end_pos = pair.clone().into_span().end_pos(); | |
981 | ||
982 | ParserNode { | |
983 | expr: ParserExpr::Range(start.to_owned(), end.to_owned()), | |
984 | span: start_pos.span(&end_pos) | |
985 | } | |
986 | } | |
987 | _ => unreachable!() | |
988 | }; | |
989 | ||
990 | pairs.fold(node, |node, pair| { | |
991 | match pair.as_rule() { | |
992 | GrammarRule::optional_operator => { | |
993 | let start = node.span.start_pos(); | |
994 | ParserNode { | |
995 | expr: ParserExpr::Opt(Box::new(node)), | |
996 | span: start.span(&pair.into_span().end_pos()) | |
997 | } | |
998 | } | |
999 | GrammarRule::repeat_operator => { | |
1000 | let start = node.span.start_pos(); | |
1001 | ParserNode { | |
1002 | expr: ParserExpr::Rep(Box::new(node)), | |
1003 | span: start.span(&pair.into_span().end_pos()) | |
1004 | } | |
1005 | } | |
1006 | GrammarRule::repeat_once_operator => { | |
1007 | let start = node.span.start_pos(); | |
1008 | ParserNode { | |
1009 | expr: ParserExpr::RepOnce(Box::new(node)), | |
1010 | span: start.span(&pair.into_span().end_pos()) | |
1011 | } | |
1012 | } | |
1013 | GrammarRule::repeat_exact => { | |
1014 | let overflow = |span| { | |
1015 | let error: Error<()> = Error::CustomErrorSpan { | |
1016 | message: "number cannot overflow u32".to_owned(), | |
1017 | span | |
1018 | }; | |
1019 | ||
1020 | format!("parsing error\n\n{}", error) | |
1021 | }; | |
1022 | ||
1023 | let mut inner = pair.clone().into_inner(); | |
1024 | ||
1025 | inner.next().unwrap(); // opening_brace | |
1026 | ||
1027 | let number = inner.next().unwrap(); | |
1028 | let num: u32 = number | |
1029 | .as_str() | |
1030 | .parse() | |
1031 | .expect(&overflow(number.into_span())); | |
1032 | ||
1033 | let start = node.span.start_pos(); | |
1034 | ParserNode { | |
1035 | expr: ParserExpr::RepExact(Box::new(node), num), | |
1036 | span: start.span(&pair.into_span().end_pos()) | |
1037 | } | |
1038 | } | |
1039 | GrammarRule::repeat_min => { | |
1040 | let overflow = |span| { | |
1041 | let error: Error<()> = Error::CustomErrorSpan { | |
1042 | message: "number cannot overflow u32".to_owned(), | |
1043 | span | |
1044 | }; | |
1045 | ||
1046 | format!("parsing error\n\n{}", error) | |
1047 | }; | |
1048 | ||
1049 | let mut inner = pair.clone().into_inner(); | |
1050 | ||
1051 | inner.next().unwrap(); // opening_brace | |
1052 | ||
1053 | let min_number = inner.next().unwrap(); | |
1054 | let min: u32 = min_number | |
1055 | .as_str() | |
1056 | .parse() | |
1057 | .expect(&overflow(min_number.into_span())); | |
1058 | ||
1059 | let start = node.span.start_pos(); | |
1060 | ParserNode { | |
1061 | expr: ParserExpr::RepMin(Box::new(node), min), | |
1062 | span: start.span(&pair.into_span().end_pos()) | |
1063 | } | |
1064 | } | |
1065 | GrammarRule::repeat_max => { | |
1066 | let overflow = |span| { | |
1067 | let error: Error<()> = Error::CustomErrorSpan { | |
1068 | message: "number cannot overflow u32".to_owned(), | |
1069 | span | |
1070 | }; | |
1071 | ||
1072 | format!("parsing error\n\n{}", error) | |
1073 | }; | |
1074 | ||
1075 | let mut inner = pair.clone().into_inner(); | |
1076 | ||
1077 | inner.next().unwrap(); // opening_brace | |
1078 | inner.next().unwrap(); // comma | |
1079 | ||
1080 | let max_number = inner.next().unwrap(); | |
1081 | let max: u32 = max_number | |
1082 | .as_str() | |
1083 | .parse() | |
1084 | .expect(&overflow(max_number.into_span())); | |
1085 | ||
1086 | let start = node.span.start_pos(); | |
1087 | ParserNode { | |
1088 | expr: ParserExpr::RepMax(Box::new(node), max), | |
1089 | span: start.span(&pair.into_span().end_pos()) | |
1090 | } | |
1091 | } | |
1092 | GrammarRule::repeat_min_max => { | |
1093 | let overflow = |span| { | |
1094 | let error: Error<()> = Error::CustomErrorSpan { | |
1095 | message: "number cannot overflow u32".to_owned(), | |
1096 | span | |
1097 | }; | |
1098 | ||
1099 | format!("parsing error\n\n{}", error) | |
1100 | }; | |
1101 | ||
1102 | let mut inner = pair.clone().into_inner(); | |
1103 | ||
1104 | inner.next().unwrap(); // opening_brace | |
1105 | ||
1106 | let min_number = inner.next().unwrap(); | |
1107 | let min: u32 = min_number | |
1108 | .as_str() | |
1109 | .parse() | |
1110 | .expect(&overflow(min_number.into_span())); | |
1111 | ||
1112 | inner.next().unwrap(); // comma | |
1113 | ||
1114 | let max_number = inner.next().unwrap(); | |
1115 | let max: u32 = max_number | |
1116 | .as_str() | |
1117 | .parse() | |
1118 | .expect(&overflow(max_number.into_span())); | |
1119 | ||
1120 | let start = node.span.start_pos(); | |
1121 | ParserNode { | |
1122 | expr: ParserExpr::RepMinMax(Box::new(node), min, max), | |
1123 | span: start.span(&pair.into_span().end_pos()) | |
1124 | } | |
1125 | } | |
1126 | GrammarRule::closing_paren => { | |
1127 | let start = node.span.start_pos(); | |
1128 | ||
1129 | ParserNode { | |
1130 | expr: node.expr, | |
1131 | span: start.span(&pair.into_span().end_pos()) | |
1132 | } | |
1133 | } | |
1134 | _ => unreachable!() | |
1135 | } | |
1136 | }) | |
1137 | } | |
1138 | } | |
1139 | } | |
1140 | ||
1141 | let term = |pair: Pair<'i, GrammarRule>| unaries(pair.into_inner().peekable(), climber); | |
1142 | let infix = | |
1143 | |lhs: ParserNode<'i>, op: Pair<'i, GrammarRule>, rhs: ParserNode<'i>| match op.as_rule() { | |
1144 | GrammarRule::sequence_operator => { | |
1145 | let start = lhs.span.start_pos(); | |
1146 | let end = rhs.span.end_pos(); | |
1147 | ||
1148 | ParserNode { | |
1149 | expr: ParserExpr::Seq(Box::new(lhs), Box::new(rhs)), | |
1150 | span: start.span(&end) | |
1151 | } | |
1152 | } | |
1153 | GrammarRule::choice_operator => { | |
1154 | let start = lhs.span.start_pos(); | |
1155 | let end = rhs.span.end_pos(); | |
1156 | ||
1157 | ParserNode { | |
1158 | expr: ParserExpr::Choice(Box::new(lhs), Box::new(rhs)), | |
1159 | span: start.span(&end) | |
1160 | } | |
1161 | } | |
1162 | _ => unreachable!() | |
1163 | }; | |
1164 | ||
1165 | climber.climb(pairs, term, infix) | |
1166 | } | |
1167 | ||
1168 | #[cfg(test)] | |
1169 | mod tests { | |
1170 | use super::*; | |
1171 | ||
1172 | #[test] | |
1173 | fn rules() { | |
1174 | parses_to! { | |
1175 | parser: GrammarParser, | |
1176 | input: "a = { b } c = { d }", | |
1177 | rule: GrammarRule::grammar_rules, | |
1178 | tokens: [ | |
1179 | grammar_rule(0, 9, [ | |
1180 | identifier(0, 1), | |
1181 | assignment_operator(2, 3), | |
1182 | opening_brace(4, 5), | |
1183 | expression(6, 7, [ | |
1184 | term(6, 7, [ | |
1185 | identifier(6, 7) | |
1186 | ]) | |
1187 | ]), | |
1188 | closing_brace(8, 9) | |
1189 | ]), | |
1190 | grammar_rule(10, 19, [ | |
1191 | identifier(10, 11), | |
1192 | assignment_operator(12, 13), | |
1193 | opening_brace(14, 15), | |
1194 | expression(16, 17, [ | |
1195 | term(16, 17, [ | |
1196 | identifier(16, 17) | |
1197 | ]) | |
1198 | ]), | |
1199 | closing_brace(18, 19) | |
1200 | ]) | |
1201 | ] | |
1202 | }; | |
1203 | } | |
1204 | ||
1205 | #[test] | |
1206 | fn rule() { | |
1207 | parses_to! { | |
1208 | parser: GrammarParser, | |
1209 | input: "a = ! { b ~ c }", | |
1210 | rule: GrammarRule::grammar_rule, | |
1211 | tokens: [ | |
1212 | grammar_rule(0, 15, [ | |
1213 | identifier(0, 1), | |
1214 | assignment_operator(2, 3), | |
1215 | non_atomic_modifier(4, 5), | |
1216 | opening_brace(6, 7), | |
1217 | expression(8, 13, [ | |
1218 | term(8, 9, [ | |
1219 | identifier(8, 9) | |
1220 | ]), | |
1221 | sequence_operator(10, 11), | |
1222 | term(12, 13, [ | |
1223 | identifier(12, 13) | |
1224 | ]) | |
1225 | ]), | |
1226 | closing_brace(14, 15) | |
1227 | ]) | |
1228 | ] | |
1229 | }; | |
1230 | } | |
1231 | ||
1232 | #[test] | |
1233 | fn expression() { | |
1234 | parses_to! { | |
1235 | parser: GrammarParser, | |
1236 | input: "_a | 'a'..'b' ~ !^\"abc\" ~ (d | e)*?", | |
1237 | rule: GrammarRule::expression, | |
1238 | tokens: [ | |
1239 | expression(0, 35, [ | |
1240 | term(0, 2, [ | |
1241 | identifier(0, 2) | |
1242 | ]), | |
1243 | choice_operator(3, 4), | |
1244 | term(5, 13, [ | |
1245 | range(5, 13, [ | |
1246 | character(5, 8, [ | |
1247 | single_quote(5, 6), | |
1248 | single_quote(7, 8) | |
1249 | ]), | |
1250 | range_operator(8, 10), | |
1251 | character(10, 13, [ | |
1252 | single_quote(10, 11), | |
1253 | single_quote(12, 13) | |
1254 | ]) | |
1255 | ]) | |
1256 | ]), | |
1257 | sequence_operator(14, 15), | |
1258 | term(16, 23, [ | |
1259 | negative_predicate_operator(16, 17), | |
1260 | insensitive_string(17, 23, [ | |
1261 | string(18, 23, [ | |
1262 | quote(18, 19), | |
1263 | quote(22, 23) | |
1264 | ]) | |
1265 | ]) | |
1266 | ]), | |
1267 | sequence_operator(24, 25), | |
1268 | term(26, 35, [ | |
1269 | opening_paren(26, 27), | |
1270 | expression(27, 32, [ | |
1271 | term(27, 28, [ | |
1272 | identifier(27, 28) | |
1273 | ]), | |
1274 | choice_operator(29, 30), | |
1275 | term(31, 32, [ | |
1276 | identifier(31, 32) | |
1277 | ]) | |
1278 | ]), | |
1279 | closing_paren(32, 33), | |
1280 | repeat_operator(33, 34), | |
1281 | optional_operator(34, 35) | |
1282 | ]) | |
1283 | ]) | |
1284 | ] | |
1285 | }; | |
1286 | } | |
1287 | ||
1288 | #[test] | |
1289 | fn repeat_exact() { | |
1290 | parses_to! { | |
1291 | parser: GrammarParser, | |
1292 | input: "{1}", | |
1293 | rule: GrammarRule::repeat_exact, | |
1294 | tokens: [ | |
1295 | repeat_exact(0, 3, [ | |
1296 | opening_brace(0, 1), | |
1297 | number(1, 2), | |
1298 | closing_brace(2, 3) | |
1299 | ]) | |
1300 | ] | |
1301 | }; | |
1302 | } | |
1303 | ||
1304 | #[test] | |
1305 | fn repeat_min() { | |
1306 | parses_to! { | |
1307 | parser: GrammarParser, | |
1308 | input: "{2,}", | |
1309 | rule: GrammarRule::repeat_min, | |
1310 | tokens: [ | |
1311 | repeat_min(0, 4, [ | |
1312 | opening_brace(0,1), | |
1313 | number(1,2), | |
1314 | comma(2,3), | |
1315 | closing_brace(3,4) | |
1316 | ]) | |
1317 | ] | |
1318 | } | |
1319 | } | |
1320 | ||
1321 | #[test] | |
1322 | fn repeat_max() { | |
1323 | parses_to! { | |
1324 | parser: GrammarParser, | |
1325 | input: "{, 3}", | |
1326 | rule: GrammarRule::repeat_max, | |
1327 | tokens: [ | |
1328 | repeat_max(0, 5, [ | |
1329 | opening_brace(0,1), | |
1330 | comma(1,2), | |
1331 | number(3,4), | |
1332 | closing_brace(4,5) | |
1333 | ]) | |
1334 | ] | |
1335 | } | |
1336 | } | |
1337 | ||
1338 | #[test] | |
1339 | fn repeat_min_max() { | |
1340 | parses_to! { | |
1341 | parser: GrammarParser, | |
1342 | input: "{1, 2}", | |
1343 | rule: GrammarRule::repeat_min_max, | |
1344 | tokens: [ | |
1345 | repeat_min_max(0, 6, [ | |
1346 | opening_brace(0, 1), | |
1347 | number(1, 2), | |
1348 | comma(2, 3), | |
1349 | number(4, 5), | |
1350 | closing_brace(5, 6) | |
1351 | ]) | |
1352 | ] | |
1353 | }; | |
1354 | } | |
1355 | ||
1356 | #[test] | |
1357 | fn push() { | |
1358 | parses_to! { | |
1359 | parser: GrammarParser, | |
1360 | input: "push ( a )", | |
1361 | rule: GrammarRule::push, | |
1362 | tokens: [ | |
1363 | push(0, 10, [ | |
1364 | opening_paren(5, 6), | |
1365 | expression(7, 8, [ | |
1366 | term(7, 8, [ | |
1367 | identifier(7, 8) | |
1368 | ]) | |
1369 | ]), | |
1370 | closing_paren(9, 10) | |
1371 | ]) | |
1372 | ] | |
1373 | }; | |
1374 | } | |
1375 | ||
1376 | #[test] | |
1377 | fn identifier() { | |
1378 | parses_to! { | |
1379 | parser: GrammarParser, | |
1380 | input: "_a8943", | |
1381 | rule: GrammarRule::identifier, | |
1382 | tokens: [ | |
1383 | identifier(0, 6) | |
1384 | ] | |
1385 | }; | |
1386 | } | |
1387 | ||
1388 | #[test] | |
1389 | fn string() { | |
1390 | parses_to! { | |
1391 | parser: GrammarParser, | |
1392 | input: "\"aaaaa\\n\\r\\t\\\\\\0\\'\\\"\\x0F\\u{123abC}\\u{12}aaaaa\"", | |
1393 | rule: GrammarRule::string, | |
1394 | tokens: [ | |
1395 | string(0, 46, [ | |
1396 | quote(0, 1), | |
1397 | quote(45, 46) | |
1398 | ]) | |
1399 | ] | |
1400 | }; | |
1401 | } | |
1402 | ||
1403 | #[test] | |
1404 | fn insensitive_string() { | |
1405 | parses_to! { | |
1406 | parser: GrammarParser, | |
1407 | input: "^ \"\\\"hi\"", | |
1408 | rule: GrammarRule::insensitive_string, | |
1409 | tokens: [ | |
1410 | insensitive_string(0, 9, [ | |
1411 | string(3, 9, [ | |
1412 | quote(3, 4), | |
1413 | quote(8, 9) | |
1414 | ]) | |
1415 | ]) | |
1416 | ] | |
1417 | }; | |
1418 | } | |
1419 | ||
1420 | #[test] | |
1421 | fn range() { | |
1422 | parses_to! { | |
1423 | parser: GrammarParser, | |
1424 | input: "'\\n' .. '\\x1a'", | |
1425 | rule: GrammarRule::range, | |
1426 | tokens: [ | |
1427 | range(0, 14, [ | |
1428 | character(0, 4, [ | |
1429 | single_quote(0, 1), | |
1430 | single_quote(3, 4) | |
1431 | ]), | |
1432 | range_operator(5, 7), | |
1433 | character(8, 14, [ | |
1434 | single_quote(8, 9), | |
1435 | single_quote(13, 14) | |
1436 | ]) | |
1437 | ]) | |
1438 | ] | |
1439 | }; | |
1440 | } | |
1441 | ||
1442 | #[test] | |
1443 | fn character() { | |
1444 | parses_to! { | |
1445 | parser: GrammarParser, | |
1446 | input: "'\\u{123abC}'", | |
1447 | rule: GrammarRule::character, | |
1448 | tokens: [ | |
1449 | character(0, 12, [ | |
1450 | single_quote(0, 1), | |
1451 | single_quote(11, 12) | |
1452 | ]) | |
1453 | ] | |
1454 | }; | |
1455 | } | |
1456 | ||
1457 | #[test] | |
1458 | fn number() { | |
1459 | parses_to! { | |
1460 | parser: GrammarParser, | |
1461 | input: "0123", | |
1462 | rule: GrammarRule::number, | |
1463 | tokens: [ | |
1464 | number(0, 4) | |
1465 | ] | |
1466 | }; | |
1467 | } | |
1468 | ||
1469 | #[test] | |
1470 | fn comment() { | |
1471 | parses_to! { | |
1472 | parser: GrammarParser, | |
1473 | input: "a ~ // asda\n b", | |
1474 | rule: GrammarRule::expression, | |
1475 | tokens: [ | |
1476 | expression(0, 17, [ | |
1477 | term(0, 1, [ | |
1478 | identifier(0, 1) | |
1479 | ]), | |
1480 | sequence_operator(2, 3), | |
1481 | term(16, 17, [ | |
1482 | identifier(16, 17) | |
1483 | ]) | |
1484 | ]) | |
1485 | ] | |
1486 | }; | |
1487 | } | |
1488 | ||
1489 | #[test] | |
1490 | fn wrong_identifier() { | |
1491 | fails_with! { | |
1492 | parser: GrammarParser, | |
1493 | input: "0", | |
1494 | rule: GrammarRule::grammar_rules, | |
1495 | positives: vec![GrammarRule::identifier], | |
1496 | negatives: vec![], | |
1497 | pos: 0 | |
1498 | }; | |
1499 | } | |
1500 | ||
1501 | #[test] | |
1502 | fn missing_assignment_operator() { | |
1503 | fails_with! { | |
1504 | parser: GrammarParser, | |
1505 | input: "a {}", | |
1506 | rule: GrammarRule::grammar_rules, | |
1507 | positives: vec![GrammarRule::assignment_operator], | |
1508 | negatives: vec![], | |
1509 | pos: 2 | |
1510 | }; | |
1511 | } | |
1512 | ||
1513 | #[test] | |
1514 | fn wrong_modifier() { | |
1515 | fails_with! { | |
1516 | parser: GrammarParser, | |
1517 | input: "a = *{}", | |
1518 | rule: GrammarRule::grammar_rules, | |
1519 | positives: vec![ | |
1520 | GrammarRule::silent_modifier, | |
1521 | GrammarRule::atomic_modifier, | |
1522 | GrammarRule::compound_atomic_modifier, | |
1523 | GrammarRule::non_atomic_modifier, | |
1524 | GrammarRule::opening_brace | |
1525 | ], | |
1526 | negatives: vec![], | |
1527 | pos: 4 | |
1528 | }; | |
1529 | } | |
1530 | ||
1531 | #[test] | |
1532 | fn missing_opening_brace() { | |
1533 | fails_with! { | |
1534 | parser: GrammarParser, | |
1535 | input: "a = _", | |
1536 | rule: GrammarRule::grammar_rules, | |
1537 | positives: vec![GrammarRule::opening_brace], | |
1538 | negatives: vec![], | |
1539 | pos: 5 | |
1540 | }; | |
1541 | } | |
1542 | ||
1543 | #[test] | |
1544 | fn empty_rule() { | |
1545 | fails_with! { | |
1546 | parser: GrammarParser, | |
1547 | input: "a = {}", | |
1548 | rule: GrammarRule::grammar_rules, | |
1549 | positives: vec![GrammarRule::expression], | |
1550 | negatives: vec![], | |
1551 | pos: 5 | |
1552 | }; | |
1553 | } | |
1554 | ||
1555 | #[test] | |
1556 | fn missing_rhs() { | |
1557 | fails_with! { | |
1558 | parser: GrammarParser, | |
1559 | input: "a = { b ~ }", | |
1560 | rule: GrammarRule::grammar_rules, | |
1561 | positives: vec![ | |
1562 | GrammarRule::opening_paren, | |
1563 | GrammarRule::positive_predicate_operator, | |
1564 | GrammarRule::negative_predicate_operator, | |
1565 | GrammarRule::push, | |
1566 | GrammarRule::identifier, | |
1567 | GrammarRule::quote, | |
1568 | GrammarRule::insensitive_string, | |
1569 | GrammarRule::single_quote | |
1570 | ], | |
1571 | negatives: vec![], | |
1572 | pos: 10 | |
1573 | }; | |
1574 | } | |
1575 | ||
1576 | #[test] | |
1577 | fn wrong_op() { | |
1578 | fails_with! { | |
1579 | parser: GrammarParser, | |
1580 | input: "a = { b % }", | |
1581 | rule: GrammarRule::grammar_rules, | |
1582 | positives: vec![ | |
1583 | GrammarRule::opening_brace, | |
1584 | GrammarRule::closing_brace, | |
1585 | GrammarRule::sequence_operator, | |
1586 | GrammarRule::choice_operator, | |
1587 | GrammarRule::optional_operator, | |
1588 | GrammarRule::repeat_operator, | |
1589 | GrammarRule::repeat_once_operator | |
1590 | ], | |
1591 | negatives: vec![], | |
1592 | pos: 8 | |
1593 | }; | |
1594 | } | |
1595 | ||
1596 | #[test] | |
1597 | fn missing_closing_paren() { | |
1598 | fails_with! { | |
1599 | parser: GrammarParser, | |
1600 | input: "a = { (b }", | |
1601 | rule: GrammarRule::grammar_rules, | |
1602 | positives: vec![ | |
1603 | GrammarRule::opening_brace, | |
1604 | GrammarRule::closing_paren, | |
1605 | GrammarRule::sequence_operator, | |
1606 | GrammarRule::choice_operator, | |
1607 | GrammarRule::optional_operator, | |
1608 | GrammarRule::repeat_operator, | |
1609 | GrammarRule::repeat_once_operator | |
1610 | ], | |
1611 | negatives: vec![], | |
1612 | pos: 9 | |
1613 | }; | |
1614 | } | |
1615 | ||
1616 | #[test] | |
1617 | fn missing_term() { | |
1618 | fails_with! { | |
1619 | parser: GrammarParser, | |
1620 | input: "a = { ! }", | |
1621 | rule: GrammarRule::grammar_rules, | |
1622 | positives: vec![ | |
1623 | GrammarRule::opening_paren, | |
1624 | GrammarRule::positive_predicate_operator, | |
1625 | GrammarRule::negative_predicate_operator, | |
1626 | GrammarRule::push, | |
1627 | GrammarRule::identifier, | |
1628 | GrammarRule::quote, | |
1629 | GrammarRule::insensitive_string, | |
1630 | GrammarRule::single_quote | |
1631 | ], | |
1632 | negatives: vec![], | |
1633 | pos: 8 | |
1634 | }; | |
1635 | } | |
1636 | ||
1637 | #[test] | |
1638 | fn string_missing_ending_quote() { | |
1639 | fails_with! { | |
1640 | parser: GrammarParser, | |
1641 | input: "a = { \" }", | |
1642 | rule: GrammarRule::grammar_rules, | |
1643 | positives: vec![GrammarRule::quote], | |
1644 | negatives: vec![], | |
1645 | pos: 9 | |
1646 | }; | |
1647 | } | |
1648 | ||
1649 | #[test] | |
1650 | fn insensitive_missing_string() { | |
1651 | fails_with! { | |
1652 | parser: GrammarParser, | |
1653 | input: "a = { ^ }", | |
1654 | rule: GrammarRule::grammar_rules, | |
1655 | positives: vec![GrammarRule::string], | |
1656 | negatives: vec![], | |
1657 | pos: 8 | |
1658 | }; | |
1659 | } | |
1660 | ||
1661 | #[test] | |
1662 | fn char_missing_ending_single_quote() { | |
1663 | fails_with! { | |
1664 | parser: GrammarParser, | |
1665 | input: "a = { \' }", | |
1666 | rule: GrammarRule::grammar_rules, | |
1667 | positives: vec![GrammarRule::single_quote], | |
1668 | negatives: vec![], | |
1669 | pos: 8 | |
1670 | }; | |
1671 | } | |
1672 | ||
1673 | #[test] | |
1674 | fn range_missing_range_operator() { | |
1675 | fails_with! { | |
1676 | parser: GrammarParser, | |
1677 | input: "a = { \'a\' }", | |
1678 | rule: GrammarRule::grammar_rules, | |
1679 | positives: vec![GrammarRule::range_operator], | |
1680 | negatives: vec![], | |
1681 | pos: 10 | |
1682 | }; | |
1683 | } | |
1684 | ||
1685 | #[test] | |
1686 | fn wrong_postfix() { | |
1687 | fails_with! { | |
1688 | parser: GrammarParser, | |
1689 | input: "a = { a& }", | |
1690 | rule: GrammarRule::grammar_rules, | |
1691 | positives: vec![ | |
1692 | GrammarRule::opening_brace, | |
1693 | GrammarRule::closing_brace, | |
1694 | GrammarRule::sequence_operator, | |
1695 | GrammarRule::choice_operator, | |
1696 | GrammarRule::optional_operator, | |
1697 | GrammarRule::repeat_operator, | |
1698 | GrammarRule::repeat_once_operator | |
1699 | ], | |
1700 | negatives: vec![], | |
1701 | pos: 7 | |
1702 | }; | |
1703 | } | |
1704 | ||
1705 | #[test] | |
1706 | fn ast() { | |
1707 | let input = | |
1708 | "rule = _{ a{1} ~ \"a\"{3,} ~ b{, 2} ~ \"b\"{1, 2} | !(^\"c\" | push('d'..'e'))?* }"; | |
1709 | ||
1710 | let pairs = GrammarParser::parse(GrammarRule::grammar_rules, input).unwrap(); | |
1711 | let ast = consume_rules_with_spans(pairs); | |
1712 | let ast: Vec<_> = ast.into_iter().map(|rule| convert_rule(rule)).collect(); | |
1713 | ||
1714 | assert_eq!( | |
1715 | ast, | |
1716 | vec![ | |
1717 | Rule { | |
1718 | name: Ident::new("rule"), | |
1719 | ty: RuleType::Silent, | |
1720 | expr: Expr::Choice( | |
1721 | Box::new(Expr::Seq( | |
1722 | Box::new(Expr::Seq( | |
1723 | Box::new(Expr::Seq( | |
1724 | Box::new(Expr::RepExact( | |
1725 | Box::new(Expr::Ident(Ident::new("a"))), | |
1726 | 1 | |
1727 | )), | |
1728 | Box::new(Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 3)) | |
1729 | )), | |
1730 | Box::new(Expr::RepMax(Box::new(Expr::Ident(Ident::new("b"))), 2)) | |
1731 | )), | |
1732 | Box::new(Expr::RepMinMax(Box::new(Expr::Str("b".to_owned())), 1, 2)) | |
1733 | )), | |
1734 | Box::new(Expr::NegPred(Box::new(Expr::Rep(Box::new(Expr::Opt( | |
1735 | Box::new(Expr::Choice( | |
1736 | Box::new(Expr::Insens("c".to_owned())), | |
1737 | Box::new(Expr::Push(Box::new(Expr::Range( | |
1738 | "'d'".to_owned(), | |
1739 | "'e'".to_owned() | |
1740 | )))) | |
1741 | )) | |
1742 | )))))) | |
1743 | ) | |
1744 | }, | |
1745 | ] | |
1746 | ); | |
1747 | } | |
1748 | } |