]>
Commit | Line | Data |
---|---|---|
9c376795 FG |
1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 Dragoș Tiselice | |
3 | // | |
4 | // Licensed under the Apache License, Version 2.0 | |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT | |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
7 | // option. All files in the project carrying such notice may not be copied, | |
8 | // modified, or distributed except according to those terms. | |
9 | ||
10 | //! Constructs useful in prefix, postfix, and infix operator parsing with the | |
11 | //! Pratt parsing method. | |
12 | ||
13 | use core::iter::Peekable; | |
14 | use core::marker::PhantomData; | |
15 | use core::ops::BitOr; | |
16 | ||
17 | use alloc::boxed::Box; | |
18 | use alloc::collections::BTreeMap; | |
19 | ||
20 | use crate::iterators::Pair; | |
21 | use crate::RuleType; | |
22 | ||
23 | /// Associativity of an infix binary operator, used by [`Op::infix(Assoc)`]. | |
24 | /// | |
25 | /// [`Op::infix(Assoc)`]: struct.Op.html | |
26 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] | |
27 | pub enum Assoc { | |
28 | /// Left operator associativity. Evaluate expressions from left-to-right. | |
29 | Left, | |
30 | /// Right operator associativity. Evaluate expressions from right-to-left. | |
31 | Right, | |
32 | } | |
33 | ||
34 | type Prec = u32; | |
35 | const PREC_STEP: Prec = 10; | |
36 | ||
37 | /// An operator that corresponds to a rule. | |
38 | pub struct Op<R: RuleType> { | |
39 | rule: R, | |
40 | affix: Affix, | |
41 | next: Option<Box<Op<R>>>, | |
42 | } | |
43 | ||
44 | enum Affix { | |
45 | Prefix, | |
46 | Postfix, | |
47 | Infix(Assoc), | |
48 | } | |
49 | ||
50 | impl<R: RuleType> Op<R> { | |
51 | /// Defines `rule` as a prefix unary operator. | |
52 | pub fn prefix(rule: R) -> Self { | |
53 | Self { | |
54 | rule, | |
55 | affix: Affix::Prefix, | |
56 | next: None, | |
57 | } | |
58 | } | |
59 | ||
60 | /// Defines `rule` as a postfix unary operator. | |
61 | pub fn postfix(rule: R) -> Self { | |
62 | Self { | |
63 | rule, | |
64 | affix: Affix::Postfix, | |
65 | next: None, | |
66 | } | |
67 | } | |
68 | ||
69 | /// Defines `rule` as an infix binary operator with associativity `assoc`. | |
70 | pub fn infix(rule: R, assoc: Assoc) -> Self { | |
71 | Self { | |
72 | rule, | |
73 | affix: Affix::Infix(assoc), | |
74 | next: None, | |
75 | } | |
76 | } | |
77 | } | |
78 | ||
79 | impl<R: RuleType> BitOr for Op<R> { | |
80 | type Output = Self; | |
81 | ||
82 | fn bitor(mut self, rhs: Self) -> Self { | |
83 | fn assign_next<R: RuleType>(op: &mut Op<R>, next: Op<R>) { | |
84 | if let Some(ref mut child) = op.next { | |
85 | assign_next(child, next); | |
86 | } else { | |
87 | op.next = Some(Box::new(next)); | |
88 | } | |
89 | } | |
90 | ||
91 | assign_next(&mut self, rhs); | |
92 | self | |
93 | } | |
94 | } | |
95 | ||
96 | /// Struct containing operators and precedences, which can perform [Pratt parsing][1] on | |
97 | /// primary, prefix, postfix and infix expressions over [`Pairs`]. The tokens in [`Pairs`] | |
98 | /// should alternate in the order: | |
99 | /// `prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix*)*` | |
100 | /// | |
101 | /// # Panics | |
102 | /// | |
103 | /// Panics will occur when: | |
104 | /// * `pairs` is empty | |
105 | /// * The tokens in `pairs` does not alternate in the expected order. | |
106 | /// * No `map_*` function is specified for a certain kind of operator encountered in `pairs`. | |
107 | /// | |
108 | /// # Example | |
109 | /// | |
110 | /// The following pest grammar defines a calculator which can be used for Pratt parsing. | |
111 | /// | |
112 | /// ```pest | |
113 | /// WHITESPACE = _{ " " | "\t" | NEWLINE } | |
114 | /// | |
115 | /// program = { SOI ~ expr ~ EOI } | |
116 | /// expr = { prefix* ~ primary ~ postfix* ~ (infix ~ prefix* ~ primary ~ postfix* )* } | |
117 | /// infix = _{ add | sub | mul | div | pow } | |
118 | /// add = { "+" } // Addition | |
119 | /// sub = { "-" } // Subtraction | |
120 | /// mul = { "*" } // Multiplication | |
121 | /// div = { "/" } // Division | |
122 | /// pow = { "^" } // Exponentiation | |
123 | /// prefix = _{ neg } | |
124 | /// neg = { "-" } // Negation | |
125 | /// postfix = _{ fac } | |
126 | /// fac = { "!" } // Factorial | |
127 | /// primary = _{ int | "(" ~ expr ~ ")" } | |
128 | /// int = @{ (ASCII_NONZERO_DIGIT ~ ASCII_DIGIT+ | ASCII_DIGIT) } | |
129 | /// ``` | |
130 | /// | |
131 | /// Below is a [`PrattParser`] that is able to parse an `expr` in the above grammar. The order | |
132 | /// of precedence corresponds to the order in which [`op`] is called. Thus, `mul` will | |
133 | /// have higher precedence than `add`. Operators can also be chained with `|` to give them equal | |
134 | /// precedence. | |
135 | /// | |
136 | /// ``` | |
137 | /// # use pest::pratt_parser::{Assoc, Op, PrattParser}; | |
138 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] | |
139 | /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg } | |
140 | /// let pratt = | |
141 | /// PrattParser::new() | |
142 | /// .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left)) | |
143 | /// .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left)) | |
144 | /// .op(Op::infix(Rule::pow, Assoc::Right)) | |
145 | /// .op(Op::postfix(Rule::fac)) | |
146 | /// .op(Op::prefix(Rule::neg)); | |
147 | /// ``` | |
148 | /// | |
149 | /// To parse an expression, call the [`map_primary`], [`map_prefix`], [`map_postfix`], | |
150 | /// [`map_infix`] and [`parse`] methods as follows: | |
151 | /// | |
152 | /// ``` | |
153 | /// # use pest::{iterators::Pairs, pratt_parser::PrattParser}; | |
154 | /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] | |
155 | /// # enum Rule { program, expr, int, add, mul, sub, div, pow, fac, neg } | |
156 | /// fn parse_expr(pairs: Pairs<Rule>, pratt: &PrattParser<Rule>) -> i32 { | |
157 | /// pratt | |
158 | /// .map_primary(|primary| match primary.as_rule() { | |
159 | /// Rule::int => primary.as_str().parse().unwrap(), | |
160 | /// Rule::expr => parse_expr(primary.into_inner(), pratt), // from "(" ~ expr ~ ")" | |
161 | /// _ => unreachable!(), | |
162 | /// }) | |
163 | /// .map_prefix(|op, rhs| match op.as_rule() { | |
164 | /// Rule::neg => -rhs, | |
165 | /// _ => unreachable!(), | |
166 | /// }) | |
167 | /// .map_postfix(|lhs, op| match op.as_rule() { | |
168 | /// Rule::fac => (1..lhs+1).product(), | |
169 | /// _ => unreachable!(), | |
170 | /// }) | |
171 | /// .map_infix(|lhs, op, rhs| match op.as_rule() { | |
172 | /// Rule::add => lhs + rhs, | |
173 | /// Rule::sub => lhs - rhs, | |
174 | /// Rule::mul => lhs * rhs, | |
175 | /// Rule::div => lhs / rhs, | |
176 | /// Rule::pow => (1..rhs+1).map(|_| lhs).product(), | |
177 | /// _ => unreachable!(), | |
178 | /// }) | |
179 | /// .parse(pairs) | |
180 | /// } | |
181 | /// ``` | |
182 | /// | |
183 | /// Note that [`map_prefix`], [`map_postfix`] and [`map_infix`] only need to be specified if the | |
184 | /// grammar contains the corresponding operators. | |
185 | /// | |
186 | /// [1]: https://en.wikipedia.org/wiki/Pratt_parser | |
187 | /// [`Pairs`]: ../iterators/struct.Pairs.html | |
188 | /// [`PrattParser`]: struct.PrattParser.html | |
189 | /// [`map_primary`]: struct.PrattParser.html#method.map_primary | |
190 | /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix | |
191 | /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix | |
192 | /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix | |
193 | /// [`parse`]: struct.PrattParserMap.html#method.parse | |
194 | /// [`op`]: struct.PrattParserMap.html#method.op | |
195 | pub struct PrattParser<R: RuleType> { | |
196 | prec: Prec, | |
197 | ops: BTreeMap<R, (Affix, Prec)>, | |
198 | has_prefix: bool, | |
199 | has_postfix: bool, | |
200 | has_infix: bool, | |
201 | } | |
202 | ||
203 | impl<R: RuleType> Default for PrattParser<R> { | |
204 | fn default() -> Self { | |
205 | Self::new() | |
206 | } | |
207 | } | |
208 | ||
209 | impl<R: RuleType> PrattParser<R> { | |
210 | /// Instantiate a new `PrattParser`. | |
211 | pub fn new() -> Self { | |
212 | Self { | |
213 | prec: PREC_STEP, | |
214 | ops: BTreeMap::new(), | |
215 | has_prefix: false, | |
216 | has_postfix: false, | |
217 | has_infix: false, | |
218 | } | |
219 | } | |
220 | ||
221 | /// Add `op` to `PrattParser`. | |
222 | pub fn op(mut self, op: Op<R>) -> Self { | |
223 | self.prec += PREC_STEP; | |
224 | let mut iter = Some(op); | |
225 | while let Some(Op { rule, affix, next }) = iter.take() { | |
226 | match affix { | |
227 | Affix::Prefix => self.has_prefix = true, | |
228 | Affix::Postfix => self.has_postfix = true, | |
229 | Affix::Infix(_) => self.has_infix = true, | |
230 | } | |
231 | self.ops.insert(rule, (affix, self.prec)); | |
232 | iter = next.map(|op| *op); | |
233 | } | |
234 | self | |
235 | } | |
236 | ||
237 | /// Maps primary expressions with a closure `primary`. | |
49aad941 | 238 | pub fn map_primary<'pratt, 'a, 'i, X, T>( |
9c376795 FG |
239 | &'pratt self, |
240 | primary: X, | |
49aad941 | 241 | ) -> PrattParserMap<'pratt, 'a, 'i, R, X, T> |
9c376795 FG |
242 | where |
243 | X: FnMut(Pair<'i, R>) -> T, | |
244 | R: 'pratt, | |
245 | { | |
246 | PrattParserMap { | |
247 | pratt: self, | |
248 | primary, | |
249 | prefix: None, | |
250 | postfix: None, | |
251 | infix: None, | |
252 | phantom: PhantomData, | |
253 | } | |
254 | } | |
255 | } | |
256 | ||
49aad941 FG |
257 | type PrefixFn<'a, 'i, R, T> = Box<dyn FnMut(Pair<'i, R>, T) -> T + 'a>; |
258 | type PostfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>) -> T + 'a>; | |
259 | type InfixFn<'a, 'i, R, T> = Box<dyn FnMut(T, Pair<'i, R>, T) -> T + 'a>; | |
9c376795 FG |
260 | |
261 | /// Product of calling [`map_primary`] on [`PrattParser`], defines how expressions should | |
262 | /// be mapped. | |
263 | /// | |
264 | /// [`map_primary`]: struct.PrattParser.html#method.map_primary | |
265 | /// [`PrattParser`]: struct.PrattParser.html | |
49aad941 | 266 | pub struct PrattParserMap<'pratt, 'a, 'i, R, F, T> |
9c376795 FG |
267 | where |
268 | R: RuleType, | |
269 | F: FnMut(Pair<'i, R>) -> T, | |
270 | { | |
271 | pratt: &'pratt PrattParser<R>, | |
272 | primary: F, | |
49aad941 FG |
273 | prefix: Option<PrefixFn<'a, 'i, R, T>>, |
274 | postfix: Option<PostfixFn<'a, 'i, R, T>>, | |
275 | infix: Option<InfixFn<'a, 'i, R, T>>, | |
9c376795 FG |
276 | phantom: PhantomData<T>, |
277 | } | |
278 | ||
49aad941 | 279 | impl<'pratt, 'a, 'i, R, F, T> PrattParserMap<'pratt, 'a, 'i, R, F, T> |
9c376795 FG |
280 | where |
281 | R: RuleType + 'pratt, | |
282 | F: FnMut(Pair<'i, R>) -> T, | |
283 | { | |
284 | /// Maps prefix operators with closure `prefix`. | |
285 | pub fn map_prefix<X>(mut self, prefix: X) -> Self | |
286 | where | |
49aad941 | 287 | X: FnMut(Pair<'i, R>, T) -> T + 'a, |
9c376795 FG |
288 | { |
289 | self.prefix = Some(Box::new(prefix)); | |
290 | self | |
291 | } | |
292 | ||
293 | /// Maps postfix operators with closure `postfix`. | |
294 | pub fn map_postfix<X>(mut self, postfix: X) -> Self | |
295 | where | |
49aad941 | 296 | X: FnMut(T, Pair<'i, R>) -> T + 'a, |
9c376795 FG |
297 | { |
298 | self.postfix = Some(Box::new(postfix)); | |
299 | self | |
300 | } | |
301 | ||
302 | /// Maps infix operators with a closure `infix`. | |
303 | pub fn map_infix<X>(mut self, infix: X) -> Self | |
304 | where | |
49aad941 | 305 | X: FnMut(T, Pair<'i, R>, T) -> T + 'a, |
9c376795 FG |
306 | { |
307 | self.infix = Some(Box::new(infix)); | |
308 | self | |
309 | } | |
310 | ||
311 | /// The last method to call on the provided pairs to execute the Pratt | |
312 | /// parser (previously defined using [`map_primary`], [`map_prefix`], [`map_postfix`], | |
313 | /// and [`map_infix`] methods). | |
314 | /// | |
315 | /// [`map_primary`]: struct.PrattParser.html#method.map_primary | |
316 | /// [`map_prefix`]: struct.PrattParserMap.html#method.map_prefix | |
317 | /// [`map_postfix`]: struct.PrattParserMap.html#method.map_postfix | |
318 | /// [`map_infix`]: struct.PrattParserMap.html#method.map_infix | |
319 | pub fn parse<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: P) -> T { | |
320 | self.expr(&mut pairs.peekable(), 0) | |
321 | } | |
322 | ||
323 | fn expr<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, rbp: Prec) -> T { | |
324 | let mut lhs = self.nud(pairs); | |
325 | while rbp < self.lbp(pairs) { | |
326 | lhs = self.led(pairs, lhs); | |
327 | } | |
328 | lhs | |
329 | } | |
330 | ||
331 | /// Null-Denotation | |
332 | /// | |
333 | /// "the action that should happen when the symbol is encountered | |
334 | /// as start of an expression (most notably, prefix operators) | |
335 | fn nud<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> T { | |
336 | let pair = pairs.next().expect("Pratt parsing expects non-empty Pairs"); | |
337 | match self.pratt.ops.get(&pair.as_rule()) { | |
338 | Some((Affix::Prefix, prec)) => { | |
339 | let rhs = self.expr(pairs, *prec - 1); | |
340 | match self.prefix.as_mut() { | |
341 | Some(prefix) => prefix(pair, rhs), | |
342 | None => panic!("Could not map {}, no `.map_prefix(...)` specified", pair), | |
343 | } | |
344 | } | |
345 | None => (self.primary)(pair), | |
346 | _ => panic!("Expected prefix or primary expression, found {}", pair), | |
347 | } | |
348 | } | |
349 | ||
350 | /// Left-Denotation | |
351 | /// | |
352 | /// "the action that should happen when the symbol is encountered | |
353 | /// after the start of an expression (most notably, infix and postfix operators)" | |
354 | fn led<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>, lhs: T) -> T { | |
355 | let pair = pairs.next().unwrap(); | |
356 | match self.pratt.ops.get(&pair.as_rule()) { | |
357 | Some((Affix::Infix(assoc), prec)) => { | |
358 | let rhs = match *assoc { | |
359 | Assoc::Left => self.expr(pairs, *prec), | |
360 | Assoc::Right => self.expr(pairs, *prec - 1), | |
361 | }; | |
362 | match self.infix.as_mut() { | |
363 | Some(infix) => infix(lhs, pair, rhs), | |
364 | None => panic!("Could not map {}, no `.map_infix(...)` specified", pair), | |
365 | } | |
366 | } | |
367 | Some((Affix::Postfix, _)) => match self.postfix.as_mut() { | |
368 | Some(postfix) => postfix(lhs, pair), | |
369 | None => panic!("Could not map {}, no `.map_postfix(...)` specified", pair), | |
370 | }, | |
371 | _ => panic!("Expected postfix or infix expression, found {}", pair), | |
372 | } | |
373 | } | |
374 | ||
375 | /// Left-Binding-Power | |
376 | /// | |
377 | /// "describes the symbol's precedence in infix form (most notably, operator precedence)" | |
378 | fn lbp<P: Iterator<Item = Pair<'i, R>>>(&mut self, pairs: &mut Peekable<P>) -> Prec { | |
379 | match pairs.peek() { | |
380 | Some(pair) => match self.pratt.ops.get(&pair.as_rule()) { | |
381 | Some((_, prec)) => *prec, | |
382 | None => panic!("Expected operator, found {}", pair), | |
383 | }, | |
384 | None => 0, | |
385 | } | |
386 | } | |
387 | } |