]>
Commit | Line | Data |
---|---|---|
e74abb32 XL |
1 | //! Extensions to the parsing API with niche applicability. |
2 | ||
3 | use super::*; | |
4 | ||
5 | /// Extensions to the `ParseStream` API to support speculative parsing. | |
6 | pub trait Speculative { | |
7 | /// Advance this parse stream to the position of a forked parse stream. | |
8 | /// | |
9 | /// This is the opposite operation to [`ParseStream::fork`]. You can fork a | |
10 | /// parse stream, perform some speculative parsing, then join the original | |
11 | /// stream to the fork to "commit" the parsing from the fork to the main | |
12 | /// stream. | |
13 | /// | |
14 | /// If you can avoid doing this, you should, as it limits the ability to | |
15 | /// generate useful errors. That said, it is often the only way to parse | |
16 | /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem | |
17 | /// is that when the fork fails to parse an `A`, it's impossible to tell | |
18 | /// whether that was because of a syntax error and the user meant to provide | |
19 | /// an `A`, or that the `A`s are finished and its time to start parsing | |
20 | /// `B`s. Use with care. | |
21 | /// | |
22 | /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by | |
23 | /// parsing `B*` and removing the leading members of `A` from the | |
24 | /// repetition, bypassing the need to involve the downsides associated with | |
25 | /// speculative parsing. | |
26 | /// | |
27 | /// [`ParseStream::fork`]: ParseBuffer::fork | |
28 | /// | |
29 | /// # Example | |
30 | /// | |
31 | /// There has been chatter about the possibility of making the colons in the | |
32 | /// turbofish syntax like `path::to::<T>` no longer required by accepting | |
33 | /// `path::to<T>` in expression position. Specifically, according to [RFC | |
34 | /// 2544], [`PathSegment`] parsing should always try to consume a following | |
35 | /// `<` token as the start of generic arguments, and reset to the `<` if | |
36 | /// that fails (e.g. the token is acting as a less-than operator). | |
37 | /// | |
38 | /// This is the exact kind of parsing behavior which requires the "fork, | |
39 | /// try, commit" behavior that [`ParseStream::fork`] discourages. With | |
40 | /// `advance_to`, we can avoid having to parse the speculatively parsed | |
41 | /// content a second time. | |
42 | /// | |
43 | /// This change in behavior can be implemented in syn by replacing just the | |
44 | /// `Parse` implementation for `PathSegment`: | |
45 | /// | |
46 | /// ``` | |
47 | /// # use syn::ext::IdentExt; | |
48 | /// use syn::parse::discouraged::Speculative; | |
49 | /// # use syn::parse::{Parse, ParseStream}; | |
50 | /// # use syn::{Ident, PathArguments, Result, Token}; | |
51 | /// | |
52 | /// pub struct PathSegment { | |
53 | /// pub ident: Ident, | |
54 | /// pub arguments: PathArguments, | |
55 | /// } | |
56 | /// # | |
57 | /// # impl<T> From<T> for PathSegment | |
58 | /// # where | |
59 | /// # T: Into<Ident>, | |
60 | /// # { | |
61 | /// # fn from(ident: T) -> Self { | |
62 | /// # PathSegment { | |
63 | /// # ident: ident.into(), | |
64 | /// # arguments: PathArguments::None, | |
65 | /// # } | |
66 | /// # } | |
67 | /// # } | |
68 | /// | |
69 | /// impl Parse for PathSegment { | |
70 | /// fn parse(input: ParseStream) -> Result<Self> { | |
71 | /// if input.peek(Token![super]) | |
72 | /// || input.peek(Token![self]) | |
73 | /// || input.peek(Token![Self]) | |
74 | /// || input.peek(Token![crate]) | |
75 | /// || input.peek(Token![extern]) | |
76 | /// { | |
77 | /// let ident = input.call(Ident::parse_any)?; | |
78 | /// return Ok(PathSegment::from(ident)); | |
79 | /// } | |
80 | /// | |
81 | /// let ident = input.parse()?; | |
82 | /// if input.peek(Token![::]) && input.peek3(Token![<]) { | |
83 | /// return Ok(PathSegment { | |
84 | /// ident, | |
85 | /// arguments: PathArguments::AngleBracketed(input.parse()?), | |
86 | /// }); | |
87 | /// } | |
88 | /// if input.peek(Token![<]) && !input.peek(Token![<=]) { | |
89 | /// let fork = input.fork(); | |
90 | /// if let Ok(arguments) = fork.parse() { | |
91 | /// input.advance_to(&fork); | |
92 | /// return Ok(PathSegment { | |
93 | /// ident, | |
94 | /// arguments: PathArguments::AngleBracketed(arguments), | |
95 | /// }); | |
96 | /// } | |
97 | /// } | |
98 | /// Ok(PathSegment::from(ident)) | |
99 | /// } | |
100 | /// } | |
101 | /// | |
102 | /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); | |
103 | /// ``` | |
104 | /// | |
105 | /// # Drawbacks | |
106 | /// | |
107 | /// The main drawback of this style of speculative parsing is in error | |
108 | /// presentation. Even if the lookahead is the "correct" parse, the error | |
109 | /// that is shown is that of the "fallback" parse. To use the same example | |
110 | /// as the turbofish above, take the following unfinished "turbofish": | |
111 | /// | |
112 | /// ```text | |
113 | /// let _ = f<&'a fn(), for<'a> serde::>(); | |
114 | /// ``` | |
115 | /// | |
116 | /// If this is parsed as generic arguments, we can provide the error message | |
117 | /// | |
118 | /// ```text | |
119 | /// error: expected identifier | |
120 | /// --> src.rs:L:C | |
121 | /// | | |
122 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); | |
123 | /// | ^ | |
124 | /// ``` | |
125 | /// | |
126 | /// but if parsed using the above speculative parsing, it falls back to | |
127 | /// assuming that the `<` is a less-than when it fails to parse the generic | |
128 | /// arguments, and tries to interpret the `&'a` as the start of a labelled | |
129 | /// loop, resulting in the much less helpful error | |
130 | /// | |
131 | /// ```text | |
132 | /// error: expected `:` | |
133 | /// --> src.rs:L:C | |
134 | /// | | |
135 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); | |
136 | /// | ^^ | |
137 | /// ``` | |
138 | /// | |
139 | /// This can be mitigated with various heuristics (two examples: show both | |
140 | /// forks' parse errors, or show the one that consumed more tokens), but | |
141 | /// when you can control the grammar, sticking to something that can be | |
142 | /// parsed LL(3) and without the LL(*) speculative parsing this makes | |
143 | /// possible, displaying reasonable errors becomes much more simple. | |
144 | /// | |
145 | /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 | |
146 | /// [`PathSegment`]: crate::PathSegment | |
147 | /// | |
148 | /// # Performance | |
149 | /// | |
150 | /// This method performs a cheap fixed amount of work that does not depend | |
151 | /// on how far apart the two streams are positioned. | |
152 | /// | |
153 | /// # Panics | |
154 | /// | |
155 | /// The forked stream in the argument of `advance_to` must have been | |
156 | /// obtained by forking `self`. Attempting to advance to any other stream | |
157 | /// will cause a panic. | |
158 | fn advance_to(&self, fork: &Self); | |
159 | } | |
160 | ||
161 | impl<'a> Speculative for ParseBuffer<'a> { | |
162 | fn advance_to(&self, fork: &Self) { | |
163 | if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { | |
164 | panic!("Fork was not derived from the advancing parse stream"); | |
165 | } | |
166 | ||
60c5eb7d XL |
167 | let (self_unexp, self_sp) = inner_unexpected(self); |
168 | let (fork_unexp, fork_sp) = inner_unexpected(fork); | |
169 | if !Rc::ptr_eq(&self_unexp, &fork_unexp) { | |
170 | match (fork_sp, self_sp) { | |
171 | // Unexpected set on the fork, but not on `self`, copy it over. | |
172 | (Some(span), None) => { | |
173 | self_unexp.set(Unexpected::Some(span)); | |
174 | } | |
175 | // Unexpected unset. Use chain to propagate errors from fork. | |
176 | (None, None) => { | |
177 | fork_unexp.set(Unexpected::Chain(self_unexp)); | |
178 | ||
179 | // Ensure toplevel 'unexpected' tokens from the fork don't | |
180 | // bubble up the chain by replacing the root `unexpected` | |
181 | // pointer, only 'unexpected' tokens from existing group | |
182 | // parsers should bubble. | |
183 | fork.unexpected | |
184 | .set(Some(Rc::new(Cell::new(Unexpected::None)))); | |
185 | } | |
186 | // Unexpected has been set on `self`. No changes needed. | |
187 | (_, Some(_)) => {} | |
188 | } | |
189 | } | |
190 | ||
e74abb32 XL |
191 | // See comment on `cell` in the struct definition. |
192 | self.cell | |
193 | .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }) | |
194 | } | |
195 | } |