]>
Commit | Line | Data |
---|---|---|
e74abb32 XL |
1 | //! A stably addressed token buffer supporting efficient traversal based on a |
2 | //! cheaply copyable cursor. | |
3 | //! | |
f035d41b | 4 | //! *This module is available only if Syn is built with the `"parsing"` feature.* |
e74abb32 XL |
5 | |
6 | // This module is heavily commented as it contains most of the unsafe code in | |
7 | // Syn, and caution should be used when editing it. The public-facing interface | |
8 | // is 100% safe but the implementation is fragile internally. | |
9 | ||
10 | #[cfg(all( | |
11 | not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), | |
12 | feature = "proc-macro" | |
13 | ))] | |
14 | use crate::proc_macro as pm; | |
29967ef6 | 15 | use crate::Lifetime; |
e74abb32 | 16 | use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree}; |
e74abb32 XL |
17 | use std::marker::PhantomData; |
18 | use std::ptr; | |
5099ac24 | 19 | use std::slice; |
e74abb32 | 20 | |
e74abb32 XL |
21 | /// Internal type which is used instead of `TokenTree` to represent a token tree |
22 | /// within a `TokenBuffer`. | |
23 | enum Entry { | |
24 | // Mimicking types from proc-macro. | |
25 | Group(Group, TokenBuffer), | |
26 | Ident(Ident), | |
27 | Punct(Punct), | |
28 | Literal(Literal), | |
29 | // End entries contain a raw pointer to the entry from the containing | |
30 | // token tree, or null if this is the outermost level. | |
31 | End(*const Entry), | |
32 | } | |
33 | ||
34 | /// A buffer that can be efficiently traversed multiple times, unlike | |
35 | /// `TokenStream` which requires a deep copy in order to traverse more than | |
36 | /// once. | |
37 | /// | |
f035d41b | 38 | /// *This type is available only if Syn is built with the `"parsing"` feature.* |
e74abb32 | 39 | pub struct TokenBuffer { |
5099ac24 FG |
40 | // NOTE: Do not implement clone on this - there are raw pointers inside |
41 | // these entries which will be messed up. Moving the `TokenBuffer` itself is | |
42 | // safe as the data pointed to won't be moved. | |
43 | ptr: *const Entry, | |
44 | len: usize, | |
45 | } | |
46 | ||
47 | impl Drop for TokenBuffer { | |
48 | fn drop(&mut self) { | |
49 | unsafe { | |
50 | let slice = slice::from_raw_parts_mut(self.ptr as *mut Entry, self.len); | |
51 | let _ = Box::from_raw(slice); | |
52 | } | |
53 | } | |
e74abb32 XL |
54 | } |
55 | ||
56 | impl TokenBuffer { | |
5099ac24 FG |
57 | // NOTE: Do not mutate the Vec returned from this function once it returns; |
58 | // the address of its backing memory must remain stable. | |
e74abb32 XL |
59 | fn inner_new(stream: TokenStream, up: *const Entry) -> TokenBuffer { |
60 | // Build up the entries list, recording the locations of any Groups | |
61 | // in the list to be processed later. | |
62 | let mut entries = Vec::new(); | |
5099ac24 | 63 | let mut groups = Vec::new(); |
e74abb32 XL |
64 | for tt in stream { |
65 | match tt { | |
66 | TokenTree::Ident(sym) => { | |
67 | entries.push(Entry::Ident(sym)); | |
68 | } | |
69 | TokenTree::Punct(op) => { | |
70 | entries.push(Entry::Punct(op)); | |
71 | } | |
72 | TokenTree::Literal(l) => { | |
73 | entries.push(Entry::Literal(l)); | |
74 | } | |
75 | TokenTree::Group(g) => { | |
76 | // Record the index of the interesting entry, and store an | |
5099ac24 FG |
77 | // `End(null)` there temporarily. |
78 | groups.push((entries.len(), g)); | |
e74abb32 XL |
79 | entries.push(Entry::End(ptr::null())); |
80 | } | |
81 | } | |
82 | } | |
83 | // Add an `End` entry to the end with a reference to the enclosing token | |
84 | // stream which was passed in. | |
85 | entries.push(Entry::End(up)); | |
86 | ||
87 | // NOTE: This is done to ensure that we don't accidentally modify the | |
88 | // length of the backing buffer. The backing buffer must remain at a | |
89 | // constant address after this point, as we are going to store a raw | |
90 | // pointer into it. | |
923072b8 FG |
91 | let entries = entries.into_boxed_slice(); |
92 | let len = entries.len(); | |
93 | // Convert boxed slice into a pointer to the first element early, to | |
94 | // avoid invalidating pointers into this slice when we move the Box. | |
95 | // See https://github.com/rust-lang/unsafe-code-guidelines/issues/326 | |
96 | let entries = Box::into_raw(entries) as *mut Entry; | |
5099ac24 | 97 | for (idx, group) in groups { |
e74abb32 XL |
98 | // We know that this index refers to one of the temporary |
99 | // `End(null)` entries, and we know that the last entry is | |
100 | // `End(up)`, so the next index is also valid. | |
923072b8 | 101 | let group_up = unsafe { entries.add(idx + 1) }; |
e74abb32 XL |
102 | |
103 | // The end entry stored at the end of this Entry::Group should | |
104 | // point to the Entry which follows the Group in the list. | |
5099ac24 | 105 | let inner = Self::inner_new(group.stream(), group_up); |
923072b8 | 106 | unsafe { *entries.add(idx) = Entry::Group(group, inner) }; |
e74abb32 XL |
107 | } |
108 | ||
923072b8 | 109 | TokenBuffer { ptr: entries, len } |
e74abb32 XL |
110 | } |
111 | ||
112 | /// Creates a `TokenBuffer` containing all the tokens from the input | |
5099ac24 | 113 | /// `proc_macro::TokenStream`. |
e74abb32 | 114 | /// |
f035d41b | 115 | /// *This method is available only if Syn is built with both the `"parsing"` and |
e74abb32 XL |
116 | /// `"proc-macro"` features.* |
117 | #[cfg(all( | |
118 | not(all(target_arch = "wasm32", any(target_os = "unknown", target_os = "wasi"))), | |
119 | feature = "proc-macro" | |
120 | ))] | |
5099ac24 | 121 | pub fn new(stream: pm::TokenStream) -> Self { |
e74abb32 XL |
122 | Self::new2(stream.into()) |
123 | } | |
124 | ||
125 | /// Creates a `TokenBuffer` containing all the tokens from the input | |
5099ac24 FG |
126 | /// `proc_macro2::TokenStream`. |
127 | pub fn new2(stream: TokenStream) -> Self { | |
e74abb32 XL |
128 | Self::inner_new(stream, ptr::null()) |
129 | } | |
130 | ||
131 | /// Creates a cursor referencing the first token in the buffer and able to | |
132 | /// traverse until the end of the buffer. | |
133 | pub fn begin(&self) -> Cursor { | |
5099ac24 | 134 | unsafe { Cursor::create(self.ptr, self.ptr.add(self.len - 1)) } |
e74abb32 XL |
135 | } |
136 | } | |
137 | ||
138 | /// A cheaply copyable cursor into a `TokenBuffer`. | |
139 | /// | |
140 | /// This cursor holds a shared reference into the immutable data which is used | |
141 | /// internally to represent a `TokenStream`, and can be efficiently manipulated | |
142 | /// and copied around. | |
143 | /// | |
144 | /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer` | |
145 | /// object and get a cursor to its first token with `begin()`. | |
146 | /// | |
147 | /// Two cursors are equal if they have the same location in the same input | |
148 | /// stream, and have the same scope. | |
149 | /// | |
f035d41b | 150 | /// *This type is available only if Syn is built with the `"parsing"` feature.* |
e74abb32 XL |
151 | pub struct Cursor<'a> { |
152 | // The current entry which the `Cursor` is pointing at. | |
153 | ptr: *const Entry, | |
154 | // This is the only `Entry::End(..)` object which this cursor is allowed to | |
155 | // point at. All other `End` objects are skipped over in `Cursor::create`. | |
156 | scope: *const Entry, | |
157 | // Cursor is covariant in 'a. This field ensures that our pointers are still | |
158 | // valid. | |
159 | marker: PhantomData<&'a Entry>, | |
160 | } | |
161 | ||
162 | impl<'a> Cursor<'a> { | |
163 | /// Creates a cursor referencing a static empty TokenStream. | |
164 | pub fn empty() -> Self { | |
165 | // It's safe in this situation for us to put an `Entry` object in global | |
166 | // storage, despite it not actually being safe to send across threads | |
167 | // (`Ident` is a reference into a thread-local table). This is because | |
168 | // this entry never includes a `Ident` object. | |
169 | // | |
170 | // This wrapper struct allows us to break the rules and put a `Sync` | |
171 | // object in global storage. | |
172 | struct UnsafeSyncEntry(Entry); | |
173 | unsafe impl Sync for UnsafeSyncEntry {} | |
174 | static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0 as *const Entry)); | |
175 | ||
176 | Cursor { | |
177 | ptr: &EMPTY_ENTRY.0, | |
178 | scope: &EMPTY_ENTRY.0, | |
179 | marker: PhantomData, | |
180 | } | |
181 | } | |
182 | ||
183 | /// This create method intelligently exits non-explicitly-entered | |
184 | /// `None`-delimited scopes when the cursor reaches the end of them, | |
185 | /// allowing for them to be treated transparently. | |
186 | unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self { | |
187 | // NOTE: If we're looking at a `End(..)`, we want to advance the cursor | |
188 | // past it, unless `ptr == scope`, which means that we're at the edge of | |
189 | // our cursor's scope. We should only have `ptr != scope` at the exit | |
190 | // from None-delimited groups entered with `ignore_none`. | |
191 | while let Entry::End(exit) = *ptr { | |
192 | if ptr == scope { | |
193 | break; | |
194 | } | |
195 | ptr = exit; | |
196 | } | |
197 | ||
198 | Cursor { | |
199 | ptr, | |
200 | scope, | |
201 | marker: PhantomData, | |
202 | } | |
203 | } | |
204 | ||
205 | /// Get the current entry. | |
206 | fn entry(self) -> &'a Entry { | |
207 | unsafe { &*self.ptr } | |
208 | } | |
209 | ||
210 | /// Bump the cursor to point at the next token after the current one. This | |
211 | /// is undefined behavior if the cursor is currently looking at an | |
212 | /// `Entry::End`. | |
213 | unsafe fn bump(self) -> Cursor<'a> { | |
214 | Cursor::create(self.ptr.offset(1), self.scope) | |
215 | } | |
216 | ||
f035d41b XL |
217 | /// While the cursor is looking at a `None`-delimited group, move it to look |
218 | /// at the first token inside instead. If the group is empty, this will move | |
e74abb32 XL |
219 | /// the cursor past the `None`-delimited group. |
220 | /// | |
221 | /// WARNING: This mutates its argument. | |
222 | fn ignore_none(&mut self) { | |
f035d41b | 223 | while let Entry::Group(group, buf) = self.entry() { |
e74abb32 XL |
224 | if group.delimiter() == Delimiter::None { |
225 | // NOTE: We call `Cursor::create` here to make sure that | |
226 | // situations where we should immediately exit the span after | |
227 | // entering it are handled correctly. | |
228 | unsafe { | |
5099ac24 | 229 | *self = Cursor::create(buf.ptr, self.scope); |
e74abb32 | 230 | } |
f035d41b XL |
231 | } else { |
232 | break; | |
e74abb32 XL |
233 | } |
234 | } | |
235 | } | |
236 | ||
237 | /// Checks whether the cursor is currently pointing at the end of its valid | |
238 | /// scope. | |
e74abb32 XL |
239 | pub fn eof(self) -> bool { |
240 | // We're at eof if we're at the end of our scope. | |
241 | self.ptr == self.scope | |
242 | } | |
243 | ||
244 | /// If the cursor is pointing at a `Group` with the given delimiter, returns | |
245 | /// a cursor into that group and one pointing to the next `TokenTree`. | |
246 | pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, Span, Cursor<'a>)> { | |
247 | // If we're not trying to enter a none-delimited group, we want to | |
248 | // ignore them. We have to make sure to _not_ ignore them when we want | |
249 | // to enter them, of course. For obvious reasons. | |
250 | if delim != Delimiter::None { | |
251 | self.ignore_none(); | |
252 | } | |
253 | ||
254 | if let Entry::Group(group, buf) = self.entry() { | |
255 | if group.delimiter() == delim { | |
256 | return Some((buf.begin(), group.span(), unsafe { self.bump() })); | |
257 | } | |
258 | } | |
259 | ||
260 | None | |
261 | } | |
262 | ||
263 | /// If the cursor is pointing at a `Ident`, returns it along with a cursor | |
264 | /// pointing at the next `TokenTree`. | |
265 | pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> { | |
266 | self.ignore_none(); | |
267 | match self.entry() { | |
268 | Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump() })), | |
269 | _ => None, | |
270 | } | |
271 | } | |
272 | ||
5099ac24 | 273 | /// If the cursor is pointing at a `Punct`, returns it along with a cursor |
e74abb32 XL |
274 | /// pointing at the next `TokenTree`. |
275 | pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> { | |
276 | self.ignore_none(); | |
277 | match self.entry() { | |
278 | Entry::Punct(op) if op.as_char() != '\'' => Some((op.clone(), unsafe { self.bump() })), | |
279 | _ => None, | |
280 | } | |
281 | } | |
282 | ||
283 | /// If the cursor is pointing at a `Literal`, return it along with a cursor | |
284 | /// pointing at the next `TokenTree`. | |
285 | pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> { | |
286 | self.ignore_none(); | |
287 | match self.entry() { | |
288 | Entry::Literal(lit) => Some((lit.clone(), unsafe { self.bump() })), | |
289 | _ => None, | |
290 | } | |
291 | } | |
292 | ||
293 | /// If the cursor is pointing at a `Lifetime`, returns it along with a | |
294 | /// cursor pointing at the next `TokenTree`. | |
295 | pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> { | |
296 | self.ignore_none(); | |
297 | match self.entry() { | |
298 | Entry::Punct(op) if op.as_char() == '\'' && op.spacing() == Spacing::Joint => { | |
299 | let next = unsafe { self.bump() }; | |
300 | match next.ident() { | |
301 | Some((ident, rest)) => { | |
302 | let lifetime = Lifetime { | |
303 | apostrophe: op.span(), | |
304 | ident, | |
305 | }; | |
306 | Some((lifetime, rest)) | |
307 | } | |
308 | None => None, | |
309 | } | |
310 | } | |
311 | _ => None, | |
312 | } | |
313 | } | |
314 | ||
315 | /// Copies all remaining tokens visible from this cursor into a | |
316 | /// `TokenStream`. | |
317 | pub fn token_stream(self) -> TokenStream { | |
318 | let mut tts = Vec::new(); | |
319 | let mut cursor = self; | |
320 | while let Some((tt, rest)) = cursor.token_tree() { | |
321 | tts.push(tt); | |
322 | cursor = rest; | |
323 | } | |
324 | tts.into_iter().collect() | |
325 | } | |
326 | ||
327 | /// If the cursor is pointing at a `TokenTree`, returns it along with a | |
328 | /// cursor pointing at the next `TokenTree`. | |
329 | /// | |
330 | /// Returns `None` if the cursor has reached the end of its stream. | |
331 | /// | |
332 | /// This method does not treat `None`-delimited groups as transparent, and | |
333 | /// will return a `Group(None, ..)` if the cursor is looking at one. | |
334 | pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> { | |
335 | let tree = match self.entry() { | |
336 | Entry::Group(group, _) => group.clone().into(), | |
337 | Entry::Literal(lit) => lit.clone().into(), | |
338 | Entry::Ident(ident) => ident.clone().into(), | |
339 | Entry::Punct(op) => op.clone().into(), | |
5099ac24 | 340 | Entry::End(..) => return None, |
e74abb32 XL |
341 | }; |
342 | ||
343 | Some((tree, unsafe { self.bump() })) | |
344 | } | |
345 | ||
346 | /// Returns the `Span` of the current token, or `Span::call_site()` if this | |
347 | /// cursor points to eof. | |
348 | pub fn span(self) -> Span { | |
349 | match self.entry() { | |
350 | Entry::Group(group, _) => group.span(), | |
351 | Entry::Literal(l) => l.span(), | |
352 | Entry::Ident(t) => t.span(), | |
353 | Entry::Punct(o) => o.span(), | |
354 | Entry::End(..) => Span::call_site(), | |
355 | } | |
356 | } | |
f035d41b XL |
357 | |
358 | /// Skip over the next token without cloning it. Returns `None` if this | |
359 | /// cursor points to eof. | |
360 | /// | |
361 | /// This method treats `'lifetimes` as a single token. | |
362 | pub(crate) fn skip(self) -> Option<Cursor<'a>> { | |
363 | match self.entry() { | |
364 | Entry::End(..) => None, | |
365 | ||
366 | // Treat lifetimes as a single tt for the purposes of 'skip'. | |
367 | Entry::Punct(op) if op.as_char() == '\'' && op.spacing() == Spacing::Joint => { | |
368 | let next = unsafe { self.bump() }; | |
369 | match next.entry() { | |
370 | Entry::Ident(_) => Some(unsafe { next.bump() }), | |
371 | _ => Some(next), | |
372 | } | |
373 | } | |
374 | _ => Some(unsafe { self.bump() }), | |
375 | } | |
376 | } | |
e74abb32 XL |
377 | } |
378 | ||
1b1a35ee XL |
379 | impl<'a> Copy for Cursor<'a> {} |
380 | ||
381 | impl<'a> Clone for Cursor<'a> { | |
382 | fn clone(&self) -> Self { | |
383 | *self | |
384 | } | |
385 | } | |
386 | ||
387 | impl<'a> Eq for Cursor<'a> {} | |
388 | ||
389 | impl<'a> PartialEq for Cursor<'a> { | |
390 | fn eq(&self, other: &Self) -> bool { | |
391 | let Cursor { ptr, scope, marker } = self; | |
392 | let _ = marker; | |
393 | *ptr == other.ptr && *scope == other.scope | |
394 | } | |
395 | } | |
396 | ||
e74abb32 XL |
397 | pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool { |
398 | a.scope == b.scope | |
399 | } | |
400 | ||
401 | pub(crate) fn open_span_of_group(cursor: Cursor) -> Span { | |
402 | match cursor.entry() { | |
403 | Entry::Group(group, _) => group.span_open(), | |
404 | _ => cursor.span(), | |
405 | } | |
406 | } | |
407 | ||
408 | pub(crate) fn close_span_of_group(cursor: Cursor) -> Span { | |
409 | match cursor.entry() { | |
410 | Entry::Group(group, _) => group.span_close(), | |
411 | _ => cursor.span(), | |
412 | } | |
413 | } |