1 use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken}
;
2 use rustc_ast
::token
::{self, Delimiter, Token, TokenKind}
;
3 use rustc_ast
::tokenstream
::{AttrAnnotatedTokenStream, AttributesData, CreateTokenStream}
;
4 use rustc_ast
::tokenstream
::{AttrAnnotatedTokenTree, DelimSpan, LazyTokenStream, Spacing}
;
5 use rustc_ast
::{self as ast}
;
6 use rustc_ast
::{AttrVec, Attribute, HasAttrs, HasTokens}
;
7 use rustc_errors
::PResult
;
8 use rustc_span
::{sym, Span}
;
10 use std
::convert
::TryInto
;
13 /// A wrapper type to ensure that the parser handles outer attributes correctly.
14 /// When we parse outer attributes, we need to ensure that we capture tokens
15 /// for the attribute target. This allows us to perform cfg-expansion on
16 /// a token stream before we invoke a derive proc-macro.
18 /// This wrapper prevents direct access to the underlying `Vec<ast::Attribute>`.
19 /// Parsing code can only get access to the underlying attributes
20 /// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
21 /// This makes it difficult to accidentally construct an AST node
22 /// (which stores a `Vec<ast::Attribute>`) without first collecting tokens.
24 /// This struct has its own module, to ensure that the parser code
25 /// cannot directly access the `attrs` field
26 #[derive(Debug, Clone)]
27 pub struct AttrWrapper
{
29 // The start of the outer attributes in the token cursor.
30 // This allows us to create a `ReplaceRange` for the entire attribute
31 // target, including outer attributes.
35 // This struct is passed around very frequently,
36 // so make sure it doesn't accidentally get larger
37 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
38 rustc_data_structures
::static_assert_size
!(AttrWrapper
, 16);
41 pub(super) fn new(attrs
: AttrVec
, start_pos
: usize) -> AttrWrapper
{
42 AttrWrapper { attrs, start_pos }
44 pub fn empty() -> AttrWrapper
{
45 AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
47 // FIXME: Delay span bug here?
48 pub(crate) fn take_for_recovery(self) -> AttrVec
{
52 // FIXME: require passing an NT to prevent misuse of this method
53 pub(crate) fn prepend_to_nt_inner(self, attrs
: &mut Vec
<Attribute
>) {
54 let mut self_attrs
: Vec
<_
> = self.attrs
.into();
55 std
::mem
::swap(attrs
, &mut self_attrs
);
56 attrs
.extend(self_attrs
);
59 pub fn is_empty(&self) -> bool
{
63 pub fn maybe_needs_tokens(&self) -> bool
{
64 crate::parser
::attr
::maybe_needs_tokens(&self.attrs
)
68 /// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute
69 fn has_cfg_or_cfg_attr(attrs
: &[Attribute
]) -> bool
{
70 // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports.
71 // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that
72 // we don't need to do any eager expansion.
73 attrs
.iter().any(|attr
| {
74 attr
.ident().map_or(false, |ident
| ident
.name
== sym
::cfg
|| ident
.name
== sym
::cfg_attr
)
78 // Produces a `TokenStream` on-demand. Using `cursor_snapshot`
79 // and `num_calls`, we can reconstruct the `TokenStream` seen
80 // by the callback. This allows us to avoid producing a `TokenStream`
81 // if it is never needed - for example, a captured `macro_rules!`
82 // argument that is never passed to a proc macro.
83 // In practice token stream creation happens rarely compared to
84 // calls to `collect_tokens` (see some statistics in #78736),
85 // so we are doing as little up-front work as possible.
87 // This also makes `Parser` very cheap to clone, since
88 // there is no intermediate collection buffer to clone.
90 struct LazyTokenStreamImpl
{
91 start_token
: (Token
, Spacing
),
92 cursor_snapshot
: TokenCursor
,
94 break_last_token
: bool
,
95 replace_ranges
: Box
<[ReplaceRange
]>,
98 #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
99 rustc_data_structures
::static_assert_size
!(LazyTokenStreamImpl
, 144);
101 impl CreateTokenStream
for LazyTokenStreamImpl
{
102 fn create_token_stream(&self) -> AttrAnnotatedTokenStream
{
103 // The token produced by the final call to `{,inlined_}next` was not
104 // actually consumed by the callback. The combination of chaining the
105 // initial token and using `take` produces the desired result - we
106 // produce an empty `TokenStream` if no calls were made, and omit the
107 // final token otherwise.
108 let mut cursor_snapshot
= self.cursor_snapshot
.clone();
110 std
::iter
::once((FlatToken
::Token(self.start_token
.0.clone()), self.start_token
.1))
111 .chain((0..self.num_calls
).map(|_
| {
112 let token
= cursor_snapshot
.next(cursor_snapshot
.desugar_doc_comments
);
113 (FlatToken
::Token(token
.0), token
.1)
115 .take(self.num_calls
);
117 if !self.replace_ranges
.is_empty() {
118 let mut tokens
: Vec
<_
> = tokens
.collect();
119 let mut replace_ranges
= self.replace_ranges
.clone();
120 replace_ranges
.sort_by_key(|(range
, _
)| range
.start
);
122 #[cfg(debug_assertions)]
124 for [(range
, tokens
), (next_range
, next_tokens
)] in replace_ranges
.array_windows() {
126 range
.end
<= next_range
.start
|| range
.end
>= next_range
.end
,
127 "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
136 // Process the replace ranges, starting from the highest start
137 // position and working our way back. If have tokens like:
139 // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
141 // Then we will generate replace ranges for both
142 // the `#[cfg(FALSE)] field: bool` and the entire
143 // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
145 // By starting processing from the replace range with the greatest
146 // start position, we ensure that any replace range which encloses
147 // another replace range will capture the *replaced* tokens for the inner
148 // range, not the original tokens.
149 for (range
, new_tokens
) in replace_ranges
.iter().rev() {
150 assert
!(!range
.is_empty(), "Cannot replace an empty range: {:?}", range
);
151 // Replace ranges are only allowed to decrease the number of tokens.
153 range
.len() >= new_tokens
.len(),
154 "Range {:?} has greater len than {:?}",
159 // Replace any removed tokens with `FlatToken::Empty`.
160 // This keeps the total length of `tokens` constant throughout the
161 // replacement process, allowing us to use all of the `ReplaceRanges` entries
162 // without adjusting indices.
163 let filler
= std
::iter
::repeat((FlatToken
::Empty
, Spacing
::Alone
))
164 .take(range
.len() - new_tokens
.len());
167 (range
.start
as usize)..(range
.end
as usize),
168 new_tokens
.clone().into_iter().chain(filler
),
171 make_token_stream(tokens
.into_iter(), self.break_last_token
)
173 make_token_stream(tokens
, self.break_last_token
)
178 impl<'a
> Parser
<'a
> {
179 /// Records all tokens consumed by the provided callback,
180 /// including the current token. These tokens are collected
181 /// into a `LazyTokenStream`, and returned along with the result
184 /// Note: If your callback consumes an opening delimiter
185 /// (including the case where you call `collect_tokens`
186 /// when the current token is an opening delimiter),
187 /// you must also consume the corresponding closing delimiter.
189 /// That is, you can consume
190 /// `something ([{ }])` or `([{}])`, but not `([{}]`
192 /// This restriction shouldn't be an issue in practice,
193 /// since this function is used to record the tokens for
194 /// a parsed AST item, which always has matching delimiters.
195 pub fn collect_tokens_trailing_token
<R
: HasAttrs
+ HasTokens
>(
198 force_collect
: ForceCollect
,
199 f
: impl FnOnce(&mut Self, Vec
<ast
::Attribute
>) -> PResult
<'a
, (R
, TrailingToken
)>,
200 ) -> PResult
<'a
, R
> {
201 // We only bail out when nothing could possibly observe the collected tokens:
202 // 1. We cannot be force collecting tokens (since force-collecting requires tokens
204 if matches
!(force_collect
, ForceCollect
::No
)
205 // None of our outer attributes can require tokens (e.g. a proc-macro)
206 && !attrs
.maybe_needs_tokens()
207 // If our target supports custom inner attributes, then we cannot bail
208 // out early, since we may need to capture tokens for a custom inner attribute
210 && !R
::SUPPORTS_CUSTOM_INNER_ATTRS
211 // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]`
212 // or `#[cfg_attr]` attributes.
215 return Ok(f(self, attrs
.attrs
.into())?
.0);
218 let start_token
= (self.token
.clone(), self.token_spacing
);
219 let cursor_snapshot
= self.token_cursor
.clone();
221 let has_outer_attrs
= !attrs
.attrs
.is_empty();
222 let prev_capturing
= std
::mem
::replace(&mut self.capture_state
.capturing
, Capturing
::Yes
);
223 let replace_ranges_start
= self.capture_state
.replace_ranges
.len();
225 let ret
= f(self, attrs
.attrs
.into());
227 self.capture_state
.capturing
= prev_capturing
;
229 let (mut ret
, trailing
) = ret?
;
231 // When we're not in `capture-cfg` mode, then bail out early if:
232 // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
233 // so there's nothing for us to do.
234 // 2. Our target already has tokens set (e.g. we've parsed something
235 // like `#[my_attr] $item`. The actual parsing code takes care of prepending
236 // any attributes to the nonterminal, so we don't need to modify the
237 // already captured tokens.
238 // Note that this check is independent of `force_collect`- if we already
239 // have tokens, or can't even store them, then there's never a need to
240 // force collection of new tokens.
241 if !self.capture_cfg
&& matches
!(ret
.tokens_mut(), None
| Some(Some(_
))) {
245 // This is very similar to the bail out check at the start of this function.
246 // Now that we've parsed an AST node, we have more information available.
247 if matches
!(force_collect
, ForceCollect
::No
)
248 // We now have inner attributes available, so this check is more precise
249 // than `attrs.maybe_needs_tokens()` at the start of the function.
250 // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS`
251 && !crate::parser
::attr
::maybe_needs_tokens(ret
.attrs())
252 // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`.
253 // This ensures that we consider inner attributes (e.g. `#![cfg]`),
254 // which require us to have tokens available
255 // We also call `has_cfg_or_cfg_attr` at the beginning of this function,
256 // but we only bail out if there's no possibility of inner attributes
257 // (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
258 // We only capture about `#[cfg]` or `#[cfg_attr]` in `capture_cfg`
259 // mode - during normal parsing, we don't need any special capturing
260 // for those attributes, since they're builtin.
261 && !(self.capture_cfg
&& has_cfg_or_cfg_attr(ret
.attrs()))
266 let mut inner_attr_replace_ranges
= Vec
::new();
267 // Take the captured ranges for any inner attributes that we parsed.
268 for inner_attr
in ret
.attrs().iter().filter(|a
| a
.style
== ast
::AttrStyle
::Inner
) {
269 if let Some(attr_range
) = self.capture_state
.inner_attr_ranges
.remove(&inner_attr
.id
) {
270 inner_attr_replace_ranges
.push(attr_range
);
274 .delay_span_bug(inner_attr
.span
, "Missing token range for attribute");
278 let replace_ranges_end
= self.capture_state
.replace_ranges
.len();
280 let cursor_snapshot_next_calls
= cursor_snapshot
.num_next_calls
;
281 let mut end_pos
= self.token_cursor
.num_next_calls
;
283 // Capture a trailing token if requested by the callback 'f'
285 TrailingToken
::None
=> {}
286 TrailingToken
::Semi
=> {
287 assert_eq
!(self.token
.kind
, token
::Semi
);
290 TrailingToken
::MaybeComma
=> {
291 if self.token
.kind
== token
::Comma
{
297 // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
298 // then extend the range of captured tokens to include it, since the parser
299 // was not actually bumped past it. When the `LazyTokenStream` gets converted
300 // into an `AttrAnnotatedTokenStream`, we will create the proper token.
301 if self.token_cursor
.break_last_token
{
305 "Cannot set `break_last_token` and have trailing token"
310 let num_calls
= end_pos
- cursor_snapshot_next_calls
;
312 // If we have no attributes, then we will never need to
313 // use any replace ranges.
314 let replace_ranges
: Box
<[ReplaceRange
]> = if ret
.attrs().is_empty() && !self.capture_cfg
{
317 // Grab any replace ranges that occur *inside* the current AST node.
318 // We will perform the actual replacement when we convert the `LazyTokenStream`
319 // to an `AttrAnnotatedTokenStream`
320 let start_calls
: u32 = cursor_snapshot_next_calls
.try_into().unwrap();
321 self.capture_state
.replace_ranges
[replace_ranges_start
..replace_ranges_end
]
324 .chain(inner_attr_replace_ranges
.clone().into_iter())
325 .map(|(range
, tokens
)| {
326 ((range
.start
- start_calls
)..(range
.end
- start_calls
), tokens
)
331 let tokens
= LazyTokenStream
::new(LazyTokenStreamImpl
{
335 break_last_token
: self.token_cursor
.break_last_token
,
339 // If we support tokens at all
340 if let Some(target_tokens
) = ret
.tokens_mut() {
341 if target_tokens
.is_none() {
342 // Store se our newly captured tokens into the AST node
343 *target_tokens
= Some(tokens
.clone());
347 let final_attrs
= ret
.attrs();
349 // If `capture_cfg` is set and we're inside a recursive call to
350 // `collect_tokens_trailing_token`, then we need to register a replace range
351 // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
352 // on the captured token stream.
354 && matches
!(self.capture_state
.capturing
, Capturing
::Yes
)
355 && has_cfg_or_cfg_attr(&final_attrs
)
357 let attr_data
= AttributesData { attrs: final_attrs.to_vec().into(), tokens }
;
359 // Replace the entire AST node that we just parsed, including attributes,
360 // with a `FlatToken::AttrTarget`. If this AST node is inside an item
361 // that has `#[derive]`, then this will allow us to cfg-expand this
364 if has_outer_attrs { attrs.start_pos }
else { cursor_snapshot_next_calls }
;
365 let new_tokens
= vec
![(FlatToken
::AttrTarget(attr_data
), Spacing
::Alone
)];
368 !self.token_cursor
.break_last_token
,
369 "Should not have unglued last token with cfg attr"
371 let range
: Range
<u32> = (start_pos
.try_into().unwrap())..(end_pos
.try_into().unwrap());
372 self.capture_state
.replace_ranges
.push((range
, new_tokens
));
373 self.capture_state
.replace_ranges
.extend(inner_attr_replace_ranges
);
376 // Only clear our `replace_ranges` when we're finished capturing entirely.
377 if matches
!(self.capture_state
.capturing
, Capturing
::No
) {
378 self.capture_state
.replace_ranges
.clear();
379 // We don't clear `inner_attr_ranges`, as doing so repeatedly
380 // had a measurable performance impact. Most inner attributes that
381 // we insert will get removed - when we drop the parser, we'll free
382 // up the memory used by any attributes that we didn't remove from the map.
388 /// Converts a flattened iterator of tokens (including open and close delimiter tokens)
389 /// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
390 /// of open and close delims.
391 fn make_token_stream(
392 mut iter
: impl Iterator
<Item
= (FlatToken
, Spacing
)>,
393 break_last_token
: bool
,
394 ) -> AttrAnnotatedTokenStream
{
397 // This is `None` for the first frame, `Some` for all others.
398 open_delim_sp
: Option
<(Delimiter
, Span
)>,
399 inner
: Vec
<(AttrAnnotatedTokenTree
, Spacing
)>,
401 let mut stack
= vec
![FrameData { open_delim_sp: None, inner: vec![] }
];
402 let mut token_and_spacing
= iter
.next();
403 while let Some((token
, spacing
)) = token_and_spacing
{
405 FlatToken
::Token(Token { kind: TokenKind::OpenDelim(delim), span }
) => {
406 stack
.push(FrameData { open_delim_sp: Some((delim, span)), inner: vec![] }
);
408 FlatToken
::Token(Token { kind: TokenKind::CloseDelim(delim), span }
) => {
409 let frame_data
= stack
411 .unwrap_or_else(|| panic
!("Token stack was empty for token: {:?}", token
));
413 let (open_delim
, open_sp
) = frame_data
.open_delim_sp
.unwrap();
416 "Mismatched open/close delims: open={:?} close={:?}",
419 let dspan
= DelimSpan
::from_pair(open_sp
, span
);
420 let stream
= AttrAnnotatedTokenStream
::new(frame_data
.inner
);
421 let delimited
= AttrAnnotatedTokenTree
::Delimited(dspan
, delim
, stream
);
425 panic
!("Bottom token frame is missing for token: {:?}", token
)
428 .push((delimited
, Spacing
::Alone
));
430 FlatToken
::Token(token
) => stack
432 .expect("Bottom token frame is missing!")
434 .push((AttrAnnotatedTokenTree
::Token(token
), spacing
)),
435 FlatToken
::AttrTarget(data
) => stack
437 .expect("Bottom token frame is missing!")
439 .push((AttrAnnotatedTokenTree
::Attributes(data
), spacing
)),
440 FlatToken
::Empty
=> {}
442 token_and_spacing
= iter
.next();
444 let mut final_buf
= stack
.pop().expect("Missing final buf!");
445 if break_last_token
{
446 let (last_token
, spacing
) = final_buf
.inner
.pop().unwrap();
447 if let AttrAnnotatedTokenTree
::Token(last_token
) = last_token
{
448 let unglued_first
= last_token
.kind
.break_two_token_op().unwrap().0;
450 // An 'unglued' token is always two ASCII characters
451 let mut first_span
= last_token
.span
.shrink_to_lo();
452 first_span
= first_span
.with_hi(first_span
.lo() + rustc_span
::BytePos(1));
454 final_buf
.inner
.push((
455 AttrAnnotatedTokenTree
::Token(Token
::new(unglued_first
, first_span
)),
459 panic
!("Unexpected last token {:?}", last_token
)
462 assert
!(stack
.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf
, stack
);
463 AttrAnnotatedTokenStream
::new(final_buf
.inner
)