]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_parse/src/parser/attr_wrapper.rs
New upstream version 1.68.2+dfsg1
[rustc.git] / compiler / rustc_parse / src / parser / attr_wrapper.rs
CommitLineData
cdc7bbd5 1use super::{Capturing, FlatToken, ForceCollect, Parser, ReplaceRange, TokenCursor, TrailingToken};
04454e1e 2use rustc_ast::token::{self, Delimiter, Token, TokenKind};
f2b60f7d
FG
3use rustc_ast::tokenstream::{AttrTokenStream, AttributesData, ToAttrTokenStream};
4use rustc_ast::tokenstream::{AttrTokenTree, DelimSpan, LazyAttrTokenStream, Spacing};
6a06907d 5use rustc_ast::{self as ast};
04454e1e 6use rustc_ast::{AttrVec, Attribute, HasAttrs, HasTokens};
6a06907d 7use rustc_errors::PResult;
487cf647
FG
8use rustc_session::parse::ParseSess;
9use rustc_span::{sym, Span, DUMMY_SP};
cdc7bbd5 10
cdc7bbd5 11use std::ops::Range;
6a06907d
XL
12
13/// A wrapper type to ensure that the parser handles outer attributes correctly.
14/// When we parse outer attributes, we need to ensure that we capture tokens
15/// for the attribute target. This allows us to perform cfg-expansion on
16/// a token stream before we invoke a derive proc-macro.
17///
f2b60f7d 18/// This wrapper prevents direct access to the underlying `ast::AttrVec>`.
6a06907d
XL
19/// Parsing code can only get access to the underlying attributes
20/// by passing an `AttrWrapper` to `collect_tokens_trailing_tokens`.
21/// This makes it difficult to accidentally construct an AST node
f2b60f7d 22/// (which stores an `ast::AttrVec`) without first collecting tokens.
6a06907d
XL
23///
24/// This struct has its own module, to ensure that the parser code
25/// cannot directly access the `attrs` field
26#[derive(Debug, Clone)]
27pub struct AttrWrapper {
cdc7bbd5
XL
28 attrs: AttrVec,
29 // The start of the outer attributes in the token cursor.
30 // This allows us to create a `ReplaceRange` for the entire attribute
31 // target, including outer attributes.
32 start_pos: usize,
6a06907d
XL
33}
34
35impl AttrWrapper {
cdc7bbd5
XL
36 pub(super) fn new(attrs: AttrVec, start_pos: usize) -> AttrWrapper {
37 AttrWrapper { attrs, start_pos }
6a06907d 38 }
cdc7bbd5
XL
39 pub fn empty() -> AttrWrapper {
40 AttrWrapper { attrs: AttrVec::new(), start_pos: usize::MAX }
6a06907d 41 }
487cf647
FG
42
43 pub(crate) fn take_for_recovery(self, sess: &ParseSess) -> AttrVec {
44 sess.span_diagnostic.delay_span_bug(
45 self.attrs.get(0).map(|attr| attr.span).unwrap_or(DUMMY_SP),
46 "AttrVec is taken for recovery but no error is produced",
47 );
48
6a06907d
XL
49 self.attrs
50 }
cdc7bbd5 51
487cf647 52 /// Prepend `self.attrs` to `attrs`.
cdc7bbd5 53 // FIXME: require passing an NT to prevent misuse of this method
f2b60f7d
FG
54 pub(crate) fn prepend_to_nt_inner(self, attrs: &mut AttrVec) {
55 let mut self_attrs = self.attrs;
cdc7bbd5
XL
56 std::mem::swap(attrs, &mut self_attrs);
57 attrs.extend(self_attrs);
58 }
59
6a06907d
XL
60 pub fn is_empty(&self) -> bool {
61 self.attrs.is_empty()
62 }
cdc7bbd5
XL
63
64 pub fn maybe_needs_tokens(&self) -> bool {
65 crate::parser::attr::maybe_needs_tokens(&self.attrs)
66 }
67}
68
69/// Returns `true` if `attrs` contains a `cfg` or `cfg_attr` attribute
70fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
71 // NOTE: Builtin attributes like `cfg` and `cfg_attr` cannot be renamed via imports.
72 // Therefore, the absence of a literal `cfg` or `cfg_attr` guarantees that
73 // we don't need to do any eager expansion.
74 attrs.iter().any(|attr| {
75 attr.ident().map_or(false, |ident| ident.name == sym::cfg || ident.name == sym::cfg_attr)
76 })
77}
78
79// Produces a `TokenStream` on-demand. Using `cursor_snapshot`
80// and `num_calls`, we can reconstruct the `TokenStream` seen
81// by the callback. This allows us to avoid producing a `TokenStream`
82// if it is never needed - for example, a captured `macro_rules!`
83// argument that is never passed to a proc macro.
84// In practice token stream creation happens rarely compared to
85// calls to `collect_tokens` (see some statistics in #78736),
86// so we are doing as little up-front work as possible.
87//
88// This also makes `Parser` very cheap to clone, since
89// there is no intermediate collection buffer to clone.
90#[derive(Clone)]
f2b60f7d 91struct LazyAttrTokenStreamImpl {
cdc7bbd5
XL
92 start_token: (Token, Spacing),
93 cursor_snapshot: TokenCursor,
94 num_calls: usize,
95 break_last_token: bool,
96 replace_ranges: Box<[ReplaceRange]>,
97}
98
f2b60f7d
FG
99impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
100 fn to_attr_token_stream(&self) -> AttrTokenStream {
04454e1e
FG
101 // The token produced by the final call to `{,inlined_}next` was not
102 // actually consumed by the callback. The combination of chaining the
103 // initial token and using `take` produces the desired result - we
104 // produce an empty `TokenStream` if no calls were made, and omit the
105 // final token otherwise.
cdc7bbd5
XL
106 let mut cursor_snapshot = self.cursor_snapshot.clone();
107 let tokens =
108 std::iter::once((FlatToken::Token(self.start_token.0.clone()), self.start_token.1))
109 .chain((0..self.num_calls).map(|_| {
04454e1e 110 let token = cursor_snapshot.next(cursor_snapshot.desugar_doc_comments);
cdc7bbd5
XL
111 (FlatToken::Token(token.0), token.1)
112 }))
113 .take(self.num_calls);
114
115 if !self.replace_ranges.is_empty() {
116 let mut tokens: Vec<_> = tokens.collect();
f2b60f7d 117 let mut replace_ranges = self.replace_ranges.to_vec();
cdc7bbd5
XL
118 replace_ranges.sort_by_key(|(range, _)| range.start);
119
120 #[cfg(debug_assertions)]
121 {
122 for [(range, tokens), (next_range, next_tokens)] in replace_ranges.array_windows() {
123 assert!(
124 range.end <= next_range.start || range.end >= next_range.end,
125 "Replace ranges should either be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
126 range,
127 tokens,
128 next_range,
129 next_tokens,
130 );
131 }
132 }
133
134 // Process the replace ranges, starting from the highest start
135 // position and working our way back. If have tokens like:
136 //
137 // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
138 //
139 // Then we will generate replace ranges for both
140 // the `#[cfg(FALSE)] field: bool` and the entire
141 // `#[cfg(FALSE)]` struct Foo { #[cfg(FALSE)] field: bool }`
142 //
143 // By starting processing from the replace range with the greatest
144 // start position, we ensure that any replace range which encloses
145 // another replace range will capture the *replaced* tokens for the inner
146 // range, not the original tokens.
f2b60f7d 147 for (range, new_tokens) in replace_ranges.into_iter().rev() {
cdc7bbd5
XL
148 assert!(!range.is_empty(), "Cannot replace an empty range: {:?}", range);
149 // Replace ranges are only allowed to decrease the number of tokens.
150 assert!(
151 range.len() >= new_tokens.len(),
152 "Range {:?} has greater len than {:?}",
153 range,
154 new_tokens
155 );
156
157 // Replace any removed tokens with `FlatToken::Empty`.
158 // This keeps the total length of `tokens` constant throughout the
159 // replacement process, allowing us to use all of the `ReplaceRanges` entries
160 // without adjusting indices.
161 let filler = std::iter::repeat((FlatToken::Empty, Spacing::Alone))
162 .take(range.len() - new_tokens.len());
163
164 tokens.splice(
165 (range.start as usize)..(range.end as usize),
f2b60f7d 166 new_tokens.into_iter().chain(filler),
cdc7bbd5
XL
167 );
168 }
169 make_token_stream(tokens.into_iter(), self.break_last_token)
170 } else {
171 make_token_stream(tokens, self.break_last_token)
172 }
173 }
6a06907d
XL
174}
175
176impl<'a> Parser<'a> {
177 /// Records all tokens consumed by the provided callback,
178 /// including the current token. These tokens are collected
f2b60f7d 179 /// into a `LazyAttrTokenStream`, and returned along with the result
6a06907d
XL
180 /// of the callback.
181 ///
182 /// Note: If your callback consumes an opening delimiter
183 /// (including the case where you call `collect_tokens`
5e7ed085 184 /// when the current token is an opening delimiter),
6a06907d
XL
185 /// you must also consume the corresponding closing delimiter.
186 ///
187 /// That is, you can consume
188 /// `something ([{ }])` or `([{}])`, but not `([{}]`
189 ///
190 /// This restriction shouldn't be an issue in practice,
191 /// since this function is used to record the tokens for
192 /// a parsed AST item, which always has matching delimiters.
04454e1e 193 pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
6a06907d
XL
194 &mut self,
195 attrs: AttrWrapper,
196 force_collect: ForceCollect,
f2b60f7d 197 f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, TrailingToken)>,
6a06907d 198 ) -> PResult<'a, R> {
cdc7bbd5
XL
199 // We only bail out when nothing could possibly observe the collected tokens:
200 // 1. We cannot be force collecting tokens (since force-collecting requires tokens
201 // by definition
202 if matches!(force_collect, ForceCollect::No)
203 // None of our outer attributes can require tokens (e.g. a proc-macro)
204 && !attrs.maybe_needs_tokens()
205 // If our target supports custom inner attributes, then we cannot bail
206 // out early, since we may need to capture tokens for a custom inner attribute
207 // invocation.
208 && !R::SUPPORTS_CUSTOM_INNER_ATTRS
209 // Never bail out early in `capture_cfg` mode, since there might be `#[cfg]`
210 // or `#[cfg_attr]` attributes.
211 && !self.capture_cfg
212 {
f2b60f7d 213 return Ok(f(self, attrs.attrs)?.0);
6a06907d 214 }
cdc7bbd5 215
6a06907d
XL
216 let start_token = (self.token.clone(), self.token_spacing);
217 let cursor_snapshot = self.token_cursor.clone();
218
cdc7bbd5
XL
219 let has_outer_attrs = !attrs.attrs.is_empty();
220 let prev_capturing = std::mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
221 let replace_ranges_start = self.capture_state.replace_ranges.len();
222
f2b60f7d 223 let ret = f(self, attrs.attrs);
cdc7bbd5
XL
224
225 self.capture_state.capturing = prev_capturing;
226
227 let (mut ret, trailing) = ret?;
6a06907d 228
cdc7bbd5
XL
229 // When we're not in `capture-cfg` mode, then bail out early if:
230 // 1. Our target doesn't support tokens at all (e.g we're parsing an `NtIdent`)
231 // so there's nothing for us to do.
232 // 2. Our target already has tokens set (e.g. we've parsed something
233 // like `#[my_attr] $item`. The actual parsing code takes care of prepending
234 // any attributes to the nonterminal, so we don't need to modify the
235 // already captured tokens.
236 // Note that this check is independent of `force_collect`- if we already
237 // have tokens, or can't even store them, then there's never a need to
238 // force collection of new tokens.
239 if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
240 return Ok(ret);
6a06907d 241 }
cdc7bbd5
XL
242
243 // This is very similar to the bail out check at the start of this function.
244 // Now that we've parsed an AST node, we have more information available.
245 if matches!(force_collect, ForceCollect::No)
246 // We now have inner attributes available, so this check is more precise
247 // than `attrs.maybe_needs_tokens()` at the start of the function.
248 // As a result, we don't need to check `R::SUPPORTS_CUSTOM_INNER_ATTRS`
249 && !crate::parser::attr::maybe_needs_tokens(ret.attrs())
250 // Subtle: We call `has_cfg_or_cfg_attr` with the attrs from `ret`.
251 // This ensures that we consider inner attributes (e.g. `#![cfg]`),
252 // which require us to have tokens available
253 // We also call `has_cfg_or_cfg_attr` at the beginning of this function,
254 // but we only bail out if there's no possibility of inner attributes
255 // (!R::SUPPORTS_CUSTOM_INNER_ATTRS)
5e7ed085 256 // We only capture about `#[cfg]` or `#[cfg_attr]` in `capture_cfg`
cdc7bbd5
XL
257 // mode - during normal parsing, we don't need any special capturing
258 // for those attributes, since they're builtin.
259 && !(self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()))
260 {
261 return Ok(ret);
262 }
263
264 let mut inner_attr_replace_ranges = Vec::new();
265 // Take the captured ranges for any inner attributes that we parsed.
266 for inner_attr in ret.attrs().iter().filter(|a| a.style == ast::AttrStyle::Inner) {
267 if let Some(attr_range) = self.capture_state.inner_attr_ranges.remove(&inner_attr.id) {
268 inner_attr_replace_ranges.push(attr_range);
269 } else {
270 self.sess
271 .span_diagnostic
272 .delay_span_bug(inner_attr.span, "Missing token range for attribute");
6a06907d
XL
273 }
274 }
275
cdc7bbd5
XL
276 let replace_ranges_end = self.capture_state.replace_ranges.len();
277
278 let cursor_snapshot_next_calls = cursor_snapshot.num_next_calls;
279 let mut end_pos = self.token_cursor.num_next_calls;
280
2b03887a
FG
281 let mut captured_trailing = false;
282
cdc7bbd5
XL
283 // Capture a trailing token if requested by the callback 'f'
284 match trailing {
6a06907d 285 TrailingToken::None => {}
2b03887a
FG
286 TrailingToken::Gt => {
287 assert_eq!(self.token.kind, token::Gt);
288 }
6a06907d
XL
289 TrailingToken::Semi => {
290 assert_eq!(self.token.kind, token::Semi);
cdc7bbd5 291 end_pos += 1;
2b03887a 292 captured_trailing = true;
6a06907d
XL
293 }
294 TrailingToken::MaybeComma => {
295 if self.token.kind == token::Comma {
cdc7bbd5 296 end_pos += 1;
2b03887a 297 captured_trailing = true;
6a06907d
XL
298 }
299 }
300 }
301
cdc7bbd5
XL
302 // If we 'broke' the last token (e.g. breaking a '>>' token to two '>' tokens),
303 // then extend the range of captured tokens to include it, since the parser
f2b60f7d
FG
304 // was not actually bumped past it. When the `LazyAttrTokenStream` gets converted
305 // into an `AttrTokenStream`, we will create the proper token.
cdc7bbd5 306 if self.token_cursor.break_last_token {
2b03887a 307 assert!(!captured_trailing, "Cannot set break_last_token and have trailing token");
cdc7bbd5
XL
308 end_pos += 1;
309 }
310
311 let num_calls = end_pos - cursor_snapshot_next_calls;
312
313 // If we have no attributes, then we will never need to
314 // use any replace ranges.
315 let replace_ranges: Box<[ReplaceRange]> = if ret.attrs().is_empty() && !self.capture_cfg {
316 Box::new([])
317 } else {
318 // Grab any replace ranges that occur *inside* the current AST node.
f2b60f7d
FG
319 // We will perform the actual replacement when we convert the `LazyAttrTokenStream`
320 // to an `AttrTokenStream`.
cdc7bbd5
XL
321 let start_calls: u32 = cursor_snapshot_next_calls.try_into().unwrap();
322 self.capture_state.replace_ranges[replace_ranges_start..replace_ranges_end]
323 .iter()
324 .cloned()
f2b60f7d 325 .chain(inner_attr_replace_ranges.iter().cloned())
cdc7bbd5
XL
326 .map(|(range, tokens)| {
327 ((range.start - start_calls)..(range.end - start_calls), tokens)
328 })
329 .collect()
330 };
331
f2b60f7d 332 let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
6a06907d
XL
333 start_token,
334 num_calls,
335 cursor_snapshot,
cdc7bbd5
XL
336 break_last_token: self.token_cursor.break_last_token,
337 replace_ranges,
338 });
339
340 // If we support tokens at all
341 if let Some(target_tokens) = ret.tokens_mut() {
17df50a5 342 if target_tokens.is_none() {
cdc7bbd5
XL
343 // Store se our newly captured tokens into the AST node
344 *target_tokens = Some(tokens.clone());
17df50a5 345 }
cdc7bbd5 346 }
6a06907d 347
cdc7bbd5
XL
348 let final_attrs = ret.attrs();
349
350 // If `capture_cfg` is set and we're inside a recursive call to
351 // `collect_tokens_trailing_token`, then we need to register a replace range
352 // if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
353 // on the captured token stream.
354 if self.capture_cfg
355 && matches!(self.capture_state.capturing, Capturing::Yes)
f2b60f7d 356 && has_cfg_or_cfg_attr(final_attrs)
cdc7bbd5 357 {
f2b60f7d 358 let attr_data = AttributesData { attrs: final_attrs.iter().cloned().collect(), tokens };
cdc7bbd5
XL
359
360 // Replace the entire AST node that we just parsed, including attributes,
361 // with a `FlatToken::AttrTarget`. If this AST node is inside an item
362 // that has `#[derive]`, then this will allow us to cfg-expand this
363 // AST node.
364 let start_pos =
365 if has_outer_attrs { attrs.start_pos } else { cursor_snapshot_next_calls };
366 let new_tokens = vec![(FlatToken::AttrTarget(attr_data), Spacing::Alone)];
367
368 assert!(
369 !self.token_cursor.break_last_token,
370 "Should not have unglued last token with cfg attr"
371 );
372 let range: Range<u32> = (start_pos.try_into().unwrap())..(end_pos.try_into().unwrap());
373 self.capture_state.replace_ranges.push((range, new_tokens));
374 self.capture_state.replace_ranges.extend(inner_attr_replace_ranges);
375 }
376
377 // Only clear our `replace_ranges` when we're finished capturing entirely.
378 if matches!(self.capture_state.capturing, Capturing::No) {
379 self.capture_state.replace_ranges.clear();
380 // We don't clear `inner_attr_ranges`, as doing so repeatedly
5e7ed085 381 // had a measurable performance impact. Most inner attributes that
cdc7bbd5
XL
382 // we insert will get removed - when we drop the parser, we'll free
383 // up the memory used by any attributes that we didn't remove from the map.
384 }
6a06907d
XL
385 Ok(ret)
386 }
387}
388
389/// Converts a flattened iterator of tokens (including open and close delimiter tokens)
390/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair
391/// of open and close delims.
392fn make_token_stream(
cdc7bbd5
XL
393 mut iter: impl Iterator<Item = (FlatToken, Spacing)>,
394 break_last_token: bool,
f2b60f7d 395) -> AttrTokenStream {
6a06907d
XL
396 #[derive(Debug)]
397 struct FrameData {
04454e1e
FG
398 // This is `None` for the first frame, `Some` for all others.
399 open_delim_sp: Option<(Delimiter, Span)>,
f2b60f7d 400 inner: Vec<AttrTokenTree>,
6a06907d 401 }
04454e1e 402 let mut stack = vec![FrameData { open_delim_sp: None, inner: vec![] }];
cdc7bbd5
XL
403 let mut token_and_spacing = iter.next();
404 while let Some((token, spacing)) = token_and_spacing {
6a06907d 405 match token {
cdc7bbd5 406 FlatToken::Token(Token { kind: TokenKind::OpenDelim(delim), span }) => {
04454e1e 407 stack.push(FrameData { open_delim_sp: Some((delim, span)), inner: vec![] });
6a06907d 408 }
cdc7bbd5 409 FlatToken::Token(Token { kind: TokenKind::CloseDelim(delim), span }) => {
cdc7bbd5
XL
410 let frame_data = stack
411 .pop()
412 .unwrap_or_else(|| panic!("Token stack was empty for token: {:?}", token));
413
04454e1e 414 let (open_delim, open_sp) = frame_data.open_delim_sp.unwrap();
cdc7bbd5 415 assert_eq!(
04454e1e 416 open_delim, delim,
cdc7bbd5 417 "Mismatched open/close delims: open={:?} close={:?}",
04454e1e 418 open_delim, span
cdc7bbd5 419 );
04454e1e 420 let dspan = DelimSpan::from_pair(open_sp, span);
f2b60f7d
FG
421 let stream = AttrTokenStream::new(frame_data.inner);
422 let delimited = AttrTokenTree::Delimited(dspan, delim, stream);
6a06907d
XL
423 stack
424 .last_mut()
cdc7bbd5
XL
425 .unwrap_or_else(|| {
426 panic!("Bottom token frame is missing for token: {:?}", token)
427 })
6a06907d 428 .inner
f2b60f7d 429 .push(delimited);
6a06907d 430 }
cdc7bbd5
XL
431 FlatToken::Token(token) => stack
432 .last_mut()
433 .expect("Bottom token frame is missing!")
434 .inner
f2b60f7d 435 .push(AttrTokenTree::Token(token, spacing)),
cdc7bbd5
XL
436 FlatToken::AttrTarget(data) => stack
437 .last_mut()
438 .expect("Bottom token frame is missing!")
439 .inner
f2b60f7d 440 .push(AttrTokenTree::Attributes(data)),
cdc7bbd5 441 FlatToken::Empty => {}
6a06907d 442 }
cdc7bbd5
XL
443 token_and_spacing = iter.next();
444 }
6a06907d 445 let mut final_buf = stack.pop().expect("Missing final buf!");
cdc7bbd5 446 if break_last_token {
f2b60f7d
FG
447 let last_token = final_buf.inner.pop().unwrap();
448 if let AttrTokenTree::Token(last_token, spacing) = last_token {
cdc7bbd5
XL
449 let unglued_first = last_token.kind.break_two_token_op().unwrap().0;
450
94222f64 451 // An 'unglued' token is always two ASCII characters
cdc7bbd5
XL
452 let mut first_span = last_token.span.shrink_to_lo();
453 first_span = first_span.with_hi(first_span.lo() + rustc_span::BytePos(1));
454
f2b60f7d
FG
455 final_buf
456 .inner
457 .push(AttrTokenTree::Token(Token::new(unglued_first, first_span), spacing));
cdc7bbd5
XL
458 } else {
459 panic!("Unexpected last token {:?}", last_token)
460 }
461 }
f2b60f7d 462 AttrTokenStream::new(final_buf.inner)
6a06907d 463}
2b03887a
FG
464
465// Some types are used a lot. Make sure they don't unintentionally get bigger.
466#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
467mod size_asserts {
468 use super::*;
469 use rustc_data_structures::static_assert_size;
470 // tidy-alphabetical-start
471 static_assert_size!(AttrWrapper, 16);
472 static_assert_size!(LazyAttrTokenStreamImpl, 144);
473 // tidy-alphabetical-end
474}