src/tools/rust-analyzer/crates/parser/src/shortcuts.rs

   1 //! Shortcuts that span lexer/parser abstraction.
   2 //!
   3 //! The way Rust works, parser doesn't necessary parse text, and you might
   4 //! tokenize text without parsing it further. So, it makes sense to keep
   5 //! abstract token parsing, and string tokenization as completely separate
   6 //! layers.
   7 //!
   8 //! However, often you do pares text into syntax trees and the glue code for
   9 //! that needs to live somewhere. Rather than putting it to lexer or parser, we
  10 //! use a separate shortcuts module for that.
  11
  12 use std::mem;
  13
  14 use crate::{
  15     LexedStr, Step,
  16     SyntaxKind::{self, *},
  17 };
  18
  19 #[derive(Debug)]
  20 pub enum StrStep<'a> {
  21     Token { kind: SyntaxKind, text: &'a str },
  22     Enter { kind: SyntaxKind },
  23     Exit,
  24     Error { msg: &'a str, pos: usize },
  25 }
  26
  27 impl<'a> LexedStr<'a> {
  28     pub fn to_input(&self) -> crate::Input {
  29         let mut res = crate::Input::default();
  30         let mut was_joint = false;
  31         for i in 0..self.len() {
  32             let kind = self.kind(i);
  33             if kind.is_trivia() {
  34                 was_joint = false
  35             } else {
  36                 if kind == SyntaxKind::IDENT {
  37                     let token_text = self.text(i);
  38                     let contextual_kw = SyntaxKind::from_contextual_keyword(token_text)
  39                         .unwrap_or(SyntaxKind::IDENT);
  40                     res.push_ident(contextual_kw);
  41                 } else {
  42                     if was_joint {
  43                         res.was_joint();
  44                     }
  45                     res.push(kind);
  46                     // Tag the token as joint if it is float with a fractional part
  47                     // we use this jointness to inform the parser about what token split
  48                     // event to emit when we encounter a float literal in a field access
  49                     if kind == SyntaxKind::FLOAT_NUMBER {
  50                         if !self.text(i).ends_with('.') {
  51                             res.was_joint();
  52                         }
  53                     }
  54                 }
  55
  56                 was_joint = true;
  57             }
  58         }
  59         res
  60     }
  61
  62     /// NB: only valid to call with Output from Reparser/TopLevelEntry.
  63     pub fn intersperse_trivia(
  64         &self,
  65         output: &crate::Output,
  66         sink: &mut dyn FnMut(StrStep<'_>),
  67     ) -> bool {
  68         let mut builder = Builder { lexed: self, pos: 0, state: State::PendingEnter, sink };
  69
  70         for event in output.iter() {
  71             match event {
  72                 Step::Token { kind, n_input_tokens: n_raw_tokens } => {
  73                     builder.token(kind, n_raw_tokens)
  74                 }
  75                 Step::FloatSplit { ends_in_dot: has_pseudo_dot } => {
  76                     builder.float_split(has_pseudo_dot)
  77                 }
  78                 Step::Enter { kind } => builder.enter(kind),
  79                 Step::Exit => builder.exit(),
  80                 Step::Error { msg } => {
  81                     let text_pos = builder.lexed.text_start(builder.pos);
  82                     (builder.sink)(StrStep::Error { msg, pos: text_pos });
  83                 }
  84             }
  85         }
  86
  87         match mem::replace(&mut builder.state, State::Normal) {
  88             State::PendingExit => {
  89                 builder.eat_trivias();
  90                 (builder.sink)(StrStep::Exit);
  91             }
  92             State::PendingEnter | State::Normal => unreachable!(),
  93         }
  94
  95         // is_eof?
  96         builder.pos == builder.lexed.len()
  97     }
  98 }
  99
 100 struct Builder<'a, 'b> {
 101     lexed: &'a LexedStr<'a>,
 102     pos: usize,
 103     state: State,
 104     sink: &'b mut dyn FnMut(StrStep<'_>),
 105 }
 106
 107 enum State {
 108     PendingEnter,
 109     Normal,
 110     PendingExit,
 111 }
 112
 113 impl Builder<'_, '_> {
 114     fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
 115         match mem::replace(&mut self.state, State::Normal) {
 116             State::PendingEnter => unreachable!(),
 117             State::PendingExit => (self.sink)(StrStep::Exit),
 118             State::Normal => (),
 119         }
 120         self.eat_trivias();
 121         self.do_token(kind, n_tokens as usize);
 122     }
 123
 124     fn float_split(&mut self, has_pseudo_dot: bool) {
 125         match mem::replace(&mut self.state, State::Normal) {
 126             State::PendingEnter => unreachable!(),
 127             State::PendingExit => (self.sink)(StrStep::Exit),
 128             State::Normal => (),
 129         }
 130         self.eat_trivias();
 131         self.do_float_split(has_pseudo_dot);
 132     }
 133
 134     fn enter(&mut self, kind: SyntaxKind) {
 135         match mem::replace(&mut self.state, State::Normal) {
 136             State::PendingEnter => {
 137                 (self.sink)(StrStep::Enter { kind });
 138                 // No need to attach trivias to previous node: there is no
 139                 // previous node.
 140                 return;
 141             }
 142             State::PendingExit => (self.sink)(StrStep::Exit),
 143             State::Normal => (),
 144         }
 145
 146         let n_trivias =
 147             (self.pos..self.lexed.len()).take_while(|&it| self.lexed.kind(it).is_trivia()).count();
 148         let leading_trivias = self.pos..self.pos + n_trivias;
 149         let n_attached_trivias = n_attached_trivias(
 150             kind,
 151             leading_trivias.rev().map(|it| (self.lexed.kind(it), self.lexed.text(it))),
 152         );
 153         self.eat_n_trivias(n_trivias - n_attached_trivias);
 154         (self.sink)(StrStep::Enter { kind });
 155         self.eat_n_trivias(n_attached_trivias);
 156     }
 157
 158     fn exit(&mut self) {
 159         match mem::replace(&mut self.state, State::PendingExit) {
 160             State::PendingEnter => unreachable!(),
 161             State::PendingExit => (self.sink)(StrStep::Exit),
 162             State::Normal => (),
 163         }
 164     }
 165
 166     fn eat_trivias(&mut self) {
 167         while self.pos < self.lexed.len() {
 168             let kind = self.lexed.kind(self.pos);
 169             if !kind.is_trivia() {
 170                 break;
 171             }
 172             self.do_token(kind, 1);
 173         }
 174     }
 175
 176     fn eat_n_trivias(&mut self, n: usize) {
 177         for _ in 0..n {
 178             let kind = self.lexed.kind(self.pos);
 179             assert!(kind.is_trivia());
 180             self.do_token(kind, 1);
 181         }
 182     }
 183
 184     fn do_token(&mut self, kind: SyntaxKind, n_tokens: usize) {
 185         let text = &self.lexed.range_text(self.pos..self.pos + n_tokens);
 186         self.pos += n_tokens;
 187         (self.sink)(StrStep::Token { kind, text });
 188     }
 189
 190     fn do_float_split(&mut self, has_pseudo_dot: bool) {
 191         let text = &self.lexed.range_text(self.pos..self.pos + 1);
 192         self.pos += 1;
 193         match text.split_once('.') {
 194             Some((left, right)) => {
 195                 assert!(!left.is_empty());
 196                 (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
 197                 (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: left });
 198                 (self.sink)(StrStep::Exit);
 199
 200                 // here we move the exit up, the original exit has been deleted in process
 201                 (self.sink)(StrStep::Exit);
 202
 203                 (self.sink)(StrStep::Token { kind: SyntaxKind::DOT, text: "." });
 204
 205                 if has_pseudo_dot {
 206                     assert!(right.is_empty(), "{left}.{right}");
 207                     self.state = State::Normal;
 208                 } else {
 209                     (self.sink)(StrStep::Enter { kind: SyntaxKind::NAME_REF });
 210                     (self.sink)(StrStep::Token { kind: SyntaxKind::INT_NUMBER, text: right });
 211                     (self.sink)(StrStep::Exit);
 212
 213                     // the parser creates an unbalanced start node, we are required to close it here
 214                     self.state = State::PendingExit;
 215                 }
 216             }
 217             None => unreachable!(),
 218         }
 219     }
 220 }
 221
 222 fn n_attached_trivias<'a>(
 223     kind: SyntaxKind,
 224     trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
 225 ) -> usize {
 226     match kind {
 227         CONST | ENUM | FN | IMPL | MACRO_CALL | MACRO_DEF | MACRO_RULES | MODULE | RECORD_FIELD
 228         | STATIC | STRUCT | TRAIT | TUPLE_FIELD | TYPE_ALIAS | UNION | USE | VARIANT => {
 229             let mut res = 0;
 230             let mut trivias = trivias.enumerate().peekable();
 231
 232             while let Some((i, (kind, text))) = trivias.next() {
 233                 match kind {
 234                     WHITESPACE if text.contains("\n\n") => {
 235                         // we check whether the next token is a doc-comment
 236                         // and skip the whitespace in this case
 237                         if let Some((COMMENT, peek_text)) = trivias.peek().map(|(_, pair)| pair) {
 238                             if is_outer(peek_text) {
 239                                 continue;
 240                             }
 241                         }
 242                         break;
 243                     }
 244                     COMMENT => {
 245                         if is_inner(text) {
 246                             break;
 247                         }
 248                         res = i + 1;
 249                     }
 250                     _ => (),
 251                 }
 252             }
 253             res
 254         }
 255         _ => 0,
 256     }
 257 }
 258
 259 fn is_outer(text: &str) -> bool {
 260     if text.starts_with("////") || text.starts_with("/***") {
 261         return false;
 262     }
 263     text.starts_with("///") || text.starts_with("/**")
 264 }
 265
 266 fn is_inner(text: &str) -> bool {
 267     text.starts_with("//!") || text.starts_with("/*!")
 268 }