]> git.proxmox.com Git - rustc.git/blame - src/tools/rust-analyzer/crates/parser/src/shortcuts.rs
New upstream version 1.68.2+dfsg1
[rustc.git] / src / tools / rust-analyzer / crates / parser / src / shortcuts.rs
CommitLineData
064997fb
FG
1//! Shortcuts that span lexer/parser abstraction.
2//!
3//! The way Rust works, parser doesn't necessary parse text, and you might
4//! tokenize text without parsing it further. So, it makes sense to keep
5//! abstract token parsing, and string tokenization as completely separate
6//! layers.
7//!
8//! However, often you do pares text into syntax trees and the glue code for
9//! that needs to live somewhere. Rather than putting it to lexer or parser, we
10//! use a separate shortcuts module for that.
11
12use std::mem;
13
14use crate::{
15 LexedStr, Step,
16 SyntaxKind::{self, *},
17};
18
19#[derive(Debug)]
20pub enum StrStep<'a> {
21 Token { kind: SyntaxKind, text: &'a str },
22 Enter { kind: SyntaxKind },
23 Exit,
24 Error { msg: &'a str, pos: usize },
25}
26
27impl<'a> LexedStr<'a> {
28 pub fn to_input(&self) -> crate::Input {
29 let mut res = crate::Input::default();
30 let mut was_joint = false;
31 for i in 0..self.len() {
32 let kind = self.kind(i);
33 if kind.is_trivia() {
34 was_joint = false
35 } else {
36 if kind == SyntaxKind::IDENT {
37 let token_text = self.text(i);
38 let contextual_kw = SyntaxKind::from_contextual_keyword(token_text)
39 .unwrap_or(SyntaxKind::IDENT);
40 res.push_ident(contextual_kw);
41 } else {
42 if was_joint {
43 res.was_joint();
44 }
45 res.push(kind);
46 }
47 was_joint = true;
48 }
49 }
50 res
51 }
52
53 /// NB: only valid to call with Output from Reparser/TopLevelEntry.
54 pub fn intersperse_trivia(
55 &self,
56 output: &crate::Output,
57 sink: &mut dyn FnMut(StrStep<'_>),
58 ) -> bool {
59 let mut builder = Builder { lexed: self, pos: 0, state: State::PendingEnter, sink };
60
61 for event in output.iter() {
62 match event {
63 Step::Token { kind, n_input_tokens: n_raw_tokens } => {
64 builder.token(kind, n_raw_tokens)
65 }
66 Step::Enter { kind } => builder.enter(kind),
67 Step::Exit => builder.exit(),
68 Step::Error { msg } => {
69 let text_pos = builder.lexed.text_start(builder.pos);
70 (builder.sink)(StrStep::Error { msg, pos: text_pos });
71 }
72 }
73 }
74
75 match mem::replace(&mut builder.state, State::Normal) {
76 State::PendingExit => {
77 builder.eat_trivias();
78 (builder.sink)(StrStep::Exit);
79 }
80 State::PendingEnter | State::Normal => unreachable!(),
81 }
82
f25598a0
FG
83 // is_eof?
84 builder.pos == builder.lexed.len()
064997fb
FG
85 }
86}
87
88struct Builder<'a, 'b> {
89 lexed: &'a LexedStr<'a>,
90 pos: usize,
91 state: State,
92 sink: &'b mut dyn FnMut(StrStep<'_>),
93}
94
95enum State {
96 PendingEnter,
97 Normal,
98 PendingExit,
99}
100
101impl Builder<'_, '_> {
102 fn token(&mut self, kind: SyntaxKind, n_tokens: u8) {
103 match mem::replace(&mut self.state, State::Normal) {
104 State::PendingEnter => unreachable!(),
105 State::PendingExit => (self.sink)(StrStep::Exit),
106 State::Normal => (),
107 }
108 self.eat_trivias();
109 self.do_token(kind, n_tokens as usize);
110 }
111
112 fn enter(&mut self, kind: SyntaxKind) {
113 match mem::replace(&mut self.state, State::Normal) {
114 State::PendingEnter => {
115 (self.sink)(StrStep::Enter { kind });
116 // No need to attach trivias to previous node: there is no
117 // previous node.
118 return;
119 }
120 State::PendingExit => (self.sink)(StrStep::Exit),
121 State::Normal => (),
122 }
123
124 let n_trivias =
125 (self.pos..self.lexed.len()).take_while(|&it| self.lexed.kind(it).is_trivia()).count();
126 let leading_trivias = self.pos..self.pos + n_trivias;
127 let n_attached_trivias = n_attached_trivias(
128 kind,
129 leading_trivias.rev().map(|it| (self.lexed.kind(it), self.lexed.text(it))),
130 );
131 self.eat_n_trivias(n_trivias - n_attached_trivias);
132 (self.sink)(StrStep::Enter { kind });
133 self.eat_n_trivias(n_attached_trivias);
134 }
135
136 fn exit(&mut self) {
137 match mem::replace(&mut self.state, State::PendingExit) {
138 State::PendingEnter => unreachable!(),
139 State::PendingExit => (self.sink)(StrStep::Exit),
140 State::Normal => (),
141 }
142 }
143
144 fn eat_trivias(&mut self) {
145 while self.pos < self.lexed.len() {
146 let kind = self.lexed.kind(self.pos);
147 if !kind.is_trivia() {
148 break;
149 }
150 self.do_token(kind, 1);
151 }
152 }
153
154 fn eat_n_trivias(&mut self, n: usize) {
155 for _ in 0..n {
156 let kind = self.lexed.kind(self.pos);
157 assert!(kind.is_trivia());
158 self.do_token(kind, 1);
159 }
160 }
161
162 fn do_token(&mut self, kind: SyntaxKind, n_tokens: usize) {
163 let text = &self.lexed.range_text(self.pos..self.pos + n_tokens);
164 self.pos += n_tokens;
165 (self.sink)(StrStep::Token { kind, text });
166 }
167}
168
169fn n_attached_trivias<'a>(
170 kind: SyntaxKind,
171 trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
172) -> usize {
173 match kind {
174 CONST | ENUM | FN | IMPL | MACRO_CALL | MACRO_DEF | MACRO_RULES | MODULE | RECORD_FIELD
175 | STATIC | STRUCT | TRAIT | TUPLE_FIELD | TYPE_ALIAS | UNION | USE | VARIANT => {
176 let mut res = 0;
177 let mut trivias = trivias.enumerate().peekable();
178
179 while let Some((i, (kind, text))) = trivias.next() {
180 match kind {
181 WHITESPACE if text.contains("\n\n") => {
182 // we check whether the next token is a doc-comment
183 // and skip the whitespace in this case
184 if let Some((COMMENT, peek_text)) = trivias.peek().map(|(_, pair)| pair) {
185 if is_outer(peek_text) {
186 continue;
187 }
188 }
189 break;
190 }
191 COMMENT => {
192 if is_inner(text) {
193 break;
194 }
195 res = i + 1;
196 }
197 _ => (),
198 }
199 }
200 res
201 }
202 _ => 0,
203 }
204}
205
206fn is_outer(text: &str) -> bool {
207 if text.starts_with("////") || text.starts_with("/***") {
208 return false;
209 }
210 text.starts_with("///") || text.starts_with("/**")
211}
212
213fn is_inner(text: &str) -> bool {
214 text.starts_with("//!") || text.starts_with("/*!")
215}