1 //! Shortcuts that span lexer/parser abstraction.
3 //! The way Rust works, parser doesn't necessary parse text, and you might
4 //! tokenize text without parsing it further. So, it makes sense to keep
5 //! abstract token parsing, and string tokenization as completely separate
8 //! However, often you do pares text into syntax trees and the glue code for
9 //! that needs to live somewhere. Rather than putting it to lexer or parser, we
10 //! use a separate shortcuts module for that.
16 SyntaxKind
::{self, *}
,
20 pub enum StrStep
<'a
> {
21 Token { kind: SyntaxKind, text: &'a str }
,
22 Enter { kind: SyntaxKind }
,
24 Error { msg: &'a str, pos: usize }
,
27 impl<'a
> LexedStr
<'a
> {
28 pub fn to_input(&self) -> crate::Input
{
29 let mut res
= crate::Input
::default();
30 let mut was_joint
= false;
31 for i
in 0..self.len() {
32 let kind
= self.kind(i
);
36 if kind
== SyntaxKind
::IDENT
{
37 let token_text
= self.text(i
);
38 let contextual_kw
= SyntaxKind
::from_contextual_keyword(token_text
)
39 .unwrap_or(SyntaxKind
::IDENT
);
40 res
.push_ident(contextual_kw
);
46 // Tag the token as joint if it is float with a fractional part
47 // we use this jointness to inform the parser about what token split
48 // event to emit when we encounter a float literal in a field access
49 if kind
== SyntaxKind
::FLOAT_NUMBER
{
50 if !self.text(i
).ends_with('
.'
) {
62 /// NB: only valid to call with Output from Reparser/TopLevelEntry.
63 pub fn intersperse_trivia(
65 output
: &crate::Output
,
66 sink
: &mut dyn FnMut(StrStep
<'_
>),
68 let mut builder
= Builder { lexed: self, pos: 0, state: State::PendingEnter, sink }
;
70 for event
in output
.iter() {
72 Step
::Token { kind, n_input_tokens: n_raw_tokens }
=> {
73 builder
.token(kind
, n_raw_tokens
)
75 Step
::FloatSplit { ends_in_dot: has_pseudo_dot }
=> {
76 builder
.float_split(has_pseudo_dot
)
78 Step
::Enter { kind }
=> builder
.enter(kind
),
79 Step
::Exit
=> builder
.exit(),
80 Step
::Error { msg }
=> {
81 let text_pos
= builder
.lexed
.text_start(builder
.pos
);
82 (builder
.sink
)(StrStep
::Error { msg, pos: text_pos }
);
87 match mem
::replace(&mut builder
.state
, State
::Normal
) {
88 State
::PendingExit
=> {
89 builder
.eat_trivias();
90 (builder
.sink
)(StrStep
::Exit
);
92 State
::PendingEnter
| State
::Normal
=> unreachable
!(),
96 builder
.pos
== builder
.lexed
.len()
100 struct Builder
<'a
, 'b
> {
101 lexed
: &'a LexedStr
<'a
>,
104 sink
: &'b
mut dyn FnMut(StrStep
<'_
>),
113 impl Builder
<'_
, '_
> {
114 fn token(&mut self, kind
: SyntaxKind
, n_tokens
: u8) {
115 match mem
::replace(&mut self.state
, State
::Normal
) {
116 State
::PendingEnter
=> unreachable
!(),
117 State
::PendingExit
=> (self.sink
)(StrStep
::Exit
),
121 self.do_token(kind
, n_tokens
as usize);
124 fn float_split(&mut self, has_pseudo_dot
: bool
) {
125 match mem
::replace(&mut self.state
, State
::Normal
) {
126 State
::PendingEnter
=> unreachable
!(),
127 State
::PendingExit
=> (self.sink
)(StrStep
::Exit
),
131 self.do_float_split(has_pseudo_dot
);
134 fn enter(&mut self, kind
: SyntaxKind
) {
135 match mem
::replace(&mut self.state
, State
::Normal
) {
136 State
::PendingEnter
=> {
137 (self.sink
)(StrStep
::Enter { kind }
);
138 // No need to attach trivias to previous node: there is no
142 State
::PendingExit
=> (self.sink
)(StrStep
::Exit
),
147 (self.pos
..self.lexed
.len()).take_while(|&it
| self.lexed
.kind(it
).is_trivia()).count();
148 let leading_trivias
= self.pos
..self.pos
+ n_trivias
;
149 let n_attached_trivias
= n_attached_trivias(
151 leading_trivias
.rev().map(|it
| (self.lexed
.kind(it
), self.lexed
.text(it
))),
153 self.eat_n_trivias(n_trivias
- n_attached_trivias
);
154 (self.sink
)(StrStep
::Enter { kind }
);
155 self.eat_n_trivias(n_attached_trivias
);
159 match mem
::replace(&mut self.state
, State
::PendingExit
) {
160 State
::PendingEnter
=> unreachable
!(),
161 State
::PendingExit
=> (self.sink
)(StrStep
::Exit
),
166 fn eat_trivias(&mut self) {
167 while self.pos
< self.lexed
.len() {
168 let kind
= self.lexed
.kind(self.pos
);
169 if !kind
.is_trivia() {
172 self.do_token(kind
, 1);
176 fn eat_n_trivias(&mut self, n
: usize) {
178 let kind
= self.lexed
.kind(self.pos
);
179 assert
!(kind
.is_trivia());
180 self.do_token(kind
, 1);
184 fn do_token(&mut self, kind
: SyntaxKind
, n_tokens
: usize) {
185 let text
= &self.lexed
.range_text(self.pos
..self.pos
+ n_tokens
);
186 self.pos
+= n_tokens
;
187 (self.sink
)(StrStep
::Token { kind, text }
);
190 fn do_float_split(&mut self, has_pseudo_dot
: bool
) {
191 let text
= &self.lexed
.range_text(self.pos
..self.pos
+ 1);
193 match text
.split_once('
.'
) {
194 Some((left
, right
)) => {
195 assert
!(!left
.is_empty());
196 (self.sink
)(StrStep
::Enter { kind: SyntaxKind::NAME_REF }
);
197 (self.sink
)(StrStep
::Token { kind: SyntaxKind::INT_NUMBER, text: left }
);
198 (self.sink
)(StrStep
::Exit
);
200 // here we move the exit up, the original exit has been deleted in process
201 (self.sink
)(StrStep
::Exit
);
203 (self.sink
)(StrStep
::Token { kind: SyntaxKind::DOT, text: "." }
);
206 assert
!(right
.is_empty(), "{left}.{right}");
207 self.state
= State
::Normal
;
209 (self.sink
)(StrStep
::Enter { kind: SyntaxKind::NAME_REF }
);
210 (self.sink
)(StrStep
::Token { kind: SyntaxKind::INT_NUMBER, text: right }
);
211 (self.sink
)(StrStep
::Exit
);
213 // the parser creates an unbalanced start node, we are required to close it here
214 self.state
= State
::PendingExit
;
217 None
=> unreachable
!(),
222 fn n_attached_trivias
<'a
>(
224 trivias
: impl Iterator
<Item
= (SyntaxKind
, &'a
str)>,
227 CONST
| ENUM
| FN
| IMPL
| MACRO_CALL
| MACRO_DEF
| MACRO_RULES
| MODULE
| RECORD_FIELD
228 | STATIC
| STRUCT
| TRAIT
| TUPLE_FIELD
| TYPE_ALIAS
| UNION
| USE
| VARIANT
=> {
230 let mut trivias
= trivias
.enumerate().peekable();
232 while let Some((i
, (kind
, text
))) = trivias
.next() {
234 WHITESPACE
if text
.contains("\n\n") => {
235 // we check whether the next token is a doc-comment
236 // and skip the whitespace in this case
237 if let Some((COMMENT
, peek_text
)) = trivias
.peek().map(|(_
, pair
)| pair
) {
238 if is_outer(peek_text
) {
259 fn is_outer(text
: &str) -> bool
{
260 if text
.starts_with("////") || text
.starts_with("/***") {
263 text
.starts_with("///") || text
.starts_with("/**")
266 fn is_inner(text
: &str) -> bool
{
267 text
.starts_with("//!") || text
.starts_with("/*!")