]>
Commit | Line | Data |
---|---|---|
a7813a04 | 1 | //! Basic syntax highlighting functionality. |
1a4d82fc | 2 | //! |
74b04a01 | 3 | //! This module uses librustc_ast's lexer to provide token-based highlighting for |
1a4d82fc | 4 | //! the HTML documentation generated by rustdoc. |
a7813a04 | 5 | //! |
b7449926 | 6 | //! Use the `render_with_highlighting` to highlight some rust code. |
1a4d82fc | 7 | |
9fa01778 | 8 | use crate::html::escape::Escape; |
1a4d82fc | 9 | |
1b1a35ee XL |
10 | use std::fmt::{Display, Write}; |
11 | use std::iter::Peekable; | |
a7813a04 | 12 | |
1b1a35ee XL |
13 | use rustc_lexer::{LiteralKind, TokenKind}; |
14 | use rustc_span::symbol::Ident; | |
15 | use rustc_span::with_default_session_globals; | |
1a4d82fc | 16 | |
54a0048b | 17 | /// Highlights `src`, returning the HTML output. |
a1dfa0c6 | 18 | pub fn render_with_highlighting( |
3dfed10e | 19 | src: String, |
a1dfa0c6 | 20 | class: Option<&str>, |
74b04a01 | 21 | playground_button: Option<&str>, |
a1dfa0c6 XL |
22 | tooltip: Option<(&str, &str)>, |
23 | ) -> String { | |
1a4d82fc | 24 | debug!("highlighting: ================\n{}\n==============", src); |
1b1a35ee | 25 | let mut out = String::with_capacity(src.len()); |
ea8adc8c | 26 | if let Some((tooltip, class)) = tooltip { |
dfeec247 XL |
27 | write!( |
28 | out, | |
29 | "<div class='information'><div class='tooltip {}'>ⓘ<span \ | |
1b1a35ee | 30 | class='tooltiptext'>{}</span></div></div>", |
dfeec247 XL |
31 | class, tooltip |
32 | ) | |
33 | .unwrap(); | |
ea8adc8c | 34 | } |
9fa01778 | 35 | |
1b1a35ee XL |
36 | write_header(&mut out, class); |
37 | write_code(&mut out, &src); | |
38 | write_footer(&mut out, playground_button); | |
3dfed10e | 39 | |
1b1a35ee XL |
40 | out |
41 | } | |
9fa01778 | 42 | |
1b1a35ee XL |
43 | fn write_header(out: &mut String, class: Option<&str>) { |
44 | write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or_default()) | |
45 | .unwrap() | |
54a0048b SL |
46 | } |
47 | ||
1b1a35ee | 48 | fn write_code(out: &mut String, src: &str) { |
2a314972 XL |
49 | // This replace allows to fix how the code source with DOS backline characters is displayed. |
50 | let src = src.replace("\r\n", "\n"); | |
51 | Classifier::new(&src).highlight(&mut |highlight| { | |
1b1a35ee XL |
52 | match highlight { |
53 | Highlight::Token { text, class } => string(out, Escape(text), class), | |
54 | Highlight::EnterSpan { class } => enter_span(out, class), | |
55 | Highlight::ExitSpan => exit_span(out), | |
56 | }; | |
57 | }); | |
58 | } | |
a7813a04 | 59 | |
1b1a35ee XL |
60 | fn write_footer(out: &mut String, playground_button: Option<&str>) { |
61 | write!(out, "</pre>{}</div>\n", playground_button.unwrap_or_default()).unwrap() | |
a7813a04 XL |
62 | } |
63 | ||
64 | /// How a span of text is classified. Mostly corresponds to token kinds. | |
65 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] | |
b7449926 | 66 | enum Class { |
a7813a04 XL |
67 | None, |
68 | Comment, | |
69 | DocComment, | |
70 | Attribute, | |
71 | KeyWord, | |
72 | // Keywords that do pointer/reference stuff. | |
73 | RefKeyWord, | |
74 | Self_, | |
75 | Op, | |
76 | Macro, | |
77 | MacroNonTerminal, | |
78 | String, | |
79 | Number, | |
80 | Bool, | |
81 | Ident, | |
82 | Lifetime, | |
83 | PreludeTy, | |
84 | PreludeVal, | |
c30ab7b3 | 85 | QuestionMark, |
1a4d82fc JJ |
86 | } |
87 | ||
1b1a35ee XL |
88 | impl Class { |
89 | /// Returns the css class expected by rustdoc for each `Class`. | |
90 | fn as_html(self) -> &'static str { | |
91 | match self { | |
92 | Class::None => "", | |
93 | Class::Comment => "comment", | |
94 | Class::DocComment => "doccomment", | |
95 | Class::Attribute => "attribute", | |
96 | Class::KeyWord => "kw", | |
97 | Class::RefKeyWord => "kw-2", | |
98 | Class::Self_ => "self", | |
99 | Class::Op => "op", | |
100 | Class::Macro => "macro", | |
101 | Class::MacroNonTerminal => "macro-nonterminal", | |
102 | Class::String => "string", | |
103 | Class::Number => "number", | |
104 | Class::Bool => "bool-val", | |
105 | Class::Ident => "ident", | |
106 | Class::Lifetime => "lifetime", | |
107 | Class::PreludeTy => "prelude-ty", | |
108 | Class::PreludeVal => "prelude-val", | |
109 | Class::QuestionMark => "question-mark", | |
a7813a04 XL |
110 | } |
111 | } | |
a7813a04 XL |
112 | } |
113 | ||
1b1a35ee XL |
114 | enum Highlight<'a> { |
115 | Token { text: &'a str, class: Class }, | |
116 | EnterSpan { class: Class }, | |
117 | ExitSpan, | |
9fa01778 XL |
118 | } |
119 | ||
1b1a35ee XL |
120 | struct TokenIter<'a> { |
121 | src: &'a str, | |
9fa01778 XL |
122 | } |
123 | ||
1b1a35ee XL |
124 | impl Iterator for TokenIter<'a> { |
125 | type Item = (TokenKind, &'a str); | |
126 | fn next(&mut self) -> Option<(TokenKind, &'a str)> { | |
127 | if self.src.is_empty() { | |
128 | return None; | |
a7813a04 | 129 | } |
1b1a35ee XL |
130 | let token = rustc_lexer::first_token(self.src); |
131 | let (text, rest) = self.src.split_at(token.len); | |
132 | self.src = rest; | |
133 | Some((token.kind, text)) | |
a7813a04 | 134 | } |
1b1a35ee | 135 | } |
a7813a04 | 136 | |
1b1a35ee XL |
137 | /// Processes program tokens, classifying strings of text by highlighting |
138 | /// category (`Class`). | |
139 | struct Classifier<'a> { | |
140 | tokens: Peekable<TokenIter<'a>>, | |
141 | in_attribute: bool, | |
142 | in_macro: bool, | |
143 | in_macro_nonterminal: bool, | |
144 | } | |
416331ca | 145 | |
1b1a35ee XL |
146 | impl<'a> Classifier<'a> { |
147 | fn new(src: &str) -> Classifier<'_> { | |
148 | let tokens = TokenIter { src }.peekable(); | |
149 | Classifier { tokens, in_attribute: false, in_macro: false, in_macro_nonterminal: false } | |
ea8adc8c XL |
150 | } |
151 | ||
1b1a35ee | 152 | /// Exhausts the `Classifier` writing the output into `sink`. |
a7813a04 XL |
153 | /// |
154 | /// The general structure for this method is to iterate over each token, | |
1b1a35ee XL |
155 | /// possibly giving it an HTML span with a class specifying what flavor of |
156 | /// token is used. | |
157 | fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) { | |
158 | with_default_session_globals(|| { | |
159 | while let Some((token, text)) = self.tokens.next() { | |
160 | self.advance(token, text, sink); | |
3dfed10e | 161 | } |
1b1a35ee | 162 | }) |
a7813a04 XL |
163 | } |
164 | ||
1b1a35ee XL |
165 | /// Single step of highlighting. This will classify `token`, but maybe also |
166 | /// a couple of following ones as well. | |
167 | fn advance(&mut self, token: TokenKind, text: &'a str, sink: &mut dyn FnMut(Highlight<'a>)) { | |
168 | let lookahead = self.peek(); | |
169 | let class = match token { | |
170 | TokenKind::Whitespace => Class::None, | |
171 | TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => { | |
172 | if doc_style.is_some() { | |
173 | Class::DocComment | |
174 | } else { | |
175 | Class::Comment | |
176 | } | |
dfeec247 | 177 | } |
a7813a04 XL |
178 | // Consider this as part of a macro invocation if there was a |
179 | // leading identifier. | |
1b1a35ee | 180 | TokenKind::Bang if self.in_macro => { |
a7813a04 XL |
181 | self.in_macro = false; |
182 | Class::Macro | |
183 | } | |
1a4d82fc | 184 | |
1b1a35ee XL |
185 | // Assume that '&' or '*' is the reference or dereference operator |
186 | // or a reference or pointer type. Unless, of course, it looks like | |
187 | // a logical and or a multiplication operator: `&&` or `* `. | |
188 | TokenKind::Star => match lookahead { | |
189 | Some(TokenKind::Whitespace) => Class::Op, | |
190 | _ => Class::RefKeyWord, | |
191 | }, | |
192 | TokenKind::And => match lookahead { | |
193 | Some(TokenKind::And) => { | |
194 | let _and = self.tokens.next(); | |
195 | sink(Highlight::Token { text: "&&", class: Class::Op }); | |
196 | return; | |
197 | } | |
198 | Some(TokenKind::Eq) => { | |
199 | let _eq = self.tokens.next(); | |
200 | sink(Highlight::Token { text: "&=", class: Class::Op }); | |
201 | return; | |
202 | } | |
203 | Some(TokenKind::Whitespace) => Class::Op, | |
204 | _ => Class::RefKeyWord, | |
205 | }, | |
206 | ||
a7813a04 | 207 | // Operators. |
1b1a35ee XL |
208 | TokenKind::Minus |
209 | | TokenKind::Plus | |
210 | | TokenKind::Or | |
211 | | TokenKind::Slash | |
212 | | TokenKind::Caret | |
213 | | TokenKind::Percent | |
214 | | TokenKind::Bang | |
215 | | TokenKind::Eq | |
216 | | TokenKind::Lt | |
217 | | TokenKind::Gt => Class::Op, | |
1a4d82fc | 218 | |
a7813a04 | 219 | // Miscellaneous, no highlighting. |
1b1a35ee XL |
220 | TokenKind::Dot |
221 | | TokenKind::Semi | |
222 | | TokenKind::Comma | |
223 | | TokenKind::OpenParen | |
224 | | TokenKind::CloseParen | |
225 | | TokenKind::OpenBrace | |
226 | | TokenKind::CloseBrace | |
227 | | TokenKind::OpenBracket | |
228 | | TokenKind::At | |
229 | | TokenKind::Tilde | |
230 | | TokenKind::Colon | |
231 | | TokenKind::Unknown => Class::None, | |
232 | ||
233 | TokenKind::Question => Class::QuestionMark, | |
234 | ||
235 | TokenKind::Dollar => match lookahead { | |
236 | Some(TokenKind::Ident) => { | |
a7813a04 XL |
237 | self.in_macro_nonterminal = true; |
238 | Class::MacroNonTerminal | |
1a4d82fc | 239 | } |
1b1a35ee XL |
240 | _ => Class::None, |
241 | }, | |
1a4d82fc | 242 | |
ea8adc8c | 243 | // This might be the start of an attribute. We're going to want to |
1a4d82fc JJ |
244 | // continue highlighting it as an attribute until the ending ']' is |
245 | // seen, so skip out early. Down below we terminate the attribute | |
246 | // span when we see the ']'. | |
1b1a35ee XL |
247 | TokenKind::Pound => { |
248 | match lookahead { | |
249 | // Case 1: #![inner_attribute] | |
250 | Some(TokenKind::Bang) => { | |
251 | let _not = self.tokens.next().unwrap(); | |
252 | if let Some(TokenKind::OpenBracket) = self.peek() { | |
253 | self.in_attribute = true; | |
254 | sink(Highlight::EnterSpan { class: Class::Attribute }); | |
255 | } | |
256 | sink(Highlight::Token { text: "#", class: Class::None }); | |
257 | sink(Highlight::Token { text: "!", class: Class::None }); | |
258 | return; | |
259 | } | |
260 | // Case 2: #[outer_attribute] | |
261 | Some(TokenKind::OpenBracket) => { | |
ea8adc8c | 262 | self.in_attribute = true; |
1b1a35ee | 263 | sink(Highlight::EnterSpan { class: Class::Attribute }); |
ea8adc8c | 264 | } |
1b1a35ee | 265 | _ => (), |
ea8adc8c | 266 | } |
1b1a35ee | 267 | Class::None |
1a4d82fc | 268 | } |
1b1a35ee | 269 | TokenKind::CloseBracket => { |
a7813a04 XL |
270 | if self.in_attribute { |
271 | self.in_attribute = false; | |
1b1a35ee XL |
272 | sink(Highlight::Token { text: "]", class: Class::None }); |
273 | sink(Highlight::ExitSpan); | |
274 | return; | |
1a4d82fc | 275 | } |
1b1a35ee | 276 | Class::None |
1a4d82fc | 277 | } |
1b1a35ee XL |
278 | TokenKind::Literal { kind, .. } => match kind { |
279 | // Text literals. | |
280 | LiteralKind::Byte { .. } | |
281 | | LiteralKind::Char { .. } | |
282 | | LiteralKind::Str { .. } | |
283 | | LiteralKind::ByteStr { .. } | |
284 | | LiteralKind::RawStr { .. } | |
285 | | LiteralKind::RawByteStr { .. } => Class::String, | |
286 | // Number literals. | |
287 | LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number, | |
288 | }, | |
289 | TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => { | |
290 | self.in_macro = true; | |
291 | Class::Macro | |
1a4d82fc | 292 | } |
1b1a35ee XL |
293 | TokenKind::Ident => match text { |
294 | "ref" | "mut" => Class::RefKeyWord, | |
295 | "self" | "Self" => Class::Self_, | |
296 | "false" | "true" => Class::Bool, | |
297 | "Option" | "Result" => Class::PreludeTy, | |
298 | "Some" | "None" | "Ok" | "Err" => Class::PreludeVal, | |
299 | // Keywords are also included in the identifier set. | |
300 | _ if Ident::from_str(text).is_reserved() => Class::KeyWord, | |
301 | _ if self.in_macro_nonterminal => { | |
302 | self.in_macro_nonterminal = false; | |
303 | Class::MacroNonTerminal | |
1a4d82fc | 304 | } |
1b1a35ee | 305 | _ => Class::Ident, |
dfeec247 | 306 | }, |
1b1a35ee XL |
307 | TokenKind::RawIdent => Class::Ident, |
308 | TokenKind::Lifetime { .. } => Class::Lifetime, | |
1a4d82fc | 309 | }; |
a7813a04 XL |
310 | // Anything that didn't return above is the simple case where we the |
311 | // class just spans a single token, so we can use the `string` method. | |
1b1a35ee | 312 | sink(Highlight::Token { text, class }); |
1a4d82fc JJ |
313 | } |
314 | ||
1b1a35ee XL |
315 | fn peek(&mut self) -> Option<TokenKind> { |
316 | self.tokens.peek().map(|(toke_kind, _text)| *toke_kind) | |
a7813a04 XL |
317 | } |
318 | } | |
319 | ||
1b1a35ee XL |
320 | /// Called when we start processing a span of text that should be highlighted. |
321 | /// The `Class` argument specifies how it should be highlighted. | |
322 | fn enter_span(out: &mut String, klass: Class) { | |
323 | write!(out, "<span class=\"{}\">", klass.as_html()).unwrap() | |
54a0048b SL |
324 | } |
325 | ||
1b1a35ee XL |
326 | /// Called at the end of a span of highlighted text. |
327 | fn exit_span(out: &mut String) { | |
328 | write!(out, "</span>").unwrap() | |
54a0048b SL |
329 | } |
330 | ||
1b1a35ee XL |
331 | /// Called for a span of text. If the text should be highlighted differently |
332 | /// from the surrounding text, then the `Class` argument will be a value other | |
333 | /// than `None`. | |
334 | /// | |
335 | /// The following sequences of callbacks are equivalent: | |
336 | /// ```plain | |
337 | /// enter_span(Foo), string("text", None), exit_span() | |
338 | /// string("text", Foo) | |
339 | /// ``` | |
340 | /// The latter can be thought of as a shorthand for the former, which is more | |
341 | /// flexible. | |
342 | fn string<T: Display>(out: &mut String, text: T, klass: Class) { | |
343 | match klass { | |
344 | Class::None => write!(out, "{}", text).unwrap(), | |
345 | klass => write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text).unwrap(), | |
346 | } | |
1a4d82fc | 347 | } |
3dfed10e XL |
348 | |
349 | #[cfg(test)] | |
350 | mod tests; |