]> git.proxmox.com Git - rustc.git/blob - src/librustdoc/html/highlight.rs
New upstream version 1.53.0+dfsg1
[rustc.git] / src / librustdoc / html / highlight.rs
1 //! Basic syntax highlighting functionality.
2 //!
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
5 //!
6 //! Use the `render_with_highlighting` to highlight some rust code.
7
8 use crate::html::escape::Escape;
9
10 use std::fmt::Display;
11 use std::iter::Peekable;
12
13 use rustc_lexer::{LiteralKind, TokenKind};
14 use rustc_span::edition::Edition;
15 use rustc_span::symbol::Symbol;
16 use rustc_span::with_default_session_globals;
17
18 use super::format::Buffer;
19
20 /// Highlights `src`, returning the HTML output.
21 crate fn render_with_highlighting(
22 src: &str,
23 out: &mut Buffer,
24 class: Option<&str>,
25 playground_button: Option<&str>,
26 tooltip: Option<(Option<Edition>, &str)>,
27 edition: Edition,
28 ) {
29 debug!("highlighting: ================\n{}\n==============", src);
30 if let Some((edition_info, class)) = tooltip {
31 write!(
32 out,
33 "<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
34 class,
35 if let Some(edition_info) = edition_info {
36 format!(" data-edition=\"{}\"", edition_info)
37 } else {
38 String::new()
39 },
40 );
41 }
42
43 write_header(out, class);
44 write_code(out, &src, edition);
45 write_footer(out, playground_button);
46 }
47
48 fn write_header(out: &mut Buffer, class: Option<&str>) {
49 write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or_default());
50 }
51
52 fn write_code(out: &mut Buffer, src: &str, edition: Edition) {
53 // This replace allows to fix how the code source with DOS backline characters is displayed.
54 let src = src.replace("\r\n", "\n");
55 Classifier::new(&src, edition).highlight(&mut |highlight| {
56 match highlight {
57 Highlight::Token { text, class } => string(out, Escape(text), class),
58 Highlight::EnterSpan { class } => enter_span(out, class),
59 Highlight::ExitSpan => exit_span(out),
60 };
61 });
62 }
63
64 fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
65 write!(out, "</pre>{}</div>\n", playground_button.unwrap_or_default());
66 }
67
68 /// How a span of text is classified. Mostly corresponds to token kinds.
69 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
70 enum Class {
71 Comment,
72 DocComment,
73 Attribute,
74 KeyWord,
75 // Keywords that do pointer/reference stuff.
76 RefKeyWord,
77 Self_,
78 Op,
79 Macro,
80 MacroNonTerminal,
81 String,
82 Number,
83 Bool,
84 Ident,
85 Lifetime,
86 PreludeTy,
87 PreludeVal,
88 QuestionMark,
89 }
90
91 impl Class {
92 /// Returns the css class expected by rustdoc for each `Class`.
93 fn as_html(self) -> &'static str {
94 match self {
95 Class::Comment => "comment",
96 Class::DocComment => "doccomment",
97 Class::Attribute => "attribute",
98 Class::KeyWord => "kw",
99 Class::RefKeyWord => "kw-2",
100 Class::Self_ => "self",
101 Class::Op => "op",
102 Class::Macro => "macro",
103 Class::MacroNonTerminal => "macro-nonterminal",
104 Class::String => "string",
105 Class::Number => "number",
106 Class::Bool => "bool-val",
107 Class::Ident => "ident",
108 Class::Lifetime => "lifetime",
109 Class::PreludeTy => "prelude-ty",
110 Class::PreludeVal => "prelude-val",
111 Class::QuestionMark => "question-mark",
112 }
113 }
114 }
115
116 enum Highlight<'a> {
117 Token { text: &'a str, class: Option<Class> },
118 EnterSpan { class: Class },
119 ExitSpan,
120 }
121
122 struct TokenIter<'a> {
123 src: &'a str,
124 }
125
126 impl Iterator for TokenIter<'a> {
127 type Item = (TokenKind, &'a str);
128 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
129 if self.src.is_empty() {
130 return None;
131 }
132 let token = rustc_lexer::first_token(self.src);
133 let (text, rest) = self.src.split_at(token.len);
134 self.src = rest;
135 Some((token.kind, text))
136 }
137 }
138
139 fn get_real_ident_class(text: &str, edition: Edition) -> Class {
140 match text {
141 "ref" | "mut" => Class::RefKeyWord,
142 "self" | "Self" => Class::Self_,
143 "false" | "true" => Class::Bool,
144 _ if Symbol::intern(text).is_reserved(|| edition) => Class::KeyWord,
145 _ => Class::Ident,
146 }
147 }
148
149 /// Processes program tokens, classifying strings of text by highlighting
150 /// category (`Class`).
151 struct Classifier<'a> {
152 tokens: Peekable<TokenIter<'a>>,
153 in_attribute: bool,
154 in_macro: bool,
155 in_macro_nonterminal: bool,
156 edition: Edition,
157 byte_pos: u32,
158 src: &'a str,
159 }
160
161 impl<'a> Classifier<'a> {
162 fn new(src: &str, edition: Edition) -> Classifier<'_> {
163 let tokens = TokenIter { src }.peekable();
164 Classifier {
165 tokens,
166 in_attribute: false,
167 in_macro: false,
168 in_macro_nonterminal: false,
169 edition,
170 byte_pos: 0,
171 src,
172 }
173 }
174
175 /// Concatenate colons and idents as one when possible.
176 fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
177 let start = self.byte_pos as usize;
178 let mut pos = start;
179 let mut has_ident = false;
180 let edition = self.edition;
181
182 loop {
183 let mut nb = 0;
184 while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
185 self.tokens.next();
186 nb += 1;
187 }
188 // Ident path can start with "::" but if we already have content in the ident path,
189 // the "::" is mandatory.
190 if has_ident && nb == 0 {
191 return vec![(TokenKind::Ident, start, pos)];
192 } else if nb != 0 && nb != 2 {
193 if has_ident {
194 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
195 } else {
196 return vec![(TokenKind::Colon, pos, pos + nb)];
197 }
198 }
199
200 if let Some((Class::Ident, text)) = self.tokens.peek().map(|(token, text)| {
201 if *token == TokenKind::Ident {
202 let class = get_real_ident_class(text, edition);
203 (class, text)
204 } else {
205 // Doesn't matter which Class we put in here...
206 (Class::Comment, text)
207 }
208 }) {
209 // We only "add" the colon if there is an ident behind.
210 pos += text.len() + nb;
211 has_ident = true;
212 self.tokens.next();
213 } else if nb > 0 && has_ident {
214 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
215 } else if nb > 0 {
216 return vec![(TokenKind::Colon, pos, pos + nb)];
217 } else if has_ident {
218 return vec![(TokenKind::Ident, start, pos)];
219 } else {
220 return Vec::new();
221 }
222 }
223 }
224
225 /// Wraps the tokens iteration to ensure that the byte_pos is always correct.
226 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
227 if let Some((kind, text)) = self.tokens.next() {
228 self.byte_pos += text.len() as u32;
229 Some((kind, text))
230 } else {
231 None
232 }
233 }
234
235 /// Exhausts the `Classifier` writing the output into `sink`.
236 ///
237 /// The general structure for this method is to iterate over each token,
238 /// possibly giving it an HTML span with a class specifying what flavor of
239 /// token is used.
240 fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
241 with_default_session_globals(|| {
242 loop {
243 if self
244 .tokens
245 .peek()
246 .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
247 .unwrap_or(false)
248 {
249 let tokens = self.get_full_ident_path();
250 for (token, start, end) in tokens {
251 let text = &self.src[start..end];
252 self.advance(token, text, sink);
253 self.byte_pos += text.len() as u32;
254 }
255 }
256 if let Some((token, text)) = self.next() {
257 self.advance(token, text, sink);
258 } else {
259 break;
260 }
261 }
262 })
263 }
264
265 /// Single step of highlighting. This will classify `token`, but maybe also
266 /// a couple of following ones as well.
267 fn advance(&mut self, token: TokenKind, text: &'a str, sink: &mut dyn FnMut(Highlight<'a>)) {
268 let lookahead = self.peek();
269 let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
270 let class = match token {
271 TokenKind::Whitespace => return no_highlight(sink),
272 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
273 if doc_style.is_some() {
274 Class::DocComment
275 } else {
276 Class::Comment
277 }
278 }
279 // Consider this as part of a macro invocation if there was a
280 // leading identifier.
281 TokenKind::Bang if self.in_macro => {
282 self.in_macro = false;
283 sink(Highlight::Token { text, class: None });
284 sink(Highlight::ExitSpan);
285 return;
286 }
287
288 // Assume that '&' or '*' is the reference or dereference operator
289 // or a reference or pointer type. Unless, of course, it looks like
290 // a logical and or a multiplication operator: `&&` or `* `.
291 TokenKind::Star => match lookahead {
292 Some(TokenKind::Whitespace) => Class::Op,
293 _ => Class::RefKeyWord,
294 },
295 TokenKind::And => match lookahead {
296 Some(TokenKind::And) => {
297 self.next();
298 sink(Highlight::Token { text: "&&", class: Some(Class::Op) });
299 return;
300 }
301 Some(TokenKind::Eq) => {
302 self.next();
303 sink(Highlight::Token { text: "&=", class: Some(Class::Op) });
304 return;
305 }
306 Some(TokenKind::Whitespace) => Class::Op,
307 _ => Class::RefKeyWord,
308 },
309
310 // Operators.
311 TokenKind::Minus
312 | TokenKind::Plus
313 | TokenKind::Or
314 | TokenKind::Slash
315 | TokenKind::Caret
316 | TokenKind::Percent
317 | TokenKind::Bang
318 | TokenKind::Eq
319 | TokenKind::Lt
320 | TokenKind::Gt => Class::Op,
321
322 // Miscellaneous, no highlighting.
323 TokenKind::Dot
324 | TokenKind::Semi
325 | TokenKind::Comma
326 | TokenKind::OpenParen
327 | TokenKind::CloseParen
328 | TokenKind::OpenBrace
329 | TokenKind::CloseBrace
330 | TokenKind::OpenBracket
331 | TokenKind::At
332 | TokenKind::Tilde
333 | TokenKind::Colon
334 | TokenKind::Unknown => return no_highlight(sink),
335
336 TokenKind::Question => Class::QuestionMark,
337
338 TokenKind::Dollar => match lookahead {
339 Some(TokenKind::Ident) => {
340 self.in_macro_nonterminal = true;
341 Class::MacroNonTerminal
342 }
343 _ => return no_highlight(sink),
344 },
345
346 // This might be the start of an attribute. We're going to want to
347 // continue highlighting it as an attribute until the ending ']' is
348 // seen, so skip out early. Down below we terminate the attribute
349 // span when we see the ']'.
350 TokenKind::Pound => {
351 match lookahead {
352 // Case 1: #![inner_attribute]
353 Some(TokenKind::Bang) => {
354 self.next();
355 if let Some(TokenKind::OpenBracket) = self.peek() {
356 self.in_attribute = true;
357 sink(Highlight::EnterSpan { class: Class::Attribute });
358 }
359 sink(Highlight::Token { text: "#", class: None });
360 sink(Highlight::Token { text: "!", class: None });
361 return;
362 }
363 // Case 2: #[outer_attribute]
364 Some(TokenKind::OpenBracket) => {
365 self.in_attribute = true;
366 sink(Highlight::EnterSpan { class: Class::Attribute });
367 }
368 _ => (),
369 }
370 return no_highlight(sink);
371 }
372 TokenKind::CloseBracket => {
373 if self.in_attribute {
374 self.in_attribute = false;
375 sink(Highlight::Token { text: "]", class: None });
376 sink(Highlight::ExitSpan);
377 return;
378 }
379 return no_highlight(sink);
380 }
381 TokenKind::Literal { kind, .. } => match kind {
382 // Text literals.
383 LiteralKind::Byte { .. }
384 | LiteralKind::Char { .. }
385 | LiteralKind::Str { .. }
386 | LiteralKind::ByteStr { .. }
387 | LiteralKind::RawStr { .. }
388 | LiteralKind::RawByteStr { .. } => Class::String,
389 // Number literals.
390 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
391 },
392 TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
393 self.in_macro = true;
394 sink(Highlight::EnterSpan { class: Class::Macro });
395 sink(Highlight::Token { text, class: None });
396 return;
397 }
398 TokenKind::Ident => match get_real_ident_class(text, self.edition) {
399 Class::Ident => match text {
400 "Option" | "Result" => Class::PreludeTy,
401 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
402 _ if self.in_macro_nonterminal => {
403 self.in_macro_nonterminal = false;
404 Class::MacroNonTerminal
405 }
406 _ => Class::Ident,
407 },
408 c => c,
409 },
410 TokenKind::RawIdent => Class::Ident,
411 TokenKind::Lifetime { .. } => Class::Lifetime,
412 };
413 // Anything that didn't return above is the simple case where we the
414 // class just spans a single token, so we can use the `string` method.
415 sink(Highlight::Token { text, class: Some(class) });
416 }
417
418 fn peek(&mut self) -> Option<TokenKind> {
419 self.tokens.peek().map(|(toke_kind, _text)| *toke_kind)
420 }
421 }
422
423 /// Called when we start processing a span of text that should be highlighted.
424 /// The `Class` argument specifies how it should be highlighted.
425 fn enter_span(out: &mut Buffer, klass: Class) {
426 write!(out, "<span class=\"{}\">", klass.as_html());
427 }
428
429 /// Called at the end of a span of highlighted text.
430 fn exit_span(out: &mut Buffer) {
431 out.write_str("</span>");
432 }
433
434 /// Called for a span of text. If the text should be highlighted differently
435 /// from the surrounding text, then the `Class` argument will be a value other
436 /// than `None`.
437 ///
438 /// The following sequences of callbacks are equivalent:
439 /// ```plain
440 /// enter_span(Foo), string("text", None), exit_span()
441 /// string("text", Foo)
442 /// ```
443 /// The latter can be thought of as a shorthand for the former, which is more
444 /// flexible.
445 fn string<T: Display>(out: &mut Buffer, text: T, klass: Option<Class>) {
446 match klass {
447 None => write!(out, "{}", text),
448 Some(klass) => write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text),
449 }
450 }
451
452 #[cfg(test)]
453 mod tests;