]> git.proxmox.com Git - rustc.git/blame - src/librustdoc/html/highlight.rs
New upstream version 1.48.0+dfsg1
[rustc.git] / src / librustdoc / html / highlight.rs
CommitLineData
a7813a04 1//! Basic syntax highlighting functionality.
1a4d82fc 2//!
74b04a01 3//! This module uses librustc_ast's lexer to provide token-based highlighting for
1a4d82fc 4//! the HTML documentation generated by rustdoc.
a7813a04 5//!
b7449926 6//! Use the `render_with_highlighting` to highlight some rust code.
1a4d82fc 7
9fa01778 8use crate::html::escape::Escape;
1a4d82fc 9
1b1a35ee
XL
10use std::fmt::{Display, Write};
11use std::iter::Peekable;
a7813a04 12
1b1a35ee
XL
13use rustc_lexer::{LiteralKind, TokenKind};
14use rustc_span::symbol::Ident;
15use rustc_span::with_default_session_globals;
1a4d82fc 16
54a0048b 17/// Highlights `src`, returning the HTML output.
a1dfa0c6 18pub fn render_with_highlighting(
3dfed10e 19 src: String,
a1dfa0c6 20 class: Option<&str>,
74b04a01 21 playground_button: Option<&str>,
a1dfa0c6
XL
22 tooltip: Option<(&str, &str)>,
23) -> String {
1a4d82fc 24 debug!("highlighting: ================\n{}\n==============", src);
1b1a35ee 25 let mut out = String::with_capacity(src.len());
ea8adc8c 26 if let Some((tooltip, class)) = tooltip {
dfeec247
XL
27 write!(
28 out,
29 "<div class='information'><div class='tooltip {}'>ⓘ<span \
1b1a35ee 30 class='tooltiptext'>{}</span></div></div>",
dfeec247
XL
31 class, tooltip
32 )
33 .unwrap();
ea8adc8c 34 }
9fa01778 35
1b1a35ee
XL
36 write_header(&mut out, class);
37 write_code(&mut out, &src);
38 write_footer(&mut out, playground_button);
3dfed10e 39
1b1a35ee
XL
40 out
41}
9fa01778 42
1b1a35ee
XL
43fn write_header(out: &mut String, class: Option<&str>) {
44 write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or_default())
45 .unwrap()
54a0048b
SL
46}
47
1b1a35ee 48fn write_code(out: &mut String, src: &str) {
2a314972
XL
49 // This replace allows to fix how the code source with DOS backline characters is displayed.
50 let src = src.replace("\r\n", "\n");
51 Classifier::new(&src).highlight(&mut |highlight| {
1b1a35ee
XL
52 match highlight {
53 Highlight::Token { text, class } => string(out, Escape(text), class),
54 Highlight::EnterSpan { class } => enter_span(out, class),
55 Highlight::ExitSpan => exit_span(out),
56 };
57 });
58}
a7813a04 59
1b1a35ee
XL
60fn write_footer(out: &mut String, playground_button: Option<&str>) {
61 write!(out, "</pre>{}</div>\n", playground_button.unwrap_or_default()).unwrap()
a7813a04
XL
62}
63
64/// How a span of text is classified. Mostly corresponds to token kinds.
65#[derive(Clone, Copy, Debug, Eq, PartialEq)]
b7449926 66enum Class {
a7813a04
XL
67 None,
68 Comment,
69 DocComment,
70 Attribute,
71 KeyWord,
72 // Keywords that do pointer/reference stuff.
73 RefKeyWord,
74 Self_,
75 Op,
76 Macro,
77 MacroNonTerminal,
78 String,
79 Number,
80 Bool,
81 Ident,
82 Lifetime,
83 PreludeTy,
84 PreludeVal,
c30ab7b3 85 QuestionMark,
1a4d82fc
JJ
86}
87
1b1a35ee
XL
88impl Class {
89 /// Returns the css class expected by rustdoc for each `Class`.
90 fn as_html(self) -> &'static str {
91 match self {
92 Class::None => "",
93 Class::Comment => "comment",
94 Class::DocComment => "doccomment",
95 Class::Attribute => "attribute",
96 Class::KeyWord => "kw",
97 Class::RefKeyWord => "kw-2",
98 Class::Self_ => "self",
99 Class::Op => "op",
100 Class::Macro => "macro",
101 Class::MacroNonTerminal => "macro-nonterminal",
102 Class::String => "string",
103 Class::Number => "number",
104 Class::Bool => "bool-val",
105 Class::Ident => "ident",
106 Class::Lifetime => "lifetime",
107 Class::PreludeTy => "prelude-ty",
108 Class::PreludeVal => "prelude-val",
109 Class::QuestionMark => "question-mark",
a7813a04
XL
110 }
111 }
a7813a04
XL
112}
113
1b1a35ee
XL
114enum Highlight<'a> {
115 Token { text: &'a str, class: Class },
116 EnterSpan { class: Class },
117 ExitSpan,
9fa01778
XL
118}
119
1b1a35ee
XL
120struct TokenIter<'a> {
121 src: &'a str,
9fa01778
XL
122}
123
1b1a35ee
XL
124impl Iterator for TokenIter<'a> {
125 type Item = (TokenKind, &'a str);
126 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
127 if self.src.is_empty() {
128 return None;
a7813a04 129 }
1b1a35ee
XL
130 let token = rustc_lexer::first_token(self.src);
131 let (text, rest) = self.src.split_at(token.len);
132 self.src = rest;
133 Some((token.kind, text))
a7813a04 134 }
1b1a35ee 135}
a7813a04 136
1b1a35ee
XL
137/// Processes program tokens, classifying strings of text by highlighting
138/// category (`Class`).
139struct Classifier<'a> {
140 tokens: Peekable<TokenIter<'a>>,
141 in_attribute: bool,
142 in_macro: bool,
143 in_macro_nonterminal: bool,
144}
416331ca 145
1b1a35ee
XL
146impl<'a> Classifier<'a> {
147 fn new(src: &str) -> Classifier<'_> {
148 let tokens = TokenIter { src }.peekable();
149 Classifier { tokens, in_attribute: false, in_macro: false, in_macro_nonterminal: false }
ea8adc8c
XL
150 }
151
1b1a35ee 152 /// Exhausts the `Classifier` writing the output into `sink`.
a7813a04
XL
153 ///
154 /// The general structure for this method is to iterate over each token,
1b1a35ee
XL
155 /// possibly giving it an HTML span with a class specifying what flavor of
156 /// token is used.
157 fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'a>)) {
158 with_default_session_globals(|| {
159 while let Some((token, text)) = self.tokens.next() {
160 self.advance(token, text, sink);
3dfed10e 161 }
1b1a35ee 162 })
a7813a04
XL
163 }
164
1b1a35ee
XL
165 /// Single step of highlighting. This will classify `token`, but maybe also
166 /// a couple of following ones as well.
167 fn advance(&mut self, token: TokenKind, text: &'a str, sink: &mut dyn FnMut(Highlight<'a>)) {
168 let lookahead = self.peek();
169 let class = match token {
170 TokenKind::Whitespace => Class::None,
171 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
172 if doc_style.is_some() {
173 Class::DocComment
174 } else {
175 Class::Comment
176 }
dfeec247 177 }
a7813a04
XL
178 // Consider this as part of a macro invocation if there was a
179 // leading identifier.
1b1a35ee 180 TokenKind::Bang if self.in_macro => {
a7813a04
XL
181 self.in_macro = false;
182 Class::Macro
183 }
1a4d82fc 184
1b1a35ee
XL
185 // Assume that '&' or '*' is the reference or dereference operator
186 // or a reference or pointer type. Unless, of course, it looks like
187 // a logical and or a multiplication operator: `&&` or `* `.
188 TokenKind::Star => match lookahead {
189 Some(TokenKind::Whitespace) => Class::Op,
190 _ => Class::RefKeyWord,
191 },
192 TokenKind::And => match lookahead {
193 Some(TokenKind::And) => {
194 let _and = self.tokens.next();
195 sink(Highlight::Token { text: "&&", class: Class::Op });
196 return;
197 }
198 Some(TokenKind::Eq) => {
199 let _eq = self.tokens.next();
200 sink(Highlight::Token { text: "&=", class: Class::Op });
201 return;
202 }
203 Some(TokenKind::Whitespace) => Class::Op,
204 _ => Class::RefKeyWord,
205 },
206
a7813a04 207 // Operators.
1b1a35ee
XL
208 TokenKind::Minus
209 | TokenKind::Plus
210 | TokenKind::Or
211 | TokenKind::Slash
212 | TokenKind::Caret
213 | TokenKind::Percent
214 | TokenKind::Bang
215 | TokenKind::Eq
216 | TokenKind::Lt
217 | TokenKind::Gt => Class::Op,
1a4d82fc 218
a7813a04 219 // Miscellaneous, no highlighting.
1b1a35ee
XL
220 TokenKind::Dot
221 | TokenKind::Semi
222 | TokenKind::Comma
223 | TokenKind::OpenParen
224 | TokenKind::CloseParen
225 | TokenKind::OpenBrace
226 | TokenKind::CloseBrace
227 | TokenKind::OpenBracket
228 | TokenKind::At
229 | TokenKind::Tilde
230 | TokenKind::Colon
231 | TokenKind::Unknown => Class::None,
232
233 TokenKind::Question => Class::QuestionMark,
234
235 TokenKind::Dollar => match lookahead {
236 Some(TokenKind::Ident) => {
a7813a04
XL
237 self.in_macro_nonterminal = true;
238 Class::MacroNonTerminal
1a4d82fc 239 }
1b1a35ee
XL
240 _ => Class::None,
241 },
1a4d82fc 242
ea8adc8c 243 // This might be the start of an attribute. We're going to want to
1a4d82fc
JJ
244 // continue highlighting it as an attribute until the ending ']' is
245 // seen, so skip out early. Down below we terminate the attribute
246 // span when we see the ']'.
1b1a35ee
XL
247 TokenKind::Pound => {
248 match lookahead {
249 // Case 1: #![inner_attribute]
250 Some(TokenKind::Bang) => {
251 let _not = self.tokens.next().unwrap();
252 if let Some(TokenKind::OpenBracket) = self.peek() {
253 self.in_attribute = true;
254 sink(Highlight::EnterSpan { class: Class::Attribute });
255 }
256 sink(Highlight::Token { text: "#", class: Class::None });
257 sink(Highlight::Token { text: "!", class: Class::None });
258 return;
259 }
260 // Case 2: #[outer_attribute]
261 Some(TokenKind::OpenBracket) => {
ea8adc8c 262 self.in_attribute = true;
1b1a35ee 263 sink(Highlight::EnterSpan { class: Class::Attribute });
ea8adc8c 264 }
1b1a35ee 265 _ => (),
ea8adc8c 266 }
1b1a35ee 267 Class::None
1a4d82fc 268 }
1b1a35ee 269 TokenKind::CloseBracket => {
a7813a04
XL
270 if self.in_attribute {
271 self.in_attribute = false;
1b1a35ee
XL
272 sink(Highlight::Token { text: "]", class: Class::None });
273 sink(Highlight::ExitSpan);
274 return;
1a4d82fc 275 }
1b1a35ee 276 Class::None
1a4d82fc 277 }
1b1a35ee
XL
278 TokenKind::Literal { kind, .. } => match kind {
279 // Text literals.
280 LiteralKind::Byte { .. }
281 | LiteralKind::Char { .. }
282 | LiteralKind::Str { .. }
283 | LiteralKind::ByteStr { .. }
284 | LiteralKind::RawStr { .. }
285 | LiteralKind::RawByteStr { .. } => Class::String,
286 // Number literals.
287 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
288 },
289 TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
290 self.in_macro = true;
291 Class::Macro
1a4d82fc 292 }
1b1a35ee
XL
293 TokenKind::Ident => match text {
294 "ref" | "mut" => Class::RefKeyWord,
295 "self" | "Self" => Class::Self_,
296 "false" | "true" => Class::Bool,
297 "Option" | "Result" => Class::PreludeTy,
298 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
299 // Keywords are also included in the identifier set.
300 _ if Ident::from_str(text).is_reserved() => Class::KeyWord,
301 _ if self.in_macro_nonterminal => {
302 self.in_macro_nonterminal = false;
303 Class::MacroNonTerminal
1a4d82fc 304 }
1b1a35ee 305 _ => Class::Ident,
dfeec247 306 },
1b1a35ee
XL
307 TokenKind::RawIdent => Class::Ident,
308 TokenKind::Lifetime { .. } => Class::Lifetime,
1a4d82fc 309 };
a7813a04
XL
310 // Anything that didn't return above is the simple case where we the
311 // class just spans a single token, so we can use the `string` method.
1b1a35ee 312 sink(Highlight::Token { text, class });
1a4d82fc
JJ
313 }
314
1b1a35ee
XL
315 fn peek(&mut self) -> Option<TokenKind> {
316 self.tokens.peek().map(|(toke_kind, _text)| *toke_kind)
a7813a04
XL
317 }
318}
319
1b1a35ee
XL
320/// Called when we start processing a span of text that should be highlighted.
321/// The `Class` argument specifies how it should be highlighted.
322fn enter_span(out: &mut String, klass: Class) {
323 write!(out, "<span class=\"{}\">", klass.as_html()).unwrap()
54a0048b
SL
324}
325
1b1a35ee
XL
326/// Called at the end of a span of highlighted text.
327fn exit_span(out: &mut String) {
328 write!(out, "</span>").unwrap()
54a0048b
SL
329}
330
1b1a35ee
XL
331/// Called for a span of text. If the text should be highlighted differently
332/// from the surrounding text, then the `Class` argument will be a value other
333/// than `None`.
334///
335/// The following sequences of callbacks are equivalent:
336/// ```plain
337/// enter_span(Foo), string("text", None), exit_span()
338/// string("text", Foo)
339/// ```
340/// The latter can be thought of as a shorthand for the former, which is more
341/// flexible.
342fn string<T: Display>(out: &mut String, text: T, klass: Class) {
343 match klass {
344 Class::None => write!(out, "{}", text).unwrap(),
345 klass => write!(out, "<span class=\"{}\">{}</span>", klass.as_html(), text).unwrap(),
346 }
1a4d82fc 347}
3dfed10e
XL
348
349#[cfg(test)]
350mod tests;