]> git.proxmox.com Git - rustc.git/blame - src/librustdoc/html/highlight.rs
New upstream version 1.46.0+dfsg1
[rustc.git] / src / librustdoc / html / highlight.rs
CommitLineData
a7813a04 1//! Basic syntax highlighting functionality.
1a4d82fc 2//!
74b04a01 3//! This module uses librustc_ast's lexer to provide token-based highlighting for
1a4d82fc 4//! the HTML documentation generated by rustdoc.
a7813a04 5//!
b7449926 6//! Use the `render_with_highlighting` to highlight some rust code.
1a4d82fc 7
9fa01778 8use crate::html::escape::Escape;
1a4d82fc 9
a7813a04 10use std::fmt::Display;
c34b1796
AL
11use std::io;
12use std::io::prelude::*;
a7813a04 13
74b04a01 14use rustc_ast::token::{self, Token};
60c5eb7d 15use rustc_parse::lexer;
74b04a01 16use rustc_session::parse::ParseSess;
dfeec247
XL
17use rustc_span::source_map::SourceMap;
18use rustc_span::symbol::{kw, sym};
19use rustc_span::{FileName, Span};
1a4d82fc 20
54a0048b 21/// Highlights `src`, returning the HTML output.
a1dfa0c6
XL
22pub fn render_with_highlighting(
23 src: &str,
24 class: Option<&str>,
74b04a01 25 playground_button: Option<&str>,
a1dfa0c6
XL
26 tooltip: Option<(&str, &str)>,
27) -> String {
1a4d82fc 28 debug!("highlighting: ================\n{}\n==============", src);
1a4d82fc 29 let mut out = Vec::new();
ea8adc8c 30 if let Some((tooltip, class)) = tooltip {
dfeec247
XL
31 write!(
32 out,
33 "<div class='information'><div class='tooltip {}'>ⓘ<span \
ea8adc8c 34 class='tooltiptext'>{}</span></div></div>",
dfeec247
XL
35 class, tooltip
36 )
37 .unwrap();
ea8adc8c 38 }
9fa01778 39
60c5eb7d 40 let sess = ParseSess::with_silent_emitter();
74b04a01 41 let sf = sess
dfeec247
XL
42 .source_map()
43 .new_source_file(FileName::Custom(String::from("rustdoc-highlighting")), src.to_owned());
44 let highlight_result = rustc_driver::catch_fatal_errors(|| {
74b04a01 45 let lexer = lexer::StringReader::new(&sess, sf, None);
416331ca
XL
46 let mut classifier = Classifier::new(lexer, sess.source_map());
47
48 let mut highlighted_source = vec![];
49 if classifier.write_source(&mut highlighted_source).is_err() {
50 Err(())
51 } else {
52 Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
53 }
dfeec247
XL
54 })
55 .unwrap_or(Err(()));
9fa01778
XL
56
57 match highlight_result {
58 Ok(highlighted_source) => {
59 write_header(class, &mut out).unwrap();
60 write!(out, "{}", highlighted_source).unwrap();
74b04a01 61 write_footer(&mut out, playground_button).unwrap();
0bf4aa26 62 }
416331ca
XL
63 Err(()) => {
64 // If errors are encountered while trying to highlight, just emit
65 // the unhighlighted source.
dfeec247 66 write!(out, "<pre><code>{}</code></pre>", Escape(src)).unwrap();
9fa01778 67 }
9e0c209e 68 }
9fa01778 69
54a0048b
SL
70 String::from_utf8_lossy(&out[..]).into_owned()
71}
72
a7813a04
XL
73/// Processes a program (nested in the internal `lexer`), classifying strings of
74/// text by highlighting category (`Class`). Calls out to a `Writer` to write
75/// each span of text in sequence.
b7449926 76struct Classifier<'a> {
a7813a04 77 lexer: lexer::StringReader<'a>,
416331ca 78 peek_token: Option<Token>,
b7449926 79 source_map: &'a SourceMap,
a7813a04
XL
80
81 // State of the classifier.
82 in_attribute: bool,
83 in_macro: bool,
84 in_macro_nonterminal: bool,
85}
86
87/// How a span of text is classified. Mostly corresponds to token kinds.
88#[derive(Clone, Copy, Debug, Eq, PartialEq)]
b7449926 89enum Class {
a7813a04
XL
90 None,
91 Comment,
92 DocComment,
93 Attribute,
94 KeyWord,
95 // Keywords that do pointer/reference stuff.
96 RefKeyWord,
97 Self_,
98 Op,
99 Macro,
100 MacroNonTerminal,
101 String,
102 Number,
103 Bool,
104 Ident,
105 Lifetime,
106 PreludeTy,
107 PreludeVal,
c30ab7b3 108 QuestionMark,
1a4d82fc
JJ
109}
110
a7813a04 111/// Trait that controls writing the output of syntax highlighting. Users should
3b2f2976 112/// implement this trait to customize writing output.
1a4d82fc 113///
a7813a04
XL
114/// The classifier will call into the `Writer` implementation as it finds spans
115/// of text to highlight. Exactly how that text should be highlighted is up to
5bcae85e 116/// the implementation.
b7449926 117trait Writer {
a7813a04
XL
118 /// Called when we start processing a span of text that should be highlighted.
119 /// The `Class` argument specifies how it should be highlighted.
7cac9316 120 fn enter_span(&mut self, _: Class) -> io::Result<()>;
1a4d82fc 121
a7813a04
XL
122 /// Called at the end of a span of highlighted text.
123 fn exit_span(&mut self) -> io::Result<()>;
1a4d82fc 124
9fa01778 125 /// Called for a span of text. If the text should be highlighted differently from the
b7449926
XL
126 /// surrounding text, then the `Class` argument will be a value other than `None`.
127 ///
a7813a04
XL
128 /// The following sequences of callbacks are equivalent:
129 /// ```plain
130 /// enter_span(Foo), string("text", None), exit_span()
131 /// string("text", Foo)
132 /// ```
133 /// The latter can be thought of as a shorthand for the former, which is
134 /// more flexible.
dfeec247 135 fn string<T: Display>(&mut self, text: T, klass: Class) -> io::Result<()>;
a7813a04 136}
1a4d82fc 137
74b04a01 138// Implement `Writer` for anything that can be written to, this just implements
a7813a04
XL
139// the default rustdoc behaviour.
140impl<U: Write> Writer for U {
dfeec247 141 fn string<T: Display>(&mut self, text: T, klass: Class) -> io::Result<()> {
a7813a04
XL
142 match klass {
143 Class::None => write!(self, "{}", text),
8bb4bdeb 144 klass => write!(self, "<span class=\"{}\">{}</span>", klass.rustdoc_class(), text),
a7813a04
XL
145 }
146 }
147
148 fn enter_span(&mut self, klass: Class) -> io::Result<()> {
8bb4bdeb 149 write!(self, "<span class=\"{}\">", klass.rustdoc_class())
a7813a04
XL
150 }
151
152 fn exit_span(&mut self) -> io::Result<()> {
153 write!(self, "</span>")
154 }
155}
156
9fa01778
XL
157enum HighlightError {
158 LexError,
159 IoError(io::Error),
160}
161
162impl From<io::Error> for HighlightError {
163 fn from(err: io::Error) -> Self {
164 HighlightError::IoError(err)
165 }
166}
167
a7813a04 168impl<'a> Classifier<'a> {
b7449926 169 fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> {
a7813a04 170 Classifier {
3b2f2976 171 lexer,
416331ca 172 peek_token: None,
b7449926 173 source_map,
a7813a04
XL
174 in_attribute: false,
175 in_macro: false,
176 in_macro_nonterminal: false,
177 }
178 }
179
9fa01778 180 /// Gets the next token out of the lexer.
dc9dc135 181 fn try_next_token(&mut self) -> Result<Token, HighlightError> {
416331ca
XL
182 if let Some(token) = self.peek_token.take() {
183 return Ok(token);
184 }
185 let token = self.lexer.next_token();
186 if let token::Unknown(..) = &token.kind {
187 return Err(HighlightError::LexError);
188 }
189 Ok(token)
190 }
191
192 fn peek(&mut self) -> Result<&Token, HighlightError> {
193 if self.peek_token.is_none() {
194 let token = self.lexer.next_token();
195 if let token::Unknown(..) = &token.kind {
196 return Err(HighlightError::LexError);
197 }
198 self.peek_token = Some(token);
ea8adc8c 199 }
416331ca 200 Ok(self.peek_token.as_ref().unwrap())
ea8adc8c
XL
201 }
202
a7813a04
XL
203 /// Exhausts the `lexer` writing the output into `out`.
204 ///
205 /// The general structure for this method is to iterate over each token,
206 /// possibly giving it an HTML span with a class specifying what flavor of token
207 /// is used. All source code emission is done as slices from the source map,
208 /// not from the tokens themselves, in order to stay true to the original
209 /// source.
dfeec247 210 fn write_source<W: Writer>(&mut self, out: &mut W) -> Result<(), HighlightError> {
a7813a04 211 loop {
ea8adc8c 212 let next = self.try_next_token()?;
dc9dc135 213 if next == token::Eof {
a7813a04
XL
214 break;
215 }
216
217 self.write_token(out, next)?;
218 }
219
220 Ok(())
221 }
222
223 // Handles an individual token from the lexer.
dfeec247 224 fn write_token<W: Writer>(&mut self, out: &mut W, token: Token) -> Result<(), HighlightError> {
dc9dc135 225 let klass = match token.kind {
1a4d82fc 226 token::Shebang(s) => {
b7449926 227 out.string(Escape(&s.as_str()), Class::None)?;
a7813a04 228 return Ok(());
dfeec247 229 }
a7813a04 230
416331ca 231 token::Whitespace | token::Unknown(..) => Class::None,
a7813a04
XL
232 token::Comment => Class::Comment,
233 token::DocComment(..) => Class::DocComment,
234
32a655c1
SL
235 // If this '&' or '*' token is followed by a non-whitespace token, assume that it's the
236 // reference or dereference operator or a reference or pointer type, instead of the
237 // bit-and or multiplication operator.
ba9703b0 238 token::BinOp(token::And | token::Star) if self.peek()? != &token::Whitespace => {
dfeec247
XL
239 Class::RefKeyWord
240 }
1a4d82fc 241
a7813a04
XL
242 // Consider this as part of a macro invocation if there was a
243 // leading identifier.
244 token::Not if self.in_macro => {
245 self.in_macro = false;
246 Class::Macro
247 }
1a4d82fc 248
a7813a04 249 // Operators.
dfeec247
XL
250 token::Eq
251 | token::Lt
252 | token::Le
253 | token::EqEq
254 | token::Ne
255 | token::Ge
256 | token::Gt
257 | token::AndAnd
258 | token::OrOr
259 | token::Not
260 | token::BinOp(..)
261 | token::RArrow
262 | token::BinOpEq(..)
263 | token::FatArrow => Class::Op,
1a4d82fc 264
a7813a04 265 // Miscellaneous, no highlighting.
dfeec247
XL
266 token::Dot
267 | token::DotDot
268 | token::DotDotDot
269 | token::DotDotEq
270 | token::Comma
271 | token::Semi
272 | token::Colon
273 | token::ModSep
274 | token::LArrow
275 | token::OpenDelim(_)
ba9703b0 276 | token::CloseDelim(token::Brace | token::Paren | token::NoDelim) => Class::None,
c30ab7b3
SL
277
278 token::Question => Class::QuestionMark,
279
1a4d82fc 280 token::Dollar => {
416331ca 281 if self.peek()?.is_ident() {
a7813a04
XL
282 self.in_macro_nonterminal = true;
283 Class::MacroNonTerminal
1a4d82fc 284 } else {
a7813a04 285 Class::None
1a4d82fc
JJ
286 }
287 }
288
ea8adc8c 289 // This might be the start of an attribute. We're going to want to
1a4d82fc
JJ
290 // continue highlighting it as an attribute until the ending ']' is
291 // seen, so skip out early. Down below we terminate the attribute
292 // span when we see the ']'.
293 token::Pound => {
ea8adc8c
XL
294 // We can't be sure that our # begins an attribute (it could
295 // just be appearing in a macro) until we read either `#![` or
296 // `#[` from the input stream.
297 //
298 // We don't want to start highlighting as an attribute until
299 // we're confident there is going to be a ] coming up, as
300 // otherwise # tokens in macros highlight the rest of the input
301 // as an attribute.
302
303 // Case 1: #![inner_attribute]
416331ca 304 if self.peek()? == &token::Not {
ea8adc8c 305 self.try_next_token()?; // NOTE: consumes `!` token!
416331ca 306 if self.peek()? == &token::OpenDelim(token::Bracket) {
ea8adc8c
XL
307 self.in_attribute = true;
308 out.enter_span(Class::Attribute)?;
309 }
b7449926
XL
310 out.string("#", Class::None)?;
311 out.string("!", Class::None)?;
ea8adc8c
XL
312 return Ok(());
313 }
314
315 // Case 2: #[outer_attribute]
416331ca 316 if self.peek()? == &token::OpenDelim(token::Bracket) {
ea8adc8c
XL
317 self.in_attribute = true;
318 out.enter_span(Class::Attribute)?;
319 }
b7449926 320 out.string("#", Class::None)?;
a7813a04 321 return Ok(());
1a4d82fc
JJ
322 }
323 token::CloseDelim(token::Bracket) => {
a7813a04
XL
324 if self.in_attribute {
325 self.in_attribute = false;
b7449926 326 out.string("]", Class::None)?;
a7813a04
XL
327 out.exit_span()?;
328 return Ok(());
1a4d82fc 329 } else {
a7813a04 330 Class::None
1a4d82fc
JJ
331 }
332 }
333
dc9dc135
XL
334 token::Literal(lit) => {
335 match lit.kind {
a7813a04 336 // Text literals.
dfeec247
XL
337 token::Byte
338 | token::Char
339 | token::Err
340 | token::ByteStr
341 | token::ByteStrRaw(..)
342 | token::Str
343 | token::StrRaw(..) => Class::String,
1a4d82fc 344
a7813a04 345 // Number literals.
dc9dc135 346 token::Integer | token::Float => Class::Number,
48663c56 347
dc9dc135 348 token::Bool => panic!("literal token contains `Lit::Bool`"),
1a4d82fc
JJ
349 }
350 }
351
a7813a04 352 // Keywords are also included in the identifier set.
dfeec247
XL
353 token::Ident(name, is_raw) => match name {
354 kw::Ref | kw::Mut if !is_raw => Class::RefKeyWord,
355
356 kw::SelfLower | kw::SelfUpper => Class::Self_,
357 kw::False | kw::True if !is_raw => Class::Bool,
358
359 sym::Option | sym::Result => Class::PreludeTy,
360 sym::Some | sym::None | sym::Ok | sym::Err => Class::PreludeVal,
361
362 _ if token.is_reserved_ident() => Class::KeyWord,
363
364 _ => {
365 if self.in_macro_nonterminal {
366 self.in_macro_nonterminal = false;
367 Class::MacroNonTerminal
368 } else if self.peek()? == &token::Not {
369 self.in_macro = true;
370 Class::Macro
371 } else {
372 Class::Ident
1a4d82fc
JJ
373 }
374 }
dfeec247 375 },
1a4d82fc 376
a7813a04 377 token::Lifetime(..) => Class::Lifetime,
1a4d82fc 378
dfeec247
XL
379 token::Eof
380 | token::Interpolated(..)
381 | token::Tilde
382 | token::At
383 | token::SingleQuote => Class::None,
1a4d82fc
JJ
384 };
385
a7813a04
XL
386 // Anything that didn't return above is the simple case where we the
387 // class just spans a single token, so we can use the `string` method.
dc9dc135 388 out.string(Escape(&self.snip(token.span)), klass)?;
9fa01778
XL
389
390 Ok(())
1a4d82fc
JJ
391 }
392
b7449926 393 // Helper function to get a snippet from the source_map.
a7813a04 394 fn snip(&self, sp: Span) -> String {
b7449926 395 self.source_map.span_to_snippet(sp).unwrap()
a7813a04
XL
396 }
397}
398
399impl Class {
400 /// Returns the css class expected by rustdoc for each `Class`.
b7449926 401 fn rustdoc_class(self) -> &'static str {
a7813a04
XL
402 match self {
403 Class::None => "",
404 Class::Comment => "comment",
405 Class::DocComment => "doccomment",
406 Class::Attribute => "attribute",
407 Class::KeyWord => "kw",
408 Class::RefKeyWord => "kw-2",
409 Class::Self_ => "self",
410 Class::Op => "op",
411 Class::Macro => "macro",
412 Class::MacroNonTerminal => "macro-nonterminal",
413 Class::String => "string",
414 Class::Number => "number",
3157f602 415 Class::Bool => "bool-val",
a7813a04
XL
416 Class::Ident => "ident",
417 Class::Lifetime => "lifetime",
418 Class::PreludeTy => "prelude-ty",
419 Class::PreludeVal => "prelude-val",
dfeec247 420 Class::QuestionMark => "question-mark",
a7813a04
XL
421 }
422 }
54a0048b
SL
423}
424
b7449926 425fn write_header(class: Option<&str>, out: &mut dyn Write) -> io::Result<()> {
a1dfa0c6 426 write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or(""))
54a0048b
SL
427}
428
74b04a01
XL
429fn write_footer(out: &mut dyn Write, playground_button: Option<&str>) -> io::Result<()> {
430 write!(out, "</pre>{}</div>\n", if let Some(button) = playground_button { button } else { "" })
1a4d82fc 431}