]>
Commit | Line | Data |
---|---|---|
a7813a04 | 1 | //! Basic syntax highlighting functionality. |
1a4d82fc | 2 | //! |
74b04a01 | 3 | //! This module uses librustc_ast's lexer to provide token-based highlighting for |
1a4d82fc | 4 | //! the HTML documentation generated by rustdoc. |
a7813a04 | 5 | //! |
b7449926 | 6 | //! Use the `render_with_highlighting` to highlight some rust code. |
1a4d82fc | 7 | |
9fa01778 | 8 | use crate::html::escape::Escape; |
1a4d82fc | 9 | |
a7813a04 | 10 | use std::fmt::Display; |
c34b1796 AL |
11 | use std::io; |
12 | use std::io::prelude::*; | |
a7813a04 | 13 | |
74b04a01 | 14 | use rustc_ast::token::{self, Token}; |
60c5eb7d | 15 | use rustc_parse::lexer; |
74b04a01 | 16 | use rustc_session::parse::ParseSess; |
dfeec247 XL |
17 | use rustc_span::source_map::SourceMap; |
18 | use rustc_span::symbol::{kw, sym}; | |
19 | use rustc_span::{FileName, Span}; | |
1a4d82fc | 20 | |
54a0048b | 21 | /// Highlights `src`, returning the HTML output. |
a1dfa0c6 XL |
22 | pub fn render_with_highlighting( |
23 | src: &str, | |
24 | class: Option<&str>, | |
74b04a01 | 25 | playground_button: Option<&str>, |
a1dfa0c6 XL |
26 | tooltip: Option<(&str, &str)>, |
27 | ) -> String { | |
1a4d82fc | 28 | debug!("highlighting: ================\n{}\n==============", src); |
1a4d82fc | 29 | let mut out = Vec::new(); |
ea8adc8c | 30 | if let Some((tooltip, class)) = tooltip { |
dfeec247 XL |
31 | write!( |
32 | out, | |
33 | "<div class='information'><div class='tooltip {}'>ⓘ<span \ | |
ea8adc8c | 34 | class='tooltiptext'>{}</span></div></div>", |
dfeec247 XL |
35 | class, tooltip |
36 | ) | |
37 | .unwrap(); | |
ea8adc8c | 38 | } |
9fa01778 | 39 | |
60c5eb7d | 40 | let sess = ParseSess::with_silent_emitter(); |
74b04a01 | 41 | let sf = sess |
dfeec247 XL |
42 | .source_map() |
43 | .new_source_file(FileName::Custom(String::from("rustdoc-highlighting")), src.to_owned()); | |
44 | let highlight_result = rustc_driver::catch_fatal_errors(|| { | |
74b04a01 | 45 | let lexer = lexer::StringReader::new(&sess, sf, None); |
416331ca XL |
46 | let mut classifier = Classifier::new(lexer, sess.source_map()); |
47 | ||
48 | let mut highlighted_source = vec![]; | |
49 | if classifier.write_source(&mut highlighted_source).is_err() { | |
50 | Err(()) | |
51 | } else { | |
52 | Ok(String::from_utf8_lossy(&highlighted_source).into_owned()) | |
53 | } | |
dfeec247 XL |
54 | }) |
55 | .unwrap_or(Err(())); | |
9fa01778 XL |
56 | |
57 | match highlight_result { | |
58 | Ok(highlighted_source) => { | |
59 | write_header(class, &mut out).unwrap(); | |
60 | write!(out, "{}", highlighted_source).unwrap(); | |
74b04a01 | 61 | write_footer(&mut out, playground_button).unwrap(); |
0bf4aa26 | 62 | } |
416331ca XL |
63 | Err(()) => { |
64 | // If errors are encountered while trying to highlight, just emit | |
65 | // the unhighlighted source. | |
dfeec247 | 66 | write!(out, "<pre><code>{}</code></pre>", Escape(src)).unwrap(); |
9fa01778 | 67 | } |
9e0c209e | 68 | } |
9fa01778 | 69 | |
54a0048b SL |
70 | String::from_utf8_lossy(&out[..]).into_owned() |
71 | } | |
72 | ||
a7813a04 XL |
73 | /// Processes a program (nested in the internal `lexer`), classifying strings of |
74 | /// text by highlighting category (`Class`). Calls out to a `Writer` to write | |
75 | /// each span of text in sequence. | |
b7449926 | 76 | struct Classifier<'a> { |
a7813a04 | 77 | lexer: lexer::StringReader<'a>, |
416331ca | 78 | peek_token: Option<Token>, |
b7449926 | 79 | source_map: &'a SourceMap, |
a7813a04 XL |
80 | |
81 | // State of the classifier. | |
82 | in_attribute: bool, | |
83 | in_macro: bool, | |
84 | in_macro_nonterminal: bool, | |
85 | } | |
86 | ||
87 | /// How a span of text is classified. Mostly corresponds to token kinds. | |
88 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] | |
b7449926 | 89 | enum Class { |
a7813a04 XL |
90 | None, |
91 | Comment, | |
92 | DocComment, | |
93 | Attribute, | |
94 | KeyWord, | |
95 | // Keywords that do pointer/reference stuff. | |
96 | RefKeyWord, | |
97 | Self_, | |
98 | Op, | |
99 | Macro, | |
100 | MacroNonTerminal, | |
101 | String, | |
102 | Number, | |
103 | Bool, | |
104 | Ident, | |
105 | Lifetime, | |
106 | PreludeTy, | |
107 | PreludeVal, | |
c30ab7b3 | 108 | QuestionMark, |
1a4d82fc JJ |
109 | } |
110 | ||
a7813a04 | 111 | /// Trait that controls writing the output of syntax highlighting. Users should |
3b2f2976 | 112 | /// implement this trait to customize writing output. |
1a4d82fc | 113 | /// |
a7813a04 XL |
114 | /// The classifier will call into the `Writer` implementation as it finds spans |
115 | /// of text to highlight. Exactly how that text should be highlighted is up to | |
5bcae85e | 116 | /// the implementation. |
b7449926 | 117 | trait Writer { |
a7813a04 XL |
118 | /// Called when we start processing a span of text that should be highlighted. |
119 | /// The `Class` argument specifies how it should be highlighted. | |
7cac9316 | 120 | fn enter_span(&mut self, _: Class) -> io::Result<()>; |
1a4d82fc | 121 | |
a7813a04 XL |
122 | /// Called at the end of a span of highlighted text. |
123 | fn exit_span(&mut self) -> io::Result<()>; | |
1a4d82fc | 124 | |
9fa01778 | 125 | /// Called for a span of text. If the text should be highlighted differently from the |
b7449926 XL |
126 | /// surrounding text, then the `Class` argument will be a value other than `None`. |
127 | /// | |
a7813a04 XL |
128 | /// The following sequences of callbacks are equivalent: |
129 | /// ```plain | |
130 | /// enter_span(Foo), string("text", None), exit_span() | |
131 | /// string("text", Foo) | |
132 | /// ``` | |
133 | /// The latter can be thought of as a shorthand for the former, which is | |
134 | /// more flexible. | |
dfeec247 | 135 | fn string<T: Display>(&mut self, text: T, klass: Class) -> io::Result<()>; |
a7813a04 | 136 | } |
1a4d82fc | 137 | |
74b04a01 | 138 | // Implement `Writer` for anything that can be written to, this just implements |
a7813a04 XL |
139 | // the default rustdoc behaviour. |
140 | impl<U: Write> Writer for U { | |
dfeec247 | 141 | fn string<T: Display>(&mut self, text: T, klass: Class) -> io::Result<()> { |
a7813a04 XL |
142 | match klass { |
143 | Class::None => write!(self, "{}", text), | |
8bb4bdeb | 144 | klass => write!(self, "<span class=\"{}\">{}</span>", klass.rustdoc_class(), text), |
a7813a04 XL |
145 | } |
146 | } | |
147 | ||
148 | fn enter_span(&mut self, klass: Class) -> io::Result<()> { | |
8bb4bdeb | 149 | write!(self, "<span class=\"{}\">", klass.rustdoc_class()) |
a7813a04 XL |
150 | } |
151 | ||
152 | fn exit_span(&mut self) -> io::Result<()> { | |
153 | write!(self, "</span>") | |
154 | } | |
155 | } | |
156 | ||
9fa01778 XL |
157 | enum HighlightError { |
158 | LexError, | |
159 | IoError(io::Error), | |
160 | } | |
161 | ||
162 | impl From<io::Error> for HighlightError { | |
163 | fn from(err: io::Error) -> Self { | |
164 | HighlightError::IoError(err) | |
165 | } | |
166 | } | |
167 | ||
a7813a04 | 168 | impl<'a> Classifier<'a> { |
b7449926 | 169 | fn new(lexer: lexer::StringReader<'a>, source_map: &'a SourceMap) -> Classifier<'a> { |
a7813a04 | 170 | Classifier { |
3b2f2976 | 171 | lexer, |
416331ca | 172 | peek_token: None, |
b7449926 | 173 | source_map, |
a7813a04 XL |
174 | in_attribute: false, |
175 | in_macro: false, | |
176 | in_macro_nonterminal: false, | |
177 | } | |
178 | } | |
179 | ||
9fa01778 | 180 | /// Gets the next token out of the lexer. |
dc9dc135 | 181 | fn try_next_token(&mut self) -> Result<Token, HighlightError> { |
416331ca XL |
182 | if let Some(token) = self.peek_token.take() { |
183 | return Ok(token); | |
184 | } | |
185 | let token = self.lexer.next_token(); | |
186 | if let token::Unknown(..) = &token.kind { | |
187 | return Err(HighlightError::LexError); | |
188 | } | |
189 | Ok(token) | |
190 | } | |
191 | ||
192 | fn peek(&mut self) -> Result<&Token, HighlightError> { | |
193 | if self.peek_token.is_none() { | |
194 | let token = self.lexer.next_token(); | |
195 | if let token::Unknown(..) = &token.kind { | |
196 | return Err(HighlightError::LexError); | |
197 | } | |
198 | self.peek_token = Some(token); | |
ea8adc8c | 199 | } |
416331ca | 200 | Ok(self.peek_token.as_ref().unwrap()) |
ea8adc8c XL |
201 | } |
202 | ||
a7813a04 XL |
203 | /// Exhausts the `lexer` writing the output into `out`. |
204 | /// | |
205 | /// The general structure for this method is to iterate over each token, | |
206 | /// possibly giving it an HTML span with a class specifying what flavor of token | |
207 | /// is used. All source code emission is done as slices from the source map, | |
208 | /// not from the tokens themselves, in order to stay true to the original | |
209 | /// source. | |
dfeec247 | 210 | fn write_source<W: Writer>(&mut self, out: &mut W) -> Result<(), HighlightError> { |
a7813a04 | 211 | loop { |
ea8adc8c | 212 | let next = self.try_next_token()?; |
dc9dc135 | 213 | if next == token::Eof { |
a7813a04 XL |
214 | break; |
215 | } | |
216 | ||
217 | self.write_token(out, next)?; | |
218 | } | |
219 | ||
220 | Ok(()) | |
221 | } | |
222 | ||
223 | // Handles an individual token from the lexer. | |
dfeec247 | 224 | fn write_token<W: Writer>(&mut self, out: &mut W, token: Token) -> Result<(), HighlightError> { |
dc9dc135 | 225 | let klass = match token.kind { |
1a4d82fc | 226 | token::Shebang(s) => { |
b7449926 | 227 | out.string(Escape(&s.as_str()), Class::None)?; |
a7813a04 | 228 | return Ok(()); |
dfeec247 | 229 | } |
a7813a04 | 230 | |
416331ca | 231 | token::Whitespace | token::Unknown(..) => Class::None, |
a7813a04 XL |
232 | token::Comment => Class::Comment, |
233 | token::DocComment(..) => Class::DocComment, | |
234 | ||
32a655c1 SL |
235 | // If this '&' or '*' token is followed by a non-whitespace token, assume that it's the |
236 | // reference or dereference operator or a reference or pointer type, instead of the | |
237 | // bit-and or multiplication operator. | |
ba9703b0 | 238 | token::BinOp(token::And | token::Star) if self.peek()? != &token::Whitespace => { |
dfeec247 XL |
239 | Class::RefKeyWord |
240 | } | |
1a4d82fc | 241 | |
a7813a04 XL |
242 | // Consider this as part of a macro invocation if there was a |
243 | // leading identifier. | |
244 | token::Not if self.in_macro => { | |
245 | self.in_macro = false; | |
246 | Class::Macro | |
247 | } | |
1a4d82fc | 248 | |
a7813a04 | 249 | // Operators. |
dfeec247 XL |
250 | token::Eq |
251 | | token::Lt | |
252 | | token::Le | |
253 | | token::EqEq | |
254 | | token::Ne | |
255 | | token::Ge | |
256 | | token::Gt | |
257 | | token::AndAnd | |
258 | | token::OrOr | |
259 | | token::Not | |
260 | | token::BinOp(..) | |
261 | | token::RArrow | |
262 | | token::BinOpEq(..) | |
263 | | token::FatArrow => Class::Op, | |
1a4d82fc | 264 | |
a7813a04 | 265 | // Miscellaneous, no highlighting. |
dfeec247 XL |
266 | token::Dot |
267 | | token::DotDot | |
268 | | token::DotDotDot | |
269 | | token::DotDotEq | |
270 | | token::Comma | |
271 | | token::Semi | |
272 | | token::Colon | |
273 | | token::ModSep | |
274 | | token::LArrow | |
275 | | token::OpenDelim(_) | |
ba9703b0 | 276 | | token::CloseDelim(token::Brace | token::Paren | token::NoDelim) => Class::None, |
c30ab7b3 SL |
277 | |
278 | token::Question => Class::QuestionMark, | |
279 | ||
1a4d82fc | 280 | token::Dollar => { |
416331ca | 281 | if self.peek()?.is_ident() { |
a7813a04 XL |
282 | self.in_macro_nonterminal = true; |
283 | Class::MacroNonTerminal | |
1a4d82fc | 284 | } else { |
a7813a04 | 285 | Class::None |
1a4d82fc JJ |
286 | } |
287 | } | |
288 | ||
ea8adc8c | 289 | // This might be the start of an attribute. We're going to want to |
1a4d82fc JJ |
290 | // continue highlighting it as an attribute until the ending ']' is |
291 | // seen, so skip out early. Down below we terminate the attribute | |
292 | // span when we see the ']'. | |
293 | token::Pound => { | |
ea8adc8c XL |
294 | // We can't be sure that our # begins an attribute (it could |
295 | // just be appearing in a macro) until we read either `#![` or | |
296 | // `#[` from the input stream. | |
297 | // | |
298 | // We don't want to start highlighting as an attribute until | |
299 | // we're confident there is going to be a ] coming up, as | |
300 | // otherwise # tokens in macros highlight the rest of the input | |
301 | // as an attribute. | |
302 | ||
303 | // Case 1: #![inner_attribute] | |
416331ca | 304 | if self.peek()? == &token::Not { |
ea8adc8c | 305 | self.try_next_token()?; // NOTE: consumes `!` token! |
416331ca | 306 | if self.peek()? == &token::OpenDelim(token::Bracket) { |
ea8adc8c XL |
307 | self.in_attribute = true; |
308 | out.enter_span(Class::Attribute)?; | |
309 | } | |
b7449926 XL |
310 | out.string("#", Class::None)?; |
311 | out.string("!", Class::None)?; | |
ea8adc8c XL |
312 | return Ok(()); |
313 | } | |
314 | ||
315 | // Case 2: #[outer_attribute] | |
416331ca | 316 | if self.peek()? == &token::OpenDelim(token::Bracket) { |
ea8adc8c XL |
317 | self.in_attribute = true; |
318 | out.enter_span(Class::Attribute)?; | |
319 | } | |
b7449926 | 320 | out.string("#", Class::None)?; |
a7813a04 | 321 | return Ok(()); |
1a4d82fc JJ |
322 | } |
323 | token::CloseDelim(token::Bracket) => { | |
a7813a04 XL |
324 | if self.in_attribute { |
325 | self.in_attribute = false; | |
b7449926 | 326 | out.string("]", Class::None)?; |
a7813a04 XL |
327 | out.exit_span()?; |
328 | return Ok(()); | |
1a4d82fc | 329 | } else { |
a7813a04 | 330 | Class::None |
1a4d82fc JJ |
331 | } |
332 | } | |
333 | ||
dc9dc135 XL |
334 | token::Literal(lit) => { |
335 | match lit.kind { | |
a7813a04 | 336 | // Text literals. |
dfeec247 XL |
337 | token::Byte |
338 | | token::Char | |
339 | | token::Err | |
340 | | token::ByteStr | |
341 | | token::ByteStrRaw(..) | |
342 | | token::Str | |
343 | | token::StrRaw(..) => Class::String, | |
1a4d82fc | 344 | |
a7813a04 | 345 | // Number literals. |
dc9dc135 | 346 | token::Integer | token::Float => Class::Number, |
48663c56 | 347 | |
dc9dc135 | 348 | token::Bool => panic!("literal token contains `Lit::Bool`"), |
1a4d82fc JJ |
349 | } |
350 | } | |
351 | ||
a7813a04 | 352 | // Keywords are also included in the identifier set. |
dfeec247 XL |
353 | token::Ident(name, is_raw) => match name { |
354 | kw::Ref | kw::Mut if !is_raw => Class::RefKeyWord, | |
355 | ||
356 | kw::SelfLower | kw::SelfUpper => Class::Self_, | |
357 | kw::False | kw::True if !is_raw => Class::Bool, | |
358 | ||
359 | sym::Option | sym::Result => Class::PreludeTy, | |
360 | sym::Some | sym::None | sym::Ok | sym::Err => Class::PreludeVal, | |
361 | ||
362 | _ if token.is_reserved_ident() => Class::KeyWord, | |
363 | ||
364 | _ => { | |
365 | if self.in_macro_nonterminal { | |
366 | self.in_macro_nonterminal = false; | |
367 | Class::MacroNonTerminal | |
368 | } else if self.peek()? == &token::Not { | |
369 | self.in_macro = true; | |
370 | Class::Macro | |
371 | } else { | |
372 | Class::Ident | |
1a4d82fc JJ |
373 | } |
374 | } | |
dfeec247 | 375 | }, |
1a4d82fc | 376 | |
a7813a04 | 377 | token::Lifetime(..) => Class::Lifetime, |
1a4d82fc | 378 | |
dfeec247 XL |
379 | token::Eof |
380 | | token::Interpolated(..) | |
381 | | token::Tilde | |
382 | | token::At | |
383 | | token::SingleQuote => Class::None, | |
1a4d82fc JJ |
384 | }; |
385 | ||
a7813a04 XL |
386 | // Anything that didn't return above is the simple case where we the |
387 | // class just spans a single token, so we can use the `string` method. | |
dc9dc135 | 388 | out.string(Escape(&self.snip(token.span)), klass)?; |
9fa01778 XL |
389 | |
390 | Ok(()) | |
1a4d82fc JJ |
391 | } |
392 | ||
b7449926 | 393 | // Helper function to get a snippet from the source_map. |
a7813a04 | 394 | fn snip(&self, sp: Span) -> String { |
b7449926 | 395 | self.source_map.span_to_snippet(sp).unwrap() |
a7813a04 XL |
396 | } |
397 | } | |
398 | ||
399 | impl Class { | |
400 | /// Returns the css class expected by rustdoc for each `Class`. | |
b7449926 | 401 | fn rustdoc_class(self) -> &'static str { |
a7813a04 XL |
402 | match self { |
403 | Class::None => "", | |
404 | Class::Comment => "comment", | |
405 | Class::DocComment => "doccomment", | |
406 | Class::Attribute => "attribute", | |
407 | Class::KeyWord => "kw", | |
408 | Class::RefKeyWord => "kw-2", | |
409 | Class::Self_ => "self", | |
410 | Class::Op => "op", | |
411 | Class::Macro => "macro", | |
412 | Class::MacroNonTerminal => "macro-nonterminal", | |
413 | Class::String => "string", | |
414 | Class::Number => "number", | |
3157f602 | 415 | Class::Bool => "bool-val", |
a7813a04 XL |
416 | Class::Ident => "ident", |
417 | Class::Lifetime => "lifetime", | |
418 | Class::PreludeTy => "prelude-ty", | |
419 | Class::PreludeVal => "prelude-val", | |
dfeec247 | 420 | Class::QuestionMark => "question-mark", |
a7813a04 XL |
421 | } |
422 | } | |
54a0048b SL |
423 | } |
424 | ||
b7449926 | 425 | fn write_header(class: Option<&str>, out: &mut dyn Write) -> io::Result<()> { |
a1dfa0c6 | 426 | write!(out, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class.unwrap_or("")) |
54a0048b SL |
427 | } |
428 | ||
74b04a01 XL |
429 | fn write_footer(out: &mut dyn Write, playground_button: Option<&str>) -> io::Result<()> { |
430 | write!(out, "</pre>{}</div>\n", if let Some(button) = playground_button { button } else { "" }) | |
1a4d82fc | 431 | } |