]>
Commit | Line | Data |
---|---|---|
a7813a04 | 1 | // Copyright 2014-2016 The Rust Project Developers. See the COPYRIGHT |
1a4d82fc JJ |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
a7813a04 | 11 | //! Basic syntax highlighting functionality. |
1a4d82fc JJ |
12 | //! |
13 | //! This module uses libsyntax's lexer to provide token-based highlighting for | |
14 | //! the HTML documentation generated by rustdoc. | |
a7813a04 XL |
15 | //! |
16 | //! If you just want to syntax highlighting for a Rust program, then you can use | |
17 | //! the `render_inner_with_highlighting` or `render_with_highlighting` | |
18 | //! functions. For more advanced use cases (if you want to supply your own css | |
19 | //! classes or control how the HTML is generated, or even generate something | |
9e0c209e | 20 | //! other then HTML), then you should implement the `Writer` trait and use a |
a7813a04 | 21 | //! `Classifier`. |
1a4d82fc JJ |
22 | |
23 | use html::escape::Escape; | |
24 | ||
a7813a04 | 25 | use std::fmt::Display; |
c34b1796 AL |
26 | use std::io; |
27 | use std::io::prelude::*; | |
a7813a04 | 28 | |
7cac9316 | 29 | use syntax::codemap::{CodeMap, FilePathMapping}; |
32a655c1 | 30 | use syntax::parse::lexer::{self, TokenAndSpan}; |
1a4d82fc JJ |
31 | use syntax::parse::token; |
32 | use syntax::parse; | |
3157f602 | 33 | use syntax_pos::Span; |
1a4d82fc | 34 | |
54a0048b | 35 | /// Highlights `src`, returning the HTML output. |
9e0c209e | 36 | pub fn render_with_highlighting(src: &str, class: Option<&str>, id: Option<&str>, |
ea8adc8c XL |
37 | extension: Option<&str>, |
38 | tooltip: Option<(&str, &str)>) -> String { | |
1a4d82fc | 39 | debug!("highlighting: ================\n{}\n==============", src); |
7cac9316 XL |
40 | let sess = parse::ParseSess::new(FilePathMapping::empty()); |
41 | let fm = sess.codemap().new_filemap("<stdin>".to_string(), src.to_string()); | |
1a4d82fc JJ |
42 | |
43 | let mut out = Vec::new(); | |
ea8adc8c | 44 | if let Some((tooltip, class)) = tooltip { |
abe05a73 | 45 | write!(out, "<div class='information'><div class='tooltip {}'>ⓘ<span \ |
ea8adc8c XL |
46 | class='tooltiptext'>{}</span></div></div>", |
47 | class, tooltip).unwrap(); | |
48 | } | |
54a0048b | 49 | write_header(class, id, &mut out).unwrap(); |
a7813a04 | 50 | |
32a655c1 | 51 | let mut classifier = Classifier::new(lexer::StringReader::new(&sess, fm), sess.codemap()); |
a7813a04 XL |
52 | if let Err(_) = classifier.write_source(&mut out) { |
53 | return format!("<pre>{}</pre>", src); | |
54 | } | |
55 | ||
9e0c209e SL |
56 | if let Some(extension) = extension { |
57 | write!(out, "{}", extension).unwrap(); | |
58 | } | |
54a0048b SL |
59 | write_footer(&mut out).unwrap(); |
60 | String::from_utf8_lossy(&out[..]).into_owned() | |
61 | } | |
62 | ||
63 | /// Highlights `src`, returning the HTML output. Returns only the inner html to | |
64 | /// be inserted into an element. C.f., `render_with_highlighting` which includes | |
65 | /// an enclosing `<pre>` block. | |
a7813a04 | 66 | pub fn render_inner_with_highlighting(src: &str) -> io::Result<String> { |
7cac9316 XL |
67 | let sess = parse::ParseSess::new(FilePathMapping::empty()); |
68 | let fm = sess.codemap().new_filemap("<stdin>".to_string(), src.to_string()); | |
54a0048b SL |
69 | |
70 | let mut out = Vec::new(); | |
32a655c1 | 71 | let mut classifier = Classifier::new(lexer::StringReader::new(&sess, fm), sess.codemap()); |
a7813a04 XL |
72 | classifier.write_source(&mut out)?; |
73 | ||
74 | Ok(String::from_utf8_lossy(&out).into_owned()) | |
75 | } | |
76 | ||
77 | /// Processes a program (nested in the internal `lexer`), classifying strings of | |
78 | /// text by highlighting category (`Class`). Calls out to a `Writer` to write | |
79 | /// each span of text in sequence. | |
80 | pub struct Classifier<'a> { | |
81 | lexer: lexer::StringReader<'a>, | |
82 | codemap: &'a CodeMap, | |
83 | ||
84 | // State of the classifier. | |
85 | in_attribute: bool, | |
86 | in_macro: bool, | |
87 | in_macro_nonterminal: bool, | |
88 | } | |
89 | ||
90 | /// How a span of text is classified. Mostly corresponds to token kinds. | |
91 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] | |
92 | pub enum Class { | |
93 | None, | |
94 | Comment, | |
95 | DocComment, | |
96 | Attribute, | |
97 | KeyWord, | |
98 | // Keywords that do pointer/reference stuff. | |
99 | RefKeyWord, | |
100 | Self_, | |
101 | Op, | |
102 | Macro, | |
103 | MacroNonTerminal, | |
104 | String, | |
105 | Number, | |
106 | Bool, | |
107 | Ident, | |
108 | Lifetime, | |
109 | PreludeTy, | |
110 | PreludeVal, | |
c30ab7b3 | 111 | QuestionMark, |
1a4d82fc JJ |
112 | } |
113 | ||
a7813a04 | 114 | /// Trait that controls writing the output of syntax highlighting. Users should |
3b2f2976 | 115 | /// implement this trait to customize writing output. |
1a4d82fc | 116 | /// |
a7813a04 XL |
117 | /// The classifier will call into the `Writer` implementation as it finds spans |
118 | /// of text to highlight. Exactly how that text should be highlighted is up to | |
5bcae85e | 119 | /// the implementation. |
a7813a04 XL |
120 | pub trait Writer { |
121 | /// Called when we start processing a span of text that should be highlighted. | |
122 | /// The `Class` argument specifies how it should be highlighted. | |
7cac9316 | 123 | fn enter_span(&mut self, _: Class) -> io::Result<()>; |
1a4d82fc | 124 | |
a7813a04 XL |
125 | /// Called at the end of a span of highlighted text. |
126 | fn exit_span(&mut self) -> io::Result<()>; | |
1a4d82fc | 127 | |
a7813a04 XL |
128 | /// Called for a span of text, usually, but not always, a single token. If |
129 | /// the string of text (`T`) does correspond to a token, then the token will | |
130 | /// also be passed. If the text should be highlighted differently from the | |
131 | /// surrounding text, then the `Class` argument will be a value other than | |
132 | /// `None`. | |
133 | /// The following sequences of callbacks are equivalent: | |
134 | /// ```plain | |
135 | /// enter_span(Foo), string("text", None), exit_span() | |
136 | /// string("text", Foo) | |
137 | /// ``` | |
138 | /// The latter can be thought of as a shorthand for the former, which is | |
139 | /// more flexible. | |
7cac9316 XL |
140 | fn string<T: Display>(&mut self, |
141 | text: T, | |
142 | klass: Class, | |
143 | tok: Option<&TokenAndSpan>) | |
144 | -> io::Result<()>; | |
a7813a04 | 145 | } |
1a4d82fc | 146 | |
a7813a04 XL |
147 | // Implement `Writer` for anthing that can be written to, this just implements |
148 | // the default rustdoc behaviour. | |
149 | impl<U: Write> Writer for U { | |
150 | fn string<T: Display>(&mut self, | |
151 | text: T, | |
152 | klass: Class, | |
153 | _tas: Option<&TokenAndSpan>) | |
154 | -> io::Result<()> { | |
155 | match klass { | |
156 | Class::None => write!(self, "{}", text), | |
8bb4bdeb | 157 | klass => write!(self, "<span class=\"{}\">{}</span>", klass.rustdoc_class(), text), |
a7813a04 XL |
158 | } |
159 | } | |
160 | ||
161 | fn enter_span(&mut self, klass: Class) -> io::Result<()> { | |
8bb4bdeb | 162 | write!(self, "<span class=\"{}\">", klass.rustdoc_class()) |
a7813a04 XL |
163 | } |
164 | ||
165 | fn exit_span(&mut self) -> io::Result<()> { | |
166 | write!(self, "</span>") | |
167 | } | |
168 | } | |
169 | ||
170 | impl<'a> Classifier<'a> { | |
171 | pub fn new(lexer: lexer::StringReader<'a>, codemap: &'a CodeMap) -> Classifier<'a> { | |
172 | Classifier { | |
3b2f2976 XL |
173 | lexer, |
174 | codemap, | |
a7813a04 XL |
175 | in_attribute: false, |
176 | in_macro: false, | |
177 | in_macro_nonterminal: false, | |
178 | } | |
179 | } | |
180 | ||
ea8adc8c XL |
181 | /// Gets the next token out of the lexer, emitting fatal errors if lexing fails. |
182 | fn try_next_token(&mut self) -> io::Result<TokenAndSpan> { | |
183 | match self.lexer.try_next_token() { | |
184 | Ok(tas) => Ok(tas), | |
185 | Err(_) => { | |
186 | self.lexer.emit_fatal_errors(); | |
187 | self.lexer.sess.span_diagnostic | |
188 | .struct_warn("Backing out of syntax highlighting") | |
189 | .note("You probably did not intend to render this as a rust code-block") | |
190 | .emit(); | |
191 | Err(io::Error::new(io::ErrorKind::Other, "")) | |
192 | } | |
193 | } | |
194 | } | |
195 | ||
a7813a04 XL |
196 | /// Exhausts the `lexer` writing the output into `out`. |
197 | /// | |
198 | /// The general structure for this method is to iterate over each token, | |
199 | /// possibly giving it an HTML span with a class specifying what flavor of token | |
200 | /// is used. All source code emission is done as slices from the source map, | |
201 | /// not from the tokens themselves, in order to stay true to the original | |
202 | /// source. | |
203 | pub fn write_source<W: Writer>(&mut self, | |
204 | out: &mut W) | |
205 | -> io::Result<()> { | |
206 | loop { | |
ea8adc8c | 207 | let next = self.try_next_token()?; |
a7813a04 XL |
208 | if next.tok == token::Eof { |
209 | break; | |
210 | } | |
211 | ||
212 | self.write_token(out, next)?; | |
213 | } | |
214 | ||
215 | Ok(()) | |
216 | } | |
217 | ||
218 | // Handles an individual token from the lexer. | |
219 | fn write_token<W: Writer>(&mut self, | |
220 | out: &mut W, | |
221 | tas: TokenAndSpan) | |
222 | -> io::Result<()> { | |
223 | let klass = match tas.tok { | |
1a4d82fc | 224 | token::Shebang(s) => { |
a7813a04 XL |
225 | out.string(Escape(&s.as_str()), Class::None, Some(&tas))?; |
226 | return Ok(()); | |
1a4d82fc | 227 | }, |
a7813a04 XL |
228 | |
229 | token::Whitespace => Class::None, | |
230 | token::Comment => Class::Comment, | |
231 | token::DocComment(..) => Class::DocComment, | |
232 | ||
32a655c1 SL |
233 | // If this '&' or '*' token is followed by a non-whitespace token, assume that it's the |
234 | // reference or dereference operator or a reference or pointer type, instead of the | |
235 | // bit-and or multiplication operator. | |
236 | token::BinOp(token::And) | token::BinOp(token::Star) | |
237 | if self.lexer.peek().tok != token::Whitespace => Class::RefKeyWord, | |
1a4d82fc | 238 | |
a7813a04 XL |
239 | // Consider this as part of a macro invocation if there was a |
240 | // leading identifier. | |
241 | token::Not if self.in_macro => { | |
242 | self.in_macro = false; | |
243 | Class::Macro | |
244 | } | |
1a4d82fc | 245 | |
a7813a04 | 246 | // Operators. |
1a4d82fc JJ |
247 | token::Eq | token::Lt | token::Le | token::EqEq | token::Ne | token::Ge | token::Gt | |
248 | token::AndAnd | token::OrOr | token::Not | token::BinOp(..) | token::RArrow | | |
a7813a04 | 249 | token::BinOpEq(..) | token::FatArrow => Class::Op, |
1a4d82fc | 250 | |
a7813a04 | 251 | // Miscellaneous, no highlighting. |
ea8adc8c XL |
252 | token::Dot | token::DotDot | token::DotDotDot | token::DotDotEq | token::Comma | |
253 | token::Semi | token::Colon | token::ModSep | token::LArrow | token::OpenDelim(_) | | |
1a4d82fc | 254 | token::CloseDelim(token::Brace) | token::CloseDelim(token::Paren) | |
c30ab7b3 SL |
255 | token::CloseDelim(token::NoDelim) => Class::None, |
256 | ||
257 | token::Question => Class::QuestionMark, | |
258 | ||
1a4d82fc | 259 | token::Dollar => { |
a7813a04 XL |
260 | if self.lexer.peek().tok.is_ident() { |
261 | self.in_macro_nonterminal = true; | |
262 | Class::MacroNonTerminal | |
1a4d82fc | 263 | } else { |
a7813a04 | 264 | Class::None |
1a4d82fc JJ |
265 | } |
266 | } | |
267 | ||
ea8adc8c | 268 | // This might be the start of an attribute. We're going to want to |
1a4d82fc JJ |
269 | // continue highlighting it as an attribute until the ending ']' is |
270 | // seen, so skip out early. Down below we terminate the attribute | |
271 | // span when we see the ']'. | |
272 | token::Pound => { | |
ea8adc8c XL |
273 | // We can't be sure that our # begins an attribute (it could |
274 | // just be appearing in a macro) until we read either `#![` or | |
275 | // `#[` from the input stream. | |
276 | // | |
277 | // We don't want to start highlighting as an attribute until | |
278 | // we're confident there is going to be a ] coming up, as | |
279 | // otherwise # tokens in macros highlight the rest of the input | |
280 | // as an attribute. | |
281 | ||
282 | // Case 1: #![inner_attribute] | |
283 | if self.lexer.peek().tok == token::Not { | |
284 | self.try_next_token()?; // NOTE: consumes `!` token! | |
285 | if self.lexer.peek().tok == token::OpenDelim(token::Bracket) { | |
286 | self.in_attribute = true; | |
287 | out.enter_span(Class::Attribute)?; | |
288 | } | |
289 | out.string("#", Class::None, None)?; | |
290 | out.string("!", Class::None, None)?; | |
291 | return Ok(()); | |
292 | } | |
293 | ||
294 | // Case 2: #[outer_attribute] | |
295 | if self.lexer.peek().tok == token::OpenDelim(token::Bracket) { | |
296 | self.in_attribute = true; | |
297 | out.enter_span(Class::Attribute)?; | |
298 | } | |
a7813a04 XL |
299 | out.string("#", Class::None, None)?; |
300 | return Ok(()); | |
1a4d82fc JJ |
301 | } |
302 | token::CloseDelim(token::Bracket) => { | |
a7813a04 XL |
303 | if self.in_attribute { |
304 | self.in_attribute = false; | |
305 | out.string("]", Class::None, None)?; | |
306 | out.exit_span()?; | |
307 | return Ok(()); | |
1a4d82fc | 308 | } else { |
a7813a04 | 309 | Class::None |
1a4d82fc JJ |
310 | } |
311 | } | |
312 | ||
313 | token::Literal(lit, _suf) => { | |
314 | match lit { | |
a7813a04 | 315 | // Text literals. |
1a4d82fc | 316 | token::Byte(..) | token::Char(..) | |
e9174d1e | 317 | token::ByteStr(..) | token::ByteStrRaw(..) | |
a7813a04 | 318 | token::Str_(..) | token::StrRaw(..) => Class::String, |
1a4d82fc | 319 | |
a7813a04 XL |
320 | // Number literals. |
321 | token::Integer(..) | token::Float(..) => Class::Number, | |
1a4d82fc JJ |
322 | } |
323 | } | |
324 | ||
a7813a04 XL |
325 | // Keywords are also included in the identifier set. |
326 | token::Ident(ident) => { | |
c1a9b12d | 327 | match &*ident.name.as_str() { |
a7813a04 | 328 | "ref" | "mut" => Class::RefKeyWord, |
1a4d82fc | 329 | |
a7813a04 XL |
330 | "self" |"Self" => Class::Self_, |
331 | "false" | "true" => Class::Bool, | |
1a4d82fc | 332 | |
a7813a04 XL |
333 | "Option" | "Result" => Class::PreludeTy, |
334 | "Some" | "None" | "Ok" | "Err" => Class::PreludeVal, | |
1a4d82fc | 335 | |
c30ab7b3 | 336 | "$crate" => Class::KeyWord, |
041b39d2 | 337 | _ if tas.tok.is_reserved_ident() => Class::KeyWord, |
c30ab7b3 | 338 | |
1a4d82fc | 339 | _ => { |
a7813a04 XL |
340 | if self.in_macro_nonterminal { |
341 | self.in_macro_nonterminal = false; | |
342 | Class::MacroNonTerminal | |
343 | } else if self.lexer.peek().tok == token::Not { | |
344 | self.in_macro = true; | |
345 | Class::Macro | |
1a4d82fc | 346 | } else { |
a7813a04 | 347 | Class::Ident |
1a4d82fc JJ |
348 | } |
349 | } | |
350 | } | |
351 | } | |
352 | ||
a7813a04 | 353 | token::Lifetime(..) => Class::Lifetime, |
1a4d82fc | 354 | |
1a4d82fc | 355 | token::Underscore | token::Eof | token::Interpolated(..) | |
ea8adc8c | 356 | token::Tilde | token::At | token::DotEq => Class::None, |
1a4d82fc JJ |
357 | }; |
358 | ||
a7813a04 XL |
359 | // Anything that didn't return above is the simple case where we the |
360 | // class just spans a single token, so we can use the `string` method. | |
361 | out.string(Escape(&self.snip(tas.sp)), klass, Some(&tas)) | |
1a4d82fc JJ |
362 | } |
363 | ||
a7813a04 XL |
364 | // Helper function to get a snippet from the codemap. |
365 | fn snip(&self, sp: Span) -> String { | |
366 | self.codemap.span_to_snippet(sp).unwrap() | |
367 | } | |
368 | } | |
369 | ||
370 | impl Class { | |
371 | /// Returns the css class expected by rustdoc for each `Class`. | |
372 | pub fn rustdoc_class(self) -> &'static str { | |
373 | match self { | |
374 | Class::None => "", | |
375 | Class::Comment => "comment", | |
376 | Class::DocComment => "doccomment", | |
377 | Class::Attribute => "attribute", | |
378 | Class::KeyWord => "kw", | |
379 | Class::RefKeyWord => "kw-2", | |
380 | Class::Self_ => "self", | |
381 | Class::Op => "op", | |
382 | Class::Macro => "macro", | |
383 | Class::MacroNonTerminal => "macro-nonterminal", | |
384 | Class::String => "string", | |
385 | Class::Number => "number", | |
3157f602 | 386 | Class::Bool => "bool-val", |
a7813a04 XL |
387 | Class::Ident => "ident", |
388 | Class::Lifetime => "lifetime", | |
389 | Class::PreludeTy => "prelude-ty", | |
390 | Class::PreludeVal => "prelude-val", | |
c30ab7b3 | 391 | Class::QuestionMark => "question-mark" |
a7813a04 XL |
392 | } |
393 | } | |
54a0048b SL |
394 | } |
395 | ||
396 | fn write_header(class: Option<&str>, | |
397 | id: Option<&str>, | |
398 | out: &mut Write) | |
399 | -> io::Result<()> { | |
400 | write!(out, "<pre ")?; | |
3157f602 XL |
401 | if let Some(id) = id { |
402 | write!(out, "id='{}' ", id)?; | |
54a0048b | 403 | } |
8bb4bdeb | 404 | write!(out, "class=\"rust {}\">\n", class.unwrap_or("")) |
54a0048b SL |
405 | } |
406 | ||
407 | fn write_footer(out: &mut Write) -> io::Result<()> { | |
1a4d82fc JJ |
408 | write!(out, "</pre>\n") |
409 | } |