src/librustdoc/html/highlight.rs

   1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Basic html highlighting functionality
  12 //!
  13 //! This module uses libsyntax's lexer to provide token-based highlighting for
  14 //! the HTML documentation generated by rustdoc.
  15
  16 use html::escape::Escape;
  17
  18 use std::io;
  19 use std::io::prelude::*;
  20 use syntax::parse::lexer;
  21 use syntax::parse::token;
  22 use syntax::parse;
  23
  24 /// Highlights some source code, returning the HTML output.
  25 pub fn highlight(src: &str, class: Option<&str>, id: Option<&str>) -> String {
  26     debug!("highlighting: ================\n{}\n==============", src);
  27     let sess = parse::ParseSess::new();
  28     let fm = sess.codemap().new_filemap("<stdin>".to_string(), src.to_string());
  29
  30     let mut out = Vec::new();
  31     doit(&sess,
  32          lexer::StringReader::new(&sess.span_diagnostic, fm),
  33          class,
  34          id,
  35          &mut out).unwrap();
  36     String::from_utf8_lossy(&out[..]).into_owned()
  37 }
  38
  39 /// Exhausts the `lexer` writing the output into `out`.
  40 ///
  41 /// The general structure for this method is to iterate over each token,
  42 /// possibly giving it an HTML span with a class specifying what flavor of token
  43 /// it's used. All source code emission is done as slices from the source map,
  44 /// not from the tokens themselves, in order to stay true to the original
  45 /// source.
  46 fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
  47         class: Option<&str>, id: Option<&str>,
  48         out: &mut Write) -> io::Result<()> {
  49     use syntax::parse::lexer::Reader;
  50
  51     try!(write!(out, "<pre "));
  52     match id {
  53         Some(id) => try!(write!(out, "id='{}' ", id)),
  54         None => {}
  55     }
  56     try!(write!(out, "class='rust {}'>\n", class.unwrap_or("")));
  57     let mut is_attribute = false;
  58     let mut is_macro = false;
  59     let mut is_macro_nonterminal = false;
  60     loop {
  61         let next = lexer.next_token();
  62
  63         let snip = |sp| sess.codemap().span_to_snippet(sp).unwrap();
  64
  65         if next.tok == token::Eof { break }
  66
  67         let klass = match next.tok {
  68             token::Whitespace => {
  69                 try!(write!(out, "{}", Escape(&snip(next.sp))));
  70                 continue
  71             },
  72             token::Comment => {
  73                 try!(write!(out, "<span class='comment'>{}</span>",
  74                             Escape(&snip(next.sp))));
  75                 continue
  76             },
  77             token::Shebang(s) => {
  78                 try!(write!(out, "{}", Escape(&s.as_str())));
  79                 continue
  80             },
  81             // If this '&' token is directly adjacent to another token, assume
  82             // that it's the address-of operator instead of the and-operator.
  83             // This allows us to give all pointers their own class (`Box` and
  84             // `@` are below).
  85             token::BinOp(token::And) if lexer.peek().sp.lo == next.sp.hi => "kw-2",
  86             token::At | token::Tilde => "kw-2",
  87
  88             // consider this as part of a macro invocation if there was a
  89             // leading identifier
  90             token::Not if is_macro => { is_macro = false; "macro" }
  91
  92             // operators
  93             token::Eq | token::Lt | token::Le | token::EqEq | token::Ne | token::Ge | token::Gt |
  94                 token::AndAnd | token::OrOr | token::Not | token::BinOp(..) | token::RArrow |
  95                 token::BinOpEq(..) | token::FatArrow => "op",
  96
  97             // miscellaneous, no highlighting
  98             token::Dot | token::DotDot | token::DotDotDot | token::Comma | token::Semi |
  99                 token::Colon | token::ModSep | token::LArrow | token::OpenDelim(_) |
 100                 token::CloseDelim(token::Brace) | token::CloseDelim(token::Paren) |
 101                 token::Question => "",
 102             token::Dollar => {
 103                 if lexer.peek().tok.is_ident() {
 104                     is_macro_nonterminal = true;
 105                     "macro-nonterminal"
 106                 } else {
 107                     ""
 108                 }
 109             }
 110
 111             // This is the start of an attribute. We're going to want to
 112             // continue highlighting it as an attribute until the ending ']' is
 113             // seen, so skip out early. Down below we terminate the attribute
 114             // span when we see the ']'.
 115             token::Pound => {
 116                 is_attribute = true;
 117                 try!(write!(out, r"<span class='attribute'>#"));
 118                 continue
 119             }
 120             token::CloseDelim(token::Bracket) => {
 121                 if is_attribute {
 122                     is_attribute = false;
 123                     try!(write!(out, "]</span>"));
 124                     continue
 125                 } else {
 126                     ""
 127                 }
 128             }
 129
 130             token::Literal(lit, _suf) => {
 131                 match lit {
 132                     // text literals
 133                     token::Byte(..) | token::Char(..) |
 134                         token::Binary(..) | token::BinaryRaw(..) |
 135                         token::Str_(..) | token::StrRaw(..) => "string",
 136
 137                     // number literals
 138                     token::Integer(..) | token::Float(..) => "number",
 139                 }
 140             }
 141
 142             // keywords are also included in the identifier set
 143             token::Ident(ident, _is_mod_sep) => {
 144                 match &*ident.name.as_str() {
 145                     "ref" | "mut" => "kw-2",
 146
 147                     "self" => "self",
 148                     "false" | "true" => "boolval",
 149
 150                     "Option" | "Result" => "prelude-ty",
 151                     "Some" | "None" | "Ok" | "Err" => "prelude-val",
 152
 153                     _ if next.tok.is_any_keyword() => "kw",
 154                     _ => {
 155                         if is_macro_nonterminal {
 156                             is_macro_nonterminal = false;
 157                             "macro-nonterminal"
 158                         } else if lexer.peek().tok == token::Not {
 159                             is_macro = true;
 160                             "macro"
 161                         } else {
 162                             "ident"
 163                         }
 164                     }
 165                 }
 166             }
 167
 168             // Special macro vars are like keywords
 169             token::SpecialVarNt(_) => "kw-2",
 170
 171             token::Lifetime(..) => "lifetime",
 172             token::DocComment(..) => "doccomment",
 173             token::Underscore | token::Eof | token::Interpolated(..) |
 174                 token::MatchNt(..) | token::SubstNt(..) => "",
 175         };
 176
 177         // as mentioned above, use the original source code instead of
 178         // stringifying this token
 179         let snip = sess.codemap().span_to_snippet(next.sp).unwrap();
 180         if klass == "" {
 181             try!(write!(out, "{}", Escape(&snip)));
 182         } else {
 183             try!(write!(out, "<span class='{}'>{}</span>", klass,
 184                           Escape(&snip)));
 185         }
 186     }
 187
 188     write!(out, "</pre>\n")
 189 }