src/librustdoc/html/highlight.rs

   1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Basic html highlighting functionality
  12 //!
  13 //! This module uses libsyntax's lexer to provide token-based highlighting for
  14 //! the HTML documentation generated by rustdoc.
  15
  16 use html::escape::Escape;
  17
  18 use std::io;
  19 use syntax::parse::lexer;
  20 use syntax::parse::token;
  21 use syntax::parse;
  22
  23 /// Highlights some source code, returning the HTML output.
  24 pub fn highlight(src: &str, class: Option<&str>, id: Option<&str>) -> String {
  25     debug!("highlighting: ================\n{}\n==============", src);
  26     let sess = parse::new_parse_sess();
  27     let fm = parse::string_to_filemap(&sess,
  28                                       src.to_string(),
  29                                       "<stdin>".to_string());
  30
  31     let mut out = Vec::new();
  32     doit(&sess,
  33          lexer::StringReader::new(&sess.span_diagnostic, fm),
  34          class,
  35          id,
  36          &mut out).unwrap();
  37     String::from_utf8_lossy(&out[]).into_owned()
  38 }
  39
  40 /// Exhausts the `lexer` writing the output into `out`.
  41 ///
  42 /// The general structure for this method is to iterate over each token,
  43 /// possibly giving it an HTML span with a class specifying what flavor of token
  44 /// it's used. All source code emission is done as slices from the source map,
  45 /// not from the tokens themselves, in order to stay true to the original
  46 /// source.
  47 fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
  48         class: Option<&str>, id: Option<&str>,
  49         out: &mut Writer) -> io::IoResult<()> {
  50     use syntax::parse::lexer::Reader;
  51
  52     try!(write!(out, "<pre "));
  53     match id {
  54         Some(id) => try!(write!(out, "id='{}' ", id)),
  55         None => {}
  56     }
  57     try!(write!(out, "class='rust {}'>\n", class.unwrap_or("")));
  58     let mut is_attribute = false;
  59     let mut is_macro = false;
  60     let mut is_macro_nonterminal = false;
  61     loop {
  62         let next = lexer.next_token();
  63
  64         let snip = |&: sp| sess.span_diagnostic.cm.span_to_snippet(sp).unwrap();
  65
  66         if next.tok == token::Eof { break }
  67
  68         let klass = match next.tok {
  69             token::Whitespace => {
  70                 try!(write!(out, "{}", Escape(snip(next.sp).as_slice())));
  71                 continue
  72             },
  73             token::Comment => {
  74                 try!(write!(out, "<span class='comment'>{}</span>",
  75                             Escape(snip(next.sp).as_slice())));
  76                 continue
  77             },
  78             token::Shebang(s) => {
  79                 try!(write!(out, "{}", Escape(s.as_str())));
  80                 continue
  81             },
  82             // If this '&' token is directly adjacent to another token, assume
  83             // that it's the address-of operator instead of the and-operator.
  84             // This allows us to give all pointers their own class (`Box` and
  85             // `@` are below).
  86             token::BinOp(token::And) if lexer.peek().sp.lo == next.sp.hi => "kw-2",
  87             token::At | token::Tilde => "kw-2",
  88
  89             // consider this as part of a macro invocation if there was a
  90             // leading identifier
  91             token::Not if is_macro => { is_macro = false; "macro" }
  92
  93             // operators
  94             token::Eq | token::Lt | token::Le | token::EqEq | token::Ne | token::Ge | token::Gt |
  95                 token::AndAnd | token::OrOr | token::Not | token::BinOp(..) | token::RArrow |
  96                 token::BinOpEq(..) | token::FatArrow => "op",
  97
  98             // miscellaneous, no highlighting
  99             token::Dot | token::DotDot | token::DotDotDot | token::Comma | token::Semi |
 100                 token::Colon | token::ModSep | token::LArrow | token::OpenDelim(_) |
 101                 token::CloseDelim(token::Brace) | token::CloseDelim(token::Paren) |
 102                 token::Question => "",
 103             token::Dollar => {
 104                 if lexer.peek().tok.is_ident() {
 105                     is_macro_nonterminal = true;
 106                     "macro-nonterminal"
 107                 } else {
 108                     ""
 109                 }
 110             }
 111
 112             // This is the start of an attribute. We're going to want to
 113             // continue highlighting it as an attribute until the ending ']' is
 114             // seen, so skip out early. Down below we terminate the attribute
 115             // span when we see the ']'.
 116             token::Pound => {
 117                 is_attribute = true;
 118                 try!(write!(out, r"<span class='attribute'>#"));
 119                 continue
 120             }
 121             token::CloseDelim(token::Bracket) => {
 122                 if is_attribute {
 123                     is_attribute = false;
 124                     try!(write!(out, "]</span>"));
 125                     continue
 126                 } else {
 127                     ""
 128                 }
 129             }
 130
 131             token::Literal(lit, _suf) => {
 132                 match lit {
 133                     // text literals
 134                     token::Byte(..) | token::Char(..) |
 135                         token::Binary(..) | token::BinaryRaw(..) |
 136                         token::Str_(..) | token::StrRaw(..) => "string",
 137
 138                     // number literals
 139                     token::Integer(..) | token::Float(..) => "number",
 140                 }
 141             }
 142
 143             // keywords are also included in the identifier set
 144             token::Ident(ident, _is_mod_sep) => {
 145                 match token::get_ident(ident).get() {
 146                     "ref" | "mut" => "kw-2",
 147
 148                     "self" => "self",
 149                     "false" | "true" => "boolval",
 150
 151                     "Option" | "Result" => "prelude-ty",
 152                     "Some" | "None" | "Ok" | "Err" => "prelude-val",
 153
 154                     _ if next.tok.is_any_keyword() => "kw",
 155                     _ => {
 156                         if is_macro_nonterminal {
 157                             is_macro_nonterminal = false;
 158                             "macro-nonterminal"
 159                         } else if lexer.peek().tok == token::Not {
 160                             is_macro = true;
 161                             "macro"
 162                         } else {
 163                             "ident"
 164                         }
 165                     }
 166                 }
 167             }
 168
 169             // Special macro vars are like keywords
 170             token::SpecialVarNt(_) => "kw-2",
 171
 172             token::Lifetime(..) => "lifetime",
 173             token::DocComment(..) => "doccomment",
 174             token::Underscore | token::Eof | token::Interpolated(..) |
 175                 token::MatchNt(..) | token::SubstNt(..) => "",
 176         };
 177
 178         // as mentioned above, use the original source code instead of
 179         // stringifying this token
 180         let snip = sess.span_diagnostic.cm.span_to_snippet(next.sp).unwrap();
 181         if klass == "" {
 182             try!(write!(out, "{}", Escape(snip.as_slice())));
 183         } else {
 184             try!(write!(out, "<span class='{}'>{}</span>", klass,
 185                           Escape(snip.as_slice())));
 186         }
 187     }
 188
 189     write!(out, "</pre>\n")
 190 }