1 //! Basic syntax highlighting functionality.
3 //! This module uses librustc_ast's lexer to provide token-based highlighting for
4 //! the HTML documentation generated by rustdoc.
6 //! Use the `render_with_highlighting` to highlight some rust code.
8 use crate::html
::escape
::Escape
;
10 use std
::fmt
::Display
;
11 use std
::iter
::Peekable
;
13 use rustc_lexer
::{LiteralKind, TokenKind}
;
14 use rustc_span
::edition
::Edition
;
15 use rustc_span
::symbol
::Symbol
;
16 use rustc_span
::with_default_session_globals
;
18 use super::format
::Buffer
;
20 /// Highlights `src`, returning the HTML output.
21 crate fn render_with_highlighting(
25 playground_button
: Option
<&str>,
26 tooltip
: Option
<(Option
<Edition
>, &str)>,
29 debug
!("highlighting: ================\n{}\n==============", src
);
30 if let Some((edition_info
, class
)) = tooltip
{
33 "<div class='information'><div class='tooltip {}'{}>ⓘ</div></div>",
35 if let Some(edition_info
) = edition_info
{
36 format
!(" data-edition=\"{}\"", edition_info
)
43 write_header(out
, class
);
44 write_code(out
, &src
, edition
);
45 write_footer(out
, playground_button
);
48 fn write_header(out
: &mut Buffer
, class
: Option
<&str>) {
49 write
!(out
, "<div class=\"example-wrap\"><pre class=\"rust {}\">\n", class
.unwrap_or_default());
52 fn write_code(out
: &mut Buffer
, src
: &str, edition
: Edition
) {
53 // This replace allows to fix how the code source with DOS backline characters is displayed.
54 let src
= src
.replace("\r\n", "\n");
55 Classifier
::new(&src
, edition
).highlight(&mut |highlight
| {
57 Highlight
::Token { text, class }
=> string(out
, Escape(text
), class
),
58 Highlight
::EnterSpan { class }
=> enter_span(out
, class
),
59 Highlight
::ExitSpan
=> exit_span(out
),
64 fn write_footer(out
: &mut Buffer
, playground_button
: Option
<&str>) {
65 write
!(out
, "</pre>{}</div>\n", playground_button
.unwrap_or_default());
68 /// How a span of text is classified. Mostly corresponds to token kinds.
69 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
75 // Keywords that do pointer/reference stuff.
92 /// Returns the css class expected by rustdoc for each `Class`.
93 fn as_html(self) -> &'
static str {
95 Class
::Comment
=> "comment",
96 Class
::DocComment
=> "doccomment",
97 Class
::Attribute
=> "attribute",
98 Class
::KeyWord
=> "kw",
99 Class
::RefKeyWord
=> "kw-2",
100 Class
::Self_
=> "self",
102 Class
::Macro
=> "macro",
103 Class
::MacroNonTerminal
=> "macro-nonterminal",
104 Class
::String
=> "string",
105 Class
::Number
=> "number",
106 Class
::Bool
=> "bool-val",
107 Class
::Ident
=> "ident",
108 Class
::Lifetime
=> "lifetime",
109 Class
::PreludeTy
=> "prelude-ty",
110 Class
::PreludeVal
=> "prelude-val",
111 Class
::QuestionMark
=> "question-mark",
117 Token { text: &'a str, class: Option<Class> }
,
118 EnterSpan { class: Class }
,
122 struct TokenIter
<'a
> {
126 impl Iterator
for TokenIter
<'a
> {
127 type Item
= (TokenKind
, &'a
str);
128 fn next(&mut self) -> Option
<(TokenKind
, &'a
str)> {
129 if self.src
.is_empty() {
132 let token
= rustc_lexer
::first_token(self.src
);
133 let (text
, rest
) = self.src
.split_at(token
.len
);
135 Some((token
.kind
, text
))
139 fn get_real_ident_class(text
: &str, edition
: Edition
) -> Class
{
141 "ref" | "mut" => Class
::RefKeyWord
,
142 "self" | "Self" => Class
::Self_
,
143 "false" | "true" => Class
::Bool
,
144 _
if Symbol
::intern(text
).is_reserved(|| edition
) => Class
::KeyWord
,
149 /// Processes program tokens, classifying strings of text by highlighting
150 /// category (`Class`).
151 struct Classifier
<'a
> {
152 tokens
: Peekable
<TokenIter
<'a
>>,
155 in_macro_nonterminal
: bool
,
161 impl<'a
> Classifier
<'a
> {
162 fn new(src
: &str, edition
: Edition
) -> Classifier
<'_
> {
163 let tokens
= TokenIter { src }
.peekable();
168 in_macro_nonterminal
: false,
175 /// Concatenate colons and idents as one when possible.
176 fn get_full_ident_path(&mut self) -> Vec
<(TokenKind
, usize, usize)> {
177 let start
= self.byte_pos
as usize;
179 let mut has_ident
= false;
180 let edition
= self.edition
;
184 while let Some((TokenKind
::Colon
, _
)) = self.tokens
.peek() {
188 // Ident path can start with "::" but if we already have content in the ident path,
189 // the "::" is mandatory.
190 if has_ident
&& nb
== 0 {
191 return vec
![(TokenKind
::Ident
, start
, pos
)];
192 } else if nb
!= 0 && nb
!= 2 {
194 return vec
![(TokenKind
::Ident
, start
, pos
), (TokenKind
::Colon
, pos
, pos
+ nb
)];
196 return vec
![(TokenKind
::Colon
, pos
, pos
+ nb
)];
200 if let Some((Class
::Ident
, text
)) = self.tokens
.peek().map(|(token
, text
)| {
201 if *token
== TokenKind
::Ident
{
202 let class
= get_real_ident_class(text
, edition
);
205 // Doesn't matter which Class we put in here...
206 (Class
::Comment
, text
)
209 // We only "add" the colon if there is an ident behind.
210 pos
+= text
.len() + nb
;
213 } else if nb
> 0 && has_ident
{
214 return vec
![(TokenKind
::Ident
, start
, pos
), (TokenKind
::Colon
, pos
, pos
+ nb
)];
216 return vec
![(TokenKind
::Colon
, pos
, pos
+ nb
)];
217 } else if has_ident
{
218 return vec
![(TokenKind
::Ident
, start
, pos
)];
225 /// Wraps the tokens iteration to ensure that the byte_pos is always correct.
226 fn next(&mut self) -> Option
<(TokenKind
, &'a
str)> {
227 if let Some((kind
, text
)) = self.tokens
.next() {
228 self.byte_pos
+= text
.len() as u32;
235 /// Exhausts the `Classifier` writing the output into `sink`.
237 /// The general structure for this method is to iterate over each token,
238 /// possibly giving it an HTML span with a class specifying what flavor of
240 fn highlight(mut self, sink
: &mut dyn FnMut(Highlight
<'a
>)) {
241 with_default_session_globals(|| {
246 .map(|t
| matches
!(t
.0, TokenKind
::Colon
| TokenKind
::Ident
))
249 let tokens
= self.get_full_ident_path();
250 for (token
, start
, end
) in tokens
{
251 let text
= &self.src
[start
..end
];
252 self.advance(token
, text
, sink
);
253 self.byte_pos
+= text
.len() as u32;
256 if let Some((token
, text
)) = self.next() {
257 self.advance(token
, text
, sink
);
265 /// Single step of highlighting. This will classify `token`, but maybe also
266 /// a couple of following ones as well.
267 fn advance(&mut self, token
: TokenKind
, text
: &'a
str, sink
: &mut dyn FnMut(Highlight
<'a
>)) {
268 let lookahead
= self.peek();
269 let no_highlight
= |sink
: &mut dyn FnMut(_
)| sink(Highlight
::Token { text, class: None }
);
270 let class
= match token
{
271 TokenKind
::Whitespace
=> return no_highlight(sink
),
272 TokenKind
::LineComment { doc_style }
| TokenKind
::BlockComment { doc_style, .. }
=> {
273 if doc_style
.is_some() {
279 // Consider this as part of a macro invocation if there was a
280 // leading identifier.
281 TokenKind
::Bang
if self.in_macro
=> {
282 self.in_macro
= false;
283 sink(Highlight
::Token { text, class: None }
);
284 sink(Highlight
::ExitSpan
);
288 // Assume that '&' or '*' is the reference or dereference operator
289 // or a reference or pointer type. Unless, of course, it looks like
290 // a logical and or a multiplication operator: `&&` or `* `.
291 TokenKind
::Star
=> match lookahead
{
292 Some(TokenKind
::Whitespace
) => Class
::Op
,
293 _
=> Class
::RefKeyWord
,
295 TokenKind
::And
=> match lookahead
{
296 Some(TokenKind
::And
) => {
298 sink(Highlight
::Token { text: "&&", class: Some(Class::Op) }
);
301 Some(TokenKind
::Eq
) => {
303 sink(Highlight
::Token { text: "&=", class: Some(Class::Op) }
);
306 Some(TokenKind
::Whitespace
) => Class
::Op
,
307 _
=> Class
::RefKeyWord
,
320 | TokenKind
::Gt
=> Class
::Op
,
322 // Miscellaneous, no highlighting.
326 | TokenKind
::OpenParen
327 | TokenKind
::CloseParen
328 | TokenKind
::OpenBrace
329 | TokenKind
::CloseBrace
330 | TokenKind
::OpenBracket
334 | TokenKind
::Unknown
=> return no_highlight(sink
),
336 TokenKind
::Question
=> Class
::QuestionMark
,
338 TokenKind
::Dollar
=> match lookahead
{
339 Some(TokenKind
::Ident
) => {
340 self.in_macro_nonterminal
= true;
341 Class
::MacroNonTerminal
343 _
=> return no_highlight(sink
),
346 // This might be the start of an attribute. We're going to want to
347 // continue highlighting it as an attribute until the ending ']' is
348 // seen, so skip out early. Down below we terminate the attribute
349 // span when we see the ']'.
350 TokenKind
::Pound
=> {
352 // Case 1: #![inner_attribute]
353 Some(TokenKind
::Bang
) => {
355 if let Some(TokenKind
::OpenBracket
) = self.peek() {
356 self.in_attribute
= true;
357 sink(Highlight
::EnterSpan { class: Class::Attribute }
);
359 sink(Highlight
::Token { text: "#", class: None }
);
360 sink(Highlight
::Token { text: "!", class: None }
);
363 // Case 2: #[outer_attribute]
364 Some(TokenKind
::OpenBracket
) => {
365 self.in_attribute
= true;
366 sink(Highlight
::EnterSpan { class: Class::Attribute }
);
370 return no_highlight(sink
);
372 TokenKind
::CloseBracket
=> {
373 if self.in_attribute
{
374 self.in_attribute
= false;
375 sink(Highlight
::Token { text: "]", class: None }
);
376 sink(Highlight
::ExitSpan
);
379 return no_highlight(sink
);
381 TokenKind
::Literal { kind, .. }
=> match kind
{
383 LiteralKind
::Byte { .. }
384 | LiteralKind
::Char { .. }
385 | LiteralKind
::Str { .. }
386 | LiteralKind
::ByteStr { .. }
387 | LiteralKind
::RawStr { .. }
388 | LiteralKind
::RawByteStr { .. }
=> Class
::String
,
390 LiteralKind
::Float { .. }
| LiteralKind
::Int { .. }
=> Class
::Number
,
392 TokenKind
::Ident
| TokenKind
::RawIdent
if lookahead
== Some(TokenKind
::Bang
) => {
393 self.in_macro
= true;
394 sink(Highlight
::EnterSpan { class: Class::Macro }
);
395 sink(Highlight
::Token { text, class: None }
);
398 TokenKind
::Ident
=> match get_real_ident_class(text
, self.edition
) {
399 Class
::Ident
=> match text
{
400 "Option" | "Result" => Class
::PreludeTy
,
401 "Some" | "None" | "Ok" | "Err" => Class
::PreludeVal
,
402 _
if self.in_macro_nonterminal
=> {
403 self.in_macro_nonterminal
= false;
404 Class
::MacroNonTerminal
410 TokenKind
::RawIdent
=> Class
::Ident
,
411 TokenKind
::Lifetime { .. }
=> Class
::Lifetime
,
413 // Anything that didn't return above is the simple case where we the
414 // class just spans a single token, so we can use the `string` method.
415 sink(Highlight
::Token { text, class: Some(class) }
);
418 fn peek(&mut self) -> Option
<TokenKind
> {
419 self.tokens
.peek().map(|(toke_kind
, _text
)| *toke_kind
)
423 /// Called when we start processing a span of text that should be highlighted.
424 /// The `Class` argument specifies how it should be highlighted.
425 fn enter_span(out
: &mut Buffer
, klass
: Class
) {
426 write
!(out
, "<span class=\"{}\">", klass
.as_html());
429 /// Called at the end of a span of highlighted text.
430 fn exit_span(out
: &mut Buffer
) {
431 out
.write_str("</span>");
434 /// Called for a span of text. If the text should be highlighted differently
435 /// from the surrounding text, then the `Class` argument will be a value other
438 /// The following sequences of callbacks are equivalent:
440 /// enter_span(Foo), string("text", None), exit_span()
441 /// string("text", Foo)
443 /// The latter can be thought of as a shorthand for the former, which is more
445 fn string
<T
: Display
>(out
: &mut Buffer
, text
: T
, klass
: Option
<Class
>) {
447 None
=> write
!(out
, "{}", text
),
448 Some(klass
) => write
!(out
, "<span class=\"{}\">{}</span>", klass
.as_html(), text
),