]> git.proxmox.com Git - rustc.git/blame - src/librustdoc/html/highlight.rs
New upstream version 1.69.0+dfsg1
[rustc.git] / src / librustdoc / html / highlight.rs
CommitLineData
a7813a04 1//! Basic syntax highlighting functionality.
1a4d82fc 2//!
74b04a01 3//! This module uses librustc_ast's lexer to provide token-based highlighting for
1a4d82fc 4//! the HTML documentation generated by rustdoc.
a7813a04 5//!
b7449926 6//! Use the `render_with_highlighting` to highlight some rust code.
1a4d82fc 7
c295e0f8 8use crate::clean::PrimitiveType;
9fa01778 9use crate::html::escape::Escape;
2b03887a 10use crate::html::render::{Context, LinkFromSrc};
1a4d82fc 11
c295e0f8 12use std::collections::VecDeque;
94222f64 13use std::fmt::{Display, Write};
a7813a04 14
3c0e092e 15use rustc_data_structures::fx::FxHashMap;
2b03887a 16use rustc_lexer::{Cursor, LiteralKind, TokenKind};
fc512014
XL
17use rustc_span::edition::Edition;
18use rustc_span::symbol::Symbol;
94222f64
XL
19use rustc_span::{BytePos, Span, DUMMY_SP};
20
21use super::format::{self, Buffer};
94222f64
XL
22
23/// This type is needed in case we want to render links on items to allow to go to their definition.
9c376795
FG
24pub(crate) struct HrefContext<'a, 'tcx> {
25 pub(crate) context: &'a Context<'tcx>,
94222f64 26 /// This span contains the current file we're going through.
923072b8 27 pub(crate) file_span: Span,
94222f64
XL
28 /// This field is used to know "how far" from the top of the directory we are to link to either
29 /// documentation pages or other source pages.
9c376795 30 pub(crate) root_path: &'a str,
f2b60f7d 31 /// This field is used to calculate precise local URLs.
9c376795 32 pub(crate) current_href: String,
94222f64 33}
5869c6ff 34
3c0e092e
XL
35/// Decorations are represented as a map from CSS class to vector of character ranges.
36/// Each range will be wrapped in a span with that class.
f2b60f7d 37#[derive(Default)]
923072b8 38pub(crate) struct DecorationInfo(pub(crate) FxHashMap<&'static str, Vec<(u32, u32)>>);
3c0e092e 39
f2b60f7d
FG
40#[derive(Eq, PartialEq, Clone, Copy)]
41pub(crate) enum Tooltip {
42 Ignore,
43 CompileFail,
44 ShouldPanic,
45 Edition(Edition),
46 None,
47}
48
49/// Highlights `src` as an inline example, returning the HTML output.
50pub(crate) fn render_example_with_highlighting(
5869c6ff
XL
51 src: &str,
52 out: &mut Buffer,
f2b60f7d 53 tooltip: Tooltip,
74b04a01 54 playground_button: Option<&str>,
5869c6ff 55) {
f2b60f7d
FG
56 write_header(out, "rust-example-rendered", None, tooltip);
57 write_code(out, src, None, None);
58 write_footer(out, playground_button);
59}
60
9ffffee4
FG
61/// Highlights `src` as an item-decl, returning the HTML output.
62pub(crate) fn render_item_decl_with_highlighting(src: &str, out: &mut Buffer) {
63 write!(out, "<pre class=\"rust item-decl\">");
f2b60f7d 64 write_code(out, src, None, None);
9ffffee4 65 write!(out, "</pre>");
f2b60f7d
FG
66}
67
68/// Highlights `src` as a source code page, returning the HTML output.
69pub(crate) fn render_source_with_highlighting(
70 src: &str,
71 out: &mut Buffer,
72 line_numbers: Buffer,
9c376795 73 href_context: HrefContext<'_, '_>,
f2b60f7d 74 decoration_info: DecorationInfo,
487cf647 75 extra: Option<&str>,
f2b60f7d
FG
76) {
77 write_header(out, "", Some(line_numbers), Tooltip::None);
487cf647
FG
78 if let Some(extra) = extra {
79 out.push_str(extra);
80 }
f2b60f7d
FG
81 write_code(out, src, Some(href_context), Some(decoration_info));
82 write_footer(out, None);
83}
84
85fn write_header(out: &mut Buffer, class: &str, extra_content: Option<Buffer>, tooltip: Tooltip) {
86 write!(
87 out,
88 "<div class=\"example-wrap{}\">",
89 match tooltip {
90 Tooltip::Ignore => " ignore",
91 Tooltip::CompileFail => " compile_fail",
92 Tooltip::ShouldPanic => " should_panic",
93 Tooltip::Edition(_) => " edition",
94 Tooltip::None => "",
95 },
96 );
97
98 if tooltip != Tooltip::None {
9ffffee4 99 let edition_code;
dfeec247
XL
100 write!(
101 out,
9ffffee4
FG
102 "<a href=\"#\" class=\"tooltip\" title=\"{}\">ⓘ</a>",
103 match tooltip {
104 Tooltip::Ignore => "This example is not tested",
105 Tooltip::CompileFail => "This example deliberately fails to compile",
106 Tooltip::ShouldPanic => "This example panics",
107 Tooltip::Edition(edition) => {
108 edition_code = format!("This example runs with edition {edition}");
109 &edition_code
110 }
111 Tooltip::None => unreachable!(),
fc512014 112 },
5869c6ff 113 );
ea8adc8c 114 }
9fa01778 115
17df50a5
XL
116 if let Some(extra) = extra_content {
117 out.push_buffer(extra);
118 }
f2b60f7d 119 if class.is_empty() {
94222f64 120 write!(out, "<pre class=\"rust\">");
f2b60f7d
FG
121 } else {
122 write!(out, "<pre class=\"rust {class}\">");
17df50a5 123 }
94222f64 124 write!(out, "<code>");
54a0048b
SL
125}
126
f2b60f7d
FG
127/// Check if two `Class` can be merged together. In the following rules, "unclassified" means `None`
128/// basically (since it's `Option<Class>`). The following rules apply:
129///
130/// * If two `Class` have the same variant, then they can be merged.
131/// * If the other `Class` is unclassified and only contains white characters (backline,
132/// whitespace, etc), it can be merged.
133/// * `Class::Ident` is considered the same as unclassified (because it doesn't have an associated
134/// CSS class).
135fn can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool {
136 match (class1, class2) {
137 (Some(c1), Some(c2)) => c1.is_equal_to(c2),
138 (Some(Class::Ident(_)), None) | (None, Some(Class::Ident(_))) => true,
139 (Some(_), None) | (None, Some(_)) => text.trim().is_empty(),
140 (None, None) => true,
141 }
142}
143
144/// This type is used as a conveniency to prevent having to pass all its fields as arguments into
145/// the various functions (which became its methods).
9c376795 146struct TokenHandler<'a, 'tcx> {
f2b60f7d
FG
147 out: &'a mut Buffer,
148 /// It contains the closing tag and the associated `Class`.
149 closing_tags: Vec<(&'static str, Class)>,
150 /// This is used because we don't automatically generate the closing tag on `ExitSpan` in
151 /// case an `EnterSpan` event with the same class follows.
152 pending_exit_span: Option<Class>,
153 /// `current_class` and `pending_elems` are used to group HTML elements with same `class`
154 /// attributes to reduce the DOM size.
155 current_class: Option<Class>,
156 /// We need to keep the `Class` for each element because it could contain a `Span` which is
157 /// used to generate links.
9c376795
FG
158 pending_elems: Vec<(&'a str, Option<Class>)>,
159 href_context: Option<HrefContext<'a, 'tcx>>,
f2b60f7d
FG
160}
161
9c376795 162impl<'a, 'tcx> TokenHandler<'a, 'tcx> {
f2b60f7d
FG
163 fn handle_exit_span(&mut self) {
164 // We can't get the last `closing_tags` element using `pop()` because `closing_tags` is
165 // being used in `write_pending_elems`.
166 let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1;
167 // We flush everything just in case...
168 self.write_pending_elems(Some(class));
169
170 exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0);
171 self.pending_exit_span = None;
172 }
173
174 /// Write all the pending elements sharing a same (or at mergeable) `Class`.
175 ///
176 /// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged
177 /// with the elements' class, then we simply write the elements since the `ExitSpan` event will
178 /// close the tag.
179 ///
180 /// Otherwise, if there is only one pending element, we let the `string` function handle both
181 /// opening and closing the tag, otherwise we do it into this function.
182 ///
183 /// It returns `true` if `current_class` must be set to `None` afterwards.
184 fn write_pending_elems(&mut self, current_class: Option<Class>) -> bool {
185 if self.pending_elems.is_empty() {
186 return false;
187 }
188 if let Some((_, parent_class)) = self.closing_tags.last() &&
189 can_merge(current_class, Some(*parent_class), "")
190 {
191 for (text, class) in self.pending_elems.iter() {
192 string(self.out, Escape(text), *class, &self.href_context, false);
193 }
194 } else {
195 // We only want to "open" the tag ourselves if we have more than one pending and if the
196 // current parent tag is not the same as our pending content.
197 let close_tag = if self.pending_elems.len() > 1 && current_class.is_some() {
198 Some(enter_span(self.out, current_class.unwrap(), &self.href_context))
199 } else {
200 None
201 };
202 for (text, class) in self.pending_elems.iter() {
203 string(self.out, Escape(text), *class, &self.href_context, close_tag.is_none());
204 }
205 if let Some(close_tag) = close_tag {
206 exit_span(self.out, close_tag);
207 }
208 }
209 self.pending_elems.clear();
210 true
211 }
212}
213
9c376795 214impl<'a, 'tcx> Drop for TokenHandler<'a, 'tcx> {
f2b60f7d
FG
215 /// When leaving, we need to flush all pending data to not have missing content.
216 fn drop(&mut self) {
217 if self.pending_exit_span.is_some() {
218 self.handle_exit_span();
219 } else {
220 self.write_pending_elems(self.current_class);
221 }
222 }
223}
224
94222f64
XL
225/// Convert the given `src` source code into HTML by adding classes for highlighting.
226///
227/// This code is used to render code blocks (in the documentation) as well as the source code pages.
228///
229/// Some explanations on the last arguments:
230///
923072b8
FG
231/// In case we are rendering a code block and not a source code file, `href_context` will be `None`.
232/// To put it more simply: if `href_context` is `None`, the code won't try to generate links to an
94222f64
XL
233/// item definition.
234///
235/// More explanations about spans and how we use them here are provided in the
236fn write_code(
237 out: &mut Buffer,
238 src: &str,
9c376795 239 href_context: Option<HrefContext<'_, '_>>,
3c0e092e 240 decoration_info: Option<DecorationInfo>,
94222f64 241) {
2a314972
XL
242 // This replace allows to fix how the code source with DOS backline characters is displayed.
243 let src = src.replace("\r\n", "\n");
f2b60f7d
FG
244 let mut token_handler = TokenHandler {
245 out,
246 closing_tags: Vec::new(),
247 pending_exit_span: None,
248 current_class: None,
249 pending_elems: Vec::new(),
250 href_context,
251 };
252
3c0e092e
XL
253 Classifier::new(
254 &src,
f2b60f7d 255 token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP),
3c0e092e
XL
256 decoration_info,
257 )
258 .highlight(&mut |highlight| {
259 match highlight {
f2b60f7d
FG
260 Highlight::Token { text, class } => {
261 // If we received a `ExitSpan` event and then have a non-compatible `Class`, we
262 // need to close the `<span>`.
263 let need_current_class_update = if let Some(pending) = token_handler.pending_exit_span &&
264 !can_merge(Some(pending), class, text) {
265 token_handler.handle_exit_span();
266 true
267 // If the two `Class` are different, time to flush the current content and start
268 // a new one.
269 } else if !can_merge(token_handler.current_class, class, text) {
270 token_handler.write_pending_elems(token_handler.current_class);
271 true
272 } else {
273 token_handler.current_class.is_none()
274 };
275
276 if need_current_class_update {
277 token_handler.current_class = class.map(Class::dummy);
278 }
279 token_handler.pending_elems.push((text, class));
280 }
923072b8 281 Highlight::EnterSpan { class } => {
f2b60f7d
FG
282 let mut should_add = true;
283 if let Some(pending_exit_span) = token_handler.pending_exit_span {
284 if class.is_equal_to(pending_exit_span) {
285 should_add = false;
286 } else {
287 token_handler.handle_exit_span();
288 }
289 } else {
290 // We flush everything just in case...
291 if token_handler.write_pending_elems(token_handler.current_class) {
292 token_handler.current_class = None;
293 }
294 }
295 if should_add {
296 let closing_tag = enter_span(token_handler.out, class, &token_handler.href_context);
297 token_handler.closing_tags.push((closing_tag, class));
298 }
299
300 token_handler.current_class = None;
301 token_handler.pending_exit_span = None;
923072b8
FG
302 }
303 Highlight::ExitSpan => {
f2b60f7d
FG
304 token_handler.current_class = None;
305 token_handler.pending_exit_span =
306 Some(token_handler.closing_tags.last().as_ref().expect("ExitSpan without EnterSpan").1);
923072b8 307 }
3c0e092e
XL
308 };
309 });
1b1a35ee 310}
a7813a04 311
5869c6ff 312fn write_footer(out: &mut Buffer, playground_button: Option<&str>) {
94222f64 313 writeln!(out, "</code></pre>{}</div>", playground_button.unwrap_or_default());
a7813a04
XL
314}
315
316/// How a span of text is classified. Mostly corresponds to token kinds.
317#[derive(Clone, Copy, Debug, Eq, PartialEq)]
b7449926 318enum Class {
a7813a04
XL
319 Comment,
320 DocComment,
321 Attribute,
322 KeyWord,
f2b60f7d 323 /// Keywords that do pointer/reference stuff.
a7813a04 324 RefKeyWord,
94222f64 325 Self_(Span),
923072b8 326 Macro(Span),
a7813a04
XL
327 MacroNonTerminal,
328 String,
329 Number,
330 Bool,
f2b60f7d 331 /// `Ident` isn't rendered in the HTML but we still need it for the `Span` it contains.
94222f64 332 Ident(Span),
a7813a04
XL
333 Lifetime,
334 PreludeTy,
335 PreludeVal,
c30ab7b3 336 QuestionMark,
3c0e092e 337 Decoration(&'static str),
1a4d82fc
JJ
338}
339
1b1a35ee 340impl Class {
f2b60f7d
FG
341 /// It is only looking at the variant, not the variant content.
342 ///
343 /// It is used mostly to group multiple similar HTML elements into one `<span>` instead of
344 /// multiple ones.
345 fn is_equal_to(self, other: Self) -> bool {
346 match (self, other) {
347 (Self::Self_(_), Self::Self_(_))
348 | (Self::Macro(_), Self::Macro(_))
349 | (Self::Ident(_), Self::Ident(_)) => true,
350 (Self::Decoration(c1), Self::Decoration(c2)) => c1 == c2,
351 (x, y) => x == y,
352 }
353 }
354
355 /// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links
356 /// on "empty content" (because of the attributes merge).
357 fn dummy(self) -> Self {
358 match self {
359 Self::Self_(_) => Self::Self_(DUMMY_SP),
360 Self::Macro(_) => Self::Macro(DUMMY_SP),
361 Self::Ident(_) => Self::Ident(DUMMY_SP),
362 s => s,
363 }
364 }
365
1b1a35ee
XL
366 /// Returns the css class expected by rustdoc for each `Class`.
367 fn as_html(self) -> &'static str {
368 match self {
1b1a35ee
XL
369 Class::Comment => "comment",
370 Class::DocComment => "doccomment",
487cf647 371 Class::Attribute => "attr",
1b1a35ee
XL
372 Class::KeyWord => "kw",
373 Class::RefKeyWord => "kw-2",
94222f64 374 Class::Self_(_) => "self",
923072b8 375 Class::Macro(_) => "macro",
1b1a35ee
XL
376 Class::MacroNonTerminal => "macro-nonterminal",
377 Class::String => "string",
378 Class::Number => "number",
379 Class::Bool => "bool-val",
f2b60f7d 380 Class::Ident(_) => "",
1b1a35ee
XL
381 Class::Lifetime => "lifetime",
382 Class::PreludeTy => "prelude-ty",
383 Class::PreludeVal => "prelude-val",
384 Class::QuestionMark => "question-mark",
3c0e092e 385 Class::Decoration(kind) => kind,
a7813a04
XL
386 }
387 }
94222f64
XL
388
389 /// In case this is an item which can be converted into a link to a definition, it'll contain
390 /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`).
391 fn get_span(self) -> Option<Span> {
392 match self {
923072b8
FG
393 Self::Ident(sp) | Self::Self_(sp) | Self::Macro(sp) => Some(sp),
394 Self::Comment
395 | Self::DocComment
396 | Self::Attribute
397 | Self::KeyWord
398 | Self::RefKeyWord
923072b8
FG
399 | Self::MacroNonTerminal
400 | Self::String
401 | Self::Number
402 | Self::Bool
403 | Self::Lifetime
404 | Self::PreludeTy
405 | Self::PreludeVal
406 | Self::QuestionMark
407 | Self::Decoration(_) => None,
94222f64
XL
408 }
409 }
a7813a04
XL
410}
411
1b1a35ee 412enum Highlight<'a> {
fc512014 413 Token { text: &'a str, class: Option<Class> },
1b1a35ee
XL
414 EnterSpan { class: Class },
415 ExitSpan,
9fa01778
XL
416}
417
1b1a35ee
XL
418struct TokenIter<'a> {
419 src: &'a str,
2b03887a 420 cursor: Cursor<'a>,
9fa01778
XL
421}
422
a2a8927a 423impl<'a> Iterator for TokenIter<'a> {
1b1a35ee
XL
424 type Item = (TokenKind, &'a str);
425 fn next(&mut self) -> Option<(TokenKind, &'a str)> {
2b03887a
FG
426 let token = self.cursor.advance_token();
427 if token.kind == TokenKind::Eof {
1b1a35ee 428 return None;
a7813a04 429 }
064997fb 430 let (text, rest) = self.src.split_at(token.len as usize);
1b1a35ee
XL
431 self.src = rest;
432 Some((token.kind, text))
a7813a04 433 }
1b1a35ee 434}
a7813a04 435
94222f64 436/// Classifies into identifier class; returns `None` if this is a non-keyword identifier.
f2b60f7d 437fn get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class> {
94222f64
XL
438 let ignore: &[&str] =
439 if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] };
440 if ignore.iter().any(|k| *k == text) {
441 return None;
442 }
443 Some(match text {
cdc7bbd5 444 "ref" | "mut" => Class::RefKeyWord,
cdc7bbd5 445 "false" | "true" => Class::Bool,
f2b60f7d 446 _ if Symbol::intern(text).is_reserved(|| Edition::Edition2021) => Class::KeyWord,
94222f64
XL
447 _ => return None,
448 })
cdc7bbd5
XL
449}
450
c295e0f8
XL
451/// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than
452/// just the next item by using `peek_next`. The `peek` method always returns the next item after
453/// the current one whereas `peek_next` will return the next item after the last one peeked.
454///
455/// You can use both `peek` and `peek_next` at the same time without problem.
456struct PeekIter<'a> {
457 stored: VecDeque<(TokenKind, &'a str)>,
458 /// This position is reinitialized when using `next`. It is used in `peek_next`.
459 peek_pos: usize,
460 iter: TokenIter<'a>,
461}
462
a2a8927a 463impl<'a> PeekIter<'a> {
c295e0f8
XL
464 fn new(iter: TokenIter<'a>) -> Self {
465 Self { stored: VecDeque::new(), peek_pos: 0, iter }
466 }
f2b60f7d 467 /// Returns the next item after the current one. It doesn't interfere with `peek_next` output.
c295e0f8 468 fn peek(&mut self) -> Option<&(TokenKind, &'a str)> {
9ffffee4
FG
469 if self.stored.is_empty() && let Some(next) = self.iter.next() {
470 self.stored.push_back(next);
c295e0f8
XL
471 }
472 self.stored.front()
473 }
f2b60f7d 474 /// Returns the next item after the last one peeked. It doesn't interfere with `peek` output.
c295e0f8
XL
475 fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> {
476 self.peek_pos += 1;
477 if self.peek_pos - 1 < self.stored.len() {
478 self.stored.get(self.peek_pos - 1)
479 } else if let Some(next) = self.iter.next() {
480 self.stored.push_back(next);
481 self.stored.back()
482 } else {
483 None
484 }
485 }
486}
487
a2a8927a 488impl<'a> Iterator for PeekIter<'a> {
c295e0f8
XL
489 type Item = (TokenKind, &'a str);
490 fn next(&mut self) -> Option<Self::Item> {
491 self.peek_pos = 0;
492 if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() }
493 }
494}
495
3c0e092e
XL
496/// Custom spans inserted into the source. Eg --scrape-examples uses this to highlight function calls
497struct Decorations {
498 starts: Vec<(u32, &'static str)>,
499 ends: Vec<u32>,
500}
501
502impl Decorations {
503 fn new(info: DecorationInfo) -> Self {
504 // Extract tuples (start, end, kind) into separate sequences of (start, kind) and (end).
505 let (mut starts, mut ends): (Vec<_>, Vec<_>) = info
506 .0
507 .into_iter()
5099ac24 508 .flat_map(|(kind, ranges)| ranges.into_iter().map(move |(lo, hi)| ((lo, kind), hi)))
3c0e092e
XL
509 .unzip();
510
511 // Sort the sequences in document order.
512 starts.sort_by_key(|(lo, _)| *lo);
513 ends.sort();
514
515 Decorations { starts, ends }
516 }
517}
518
1b1a35ee
XL
519/// Processes program tokens, classifying strings of text by highlighting
520/// category (`Class`).
9c376795
FG
521struct Classifier<'src> {
522 tokens: PeekIter<'src>,
1b1a35ee
XL
523 in_attribute: bool,
524 in_macro: bool,
525 in_macro_nonterminal: bool,
cdc7bbd5 526 byte_pos: u32,
94222f64 527 file_span: Span,
9c376795 528 src: &'src str,
3c0e092e 529 decorations: Option<Decorations>,
1b1a35ee 530}
416331ca 531
9c376795 532impl<'src> Classifier<'src> {
94222f64
XL
533 /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
534 /// file span which will be used later on by the `span_correspondance_map`.
f2b60f7d 535 fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
2b03887a 536 let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
3c0e092e 537 let decorations = decoration_info.map(Decorations::new);
fc512014
XL
538 Classifier {
539 tokens,
540 in_attribute: false,
541 in_macro: false,
542 in_macro_nonterminal: false,
cdc7bbd5 543 byte_pos: 0,
94222f64 544 file_span,
cdc7bbd5 545 src,
3c0e092e 546 decorations,
cdc7bbd5
XL
547 }
548 }
549
94222f64
XL
550 /// Convenient wrapper to create a [`Span`] from a position in the file.
551 fn new_span(&self, lo: u32, text: &str) -> Span {
552 let hi = lo + text.len() as u32;
553 let file_lo = self.file_span.lo();
554 self.file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi))
555 }
556
cdc7bbd5
XL
557 /// Concatenate colons and idents as one when possible.
558 fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> {
559 let start = self.byte_pos as usize;
560 let mut pos = start;
561 let mut has_ident = false;
cdc7bbd5
XL
562
563 loop {
564 let mut nb = 0;
565 while let Some((TokenKind::Colon, _)) = self.tokens.peek() {
566 self.tokens.next();
567 nb += 1;
568 }
569 // Ident path can start with "::" but if we already have content in the ident path,
570 // the "::" is mandatory.
571 if has_ident && nb == 0 {
572 return vec![(TokenKind::Ident, start, pos)];
573 } else if nb != 0 && nb != 2 {
574 if has_ident {
575 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
576 } else {
94222f64 577 return vec![(TokenKind::Colon, start, pos + nb)];
cdc7bbd5
XL
578 }
579 }
580
94222f64 581 if let Some((None, text)) = self.tokens.peek().map(|(token, text)| {
cdc7bbd5 582 if *token == TokenKind::Ident {
f2b60f7d 583 let class = get_real_ident_class(text, true);
cdc7bbd5
XL
584 (class, text)
585 } else {
586 // Doesn't matter which Class we put in here...
94222f64 587 (Some(Class::Comment), text)
cdc7bbd5
XL
588 }
589 }) {
590 // We only "add" the colon if there is an ident behind.
591 pos += text.len() + nb;
592 has_ident = true;
593 self.tokens.next();
594 } else if nb > 0 && has_ident {
595 return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)];
596 } else if nb > 0 {
94222f64 597 return vec![(TokenKind::Colon, start, start + nb)];
cdc7bbd5
XL
598 } else if has_ident {
599 return vec![(TokenKind::Ident, start, pos)];
600 } else {
601 return Vec::new();
602 }
603 }
604 }
605
94222f64
XL
606 /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct.
607 ///
608 /// It returns the token's kind, the token as a string and its byte position in the source
609 /// string.
9c376795 610 fn next(&mut self) -> Option<(TokenKind, &'src str, u32)> {
cdc7bbd5 611 if let Some((kind, text)) = self.tokens.next() {
94222f64 612 let before = self.byte_pos;
cdc7bbd5 613 self.byte_pos += text.len() as u32;
94222f64 614 Some((kind, text, before))
cdc7bbd5
XL
615 } else {
616 None
fc512014 617 }
ea8adc8c
XL
618 }
619
1b1a35ee 620 /// Exhausts the `Classifier` writing the output into `sink`.
a7813a04
XL
621 ///
622 /// The general structure for this method is to iterate over each token,
1b1a35ee
XL
623 /// possibly giving it an HTML span with a class specifying what flavor of
624 /// token is used.
9c376795 625 fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'src>)) {
17df50a5 626 loop {
3c0e092e
XL
627 if let Some(decs) = self.decorations.as_mut() {
628 let byte_pos = self.byte_pos;
629 let n_starts = decs.starts.iter().filter(|(i, _)| byte_pos >= *i).count();
630 for (_, kind) in decs.starts.drain(0..n_starts) {
631 sink(Highlight::EnterSpan { class: Class::Decoration(kind) });
632 }
633
634 let n_ends = decs.ends.iter().filter(|i| byte_pos >= **i).count();
635 for _ in decs.ends.drain(0..n_ends) {
636 sink(Highlight::ExitSpan);
637 }
638 }
639
17df50a5
XL
640 if self
641 .tokens
642 .peek()
643 .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident))
644 .unwrap_or(false)
645 {
646 let tokens = self.get_full_ident_path();
94222f64
XL
647 for (token, start, end) in &tokens {
648 let text = &self.src[*start..*end];
649 self.advance(*token, text, sink, *start as u32);
17df50a5 650 self.byte_pos += text.len() as u32;
cdc7bbd5 651 }
94222f64
XL
652 if !tokens.is_empty() {
653 continue;
654 }
3dfed10e 655 }
94222f64
XL
656 if let Some((token, text, before)) = self.next() {
657 self.advance(token, text, sink, before);
17df50a5
XL
658 } else {
659 break;
660 }
661 }
a7813a04
XL
662 }
663
94222f64
XL
664 /// Single step of highlighting. This will classify `token`, but maybe also a couple of
665 /// following ones as well.
666 ///
667 /// `before` is the position of the given token in the `source` string and is used as "lo" byte
668 /// in case we want to try to generate a link for this token using the
669 /// `span_correspondance_map`.
670 fn advance(
671 &mut self,
672 token: TokenKind,
9c376795
FG
673 text: &'src str,
674 sink: &mut dyn FnMut(Highlight<'src>),
94222f64
XL
675 before: u32,
676 ) {
1b1a35ee 677 let lookahead = self.peek();
fc512014 678 let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None });
1b1a35ee 679 let class = match token {
fc512014 680 TokenKind::Whitespace => return no_highlight(sink),
1b1a35ee
XL
681 TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => {
682 if doc_style.is_some() {
683 Class::DocComment
684 } else {
685 Class::Comment
686 }
dfeec247 687 }
a7813a04
XL
688 // Consider this as part of a macro invocation if there was a
689 // leading identifier.
1b1a35ee 690 TokenKind::Bang if self.in_macro => {
a7813a04 691 self.in_macro = false;
cdc7bbd5
XL
692 sink(Highlight::Token { text, class: None });
693 sink(Highlight::ExitSpan);
694 return;
a7813a04 695 }
1a4d82fc 696
1b1a35ee
XL
697 // Assume that '&' or '*' is the reference or dereference operator
698 // or a reference or pointer type. Unless, of course, it looks like
699 // a logical and or a multiplication operator: `&&` or `* `.
3c0e092e 700 TokenKind::Star => match self.tokens.peek() {
f2b60f7d 701 Some((TokenKind::Whitespace, _)) => return no_highlight(sink),
3c0e092e
XL
702 Some((TokenKind::Ident, "mut")) => {
703 self.next();
704 sink(Highlight::Token { text: "*mut", class: Some(Class::RefKeyWord) });
705 return;
706 }
707 Some((TokenKind::Ident, "const")) => {
708 self.next();
709 sink(Highlight::Token { text: "*const", class: Some(Class::RefKeyWord) });
710 return;
711 }
1b1a35ee
XL
712 _ => Class::RefKeyWord,
713 },
3c0e092e
XL
714 TokenKind::And => match self.tokens.peek() {
715 Some((TokenKind::And, _)) => {
cdc7bbd5 716 self.next();
f2b60f7d 717 sink(Highlight::Token { text: "&&", class: None });
1b1a35ee
XL
718 return;
719 }
3c0e092e 720 Some((TokenKind::Eq, _)) => {
cdc7bbd5 721 self.next();
f2b60f7d 722 sink(Highlight::Token { text: "&=", class: None });
1b1a35ee
XL
723 return;
724 }
f2b60f7d 725 Some((TokenKind::Whitespace, _)) => return no_highlight(sink),
3c0e092e
XL
726 Some((TokenKind::Ident, "mut")) => {
727 self.next();
728 sink(Highlight::Token { text: "&mut", class: Some(Class::RefKeyWord) });
729 return;
730 }
1b1a35ee
XL
731 _ => Class::RefKeyWord,
732 },
733
c295e0f8
XL
734 // These can either be operators, or arrows.
735 TokenKind::Eq => match lookahead {
736 Some(TokenKind::Eq) => {
737 self.next();
f2b60f7d 738 sink(Highlight::Token { text: "==", class: None });
c295e0f8
XL
739 return;
740 }
741 Some(TokenKind::Gt) => {
742 self.next();
743 sink(Highlight::Token { text: "=>", class: None });
744 return;
745 }
f2b60f7d 746 _ => return no_highlight(sink),
c295e0f8
XL
747 },
748 TokenKind::Minus if lookahead == Some(TokenKind::Gt) => {
749 self.next();
750 sink(Highlight::Token { text: "->", class: None });
751 return;
752 }
753
754 // Other operators.
1b1a35ee
XL
755 TokenKind::Minus
756 | TokenKind::Plus
757 | TokenKind::Or
758 | TokenKind::Slash
759 | TokenKind::Caret
760 | TokenKind::Percent
761 | TokenKind::Bang
1b1a35ee 762 | TokenKind::Lt
f2b60f7d 763 | TokenKind::Gt => return no_highlight(sink),
1a4d82fc 764
a7813a04 765 // Miscellaneous, no highlighting.
1b1a35ee
XL
766 TokenKind::Dot
767 | TokenKind::Semi
768 | TokenKind::Comma
769 | TokenKind::OpenParen
770 | TokenKind::CloseParen
771 | TokenKind::OpenBrace
772 | TokenKind::CloseBrace
773 | TokenKind::OpenBracket
774 | TokenKind::At
775 | TokenKind::Tilde
776 | TokenKind::Colon
fc512014 777 | TokenKind::Unknown => return no_highlight(sink),
1b1a35ee
XL
778
779 TokenKind::Question => Class::QuestionMark,
780
781 TokenKind::Dollar => match lookahead {
782 Some(TokenKind::Ident) => {
a7813a04
XL
783 self.in_macro_nonterminal = true;
784 Class::MacroNonTerminal
1a4d82fc 785 }
fc512014 786 _ => return no_highlight(sink),
1b1a35ee 787 },
1a4d82fc 788
ea8adc8c 789 // This might be the start of an attribute. We're going to want to
1a4d82fc
JJ
790 // continue highlighting it as an attribute until the ending ']' is
791 // seen, so skip out early. Down below we terminate the attribute
792 // span when we see the ']'.
1b1a35ee
XL
793 TokenKind::Pound => {
794 match lookahead {
795 // Case 1: #![inner_attribute]
796 Some(TokenKind::Bang) => {
cdc7bbd5 797 self.next();
1b1a35ee
XL
798 if let Some(TokenKind::OpenBracket) = self.peek() {
799 self.in_attribute = true;
800 sink(Highlight::EnterSpan { class: Class::Attribute });
801 }
fc512014
XL
802 sink(Highlight::Token { text: "#", class: None });
803 sink(Highlight::Token { text: "!", class: None });
1b1a35ee
XL
804 return;
805 }
806 // Case 2: #[outer_attribute]
807 Some(TokenKind::OpenBracket) => {
ea8adc8c 808 self.in_attribute = true;
1b1a35ee 809 sink(Highlight::EnterSpan { class: Class::Attribute });
ea8adc8c 810 }
1b1a35ee 811 _ => (),
ea8adc8c 812 }
fc512014 813 return no_highlight(sink);
1a4d82fc 814 }
1b1a35ee 815 TokenKind::CloseBracket => {
a7813a04
XL
816 if self.in_attribute {
817 self.in_attribute = false;
fc512014 818 sink(Highlight::Token { text: "]", class: None });
1b1a35ee
XL
819 sink(Highlight::ExitSpan);
820 return;
1a4d82fc 821 }
fc512014 822 return no_highlight(sink);
1a4d82fc 823 }
1b1a35ee
XL
824 TokenKind::Literal { kind, .. } => match kind {
825 // Text literals.
826 LiteralKind::Byte { .. }
827 | LiteralKind::Char { .. }
828 | LiteralKind::Str { .. }
829 | LiteralKind::ByteStr { .. }
830 | LiteralKind::RawStr { .. }
831 | LiteralKind::RawByteStr { .. } => Class::String,
832 // Number literals.
833 LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
834 },
835 TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => {
836 self.in_macro = true;
923072b8 837 sink(Highlight::EnterSpan { class: Class::Macro(self.new_span(before, text)) });
cdc7bbd5
XL
838 sink(Highlight::Token { text, class: None });
839 return;
1a4d82fc 840 }
f2b60f7d 841 TokenKind::Ident => match get_real_ident_class(text, false) {
94222f64 842 None => match text {
cdc7bbd5
XL
843 "Option" | "Result" => Class::PreludeTy,
844 "Some" | "None" | "Ok" | "Err" => Class::PreludeVal,
c295e0f8
XL
845 // "union" is a weak keyword and is only considered as a keyword when declaring
846 // a union type.
847 "union" if self.check_if_is_union_keyword() => Class::KeyWord,
cdc7bbd5
XL
848 _ if self.in_macro_nonterminal => {
849 self.in_macro_nonterminal = false;
850 Class::MacroNonTerminal
851 }
94222f64
XL
852 "self" | "Self" => Class::Self_(self.new_span(before, text)),
853 _ => Class::Ident(self.new_span(before, text)),
cdc7bbd5 854 },
94222f64 855 Some(c) => c,
dfeec247 856 },
3c0e092e 857 TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
94222f64
XL
858 Class::Ident(self.new_span(before, text))
859 }
1b1a35ee 860 TokenKind::Lifetime { .. } => Class::Lifetime,
2b03887a 861 TokenKind::Eof => panic!("Eof in advance"),
1a4d82fc 862 };
a7813a04
XL
863 // Anything that didn't return above is the simple case where we the
864 // class just spans a single token, so we can use the `string` method.
fc512014 865 sink(Highlight::Token { text, class: Some(class) });
1a4d82fc
JJ
866 }
867
1b1a35ee 868 fn peek(&mut self) -> Option<TokenKind> {
c295e0f8
XL
869 self.tokens.peek().map(|(token_kind, _text)| *token_kind)
870 }
871
872 fn check_if_is_union_keyword(&mut self) -> bool {
873 while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) {
874 if *kind == TokenKind::Whitespace {
875 continue;
876 }
877 return *kind == TokenKind::Ident;
878 }
879 false
a7813a04
XL
880 }
881}
882
1b1a35ee
XL
883/// Called when we start processing a span of text that should be highlighted.
884/// The `Class` argument specifies how it should be highlighted.
923072b8
FG
885fn enter_span(
886 out: &mut Buffer,
887 klass: Class,
9c376795 888 href_context: &Option<HrefContext<'_, '_>>,
923072b8 889) -> &'static str {
f2b60f7d 890 string_without_closing_tag(out, "", Some(klass), href_context, true).expect(
923072b8
FG
891 "internal error: enter_span was called with Some(klass) but did not return a \
892 closing HTML tag",
893 )
54a0048b
SL
894}
895
1b1a35ee 896/// Called at the end of a span of highlighted text.
923072b8
FG
897fn exit_span(out: &mut Buffer, closing_tag: &str) {
898 out.write_str(closing_tag);
54a0048b
SL
899}
900
1b1a35ee
XL
901/// Called for a span of text. If the text should be highlighted differently
902/// from the surrounding text, then the `Class` argument will be a value other
903/// than `None`.
904///
905/// The following sequences of callbacks are equivalent:
906/// ```plain
907/// enter_span(Foo), string("text", None), exit_span()
908/// string("text", Foo)
909/// ```
94222f64 910///
1b1a35ee
XL
911/// The latter can be thought of as a shorthand for the former, which is more
912/// flexible.
94222f64
XL
913///
914/// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function
915/// will then try to find this `span` in the `span_correspondance_map`. If found, it'll then
916/// generate a link for this element (which corresponds to where its definition is located).
917fn string<T: Display>(
918 out: &mut Buffer,
919 text: T,
920 klass: Option<Class>,
9c376795 921 href_context: &Option<HrefContext<'_, '_>>,
f2b60f7d 922 open_tag: bool,
94222f64 923) {
f2b60f7d
FG
924 if let Some(closing_tag) = string_without_closing_tag(out, text, klass, href_context, open_tag)
925 {
923072b8
FG
926 out.write_str(closing_tag);
927 }
928}
929
930/// This function writes `text` into `out` with some modifications depending on `klass`:
931///
932/// * If `klass` is `None`, `text` is written into `out` with no modification.
933/// * If `klass` is `Some` but `klass.get_span()` is `None`, it writes the text wrapped in a
934/// `<span>` with the provided `klass`.
935/// * If `klass` is `Some` and has a [`rustc_span::Span`], it then tries to generate a link (`<a>`
936/// element) by retrieving the link information from the `span_correspondance_map` that was filled
937/// in `span_map.rs::collect_spans_and_sources`. If it cannot retrieve the information, then it's
938/// the same as the second point (`klass` is `Some` but doesn't have a [`rustc_span::Span`]).
939fn string_without_closing_tag<T: Display>(
940 out: &mut Buffer,
941 text: T,
942 klass: Option<Class>,
9c376795 943 href_context: &Option<HrefContext<'_, '_>>,
f2b60f7d 944 open_tag: bool,
923072b8 945) -> Option<&'static str> {
5e7ed085 946 let Some(klass) = klass
923072b8
FG
947 else {
948 write!(out, "{}", text);
949 return None;
950 };
5e7ed085
FG
951 let Some(def_span) = klass.get_span()
952 else {
f2b60f7d
FG
953 if !open_tag {
954 write!(out, "{}", text);
955 return None;
956 }
923072b8
FG
957 write!(out, "<span class=\"{}\">{}", klass.as_html(), text);
958 return Some("</span>");
94222f64 959 };
923072b8 960
94222f64
XL
961 let mut text_s = text.to_string();
962 if text_s.contains("::") {
963 text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| {
964 match t {
965 "self" | "Self" => write!(
966 &mut path,
967 "<span class=\"{}\">{}</span>",
968 Class::Self_(DUMMY_SP).as_html(),
969 t
970 ),
971 "crate" | "super" => {
972 write!(&mut path, "<span class=\"{}\">{}</span>", Class::KeyWord.as_html(), t)
973 }
974 t => write!(&mut path, "{}", t),
975 }
976 .expect("Failed to build source HTML path");
977 path
978 });
979 }
f2b60f7d 980
923072b8 981 if let Some(href_context) = href_context {
94222f64 982 if let Some(href) =
923072b8
FG
983 href_context.context.shared.span_correspondance_map.get(&def_span).and_then(|href| {
984 let context = href_context.context;
94222f64
XL
985 // FIXME: later on, it'd be nice to provide two links (if possible) for all items:
986 // one to the documentation page and one to the source definition.
987 // FIXME: currently, external items only generate a link to their documentation,
988 // a link to their definition can be generated using this:
989 // https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338
990 match href {
f2b60f7d 991 LinkFromSrc::Local(span) => {
9c376795 992 context.href_from_span_relative(*span, &href_context.current_href)
f2b60f7d 993 }
94222f64 994 LinkFromSrc::External(def_id) => {
923072b8 995 format::href_with_root_path(*def_id, context, Some(href_context.root_path))
94222f64
XL
996 .ok()
997 .map(|(url, _, _)| url)
998 }
c295e0f8 999 LinkFromSrc::Primitive(prim) => format::href_with_root_path(
3c0e092e 1000 PrimitiveType::primitive_locations(context.tcx())[prim],
c295e0f8 1001 context,
923072b8 1002 Some(href_context.root_path),
c295e0f8
XL
1003 )
1004 .ok()
1005 .map(|(url, _, _)| url),
94222f64
XL
1006 }
1007 })
1008 {
f2b60f7d
FG
1009 if !open_tag {
1010 // We're already inside an element which has the same klass, no need to give it
1011 // again.
1012 write!(out, "<a href=\"{}\">{}", href, text_s);
1013 } else {
1014 let klass_s = klass.as_html();
1015 if klass_s.is_empty() {
1016 write!(out, "<a href=\"{}\">{}", href, text_s);
1017 } else {
1018 write!(out, "<a class=\"{}\" href=\"{}\">{}", klass_s, href, text_s);
1019 }
1020 }
923072b8 1021 return Some("</a>");
94222f64 1022 }
1b1a35ee 1023 }
f2b60f7d
FG
1024 if !open_tag {
1025 write!(out, "{}", text_s);
1026 return None;
1027 }
1028 let klass_s = klass.as_html();
1029 if klass_s.is_empty() {
1030 write!(out, "{}", text_s);
1031 Some("")
1032 } else {
1033 write!(out, "<span class=\"{}\">{}", klass_s, text_s);
1034 Some("</span>")
1035 }
1a4d82fc 1036}
3dfed10e
XL
1037
1038#[cfg(test)]
1039mod tests;