compiler/rustc_resolve/src/rustdoc.rs

   1 use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
   2 use rustc_ast as ast;
   3 use rustc_ast::util::comments::beautify_doc_string;
   4 use rustc_data_structures::fx::FxHashMap;
   5 use rustc_middle::ty::TyCtxt;
   6 use rustc_span::def_id::DefId;
   7 use rustc_span::symbol::{kw, sym, Symbol};
   8 use rustc_span::{InnerSpan, Span, DUMMY_SP};
   9 use std::ops::Range;
  10 use std::{cmp, mem};
  11
  12 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
  13 pub enum DocFragmentKind {
  14     /// A doc fragment created from a `///` or `//!` doc comment.
  15     SugaredDoc,
  16     /// A doc fragment created from a "raw" `#[doc=""]` attribute.
  17     RawDoc,
  18 }
  19
  20 /// A portion of documentation, extracted from a `#[doc]` attribute.
  21 ///
  22 /// Each variant contains the line number within the complete doc-comment where the fragment
  23 /// starts, as well as the Span where the corresponding doc comment or attribute is located.
  24 ///
  25 /// Included files are kept separate from inline doc comments so that proper line-number
  26 /// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
  27 /// kept separate because of issue #42760.
  28 #[derive(Clone, PartialEq, Eq, Debug)]
  29 pub struct DocFragment {
  30     pub span: Span,
  31     /// The item this doc-comment came from.
  32     /// Used to determine the scope in which doc links in this fragment are resolved.
  33     /// Typically filled for reexport docs when they are merged into the docs of the
  34     /// original reexported item.
  35     /// If the id is not filled, which happens for the original reexported item, then
  36     /// it has to be taken from somewhere else during doc link resolution.
  37     pub item_id: Option<DefId>,
  38     pub doc: Symbol,
  39     pub kind: DocFragmentKind,
  40     pub indent: usize,
  41 }
  42
  43 #[derive(Clone, Copy, Debug)]
  44 pub enum MalformedGenerics {
  45     /// This link has unbalanced angle brackets.
  46     ///
  47     /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
  48     UnbalancedAngleBrackets,
  49     /// The generics are not attached to a type.
  50     ///
  51     /// For example, `<T>` should trigger this.
  52     ///
  53     /// This is detected by checking if the path is empty after the generics are stripped.
  54     MissingType,
  55     /// The link uses fully-qualified syntax, which is currently unsupported.
  56     ///
  57     /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
  58     ///
  59     /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
  60     /// angle brackets.
  61     HasFullyQualifiedSyntax,
  62     /// The link has an invalid path separator.
  63     ///
  64     /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
  65     /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
  66     /// called.
  67     ///
  68     /// Note that this will also **not** be triggered if the invalid path separator is inside angle
  69     /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
  70     /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
  71     ///
  72     /// This is detected by checking if there is a colon followed by a non-colon in the link.
  73     InvalidPathSeparator,
  74     /// The link has too many angle brackets.
  75     ///
  76     /// For example, `Vec<<T>>` should trigger this.
  77     TooManyAngleBrackets,
  78     /// The link has empty angle brackets.
  79     ///
  80     /// For example, `Vec<>` should trigger this.
  81     EmptyAngleBrackets,
  82 }
  83
  84 /// Removes excess indentation on comments in order for the Markdown
  85 /// to be parsed correctly. This is necessary because the convention for
  86 /// writing documentation is to provide a space between the /// or //! marker
  87 /// and the doc text, but Markdown is whitespace-sensitive. For example,
  88 /// a block of text with four-space indentation is parsed as a code block,
  89 /// so if we didn't unindent comments, these list items
  90 ///
  91 /// /// A list:
  92 /// ///
  93 /// ///    - Foo
  94 /// ///    - Bar
  95 ///
  96 /// would be parsed as if they were in a code block, which is likely not what the user intended.
  97 pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
  98     // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
  99     // fragments kind's lines are never starting with a whitespace unless they are using some
 100     // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
 101     // we need to take into account the fact that the minimum indent minus one (to take this
 102     // whitespace into account).
 103     //
 104     // For example:
 105     //
 106     // /// hello!
 107     // #[doc = "another"]
 108     //
 109     // In this case, you want "hello! another" and not "hello!  another".
 110     let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
 111         && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
 112     {
 113         // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
 114         // "decide" how much the minimum indent will be.
 115         1
 116     } else {
 117         0
 118     };
 119
 120     // `min_indent` is used to know how much whitespaces from the start of each lines must be
 121     // removed. Example:
 122     //
 123     // ///     hello!
 124     // #[doc = "another"]
 125     //
 126     // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
 127     // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
 128     // (5 - 1) whitespaces.
 129     let Some(min_indent) = docs
 130         .iter()
 131         .map(|fragment| {
 132             fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
 133                 if line.chars().all(|c| c.is_whitespace()) {
 134                     min_indent
 135                 } else {
 136                     // Compare against either space or tab, ignoring whether they are
 137                     // mixed or not.
 138                     let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
 139                     cmp::min(min_indent, whitespace)
 140                         + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
 141                 }
 142             })
 143         })
 144         .min()
 145     else {
 146         return;
 147     };
 148
 149     for fragment in docs {
 150         if fragment.doc == kw::Empty {
 151             continue;
 152         }
 153
 154         let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
 155             min_indent - add
 156         } else {
 157             min_indent
 158         };
 159
 160         fragment.indent = min_indent;
 161     }
 162 }
 163
 164 /// The goal of this function is to apply the `DocFragment` transformation that is required when
 165 /// transforming into the final Markdown, which is applying the computed indent to each line in
 166 /// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
 167 ///
 168 /// Note: remove the trailing newline where appropriate
 169 pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
 170     let s = frag.doc.as_str();
 171     let mut iter = s.lines();
 172     if s.is_empty() {
 173         out.push('\n');
 174         return;
 175     }
 176     while let Some(line) = iter.next() {
 177         if line.chars().any(|c| !c.is_whitespace()) {
 178             assert!(line.len() >= frag.indent);
 179             out.push_str(&line[frag.indent..]);
 180         } else {
 181             out.push_str(line);
 182         }
 183         out.push('\n');
 184     }
 185 }
 186
 187 pub fn attrs_to_doc_fragments<'a>(
 188     attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>,
 189     doc_only: bool,
 190 ) -> (Vec<DocFragment>, ast::AttrVec) {
 191     let mut doc_fragments = Vec::new();
 192     let mut other_attrs = ast::AttrVec::new();
 193     for (attr, item_id) in attrs {
 194         if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
 195             let doc = beautify_doc_string(doc_str, comment_kind);
 196             let kind = if attr.is_doc_comment() {
 197                 DocFragmentKind::SugaredDoc
 198             } else {
 199                 DocFragmentKind::RawDoc
 200             };
 201             let fragment = DocFragment { span: attr.span, doc, kind, item_id, indent: 0 };
 202             doc_fragments.push(fragment);
 203         } else if !doc_only {
 204             other_attrs.push(attr.clone());
 205         }
 206     }
 207
 208     unindent_doc_fragments(&mut doc_fragments);
 209
 210     (doc_fragments, other_attrs)
 211 }
 212
 213 /// Return the doc-comments on this item, grouped by the module they came from.
 214 /// The module can be different if this is a re-export with added documentation.
 215 ///
 216 /// The last newline is not trimmed so the produced strings are reusable between
 217 /// early and late doc link resolution regardless of their position.
 218 pub fn prepare_to_doc_link_resolution(
 219     doc_fragments: &[DocFragment],
 220 ) -> FxHashMap<Option<DefId>, String> {
 221     let mut res = FxHashMap::default();
 222     for fragment in doc_fragments {
 223         let out_str = res.entry(fragment.item_id).or_default();
 224         add_doc_fragment(out_str, fragment);
 225     }
 226     res
 227 }
 228
 229 /// Options for rendering Markdown in the main body of documentation.
 230 pub fn main_body_opts() -> Options {
 231     Options::ENABLE_TABLES
 232         | Options::ENABLE_FOOTNOTES
 233         | Options::ENABLE_STRIKETHROUGH
 234         | Options::ENABLE_TASKLISTS
 235         | Options::ENABLE_SMART_PUNCTUATION
 236 }
 237
 238 fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
 239     let mut stripped_segment = String::new();
 240     let mut param_depth = 0;
 241
 242     let mut latest_generics_chunk = String::new();
 243
 244     for c in segment {
 245         if c == '<' {
 246             param_depth += 1;
 247             latest_generics_chunk.clear();
 248         } else if c == '>' {
 249             param_depth -= 1;
 250             if latest_generics_chunk.contains(" as ") {
 251                 // The segment tries to use fully-qualified syntax, which is currently unsupported.
 252                 // Give a helpful error message instead of completely ignoring the angle brackets.
 253                 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
 254             }
 255         } else {
 256             if param_depth == 0 {
 257                 stripped_segment.push(c);
 258             } else {
 259                 latest_generics_chunk.push(c);
 260             }
 261         }
 262     }
 263
 264     if param_depth == 0 {
 265         Ok(stripped_segment)
 266     } else {
 267         // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
 268         Err(MalformedGenerics::UnbalancedAngleBrackets)
 269     }
 270 }
 271
 272 pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
 273     if !path_str.contains(['<', '>']) {
 274         return Ok(path_str.into());
 275     }
 276     let mut stripped_segments = vec![];
 277     let mut path = path_str.chars().peekable();
 278     let mut segment = Vec::new();
 279
 280     while let Some(chr) = path.next() {
 281         match chr {
 282             ':' => {
 283                 if path.next_if_eq(&':').is_some() {
 284                     let stripped_segment =
 285                         strip_generics_from_path_segment(mem::take(&mut segment))?;
 286                     if !stripped_segment.is_empty() {
 287                         stripped_segments.push(stripped_segment);
 288                     }
 289                 } else {
 290                     return Err(MalformedGenerics::InvalidPathSeparator);
 291                 }
 292             }
 293             '<' => {
 294                 segment.push(chr);
 295
 296                 match path.next() {
 297                     Some('<') => {
 298                         return Err(MalformedGenerics::TooManyAngleBrackets);
 299                     }
 300                     Some('>') => {
 301                         return Err(MalformedGenerics::EmptyAngleBrackets);
 302                     }
 303                     Some(chr) => {
 304                         segment.push(chr);
 305
 306                         while let Some(chr) = path.next_if(|c| *c != '>') {
 307                             segment.push(chr);
 308                         }
 309                     }
 310                     None => break,
 311                 }
 312             }
 313             _ => segment.push(chr),
 314         }
 315         trace!("raw segment: {:?}", segment);
 316     }
 317
 318     if !segment.is_empty() {
 319         let stripped_segment = strip_generics_from_path_segment(segment)?;
 320         if !stripped_segment.is_empty() {
 321             stripped_segments.push(stripped_segment);
 322         }
 323     }
 324
 325     debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments);
 326
 327     let stripped_path = stripped_segments.join("::");
 328
 329     if !stripped_path.is_empty() {
 330         Ok(stripped_path.into())
 331     } else {
 332         Err(MalformedGenerics::MissingType)
 333     }
 334 }
 335
 336 /// Returns whether the first doc-comment is an inner attribute.
 337 ///
 338 //// If there are no doc-comments, return true.
 339 /// FIXME(#78591): Support both inner and outer attributes on the same item.
 340 pub fn inner_docs(attrs: &[ast::Attribute]) -> bool {
 341     attrs.iter().find(|a| a.doc_str().is_some()).map_or(true, |a| a.style == ast::AttrStyle::Inner)
 342 }
 343
 344 /// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
 345 pub fn has_primitive_or_keyword_docs(attrs: &[ast::Attribute]) -> bool {
 346     for attr in attrs {
 347         if attr.has_name(sym::rustc_doc_primitive) {
 348             return true;
 349         } else if attr.has_name(sym::doc) && let Some(items) = attr.meta_item_list() {
 350             for item in items {
 351                 if item.has_name(sym::keyword) {
 352                     return true;
 353                 }
 354             }
 355         }
 356     }
 357     false
 358 }
 359
 360 /// Simplified version of the corresponding function in rustdoc.
 361 /// If the rustdoc version returns a successful result, this function must return the same result.
 362 /// Otherwise this function may return anything.
 363 fn preprocess_link(link: &str) -> Box<str> {
 364     let link = link.replace('`', "");
 365     let link = link.split('#').next().unwrap();
 366     let link = link.trim();
 367     let link = link.rsplit('@').next().unwrap();
 368     let link = link.strip_suffix("()").unwrap_or(link);
 369     let link = link.strip_suffix("{}").unwrap_or(link);
 370     let link = link.strip_suffix("[]").unwrap_or(link);
 371     let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
 372     let link = link.trim();
 373     strip_generics_from_path(link).unwrap_or_else(|_| link.into())
 374 }
 375
 376 /// Keep inline and reference links `[]`,
 377 /// but skip autolinks `<>` which we never consider to be intra-doc links.
 378 pub fn may_be_doc_link(link_type: LinkType) -> bool {
 379     match link_type {
 380         LinkType::Inline
 381         | LinkType::Reference
 382         | LinkType::ReferenceUnknown
 383         | LinkType::Collapsed
 384         | LinkType::CollapsedUnknown
 385         | LinkType::Shortcut
 386         | LinkType::ShortcutUnknown => true,
 387         LinkType::Autolink | LinkType::Email => false,
 388     }
 389 }
 390
 391 /// Simplified version of `preprocessed_markdown_links` from rustdoc.
 392 /// Must return at least the same links as it, but may add some more links on top of that.
 393 pub(crate) fn attrs_to_preprocessed_links(attrs: &[ast::Attribute]) -> Vec<Box<str>> {
 394     let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
 395     let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
 396
 397     parse_links(&doc)
 398 }
 399
 400 /// Similiar version of `markdown_links` from rustdoc.
 401 /// This will collect destination links and display text if exists.
 402 fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
 403     let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
 404     let mut event_iter = Parser::new_with_broken_link_callback(
 405         &doc,
 406         main_body_opts(),
 407         Some(&mut broken_link_callback),
 408     )
 409     .into_iter();
 410     let mut links = Vec::new();
 411
 412     while let Some(event) = event_iter.next() {
 413         match event {
 414             Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
 415                 if matches!(
 416                     link_type,
 417                     LinkType::Inline
 418                         | LinkType::ReferenceUnknown
 419                         | LinkType::Reference
 420                         | LinkType::Shortcut
 421                         | LinkType::ShortcutUnknown
 422                 ) {
 423                     if let Some(display_text) = collect_link_data(&mut event_iter) {
 424                         links.push(display_text);
 425                     }
 426                 }
 427
 428                 links.push(preprocess_link(&dest));
 429             }
 430             _ => {}
 431         }
 432     }
 433
 434     links
 435 }
 436
 437 /// Collects additional data of link.
 438 fn collect_link_data<'input, 'callback>(
 439     event_iter: &mut Parser<'input, 'callback>,
 440 ) -> Option<Box<str>> {
 441     let mut display_text: Option<String> = None;
 442     let mut append_text = |text: CowStr<'_>| {
 443         if let Some(display_text) = &mut display_text {
 444             display_text.push_str(&text);
 445         } else {
 446             display_text = Some(text.to_string());
 447         }
 448     };
 449
 450     while let Some(event) = event_iter.next() {
 451         match event {
 452             Event::Text(text) => {
 453                 append_text(text);
 454             }
 455             Event::Code(code) => {
 456                 append_text(code);
 457             }
 458             Event::End(_) => {
 459                 break;
 460             }
 461             _ => {}
 462         }
 463     }
 464
 465     display_text.map(String::into_boxed_str)
 466 }
 467
 468 /// Returns a span encompassing all the document fragments.
 469 pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
 470     if fragments.is_empty() {
 471         return None;
 472     }
 473     let start = fragments[0].span;
 474     if start == DUMMY_SP {
 475         return None;
 476     }
 477     let end = fragments.last().expect("no doc strings provided").span;
 478     Some(start.to(end))
 479 }
 480
 481 /// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
 482 ///
 483 /// This method will return `None` if we cannot construct a span from the source map or if the
 484 /// fragments are not all sugared doc comments. It's difficult to calculate the correct span in
 485 /// that case due to escaping and other source features.
 486 pub fn source_span_for_markdown_range(
 487     tcx: TyCtxt<'_>,
 488     markdown: &str,
 489     md_range: &Range<usize>,
 490     fragments: &[DocFragment],
 491 ) -> Option<Span> {
 492     let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
 493
 494     if !is_all_sugared_doc {
 495         return None;
 496     }
 497
 498     let snippet = tcx.sess.source_map().span_to_snippet(span_of_fragments(fragments)?).ok()?;
 499
 500     let starting_line = markdown[..md_range.start].matches('\n').count();
 501     let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
 502
 503     // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
 504     // CRLF and LF line endings the same way.
 505     let mut src_lines = snippet.split_terminator('\n');
 506     let md_lines = markdown.split_terminator('\n');
 507
 508     // The number of bytes from the source span to the markdown span that are not part
 509     // of the markdown, like comment markers.
 510     let mut start_bytes = 0;
 511     let mut end_bytes = 0;
 512
 513     'outer: for (line_no, md_line) in md_lines.enumerate() {
 514         loop {
 515             let source_line = src_lines.next()?;
 516             match source_line.find(md_line) {
 517                 Some(offset) => {
 518                     if line_no == starting_line {
 519                         start_bytes += offset;
 520
 521                         if starting_line == ending_line {
 522                             break 'outer;
 523                         }
 524                     } else if line_no == ending_line {
 525                         end_bytes += offset;
 526                         break 'outer;
 527                     } else if line_no < starting_line {
 528                         start_bytes += source_line.len() - md_line.len();
 529                     } else {
 530                         end_bytes += source_line.len() - md_line.len();
 531                     }
 532                     break;
 533                 }
 534                 None => {
 535                     // Since this is a source line that doesn't include a markdown line,
 536                     // we have to count the newline that we split from earlier.
 537                     if line_no <= starting_line {
 538                         start_bytes += source_line.len() + 1;
 539                     } else {
 540                         end_bytes += source_line.len() + 1;
 541                     }
 542                 }
 543             }
 544         }
 545     }
 546
 547     Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
 548         md_range.start + start_bytes,
 549         md_range.end + start_bytes + end_bytes,
 550     )))
 551 }