1 use pulldown_cmark
::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag}
;
3 use rustc_ast
::util
::comments
::beautify_doc_string
;
4 use rustc_data_structures
::fx
::FxHashMap
;
5 use rustc_middle
::ty
::TyCtxt
;
6 use rustc_span
::def_id
::DefId
;
7 use rustc_span
::symbol
::{kw, sym, Symbol}
;
8 use rustc_span
::{InnerSpan, Span, DUMMY_SP}
;
12 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
13 pub enum DocFragmentKind
{
14 /// A doc fragment created from a `///` or `//!` doc comment.
16 /// A doc fragment created from a "raw" `#[doc=""]` attribute.
20 /// A portion of documentation, extracted from a `#[doc]` attribute.
22 /// Each variant contains the line number within the complete doc-comment where the fragment
23 /// starts, as well as the Span where the corresponding doc comment or attribute is located.
25 /// Included files are kept separate from inline doc comments so that proper line-number
26 /// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
27 /// kept separate because of issue #42760.
28 #[derive(Clone, PartialEq, Eq, Debug)]
29 pub struct DocFragment
{
31 /// The item this doc-comment came from.
32 /// Used to determine the scope in which doc links in this fragment are resolved.
33 /// Typically filled for reexport docs when they are merged into the docs of the
34 /// original reexported item.
35 /// If the id is not filled, which happens for the original reexported item, then
36 /// it has to be taken from somewhere else during doc link resolution.
37 pub item_id
: Option
<DefId
>,
39 pub kind
: DocFragmentKind
,
43 #[derive(Clone, Copy, Debug)]
44 pub enum MalformedGenerics
{
45 /// This link has unbalanced angle brackets.
47 /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
48 UnbalancedAngleBrackets
,
49 /// The generics are not attached to a type.
51 /// For example, `<T>` should trigger this.
53 /// This is detected by checking if the path is empty after the generics are stripped.
55 /// The link uses fully-qualified syntax, which is currently unsupported.
57 /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
59 /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
61 HasFullyQualifiedSyntax
,
62 /// The link has an invalid path separator.
64 /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
65 /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
68 /// Note that this will also **not** be triggered if the invalid path separator is inside angle
69 /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
70 /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
72 /// This is detected by checking if there is a colon followed by a non-colon in the link.
74 /// The link has too many angle brackets.
76 /// For example, `Vec<<T>>` should trigger this.
78 /// The link has empty angle brackets.
80 /// For example, `Vec<>` should trigger this.
84 /// Removes excess indentation on comments in order for the Markdown
85 /// to be parsed correctly. This is necessary because the convention for
86 /// writing documentation is to provide a space between the /// or //! marker
87 /// and the doc text, but Markdown is whitespace-sensitive. For example,
88 /// a block of text with four-space indentation is parsed as a code block,
89 /// so if we didn't unindent comments, these list items
96 /// would be parsed as if they were in a code block, which is likely not what the user intended.
97 pub fn unindent_doc_fragments(docs
: &mut [DocFragment
]) {
98 // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
99 // fragments kind's lines are never starting with a whitespace unless they are using some
100 // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
101 // we need to take into account the fact that the minimum indent minus one (to take this
102 // whitespace into account).
107 // #[doc = "another"]
109 // In this case, you want "hello! another" and not "hello! another".
110 let add
= if docs
.windows(2).any(|arr
| arr
[0].kind
!= arr
[1].kind
)
111 && docs
.iter().any(|d
| d
.kind
== DocFragmentKind
::SugaredDoc
)
113 // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
114 // "decide" how much the minimum indent will be.
120 // `min_indent` is used to know how much whitespaces from the start of each lines must be
124 // #[doc = "another"]
126 // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
127 // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
128 // (5 - 1) whitespaces.
129 let Some(min_indent
) = docs
132 fragment
.doc
.as_str().lines().fold(usize::MAX
, |min_indent
, line
| {
133 if line
.chars().all(|c
| c
.is_whitespace()) {
136 // Compare against either space or tab, ignoring whether they are
138 let whitespace
= line
.chars().take_while(|c
| *c
== ' '
|| *c
== '
\t'
).count();
139 cmp
::min(min_indent
, whitespace
)
140 + if fragment
.kind
== DocFragmentKind
::SugaredDoc { 0 }
else { add }
149 for fragment
in docs
{
150 if fragment
.doc
== kw
::Empty
{
154 let min_indent
= if fragment
.kind
!= DocFragmentKind
::SugaredDoc
&& min_indent
> 0 {
160 fragment
.indent
= min_indent
;
164 /// The goal of this function is to apply the `DocFragment` transformation that is required when
165 /// transforming into the final Markdown, which is applying the computed indent to each line in
166 /// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
168 /// Note: remove the trailing newline where appropriate
169 pub fn add_doc_fragment(out
: &mut String
, frag
: &DocFragment
) {
170 let s
= frag
.doc
.as_str();
171 let mut iter
= s
.lines();
176 while let Some(line
) = iter
.next() {
177 if line
.chars().any(|c
| !c
.is_whitespace()) {
178 assert
!(line
.len() >= frag
.indent
);
179 out
.push_str(&line
[frag
.indent
..]);
187 pub fn attrs_to_doc_fragments
<'a
>(
188 attrs
: impl Iterator
<Item
= (&'a ast
::Attribute
, Option
<DefId
>)>,
190 ) -> (Vec
<DocFragment
>, ast
::AttrVec
) {
191 let mut doc_fragments
= Vec
::new();
192 let mut other_attrs
= ast
::AttrVec
::new();
193 for (attr
, item_id
) in attrs
{
194 if let Some((doc_str
, comment_kind
)) = attr
.doc_str_and_comment_kind() {
195 let doc
= beautify_doc_string(doc_str
, comment_kind
);
196 let kind
= if attr
.is_doc_comment() {
197 DocFragmentKind
::SugaredDoc
199 DocFragmentKind
::RawDoc
201 let fragment
= DocFragment { span: attr.span, doc, kind, item_id, indent: 0 }
;
202 doc_fragments
.push(fragment
);
203 } else if !doc_only
{
204 other_attrs
.push(attr
.clone());
208 unindent_doc_fragments(&mut doc_fragments
);
210 (doc_fragments
, other_attrs
)
213 /// Return the doc-comments on this item, grouped by the module they came from.
214 /// The module can be different if this is a re-export with added documentation.
216 /// The last newline is not trimmed so the produced strings are reusable between
217 /// early and late doc link resolution regardless of their position.
218 pub fn prepare_to_doc_link_resolution(
219 doc_fragments
: &[DocFragment
],
220 ) -> FxHashMap
<Option
<DefId
>, String
> {
221 let mut res
= FxHashMap
::default();
222 for fragment
in doc_fragments
{
223 let out_str
= res
.entry(fragment
.item_id
).or_default();
224 add_doc_fragment(out_str
, fragment
);
229 /// Options for rendering Markdown in the main body of documentation.
230 pub fn main_body_opts() -> Options
{
231 Options
::ENABLE_TABLES
232 | Options
::ENABLE_FOOTNOTES
233 | Options
::ENABLE_STRIKETHROUGH
234 | Options
::ENABLE_TASKLISTS
235 | Options
::ENABLE_SMART_PUNCTUATION
238 fn strip_generics_from_path_segment(segment
: Vec
<char>) -> Result
<String
, MalformedGenerics
> {
239 let mut stripped_segment
= String
::new();
240 let mut param_depth
= 0;
242 let mut latest_generics_chunk
= String
::new();
247 latest_generics_chunk
.clear();
250 if latest_generics_chunk
.contains(" as ") {
251 // The segment tries to use fully-qualified syntax, which is currently unsupported.
252 // Give a helpful error message instead of completely ignoring the angle brackets.
253 return Err(MalformedGenerics
::HasFullyQualifiedSyntax
);
256 if param_depth
== 0 {
257 stripped_segment
.push(c
);
259 latest_generics_chunk
.push(c
);
264 if param_depth
== 0 {
267 // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
268 Err(MalformedGenerics
::UnbalancedAngleBrackets
)
272 pub fn strip_generics_from_path(path_str
: &str) -> Result
<Box
<str>, MalformedGenerics
> {
273 if !path_str
.contains(['
<'
, '
>'
]) {
274 return Ok(path_str
.into());
276 let mut stripped_segments
= vec
![];
277 let mut path
= path_str
.chars().peekable();
278 let mut segment
= Vec
::new();
280 while let Some(chr
) = path
.next() {
283 if path
.next_if_eq(&'
:'
).is_some() {
284 let stripped_segment
=
285 strip_generics_from_path_segment(mem
::take(&mut segment
))?
;
286 if !stripped_segment
.is_empty() {
287 stripped_segments
.push(stripped_segment
);
290 return Err(MalformedGenerics
::InvalidPathSeparator
);
298 return Err(MalformedGenerics
::TooManyAngleBrackets
);
301 return Err(MalformedGenerics
::EmptyAngleBrackets
);
306 while let Some(chr
) = path
.next_if(|c
| *c
!= '
>'
) {
313 _
=> segment
.push(chr
),
315 trace
!("raw segment: {:?}", segment
);
318 if !segment
.is_empty() {
319 let stripped_segment
= strip_generics_from_path_segment(segment
)?
;
320 if !stripped_segment
.is_empty() {
321 stripped_segments
.push(stripped_segment
);
325 debug
!("path_str: {:?}\nstripped segments: {:?}", path_str
, &stripped_segments
);
327 let stripped_path
= stripped_segments
.join("::");
329 if !stripped_path
.is_empty() {
330 Ok(stripped_path
.into())
332 Err(MalformedGenerics
::MissingType
)
336 /// Returns whether the first doc-comment is an inner attribute.
338 //// If there are no doc-comments, return true.
339 /// FIXME(#78591): Support both inner and outer attributes on the same item.
340 pub fn inner_docs(attrs
: &[ast
::Attribute
]) -> bool
{
341 attrs
.iter().find(|a
| a
.doc_str().is_some()).map_or(true, |a
| a
.style
== ast
::AttrStyle
::Inner
)
344 /// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
345 pub fn has_primitive_or_keyword_docs(attrs
: &[ast
::Attribute
]) -> bool
{
347 if attr
.has_name(sym
::rustc_doc_primitive
) {
349 } else if attr
.has_name(sym
::doc
) && let Some(items
) = attr
.meta_item_list() {
351 if item
.has_name(sym
::keyword
) {
360 /// Simplified version of the corresponding function in rustdoc.
361 /// If the rustdoc version returns a successful result, this function must return the same result.
362 /// Otherwise this function may return anything.
363 fn preprocess_link(link
: &str) -> Box
<str> {
364 let link
= link
.replace('`'
, "");
365 let link
= link
.split('
#').next().unwrap();
366 let link
= link
.trim();
367 let link
= link
.rsplit('@'
).next().unwrap();
368 let link
= link
.strip_suffix("()").unwrap_or(link
);
369 let link
= link
.strip_suffix("{}").unwrap_or(link
);
370 let link
= link
.strip_suffix("[]").unwrap_or(link
);
371 let link
= if link
!= "!" { link.strip_suffix('!').unwrap_or(link) }
else { link }
;
372 let link
= link
.trim();
373 strip_generics_from_path(link
).unwrap_or_else(|_
| link
.into())
376 /// Keep inline and reference links `[]`,
377 /// but skip autolinks `<>` which we never consider to be intra-doc links.
378 pub fn may_be_doc_link(link_type
: LinkType
) -> bool
{
381 | LinkType
::Reference
382 | LinkType
::ReferenceUnknown
383 | LinkType
::Collapsed
384 | LinkType
::CollapsedUnknown
386 | LinkType
::ShortcutUnknown
=> true,
387 LinkType
::Autolink
| LinkType
::Email
=> false,
391 /// Simplified version of `preprocessed_markdown_links` from rustdoc.
392 /// Must return at least the same links as it, but may add some more links on top of that.
393 pub(crate) fn attrs_to_preprocessed_links(attrs
: &[ast
::Attribute
]) -> Vec
<Box
<str>> {
394 let (doc_fragments
, _
) = attrs_to_doc_fragments(attrs
.iter().map(|attr
| (attr
, None
)), true);
395 let doc
= prepare_to_doc_link_resolution(&doc_fragments
).into_values().next().unwrap();
400 /// Similiar version of `markdown_links` from rustdoc.
401 /// This will collect destination links and display text if exists.
402 fn parse_links
<'md
>(doc
: &'md
str) -> Vec
<Box
<str>> {
403 let mut broken_link_callback
= |link
: BrokenLink
<'md
>| Some((link
.reference
, "".into()));
404 let mut event_iter
= Parser
::new_with_broken_link_callback(
407 Some(&mut broken_link_callback
),
410 let mut links
= Vec
::new();
412 while let Some(event
) = event_iter
.next() {
414 Event
::Start(Tag
::Link(link_type
, dest
, _
)) if may_be_doc_link(link_type
) => {
418 | LinkType
::ReferenceUnknown
419 | LinkType
::Reference
421 | LinkType
::ShortcutUnknown
423 if let Some(display_text
) = collect_link_data(&mut event_iter
) {
424 links
.push(display_text
);
428 links
.push(preprocess_link(&dest
));
437 /// Collects additional data of link.
438 fn collect_link_data
<'input
, 'callback
>(
439 event_iter
: &mut Parser
<'input
, 'callback
>,
440 ) -> Option
<Box
<str>> {
441 let mut display_text
: Option
<String
> = None
;
442 let mut append_text
= |text
: CowStr
<'_
>| {
443 if let Some(display_text
) = &mut display_text
{
444 display_text
.push_str(&text
);
446 display_text
= Some(text
.to_string());
450 while let Some(event
) = event_iter
.next() {
452 Event
::Text(text
) => {
455 Event
::Code(code
) => {
465 display_text
.map(String
::into_boxed_str
)
468 /// Returns a span encompassing all the document fragments.
469 pub fn span_of_fragments(fragments
: &[DocFragment
]) -> Option
<Span
> {
470 if fragments
.is_empty() {
473 let start
= fragments
[0].span
;
474 if start
== DUMMY_SP
{
477 let end
= fragments
.last().expect("no doc strings provided").span
;
481 /// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
483 /// This method will return `None` if we cannot construct a span from the source map or if the
484 /// fragments are not all sugared doc comments. It's difficult to calculate the correct span in
485 /// that case due to escaping and other source features.
486 pub fn source_span_for_markdown_range(
489 md_range
: &Range
<usize>,
490 fragments
: &[DocFragment
],
492 let is_all_sugared_doc
= fragments
.iter().all(|frag
| frag
.kind
== DocFragmentKind
::SugaredDoc
);
494 if !is_all_sugared_doc
{
498 let snippet
= tcx
.sess
.source_map().span_to_snippet(span_of_fragments(fragments
)?
).ok()?
;
500 let starting_line
= markdown
[..md_range
.start
].matches('
\n'
).count();
501 let ending_line
= starting_line
+ markdown
[md_range
.start
..md_range
.end
].matches('
\n'
).count();
503 // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
504 // CRLF and LF line endings the same way.
505 let mut src_lines
= snippet
.split_terminator('
\n'
);
506 let md_lines
= markdown
.split_terminator('
\n'
);
508 // The number of bytes from the source span to the markdown span that are not part
509 // of the markdown, like comment markers.
510 let mut start_bytes
= 0;
511 let mut end_bytes
= 0;
513 'outer
: for (line_no
, md_line
) in md_lines
.enumerate() {
515 let source_line
= src_lines
.next()?
;
516 match source_line
.find(md_line
) {
518 if line_no
== starting_line
{
519 start_bytes
+= offset
;
521 if starting_line
== ending_line
{
524 } else if line_no
== ending_line
{
527 } else if line_no
< starting_line
{
528 start_bytes
+= source_line
.len() - md_line
.len();
530 end_bytes
+= source_line
.len() - md_line
.len();
535 // Since this is a source line that doesn't include a markdown line,
536 // we have to count the newline that we split from earlier.
537 if line_no
<= starting_line
{
538 start_bytes
+= source_line
.len() + 1;
540 end_bytes
+= source_line
.len() + 1;
547 Some(span_of_fragments(fragments
)?
.from_inner(InnerSpan
::new(
548 md_range
.start
+ start_bytes
,
549 md_range
.end
+ start_bytes
+ end_bytes
,