]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_resolve/src/rustdoc.rs
New upstream version 1.74.1+dfsg1
[rustc.git] / compiler / rustc_resolve / src / rustdoc.rs
1 use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
2 use rustc_ast as ast;
3 use rustc_ast::util::comments::beautify_doc_string;
4 use rustc_data_structures::fx::FxHashMap;
5 use rustc_middle::ty::TyCtxt;
6 use rustc_span::def_id::DefId;
7 use rustc_span::symbol::{kw, sym, Symbol};
8 use rustc_span::{InnerSpan, Span, DUMMY_SP};
9 use std::ops::Range;
10 use std::{cmp, mem};
11
12 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
13 pub enum DocFragmentKind {
14 /// A doc fragment created from a `///` or `//!` doc comment.
15 SugaredDoc,
16 /// A doc fragment created from a "raw" `#[doc=""]` attribute.
17 RawDoc,
18 }
19
20 /// A portion of documentation, extracted from a `#[doc]` attribute.
21 ///
22 /// Each variant contains the line number within the complete doc-comment where the fragment
23 /// starts, as well as the Span where the corresponding doc comment or attribute is located.
24 ///
25 /// Included files are kept separate from inline doc comments so that proper line-number
26 /// information can be given when a doctest fails. Sugared doc comments and "raw" doc comments are
27 /// kept separate because of issue #42760.
28 #[derive(Clone, PartialEq, Eq, Debug)]
29 pub struct DocFragment {
30 pub span: Span,
31 /// The item this doc-comment came from.
32 /// Used to determine the scope in which doc links in this fragment are resolved.
33 /// Typically filled for reexport docs when they are merged into the docs of the
34 /// original reexported item.
35 /// If the id is not filled, which happens for the original reexported item, then
36 /// it has to be taken from somewhere else during doc link resolution.
37 pub item_id: Option<DefId>,
38 pub doc: Symbol,
39 pub kind: DocFragmentKind,
40 pub indent: usize,
41 }
42
43 #[derive(Clone, Copy, Debug)]
44 pub enum MalformedGenerics {
45 /// This link has unbalanced angle brackets.
46 ///
47 /// For example, `Vec<T` should trigger this, as should `Vec<T>>`.
48 UnbalancedAngleBrackets,
49 /// The generics are not attached to a type.
50 ///
51 /// For example, `<T>` should trigger this.
52 ///
53 /// This is detected by checking if the path is empty after the generics are stripped.
54 MissingType,
55 /// The link uses fully-qualified syntax, which is currently unsupported.
56 ///
57 /// For example, `<Vec as IntoIterator>::into_iter` should trigger this.
58 ///
59 /// This is detected by checking if ` as ` (the keyword `as` with spaces around it) is inside
60 /// angle brackets.
61 HasFullyQualifiedSyntax,
62 /// The link has an invalid path separator.
63 ///
64 /// For example, `Vec:<T>:new()` should trigger this. Note that `Vec:new()` will **not**
65 /// trigger this because it has no generics and thus [`strip_generics_from_path`] will not be
66 /// called.
67 ///
68 /// Note that this will also **not** be triggered if the invalid path separator is inside angle
69 /// brackets because rustdoc mostly ignores what's inside angle brackets (except for
70 /// [`HasFullyQualifiedSyntax`](MalformedGenerics::HasFullyQualifiedSyntax)).
71 ///
72 /// This is detected by checking if there is a colon followed by a non-colon in the link.
73 InvalidPathSeparator,
74 /// The link has too many angle brackets.
75 ///
76 /// For example, `Vec<<T>>` should trigger this.
77 TooManyAngleBrackets,
78 /// The link has empty angle brackets.
79 ///
80 /// For example, `Vec<>` should trigger this.
81 EmptyAngleBrackets,
82 }
83
84 /// Removes excess indentation on comments in order for the Markdown
85 /// to be parsed correctly. This is necessary because the convention for
86 /// writing documentation is to provide a space between the /// or //! marker
87 /// and the doc text, but Markdown is whitespace-sensitive. For example,
88 /// a block of text with four-space indentation is parsed as a code block,
89 /// so if we didn't unindent comments, these list items
90 ///
91 /// /// A list:
92 /// ///
93 /// /// - Foo
94 /// /// - Bar
95 ///
96 /// would be parsed as if they were in a code block, which is likely not what the user intended.
97 pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
98 // `add` is used in case the most common sugared doc syntax is used ("/// "). The other
99 // fragments kind's lines are never starting with a whitespace unless they are using some
100 // markdown formatting requiring it. Therefore, if the doc block have a mix between the two,
101 // we need to take into account the fact that the minimum indent minus one (to take this
102 // whitespace into account).
103 //
104 // For example:
105 //
106 // /// hello!
107 // #[doc = "another"]
108 //
109 // In this case, you want "hello! another" and not "hello! another".
110 let add = if docs.windows(2).any(|arr| arr[0].kind != arr[1].kind)
111 && docs.iter().any(|d| d.kind == DocFragmentKind::SugaredDoc)
112 {
113 // In case we have a mix of sugared doc comments and "raw" ones, we want the sugared one to
114 // "decide" how much the minimum indent will be.
115 1
116 } else {
117 0
118 };
119
120 // `min_indent` is used to know how much whitespaces from the start of each lines must be
121 // removed. Example:
122 //
123 // /// hello!
124 // #[doc = "another"]
125 //
126 // In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
127 // 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
128 // (5 - 1) whitespaces.
129 let Some(min_indent) = docs
130 .iter()
131 .map(|fragment| {
132 fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
133 if line.chars().all(|c| c.is_whitespace()) {
134 min_indent
135 } else {
136 // Compare against either space or tab, ignoring whether they are
137 // mixed or not.
138 let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
139 cmp::min(min_indent, whitespace)
140 + if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add }
141 }
142 })
143 })
144 .min()
145 else {
146 return;
147 };
148
149 for fragment in docs {
150 if fragment.doc == kw::Empty {
151 continue;
152 }
153
154 let min_indent = if fragment.kind != DocFragmentKind::SugaredDoc && min_indent > 0 {
155 min_indent - add
156 } else {
157 min_indent
158 };
159
160 fragment.indent = min_indent;
161 }
162 }
163
164 /// The goal of this function is to apply the `DocFragment` transformation that is required when
165 /// transforming into the final Markdown, which is applying the computed indent to each line in
166 /// each doc fragment (a `DocFragment` can contain multiple lines in case of `#[doc = ""]`).
167 ///
168 /// Note: remove the trailing newline where appropriate
169 pub fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
170 let s = frag.doc.as_str();
171 let mut iter = s.lines();
172 if s.is_empty() {
173 out.push('\n');
174 return;
175 }
176 while let Some(line) = iter.next() {
177 if line.chars().any(|c| !c.is_whitespace()) {
178 assert!(line.len() >= frag.indent);
179 out.push_str(&line[frag.indent..]);
180 } else {
181 out.push_str(line);
182 }
183 out.push('\n');
184 }
185 }
186
187 pub fn attrs_to_doc_fragments<'a>(
188 attrs: impl Iterator<Item = (&'a ast::Attribute, Option<DefId>)>,
189 doc_only: bool,
190 ) -> (Vec<DocFragment>, ast::AttrVec) {
191 let mut doc_fragments = Vec::new();
192 let mut other_attrs = ast::AttrVec::new();
193 for (attr, item_id) in attrs {
194 if let Some((doc_str, comment_kind)) = attr.doc_str_and_comment_kind() {
195 let doc = beautify_doc_string(doc_str, comment_kind);
196 let kind = if attr.is_doc_comment() {
197 DocFragmentKind::SugaredDoc
198 } else {
199 DocFragmentKind::RawDoc
200 };
201 let fragment = DocFragment { span: attr.span, doc, kind, item_id, indent: 0 };
202 doc_fragments.push(fragment);
203 } else if !doc_only {
204 other_attrs.push(attr.clone());
205 }
206 }
207
208 unindent_doc_fragments(&mut doc_fragments);
209
210 (doc_fragments, other_attrs)
211 }
212
213 /// Return the doc-comments on this item, grouped by the module they came from.
214 /// The module can be different if this is a re-export with added documentation.
215 ///
216 /// The last newline is not trimmed so the produced strings are reusable between
217 /// early and late doc link resolution regardless of their position.
218 pub fn prepare_to_doc_link_resolution(
219 doc_fragments: &[DocFragment],
220 ) -> FxHashMap<Option<DefId>, String> {
221 let mut res = FxHashMap::default();
222 for fragment in doc_fragments {
223 let out_str = res.entry(fragment.item_id).or_default();
224 add_doc_fragment(out_str, fragment);
225 }
226 res
227 }
228
229 /// Options for rendering Markdown in the main body of documentation.
230 pub fn main_body_opts() -> Options {
231 Options::ENABLE_TABLES
232 | Options::ENABLE_FOOTNOTES
233 | Options::ENABLE_STRIKETHROUGH
234 | Options::ENABLE_TASKLISTS
235 | Options::ENABLE_SMART_PUNCTUATION
236 }
237
238 fn strip_generics_from_path_segment(segment: Vec<char>) -> Result<String, MalformedGenerics> {
239 let mut stripped_segment = String::new();
240 let mut param_depth = 0;
241
242 let mut latest_generics_chunk = String::new();
243
244 for c in segment {
245 if c == '<' {
246 param_depth += 1;
247 latest_generics_chunk.clear();
248 } else if c == '>' {
249 param_depth -= 1;
250 if latest_generics_chunk.contains(" as ") {
251 // The segment tries to use fully-qualified syntax, which is currently unsupported.
252 // Give a helpful error message instead of completely ignoring the angle brackets.
253 return Err(MalformedGenerics::HasFullyQualifiedSyntax);
254 }
255 } else {
256 if param_depth == 0 {
257 stripped_segment.push(c);
258 } else {
259 latest_generics_chunk.push(c);
260 }
261 }
262 }
263
264 if param_depth == 0 {
265 Ok(stripped_segment)
266 } else {
267 // The segment has unbalanced angle brackets, e.g. `Vec<T` or `Vec<T>>`
268 Err(MalformedGenerics::UnbalancedAngleBrackets)
269 }
270 }
271
272 pub fn strip_generics_from_path(path_str: &str) -> Result<Box<str>, MalformedGenerics> {
273 if !path_str.contains(['<', '>']) {
274 return Ok(path_str.into());
275 }
276 let mut stripped_segments = vec![];
277 let mut path = path_str.chars().peekable();
278 let mut segment = Vec::new();
279
280 while let Some(chr) = path.next() {
281 match chr {
282 ':' => {
283 if path.next_if_eq(&':').is_some() {
284 let stripped_segment =
285 strip_generics_from_path_segment(mem::take(&mut segment))?;
286 if !stripped_segment.is_empty() {
287 stripped_segments.push(stripped_segment);
288 }
289 } else {
290 return Err(MalformedGenerics::InvalidPathSeparator);
291 }
292 }
293 '<' => {
294 segment.push(chr);
295
296 match path.next() {
297 Some('<') => {
298 return Err(MalformedGenerics::TooManyAngleBrackets);
299 }
300 Some('>') => {
301 return Err(MalformedGenerics::EmptyAngleBrackets);
302 }
303 Some(chr) => {
304 segment.push(chr);
305
306 while let Some(chr) = path.next_if(|c| *c != '>') {
307 segment.push(chr);
308 }
309 }
310 None => break,
311 }
312 }
313 _ => segment.push(chr),
314 }
315 trace!("raw segment: {:?}", segment);
316 }
317
318 if !segment.is_empty() {
319 let stripped_segment = strip_generics_from_path_segment(segment)?;
320 if !stripped_segment.is_empty() {
321 stripped_segments.push(stripped_segment);
322 }
323 }
324
325 debug!("path_str: {:?}\nstripped segments: {:?}", path_str, &stripped_segments);
326
327 let stripped_path = stripped_segments.join("::");
328
329 if !stripped_path.is_empty() {
330 Ok(stripped_path.into())
331 } else {
332 Err(MalformedGenerics::MissingType)
333 }
334 }
335
336 /// Returns whether the first doc-comment is an inner attribute.
337 ///
338 //// If there are no doc-comments, return true.
339 /// FIXME(#78591): Support both inner and outer attributes on the same item.
340 pub fn inner_docs(attrs: &[ast::Attribute]) -> bool {
341 attrs.iter().find(|a| a.doc_str().is_some()).map_or(true, |a| a.style == ast::AttrStyle::Inner)
342 }
343
344 /// Has `#[rustc_doc_primitive]` or `#[doc(keyword)]`.
345 pub fn has_primitive_or_keyword_docs(attrs: &[ast::Attribute]) -> bool {
346 for attr in attrs {
347 if attr.has_name(sym::rustc_doc_primitive) {
348 return true;
349 } else if attr.has_name(sym::doc) && let Some(items) = attr.meta_item_list() {
350 for item in items {
351 if item.has_name(sym::keyword) {
352 return true;
353 }
354 }
355 }
356 }
357 false
358 }
359
360 /// Simplified version of the corresponding function in rustdoc.
361 /// If the rustdoc version returns a successful result, this function must return the same result.
362 /// Otherwise this function may return anything.
363 fn preprocess_link(link: &str) -> Box<str> {
364 let link = link.replace('`', "");
365 let link = link.split('#').next().unwrap();
366 let link = link.trim();
367 let link = link.rsplit('@').next().unwrap();
368 let link = link.strip_suffix("()").unwrap_or(link);
369 let link = link.strip_suffix("{}").unwrap_or(link);
370 let link = link.strip_suffix("[]").unwrap_or(link);
371 let link = if link != "!" { link.strip_suffix('!').unwrap_or(link) } else { link };
372 let link = link.trim();
373 strip_generics_from_path(link).unwrap_or_else(|_| link.into())
374 }
375
376 /// Keep inline and reference links `[]`,
377 /// but skip autolinks `<>` which we never consider to be intra-doc links.
378 pub fn may_be_doc_link(link_type: LinkType) -> bool {
379 match link_type {
380 LinkType::Inline
381 | LinkType::Reference
382 | LinkType::ReferenceUnknown
383 | LinkType::Collapsed
384 | LinkType::CollapsedUnknown
385 | LinkType::Shortcut
386 | LinkType::ShortcutUnknown => true,
387 LinkType::Autolink | LinkType::Email => false,
388 }
389 }
390
391 /// Simplified version of `preprocessed_markdown_links` from rustdoc.
392 /// Must return at least the same links as it, but may add some more links on top of that.
393 pub(crate) fn attrs_to_preprocessed_links(attrs: &[ast::Attribute]) -> Vec<Box<str>> {
394 let (doc_fragments, _) = attrs_to_doc_fragments(attrs.iter().map(|attr| (attr, None)), true);
395 let doc = prepare_to_doc_link_resolution(&doc_fragments).into_values().next().unwrap();
396
397 parse_links(&doc)
398 }
399
400 /// Similiar version of `markdown_links` from rustdoc.
401 /// This will collect destination links and display text if exists.
402 fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
403 let mut broken_link_callback = |link: BrokenLink<'md>| Some((link.reference, "".into()));
404 let mut event_iter = Parser::new_with_broken_link_callback(
405 &doc,
406 main_body_opts(),
407 Some(&mut broken_link_callback),
408 )
409 .into_iter();
410 let mut links = Vec::new();
411
412 while let Some(event) = event_iter.next() {
413 match event {
414 Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
415 if matches!(
416 link_type,
417 LinkType::Inline
418 | LinkType::ReferenceUnknown
419 | LinkType::Reference
420 | LinkType::Shortcut
421 | LinkType::ShortcutUnknown
422 ) {
423 if let Some(display_text) = collect_link_data(&mut event_iter) {
424 links.push(display_text);
425 }
426 }
427
428 links.push(preprocess_link(&dest));
429 }
430 _ => {}
431 }
432 }
433
434 links
435 }
436
437 /// Collects additional data of link.
438 fn collect_link_data<'input, 'callback>(
439 event_iter: &mut Parser<'input, 'callback>,
440 ) -> Option<Box<str>> {
441 let mut display_text: Option<String> = None;
442 let mut append_text = |text: CowStr<'_>| {
443 if let Some(display_text) = &mut display_text {
444 display_text.push_str(&text);
445 } else {
446 display_text = Some(text.to_string());
447 }
448 };
449
450 while let Some(event) = event_iter.next() {
451 match event {
452 Event::Text(text) => {
453 append_text(text);
454 }
455 Event::Code(code) => {
456 append_text(code);
457 }
458 Event::End(_) => {
459 break;
460 }
461 _ => {}
462 }
463 }
464
465 display_text.map(String::into_boxed_str)
466 }
467
468 /// Returns a span encompassing all the document fragments.
469 pub fn span_of_fragments(fragments: &[DocFragment]) -> Option<Span> {
470 if fragments.is_empty() {
471 return None;
472 }
473 let start = fragments[0].span;
474 if start == DUMMY_SP {
475 return None;
476 }
477 let end = fragments.last().expect("no doc strings provided").span;
478 Some(start.to(end))
479 }
480
481 /// Attempts to match a range of bytes from parsed markdown to a `Span` in the source code.
482 ///
483 /// This method will return `None` if we cannot construct a span from the source map or if the
484 /// fragments are not all sugared doc comments. It's difficult to calculate the correct span in
485 /// that case due to escaping and other source features.
486 pub fn source_span_for_markdown_range(
487 tcx: TyCtxt<'_>,
488 markdown: &str,
489 md_range: &Range<usize>,
490 fragments: &[DocFragment],
491 ) -> Option<Span> {
492 let is_all_sugared_doc = fragments.iter().all(|frag| frag.kind == DocFragmentKind::SugaredDoc);
493
494 if !is_all_sugared_doc {
495 return None;
496 }
497
498 let snippet = tcx.sess.source_map().span_to_snippet(span_of_fragments(fragments)?).ok()?;
499
500 let starting_line = markdown[..md_range.start].matches('\n').count();
501 let ending_line = starting_line + markdown[md_range.start..md_range.end].matches('\n').count();
502
503 // We use `split_terminator('\n')` instead of `lines()` when counting bytes so that we treat
504 // CRLF and LF line endings the same way.
505 let mut src_lines = snippet.split_terminator('\n');
506 let md_lines = markdown.split_terminator('\n');
507
508 // The number of bytes from the source span to the markdown span that are not part
509 // of the markdown, like comment markers.
510 let mut start_bytes = 0;
511 let mut end_bytes = 0;
512
513 'outer: for (line_no, md_line) in md_lines.enumerate() {
514 loop {
515 let source_line = src_lines.next()?;
516 match source_line.find(md_line) {
517 Some(offset) => {
518 if line_no == starting_line {
519 start_bytes += offset;
520
521 if starting_line == ending_line {
522 break 'outer;
523 }
524 } else if line_no == ending_line {
525 end_bytes += offset;
526 break 'outer;
527 } else if line_no < starting_line {
528 start_bytes += source_line.len() - md_line.len();
529 } else {
530 end_bytes += source_line.len() - md_line.len();
531 }
532 break;
533 }
534 None => {
535 // Since this is a source line that doesn't include a markdown line,
536 // we have to count the newline that we split from earlier.
537 if line_no <= starting_line {
538 start_bytes += source_line.len() + 1;
539 } else {
540 end_bytes += source_line.len() + 1;
541 }
542 }
543 }
544 }
545 }
546
547 Some(span_of_fragments(fragments)?.from_inner(InnerSpan::new(
548 md_range.start + start_bytes,
549 md_range.end + start_bytes + end_bytes,
550 )))
551 }