1 //! Detects invalid HTML (like an unclosed `<span>`) in doc comments.
4 use crate::core
::DocContext
;
5 use crate::html
::markdown
::main_body_opts
;
6 use crate::visit
::DocVisitor
;
8 use pulldown_cmark
::{BrokenLink, Event, LinkType, Parser, Tag}
;
10 use std
::iter
::Peekable
;
12 use std
::str::CharIndices
;
14 pub(crate) const CHECK_INVALID_HTML_TAGS
: Pass
= Pass
{
15 name
: "check-invalid-html-tags",
16 run
: check_invalid_html_tags
,
17 description
: "detects invalid HTML tags in doc comments",
20 struct InvalidHtmlTagsLinter
<'a
, 'tcx
> {
21 cx
: &'a
mut DocContext
<'tcx
>,
24 pub(crate) fn check_invalid_html_tags(krate
: Crate
, cx
: &mut DocContext
<'_
>) -> Crate
{
25 if cx
.tcx
.sess
.is_nightly_build() {
26 let mut coll
= InvalidHtmlTagsLinter { cx }
;
27 coll
.visit_crate(&krate
);
32 const ALLOWED_UNCLOSED
: &[&str] = &[
33 "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
34 "source", "track", "wbr",
38 tags
: &mut Vec
<(String
, Range
<usize>)>,
41 f
: &impl Fn(&str, &Range
<usize>, bool
),
43 let tag_name_low
= tag_name
.to_lowercase();
44 if let Some(pos
) = tags
.iter().rposition(|(t
, _
)| t
.to_lowercase() == tag_name_low
) {
45 // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
47 let should_not_warn
= tags
.iter().take(pos
+ 1).any(|(at
, _
)| {
48 let at
= at
.to_lowercase();
49 at
== "script" || at
== "style"
51 for (last_tag_name
, last_tag_span
) in tags
.drain(pos
+ 1..) {
55 let last_tag_name_low
= last_tag_name
.to_lowercase();
56 if ALLOWED_UNCLOSED
.contains(&last_tag_name_low
.as_str()) {
59 // `tags` is used as a queue, meaning that everything after `pos` is included inside it.
60 // So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still
61 // have `h3`, meaning the tag wasn't closed as it should have.
62 f(&format
!("unclosed HTML tag `{}`", last_tag_name
), &last_tag_span
, true);
64 // Remove the `tag_name` that was originally closed
67 // It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required
68 // but it helps for the visualization).
69 f(&format
!("unopened HTML tag `{}`", tag_name
), &range
, false);
73 fn extract_path_backwards(text
: &str, end_pos
: usize) -> Option
<usize> {
74 use rustc_lexer
::{is_id_continue, is_id_start}
;
75 let mut current_pos
= end_pos
;
77 if current_pos
>= 2 && text
[..current_pos
].ends_with("::") {
80 let new_pos
= text
[..current_pos
]
83 .take_while(|(_
, c
)| is_id_start(*c
) || is_id_continue(*c
))
84 .reduce(|_accum
, item
| item
)
85 .and_then(|(new_pos
, c
)| is_id_start(c
).then_some(new_pos
));
86 if let Some(new_pos
) = new_pos
{
87 if current_pos
!= new_pos
{
88 current_pos
= new_pos
;
94 if current_pos
== end_pos { None }
else { Some(current_pos) }
97 fn extract_path_forward(text
: &str, start_pos
: usize) -> Option
<usize> {
98 use rustc_lexer
::{is_id_continue, is_id_start}
;
99 let mut current_pos
= start_pos
;
101 if current_pos
< text
.len() && text
[current_pos
..].starts_with("::") {
106 let mut chars
= text
[current_pos
..].chars();
107 if let Some(c
) = chars
.next() {
109 current_pos
+= c
.len_utf8();
114 while let Some(c
) = chars
.next() {
115 if is_id_continue(c
) {
116 current_pos
+= c
.len_utf8();
122 if current_pos
== start_pos { None }
else { Some(current_pos) }
125 fn is_valid_for_html_tag_name(c
: char, is_empty
: bool
) -> bool
{
126 // https://spec.commonmark.org/0.30/#raw-html
128 // > A tag name consists of an ASCII letter followed by zero or more ASCII letters, digits, or
130 c
.is_ascii_alphabetic() || !is_empty
&& (c
== '
-'
|| c
.is_ascii_digit())
134 tags
: &mut Vec
<(String
, Range
<usize>)>,
136 range
: &Range
<usize>,
138 iter
: &mut Peekable
<CharIndices
<'_
>>,
139 f
: &impl Fn(&str, &Range
<usize>, bool
),
141 let mut tag_name
= String
::new();
142 let mut is_closing
= false;
143 let mut prev_pos
= start_pos
;
146 let (pos
, c
) = match iter
.peek() {
147 Some((pos
, c
)) => (*pos
, *c
),
148 // In case we reached the of the doc comment, we want to check that it's an
149 // unclosed HTML tag. For example "/// <h3".
150 None
=> (prev_pos
, '
\0'
),
153 // Checking if this is a closing tag (like `</a>` for `<a>`).
154 if c
== '
/'
&& tag_name
.is_empty() {
156 } else if is_valid_for_html_tag_name(c
, tag_name
.is_empty()) {
159 if !tag_name
.is_empty() {
160 let mut r
= Range { start: range.start + start_pos, end: range.start + pos }
;
162 // In case we have a tag without attribute, we can consider the span to
163 // refer to it fully.
167 // In case we have "</div >" or even "</div >".
169 if !c
.is_whitespace() {
170 // It seems like it's not a valid HTML tag.
173 let mut found
= false;
174 for (new_pos
, c
) in text
[pos
..].char_indices() {
175 if !c
.is_whitespace() {
177 r
.end
= range
.start
+ new_pos
+ 1;
187 drop_tag(tags
, tag_name
, r
, f
);
189 tags
.push((tag_name
, r
));
199 tags
: &mut Vec
<(String
, Range
<usize>)>,
202 is_in_comment
: &mut Option
<Range
<usize>>,
203 f
: &impl Fn(&str, &Range
<usize>, bool
),
205 let mut iter
= text
.char_indices().peekable();
207 while let Some((start_pos
, c
)) = iter
.next() {
208 if is_in_comment
.is_some() {
209 if text
[start_pos
..].starts_with("-->") {
210 *is_in_comment
= None
;
213 if text
[start_pos
..].starts_with("<!--") {
214 // We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!)
218 *is_in_comment
= Some(Range
{
219 start
: range
.start
+ start_pos
,
220 end
: range
.start
+ start_pos
+ 3,
223 extract_html_tag(tags
, text
, &range
, start_pos
, &mut iter
, f
);
229 impl<'a
, 'tcx
> DocVisitor
for InvalidHtmlTagsLinter
<'a
, 'tcx
> {
230 fn visit_item(&mut self, item
: &Item
) {
231 let tcx
= self.cx
.tcx
;
232 let Some(hir_id
) = DocContext
::as_local_hir_id(tcx
, item
.item_id
)
233 // If non-local, no need to check anything.
235 let dox
= item
.attrs
.collapsed_doc_value().unwrap_or_default();
237 let report_diag
= |msg
: &str, range
: &Range
<usize>, is_open_tag
: bool
| {
238 let sp
= match super::source_span_for_markdown_range(tcx
, &dox
, range
, &item
.attrs
)
241 None
=> item
.attr_span(tcx
),
243 tcx
.struct_span_lint_hir(crate::lint
::INVALID_HTML_TAGS
, hir_id
, sp
, |lint
| {
244 use rustc_lint_defs
::Applicability
;
245 let mut diag
= lint
.build(msg
);
246 // If a tag looks like `<this>`, it might actually be a generic.
247 // We don't try to detect stuff `<like, this>` because that's not valid HTML,
248 // and we don't try to detect stuff `<like this>` because that's not valid Rust.
249 let mut generics_end
= range
.end
;
250 if let Some(Some(mut generics_start
)) = (is_open_tag
251 && dox
[..generics_end
].ends_with('
>'
))
252 .then(|| extract_path_backwards(&dox
, range
.start
))
254 while generics_start
!= 0
255 && generics_end
< dox
.len()
256 && dox
.as_bytes()[generics_start
- 1] == b'
<'
257 && dox
.as_bytes()[generics_end
] == b'
>'
261 if let Some(new_start
) = extract_path_backwards(&dox
, generics_start
) {
262 generics_start
= new_start
;
264 if let Some(new_end
) = extract_path_forward(&dox
, generics_end
) {
265 generics_end
= new_end
;
268 if let Some(new_end
) = extract_path_forward(&dox
, generics_end
) {
269 generics_end
= new_end
;
271 let generics_sp
= match super::source_span_for_markdown_range(
274 &(generics_start
..generics_end
),
278 None
=> item
.attr_span(tcx
),
280 // Sometimes, we only extract part of a path. For example, consider this:
282 // <[u32] as IntoIter<u32>>::Item
283 // ^^^^^ unclosed HTML tag `u32`
285 // We don't have any code for parsing fully-qualified trait paths.
286 // In theory, we could add it, but doing it correctly would require
287 // parsing the entire path grammar, which is problematic because of
288 // overlap between the path grammar and Markdown.
290 // The example above shows that ambiguity. Is `[u32]` intended to be an
291 // intra-doc link to the u32 primitive, or is it intended to be a slice?
293 // If the below conditional were removed, we would suggest this, which is
294 // not what the user probably wants.
296 // <[u32] as `IntoIter<u32>`>::Item
298 // We know that the user actually wants to wrap the whole thing in a code
299 // block, but the only reason we know that is because `u32` does not, in
300 // fact, implement IntoIter. If the example looks like this:
302 // <[Vec<i32>] as IntoIter<i32>::Item
304 // The ideal fix would be significantly different.
305 if (generics_start
> 0 && dox
.as_bytes()[generics_start
- 1] == b'
<'
)
306 || (generics_end
< dox
.len() && dox
.as_bytes()[generics_end
] == b'
>'
)
311 // multipart form is chosen here because ``Vec<i32>`` would be confusing.
312 diag
.multipart_suggestion(
313 "try marking as source code",
315 (generics_sp
.shrink_to_lo(), String
::from("`")),
316 (generics_sp
.shrink_to_hi(), String
::from("`")),
318 Applicability
::MaybeIncorrect
,
325 let mut tags
= Vec
::new();
326 let mut is_in_comment
= None
;
327 let mut in_code_block
= false;
329 let link_names
= item
.link_names(&self.cx
.cache
);
331 let mut replacer
= |broken_link
: BrokenLink
<'_
>| {
333 link_names
.iter().find(|link
| *link
.original_text
== *broken_link
.reference
)
335 Some((link
.href
.as_str().into(), link
.new_text
.as_str().into()))
337 &broken_link
.link_type
,
338 LinkType
::Reference
| LinkType
::ReferenceUnknown
340 // If the link is shaped [like][this], suppress any broken HTML in the [this] part.
341 // The `broken_intra_doc_links` will report typos in there anyway.
343 broken_link
.reference
.to_string().into(),
344 broken_link
.reference
.to_string().into(),
352 Parser
::new_with_broken_link_callback(&dox
, main_body_opts(), Some(&mut replacer
))
355 for (event
, range
) in p
{
357 Event
::Start(Tag
::CodeBlock(_
)) => in_code_block
= true,
358 Event
::Html(text
) if !in_code_block
=> {
359 extract_tags(&mut tags
, &text
, range
, &mut is_in_comment
, &report_diag
)
361 Event
::End(Tag
::CodeBlock(_
)) => in_code_block
= false,
366 for (tag
, range
) in tags
.iter().filter(|(t
, _
)| {
367 let t
= t
.to_lowercase();
368 !ALLOWED_UNCLOSED
.contains(&t
.as_str())
370 report_diag(&format
!("unclosed HTML tag `{}`", tag
), range
, true);
373 if let Some(range
) = is_in_comment
{
374 report_diag("Unclosed HTML comment", &range
, false);
378 self.visit_item_recur(item
)