]>
Commit | Line | Data |
---|---|---|
a2a8927a | 1 | //! Detects invalid HTML (like an unclosed `<span>`) in doc comments. |
cdc7bbd5 | 2 | use super::Pass; |
29967ef6 XL |
3 | use crate::clean::*; |
4 | use crate::core::DocContext; | |
c295e0f8 | 5 | use crate::html::markdown::main_body_opts; |
3c0e092e XL |
6 | use crate::visit::DocVisitor; |
7 | ||
5e7ed085 | 8 | use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag}; |
3c0e092e | 9 | |
29967ef6 | 10 | use std::iter::Peekable; |
3c0e092e | 11 | use std::ops::Range; |
29967ef6 XL |
12 | use std::str::CharIndices; |
13 | ||
923072b8 | 14 | pub(crate) const CHECK_INVALID_HTML_TAGS: Pass = Pass { |
29967ef6 XL |
15 | name: "check-invalid-html-tags", |
16 | run: check_invalid_html_tags, | |
17 | description: "detects invalid HTML tags in doc comments", | |
18 | }; | |
19 | ||
20 | struct InvalidHtmlTagsLinter<'a, 'tcx> { | |
6a06907d | 21 | cx: &'a mut DocContext<'tcx>, |
29967ef6 XL |
22 | } |
23 | ||
923072b8 | 24 | pub(crate) fn check_invalid_html_tags(krate: Crate, cx: &mut DocContext<'_>) -> Crate { |
3c0e092e | 25 | if cx.tcx.sess.is_nightly_build() { |
6a06907d | 26 | let mut coll = InvalidHtmlTagsLinter { cx }; |
3c0e092e | 27 | coll.visit_crate(&krate); |
29967ef6 | 28 | } |
3c0e092e | 29 | krate |
29967ef6 XL |
30 | } |
31 | ||
32 | const ALLOWED_UNCLOSED: &[&str] = &[ | |
33 | "area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", | |
34 | "source", "track", "wbr", | |
35 | ]; | |
36 | ||
37 | fn drop_tag( | |
38 | tags: &mut Vec<(String, Range<usize>)>, | |
39 | tag_name: String, | |
40 | range: Range<usize>, | |
5099ac24 | 41 | f: &impl Fn(&str, &Range<usize>, bool), |
29967ef6 XL |
42 | ) { |
43 | let tag_name_low = tag_name.to_lowercase(); | |
44 | if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) { | |
45 | // If the tag is nested inside a "<script>" or a "<style>" tag, no warning should | |
46 | // be emitted. | |
47 | let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| { | |
48 | let at = at.to_lowercase(); | |
49 | at == "script" || at == "style" | |
50 | }); | |
51 | for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) { | |
52 | if should_not_warn { | |
53 | continue; | |
54 | } | |
55 | let last_tag_name_low = last_tag_name.to_lowercase(); | |
c295e0f8 | 56 | if ALLOWED_UNCLOSED.contains(&last_tag_name_low.as_str()) { |
29967ef6 XL |
57 | continue; |
58 | } | |
59 | // `tags` is used as a queue, meaning that everything after `pos` is included inside it. | |
60 | // So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still | |
61 | // have `h3`, meaning the tag wasn't closed as it should have. | |
5099ac24 | 62 | f(&format!("unclosed HTML tag `{}`", last_tag_name), &last_tag_span, true); |
29967ef6 XL |
63 | } |
64 | // Remove the `tag_name` that was originally closed | |
65 | tags.pop(); | |
66 | } else { | |
67 | // It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required | |
68 | // but it helps for the visualization). | |
5099ac24 FG |
69 | f(&format!("unopened HTML tag `{}`", tag_name), &range, false); |
70 | } | |
71 | } | |
72 | ||
73 | fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> { | |
74 | use rustc_lexer::{is_id_continue, is_id_start}; | |
75 | let mut current_pos = end_pos; | |
76 | loop { | |
77 | if current_pos >= 2 && text[..current_pos].ends_with("::") { | |
78 | current_pos -= 2; | |
79 | } | |
80 | let new_pos = text[..current_pos] | |
81 | .char_indices() | |
82 | .rev() | |
83 | .take_while(|(_, c)| is_id_start(*c) || is_id_continue(*c)) | |
84 | .reduce(|_accum, item| item) | |
85 | .and_then(|(new_pos, c)| is_id_start(c).then_some(new_pos)); | |
86 | if let Some(new_pos) = new_pos { | |
87 | if current_pos != new_pos { | |
88 | current_pos = new_pos; | |
89 | continue; | |
90 | } | |
91 | } | |
92 | break; | |
93 | } | |
923072b8 | 94 | if current_pos == end_pos { None } else { Some(current_pos) } |
29967ef6 XL |
95 | } |
96 | ||
97 | fn extract_html_tag( | |
98 | tags: &mut Vec<(String, Range<usize>)>, | |
99 | text: &str, | |
100 | range: &Range<usize>, | |
101 | start_pos: usize, | |
102 | iter: &mut Peekable<CharIndices<'_>>, | |
5099ac24 | 103 | f: &impl Fn(&str, &Range<usize>, bool), |
29967ef6 XL |
104 | ) { |
105 | let mut tag_name = String::new(); | |
106 | let mut is_closing = false; | |
107 | let mut prev_pos = start_pos; | |
108 | ||
109 | loop { | |
110 | let (pos, c) = match iter.peek() { | |
111 | Some((pos, c)) => (*pos, *c), | |
112 | // In case we reached the of the doc comment, we want to check that it's an | |
113 | // unclosed HTML tag. For example "/// <h3". | |
114 | None => (prev_pos, '\0'), | |
115 | }; | |
116 | prev_pos = pos; | |
117 | // Checking if this is a closing tag (like `</a>` for `<a>`). | |
118 | if c == '/' && tag_name.is_empty() { | |
119 | is_closing = true; | |
120 | } else if c.is_ascii_alphanumeric() { | |
121 | tag_name.push(c); | |
122 | } else { | |
123 | if !tag_name.is_empty() { | |
124 | let mut r = Range { start: range.start + start_pos, end: range.start + pos }; | |
125 | if c == '>' { | |
126 | // In case we have a tag without attribute, we can consider the span to | |
127 | // refer to it fully. | |
128 | r.end += 1; | |
129 | } | |
130 | if is_closing { | |
131 | // In case we have "</div >" or even "</div >". | |
132 | if c != '>' { | |
133 | if !c.is_whitespace() { | |
134 | // It seems like it's not a valid HTML tag. | |
135 | break; | |
136 | } | |
137 | let mut found = false; | |
138 | for (new_pos, c) in text[pos..].char_indices() { | |
139 | if !c.is_whitespace() { | |
140 | if c == '>' { | |
141 | r.end = range.start + new_pos + 1; | |
142 | found = true; | |
143 | } | |
144 | break; | |
145 | } | |
146 | } | |
147 | if !found { | |
148 | break; | |
149 | } | |
150 | } | |
151 | drop_tag(tags, tag_name, r, f); | |
152 | } else { | |
153 | tags.push((tag_name, r)); | |
154 | } | |
155 | } | |
156 | break; | |
157 | } | |
158 | iter.next(); | |
159 | } | |
160 | } | |
161 | ||
162 | fn extract_tags( | |
163 | tags: &mut Vec<(String, Range<usize>)>, | |
164 | text: &str, | |
165 | range: Range<usize>, | |
166 | is_in_comment: &mut Option<Range<usize>>, | |
5099ac24 | 167 | f: &impl Fn(&str, &Range<usize>, bool), |
29967ef6 XL |
168 | ) { |
169 | let mut iter = text.char_indices().peekable(); | |
170 | ||
171 | while let Some((start_pos, c)) = iter.next() { | |
172 | if is_in_comment.is_some() { | |
173 | if text[start_pos..].starts_with("-->") { | |
174 | *is_in_comment = None; | |
175 | } | |
176 | } else if c == '<' { | |
177 | if text[start_pos..].starts_with("<!--") { | |
178 | // We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!) | |
179 | iter.next(); | |
180 | iter.next(); | |
181 | iter.next(); | |
182 | *is_in_comment = Some(Range { | |
183 | start: range.start + start_pos, | |
184 | end: range.start + start_pos + 3, | |
185 | }); | |
186 | } else { | |
187 | extract_html_tag(tags, text, &range, start_pos, &mut iter, f); | |
188 | } | |
189 | } | |
190 | } | |
191 | } | |
192 | ||
3c0e092e XL |
193 | impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> { |
194 | fn visit_item(&mut self, item: &Item) { | |
6a06907d | 195 | let tcx = self.cx.tcx; |
04454e1e | 196 | let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) |
5e7ed085 FG |
197 | // If non-local, no need to check anything. |
198 | else { return }; | |
29967ef6 XL |
199 | let dox = item.attrs.collapsed_doc_value().unwrap_or_default(); |
200 | if !dox.is_empty() { | |
5099ac24 | 201 | let report_diag = |msg: &str, range: &Range<usize>, is_open_tag: bool| { |
6a06907d XL |
202 | let sp = match super::source_span_for_markdown_range(tcx, &dox, range, &item.attrs) |
203 | { | |
29967ef6 | 204 | Some(sp) => sp, |
cdc7bbd5 | 205 | None => item.attr_span(tcx), |
29967ef6 | 206 | }; |
6a06907d | 207 | tcx.struct_span_lint_hir(crate::lint::INVALID_HTML_TAGS, hir_id, sp, |lint| { |
5099ac24 FG |
208 | use rustc_lint_defs::Applicability; |
209 | let mut diag = lint.build(msg); | |
210 | // If a tag looks like `<this>`, it might actually be a generic. | |
211 | // We don't try to detect stuff `<like, this>` because that's not valid HTML, | |
212 | // and we don't try to detect stuff `<like this>` because that's not valid Rust. | |
213 | if let Some(Some(generics_start)) = (is_open_tag | |
04454e1e | 214 | && dox[..range.end].ends_with('>')) |
5099ac24 FG |
215 | .then(|| extract_path_backwards(&dox, range.start)) |
216 | { | |
217 | let generics_sp = match super::source_span_for_markdown_range( | |
218 | tcx, | |
219 | &dox, | |
220 | &(generics_start..range.end), | |
221 | &item.attrs, | |
222 | ) { | |
223 | Some(sp) => sp, | |
224 | None => item.attr_span(tcx), | |
225 | }; | |
226 | // multipart form is chosen here because ``Vec<i32>`` would be confusing. | |
227 | diag.multipart_suggestion( | |
228 | "try marking as source code", | |
229 | vec![ | |
230 | (generics_sp.shrink_to_lo(), String::from("`")), | |
231 | (generics_sp.shrink_to_hi(), String::from("`")), | |
232 | ], | |
233 | Applicability::MaybeIncorrect, | |
234 | ); | |
235 | } | |
236 | diag.emit() | |
29967ef6 XL |
237 | }); |
238 | }; | |
239 | ||
240 | let mut tags = Vec::new(); | |
241 | let mut is_in_comment = None; | |
fc512014 | 242 | let mut in_code_block = false; |
29967ef6 | 243 | |
5e7ed085 FG |
244 | let link_names = item.link_names(&self.cx.cache); |
245 | ||
246 | let mut replacer = |broken_link: BrokenLink<'_>| { | |
247 | if let Some(link) = | |
248 | link_names.iter().find(|link| *link.original_text == *broken_link.reference) | |
249 | { | |
250 | Some((link.href.as_str().into(), link.new_text.as_str().into())) | |
251 | } else if matches!( | |
252 | &broken_link.link_type, | |
253 | LinkType::Reference | LinkType::ReferenceUnknown | |
254 | ) { | |
255 | // If the link is shaped [like][this], suppress any broken HTML in the [this] part. | |
256 | // The `broken_intra_doc_links` will report typos in there anyway. | |
257 | Some(( | |
258 | broken_link.reference.to_string().into(), | |
259 | broken_link.reference.to_string().into(), | |
260 | )) | |
261 | } else { | |
262 | None | |
263 | } | |
264 | }; | |
265 | ||
266 | let p = | |
267 | Parser::new_with_broken_link_callback(&dox, main_body_opts(), Some(&mut replacer)) | |
268 | .into_offset_iter(); | |
29967ef6 XL |
269 | |
270 | for (event, range) in p { | |
271 | match event { | |
fc512014 XL |
272 | Event::Start(Tag::CodeBlock(_)) => in_code_block = true, |
273 | Event::Html(text) | Event::Text(text) if !in_code_block => { | |
29967ef6 XL |
274 | extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) |
275 | } | |
fc512014 | 276 | Event::End(Tag::CodeBlock(_)) => in_code_block = false, |
29967ef6 XL |
277 | _ => {} |
278 | } | |
279 | } | |
280 | ||
281 | for (tag, range) in tags.iter().filter(|(t, _)| { | |
282 | let t = t.to_lowercase(); | |
c295e0f8 | 283 | !ALLOWED_UNCLOSED.contains(&t.as_str()) |
29967ef6 | 284 | }) { |
5099ac24 | 285 | report_diag(&format!("unclosed HTML tag `{}`", tag), range, true); |
29967ef6 XL |
286 | } |
287 | ||
288 | if let Some(range) = is_in_comment { | |
5099ac24 | 289 | report_diag("Unclosed HTML comment", &range, false); |
29967ef6 XL |
290 | } |
291 | } | |
292 | ||
3c0e092e | 293 | self.visit_item_recur(item) |
29967ef6 XL |
294 | } |
295 | } |