]>
Commit | Line | Data |
---|---|---|
5099ac24 | 1 | use crate::token::CommentKind; |
dfeec247 | 2 | use rustc_span::source_map::SourceMap; |
3dfed10e | 3 | use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol}; |
60c5eb7d | 4 | |
416331ca XL |
5 | #[cfg(test)] |
6 | mod tests; | |
7 | ||
c30ab7b3 | 8 | #[derive(Clone, Copy, PartialEq, Debug)] |
1a4d82fc JJ |
9 | pub enum CommentStyle { |
10 | /// No code on either side of each line of the comment | |
11 | Isolated, | |
12 | /// Code exists to the left of the comment | |
13 | Trailing, | |
14 | /// Code before /* foo */ and after the comment | |
15 | Mixed, | |
16 | /// Just a manual blank line "\n\n", for layout | |
17 | BlankLine, | |
18 | } | |
19 | ||
20 | #[derive(Clone)] | |
21 | pub struct Comment { | |
22 | pub style: CommentStyle, | |
23 | pub lines: Vec<String>, | |
24 | pub pos: BytePos, | |
25 | } | |
26 | ||
04454e1e FG |
27 | /// A fast conservative estimate on whether the string can contain documentation links. |
28 | /// A pair of square brackets `[]` must exist in the string, but we only search for the | |
29 | /// opening bracket because brackets always go in pairs in practice. | |
30 | #[inline] | |
31 | pub fn may_have_doc_links(s: &str) -> bool { | |
32 | s.contains('[') | |
33 | } | |
34 | ||
3dfed10e XL |
35 | /// Makes a doc string more presentable to users. |
36 | /// Used by rustdoc and perhaps other tools, but not by rustc. | |
5099ac24 | 37 | pub fn beautify_doc_string(data: Symbol, kind: CommentKind) -> Symbol { |
fc512014 | 38 | fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> { |
85aaf69f | 39 | let mut i = 0; |
1a4d82fc JJ |
40 | let mut j = lines.len(); |
41 | // first line of all-stars should be omitted | |
9cc50fc6 | 42 | if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { |
1a4d82fc JJ |
43 | i += 1; |
44 | } | |
b7449926 | 45 | |
1a4d82fc | 46 | // like the first, a last line of all stars should be omitted |
3c0e092e | 47 | if j > i && !lines[j - 1].is_empty() && lines[j - 1].chars().all(|c| c == '*') { |
1a4d82fc JJ |
48 | j -= 1; |
49 | } | |
b7449926 | 50 | |
fc512014 | 51 | if i != 0 || j != lines.len() { Some((i, j)) } else { None } |
1a4d82fc JJ |
52 | } |
53 | ||
5099ac24 | 54 | fn get_horizontal_trim<'a>(lines: &'a [&str], kind: CommentKind) -> Option<String> { |
85aaf69f | 55 | let mut i = usize::MAX; |
1a4d82fc | 56 | let mut first = true; |
b7449926 | 57 | |
5099ac24 FG |
58 | // In case we have doc comments like `/**` or `/*!`, we want to remove stars if they are |
59 | // present. However, we first need to strip the empty lines so they don't get in the middle | |
60 | // when we try to compute the "horizontal trim". | |
61 | let lines = if kind == CommentKind::Block { | |
62 | // Whatever happens, we skip the first line. | |
04454e1e FG |
63 | let mut i = lines |
64 | .get(0) | |
65 | .map(|l| if l.trim_start().starts_with('*') { 0 } else { 1 }) | |
66 | .unwrap_or(0); | |
5099ac24 FG |
67 | let mut j = lines.len(); |
68 | ||
69 | while i < j && lines[i].trim().is_empty() { | |
70 | i += 1; | |
71 | } | |
72 | while j > i && lines[j - 1].trim().is_empty() { | |
73 | j -= 1; | |
74 | } | |
75 | &lines[i..j] | |
76 | } else { | |
77 | lines | |
78 | }; | |
79 | ||
fc512014 | 80 | for line in lines { |
1a4d82fc | 81 | for (j, c) in line.chars().enumerate() { |
c34b1796 | 82 | if j > i || !"* \t".contains(c) { |
fc512014 | 83 | return None; |
1a4d82fc JJ |
84 | } |
85 | if c == '*' { | |
86 | if first { | |
87 | i = j; | |
88 | first = false; | |
89 | } else if i != j { | |
fc512014 | 90 | return None; |
1a4d82fc JJ |
91 | } |
92 | break; | |
93 | } | |
94 | } | |
2c00a5a8 | 95 | if i >= line.len() { |
fc512014 | 96 | return None; |
1a4d82fc JJ |
97 | } |
98 | } | |
5099ac24 | 99 | if lines.is_empty() { None } else { Some(lines[0][..i].into()) } |
fc512014 | 100 | } |
1a4d82fc | 101 | |
fc512014 XL |
102 | let data_s = data.as_str(); |
103 | if data_s.contains('\n') { | |
104 | let mut lines = data_s.lines().collect::<Vec<&str>>(); | |
105 | let mut changes = false; | |
106 | let lines = if let Some((i, j)) = get_vertical_trim(&lines) { | |
107 | changes = true; | |
108 | // remove whitespace-only lines from the start/end of lines | |
109 | &mut lines[i..j] | |
1a4d82fc | 110 | } else { |
fc512014 XL |
111 | &mut lines |
112 | }; | |
5099ac24 | 113 | if let Some(horizontal) = get_horizontal_trim(&lines, kind) { |
fc512014 XL |
114 | changes = true; |
115 | // remove a "[ \t]*\*" block from each line, if possible | |
116 | for line in lines.iter_mut() { | |
5099ac24 FG |
117 | if let Some(tmp) = line.strip_prefix(&horizontal) { |
118 | *line = tmp; | |
119 | if kind == CommentKind::Block | |
120 | && (*line == "*" || line.starts_with("* ") || line.starts_with("**")) | |
121 | { | |
122 | *line = &line[1..]; | |
123 | } | |
124 | } | |
fc512014 XL |
125 | } |
126 | } | |
127 | if changes { | |
128 | return Symbol::intern(&lines.join("\n")); | |
1a4d82fc JJ |
129 | } |
130 | } | |
fc512014 | 131 | data |
1a4d82fc JJ |
132 | } |
133 | ||
9fa01778 XL |
134 | /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. |
135 | /// Otherwise returns `Some(k)` where `k` is first char offset after that leading | |
136 | /// whitespace. Note that `k` may be outside bounds of `s`. | |
85aaf69f | 137 | fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { |
0731742a XL |
138 | let mut idx = 0; |
139 | for (i, ch) in s.char_indices().take(col.to_usize()) { | |
c34b1796 | 140 | if !ch.is_whitespace() { |
1a4d82fc JJ |
141 | return None; |
142 | } | |
0731742a | 143 | idx = i + ch.len_utf8(); |
1a4d82fc | 144 | } |
0731742a | 145 | Some(idx) |
1a4d82fc JJ |
146 | } |
147 | ||
416331ca | 148 | fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str { |
1a4d82fc | 149 | let len = s.len(); |
416331ca | 150 | match all_whitespace(&s, col) { |
dfeec247 XL |
151 | Some(col) => { |
152 | if col < len { | |
153 | &s[col..] | |
154 | } else { | |
155 | "" | |
156 | } | |
157 | } | |
1a4d82fc | 158 | None => s, |
1a4d82fc | 159 | } |
1a4d82fc JJ |
160 | } |
161 | ||
dfeec247 | 162 | fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> { |
416331ca XL |
163 | let mut res: Vec<String> = vec![]; |
164 | let mut lines = text.lines(); | |
165 | // just push the first line | |
166 | res.extend(lines.next().map(|it| it.to_string())); | |
167 | // for other lines, strip common whitespace prefix | |
168 | for line in lines { | |
169 | res.push(trim_whitespace_prefix(line, col).to_string()) | |
170 | } | |
171 | res | |
1a4d82fc JJ |
172 | } |
173 | ||
1a4d82fc JJ |
174 | // it appears this function is called only from pprust... that's |
175 | // probably not a good thing. | |
74b04a01 XL |
176 | pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> { |
177 | let sm = SourceMap::new(sm.path_mapping().clone()); | |
178 | let source_file = sm.new_source_file(path, src); | |
416331ca | 179 | let text = (*source_file.src.as_ref().unwrap()).clone(); |
1a4d82fc | 180 | |
416331ca XL |
181 | let text: &str = text.as_str(); |
182 | let start_bpos = source_file.start_pos; | |
183 | let mut pos = 0; | |
1a4d82fc | 184 | let mut comments: Vec<Comment> = Vec::new(); |
416331ca | 185 | let mut code_to_the_left = false; |
b7449926 | 186 | |
416331ca XL |
187 | if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { |
188 | comments.push(Comment { | |
3dfed10e | 189 | style: CommentStyle::Isolated, |
416331ca XL |
190 | lines: vec![text[..shebang_len].to_string()], |
191 | pos: start_bpos, | |
192 | }); | |
193 | pos += shebang_len; | |
194 | } | |
195 | ||
196 | for token in rustc_lexer::tokenize(&text[pos..]) { | |
197 | let token_text = &text[pos..pos + token.len]; | |
198 | match token.kind { | |
199 | rustc_lexer::TokenKind::Whitespace => { | |
200 | if let Some(mut idx) = token_text.find('\n') { | |
201 | code_to_the_left = false; | |
202 | while let Some(next_newline) = &token_text[idx + 1..].find('\n') { | |
3c0e092e | 203 | idx += 1 + next_newline; |
416331ca | 204 | comments.push(Comment { |
3dfed10e | 205 | style: CommentStyle::BlankLine, |
416331ca XL |
206 | lines: vec![], |
207 | pos: start_bpos + BytePos((pos + idx) as u32), | |
208 | }); | |
209 | } | |
210 | } | |
211 | } | |
3dfed10e XL |
212 | rustc_lexer::TokenKind::BlockComment { doc_style, .. } => { |
213 | if doc_style.is_none() { | |
5869c6ff XL |
214 | let code_to_the_right = |
215 | !matches!(text[pos + token.len..].chars().next(), Some('\r' | '\n')); | |
416331ca | 216 | let style = match (code_to_the_left, code_to_the_right) { |
3dfed10e XL |
217 | (_, true) => CommentStyle::Mixed, |
218 | (false, false) => CommentStyle::Isolated, | |
219 | (true, false) => CommentStyle::Trailing, | |
416331ca XL |
220 | }; |
221 | ||
222 | // Count the number of chars since the start of the line by rescanning. | |
223 | let pos_in_file = start_bpos + BytePos(pos as u32); | |
224 | let line_begin_in_file = source_file.line_begin_pos(pos_in_file); | |
225 | let line_begin_pos = (line_begin_in_file - start_bpos).to_usize(); | |
226 | let col = CharPos(text[line_begin_pos..pos].chars().count()); | |
227 | ||
228 | let lines = split_block_comment_into_lines(token_text, col); | |
229 | comments.push(Comment { style, lines, pos: pos_in_file }) | |
c30ab7b3 | 230 | } |
1a4d82fc | 231 | } |
3dfed10e XL |
232 | rustc_lexer::TokenKind::LineComment { doc_style } => { |
233 | if doc_style.is_none() { | |
416331ca | 234 | comments.push(Comment { |
3dfed10e XL |
235 | style: if code_to_the_left { |
236 | CommentStyle::Trailing | |
237 | } else { | |
238 | CommentStyle::Isolated | |
239 | }, | |
416331ca XL |
240 | lines: vec![token_text.to_string()], |
241 | pos: start_bpos + BytePos(pos as u32), | |
242 | }) | |
243 | } | |
244 | } | |
245 | _ => { | |
246 | code_to_the_left = true; | |
1a4d82fc | 247 | } |
1a4d82fc | 248 | } |
416331ca | 249 | pos += token.len; |
1a4d82fc JJ |
250 | } |
251 | ||
48663c56 | 252 | comments |
1a4d82fc | 253 | } |