]>
Commit | Line | Data |
---|---|---|
f20569fa XL |
1 | use rustc_span::source_map::SourceMap; |
2 | use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol}; | |
3 | ||
4 | #[cfg(test)] | |
5 | mod tests; | |
6 | ||
7 | #[derive(Clone, Copy, PartialEq, Debug)] | |
8 | pub enum CommentStyle { | |
9 | /// No code on either side of each line of the comment | |
10 | Isolated, | |
11 | /// Code exists to the left of the comment | |
12 | Trailing, | |
13 | /// Code before /* foo */ and after the comment | |
14 | Mixed, | |
15 | /// Just a manual blank line "\n\n", for layout | |
16 | BlankLine, | |
17 | } | |
18 | ||
19 | #[derive(Clone)] | |
20 | pub struct Comment { | |
21 | pub style: CommentStyle, | |
22 | pub lines: Vec<String>, | |
23 | pub pos: BytePos, | |
24 | } | |
25 | ||
26 | /// Makes a doc string more presentable to users. | |
27 | /// Used by rustdoc and perhaps other tools, but not by rustc. | |
28 | pub fn beautify_doc_string(data: Symbol) -> Symbol { | |
29 | fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> { | |
30 | let mut i = 0; | |
31 | let mut j = lines.len(); | |
32 | // first line of all-stars should be omitted | |
33 | if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { | |
34 | i += 1; | |
35 | } | |
36 | ||
37 | while i < j && lines[i].trim().is_empty() { | |
38 | i += 1; | |
39 | } | |
40 | // like the first, a last line of all stars should be omitted | |
41 | if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') { | |
42 | j -= 1; | |
43 | } | |
44 | ||
45 | while j > i && lines[j - 1].trim().is_empty() { | |
46 | j -= 1; | |
47 | } | |
48 | ||
49 | if i != 0 || j != lines.len() { Some((i, j)) } else { None } | |
50 | } | |
51 | ||
52 | fn get_horizontal_trim(lines: &[&str]) -> Option<usize> { | |
53 | let mut i = usize::MAX; | |
54 | let mut first = true; | |
55 | ||
56 | for line in lines { | |
57 | for (j, c) in line.chars().enumerate() { | |
58 | if j > i || !"* \t".contains(c) { | |
59 | return None; | |
60 | } | |
61 | if c == '*' { | |
62 | if first { | |
63 | i = j; | |
64 | first = false; | |
65 | } else if i != j { | |
66 | return None; | |
67 | } | |
68 | break; | |
69 | } | |
70 | } | |
71 | if i >= line.len() { | |
72 | return None; | |
73 | } | |
74 | } | |
75 | Some(i) | |
76 | } | |
77 | ||
78 | let data_s = data.as_str(); | |
79 | if data_s.contains('\n') { | |
80 | let mut lines = data_s.lines().collect::<Vec<&str>>(); | |
81 | let mut changes = false; | |
82 | let lines = if let Some((i, j)) = get_vertical_trim(&lines) { | |
83 | changes = true; | |
84 | // remove whitespace-only lines from the start/end of lines | |
85 | &mut lines[i..j] | |
86 | } else { | |
87 | &mut lines | |
88 | }; | |
89 | if let Some(horizontal) = get_horizontal_trim(&lines) { | |
90 | changes = true; | |
91 | // remove a "[ \t]*\*" block from each line, if possible | |
92 | for line in lines.iter_mut() { | |
93 | *line = &line[horizontal + 1..]; | |
94 | } | |
95 | } | |
96 | if changes { | |
97 | return Symbol::intern(&lines.join("\n")); | |
98 | } | |
99 | } | |
100 | data | |
101 | } | |
102 | ||
103 | /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. | |
104 | /// Otherwise returns `Some(k)` where `k` is first char offset after that leading | |
105 | /// whitespace. Note that `k` may be outside bounds of `s`. | |
106 | fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { | |
107 | let mut idx = 0; | |
108 | for (i, ch) in s.char_indices().take(col.to_usize()) { | |
109 | if !ch.is_whitespace() { | |
110 | return None; | |
111 | } | |
112 | idx = i + ch.len_utf8(); | |
113 | } | |
114 | Some(idx) | |
115 | } | |
116 | ||
117 | fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str { | |
118 | let len = s.len(); | |
119 | match all_whitespace(&s, col) { | |
120 | Some(col) => { | |
121 | if col < len { | |
122 | &s[col..] | |
123 | } else { | |
124 | "" | |
125 | } | |
126 | } | |
127 | None => s, | |
128 | } | |
129 | } | |
130 | ||
131 | fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> { | |
132 | let mut res: Vec<String> = vec![]; | |
133 | let mut lines = text.lines(); | |
134 | // just push the first line | |
135 | res.extend(lines.next().map(|it| it.to_string())); | |
136 | // for other lines, strip common whitespace prefix | |
137 | for line in lines { | |
138 | res.push(trim_whitespace_prefix(line, col).to_string()) | |
139 | } | |
140 | res | |
141 | } | |
142 | ||
143 | // it appears this function is called only from pprust... that's | |
144 | // probably not a good thing. | |
145 | pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> { | |
146 | let sm = SourceMap::new(sm.path_mapping().clone()); | |
147 | let source_file = sm.new_source_file(path, src); | |
148 | let text = (*source_file.src.as_ref().unwrap()).clone(); | |
149 | ||
150 | let text: &str = text.as_str(); | |
151 | let start_bpos = source_file.start_pos; | |
152 | let mut pos = 0; | |
153 | let mut comments: Vec<Comment> = Vec::new(); | |
154 | let mut code_to_the_left = false; | |
155 | ||
156 | if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { | |
157 | comments.push(Comment { | |
158 | style: CommentStyle::Isolated, | |
159 | lines: vec![text[..shebang_len].to_string()], | |
160 | pos: start_bpos, | |
161 | }); | |
162 | pos += shebang_len; | |
163 | } | |
164 | ||
165 | for token in rustc_lexer::tokenize(&text[pos..]) { | |
166 | let token_text = &text[pos..pos + token.len]; | |
167 | match token.kind { | |
168 | rustc_lexer::TokenKind::Whitespace => { | |
169 | if let Some(mut idx) = token_text.find('\n') { | |
170 | code_to_the_left = false; | |
171 | while let Some(next_newline) = &token_text[idx + 1..].find('\n') { | |
172 | idx = idx + 1 + next_newline; | |
173 | comments.push(Comment { | |
174 | style: CommentStyle::BlankLine, | |
175 | lines: vec![], | |
176 | pos: start_bpos + BytePos((pos + idx) as u32), | |
177 | }); | |
178 | } | |
179 | } | |
180 | } | |
181 | rustc_lexer::TokenKind::BlockComment { doc_style, .. } => { | |
182 | if doc_style.is_none() { | |
183 | let code_to_the_right = | |
184 | !matches!(text[pos + token.len..].chars().next(), Some('\r' | '\n')); | |
185 | let style = match (code_to_the_left, code_to_the_right) { | |
186 | (_, true) => CommentStyle::Mixed, | |
187 | (false, false) => CommentStyle::Isolated, | |
188 | (true, false) => CommentStyle::Trailing, | |
189 | }; | |
190 | ||
191 | // Count the number of chars since the start of the line by rescanning. | |
192 | let pos_in_file = start_bpos + BytePos(pos as u32); | |
193 | let line_begin_in_file = source_file.line_begin_pos(pos_in_file); | |
194 | let line_begin_pos = (line_begin_in_file - start_bpos).to_usize(); | |
195 | let col = CharPos(text[line_begin_pos..pos].chars().count()); | |
196 | ||
197 | let lines = split_block_comment_into_lines(token_text, col); | |
198 | comments.push(Comment { style, lines, pos: pos_in_file }) | |
199 | } | |
200 | } | |
201 | rustc_lexer::TokenKind::LineComment { doc_style } => { | |
202 | if doc_style.is_none() { | |
203 | comments.push(Comment { | |
204 | style: if code_to_the_left { | |
205 | CommentStyle::Trailing | |
206 | } else { | |
207 | CommentStyle::Isolated | |
208 | }, | |
209 | lines: vec![token_text.to_string()], | |
210 | pos: start_bpos + BytePos(pos as u32), | |
211 | }) | |
212 | } | |
213 | } | |
214 | _ => { | |
215 | code_to_the_left = true; | |
216 | } | |
217 | } | |
218 | pos += token.len; | |
219 | } | |
220 | ||
221 | comments | |
222 | } |