]> git.proxmox.com Git - rustc.git/blob - src/librustc_ast/util/comments.rs
New upstream version 1.44.1+dfsg1
[rustc.git] / src / librustc_ast / util / comments.rs
1 pub use CommentStyle::*;
2
3 use crate::ast;
4 use rustc_span::source_map::SourceMap;
5 use rustc_span::{BytePos, CharPos, FileName, Pos};
6
7 use log::debug;
8
9 #[cfg(test)]
10 mod tests;
11
12 #[derive(Clone, Copy, PartialEq, Debug)]
13 pub enum CommentStyle {
14 /// No code on either side of each line of the comment
15 Isolated,
16 /// Code exists to the left of the comment
17 Trailing,
18 /// Code before /* foo */ and after the comment
19 Mixed,
20 /// Just a manual blank line "\n\n", for layout
21 BlankLine,
22 }
23
24 #[derive(Clone)]
25 pub struct Comment {
26 pub style: CommentStyle,
27 pub lines: Vec<String>,
28 pub pos: BytePos,
29 }
30
31 pub fn is_line_doc_comment(s: &str) -> bool {
32 let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
33 || s.starts_with("//!");
34 debug!("is {:?} a doc comment? {}", s, res);
35 res
36 }
37
38 pub fn is_block_doc_comment(s: &str) -> bool {
39 // Prevent `/**/` from being parsed as a doc comment
40 let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
41 || s.starts_with("/*!"))
42 && s.len() >= 5;
43 debug!("is {:?} a doc comment? {}", s, res);
44 res
45 }
46
47 // FIXME(#64197): Try to privatize this again.
48 pub fn is_doc_comment(s: &str) -> bool {
49 (s.starts_with("///") && is_line_doc_comment(s))
50 || s.starts_with("//!")
51 || (s.starts_with("/**") && is_block_doc_comment(s))
52 || s.starts_with("/*!")
53 }
54
55 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
56 assert!(is_doc_comment(comment));
57 if comment.starts_with("//!") || comment.starts_with("/*!") {
58 ast::AttrStyle::Inner
59 } else {
60 ast::AttrStyle::Outer
61 }
62 }
63
64 pub fn strip_doc_comment_decoration(comment: &str) -> String {
65 /// remove whitespace-only lines from the start/end of lines
66 fn vertical_trim(lines: Vec<String>) -> Vec<String> {
67 let mut i = 0;
68 let mut j = lines.len();
69 // first line of all-stars should be omitted
70 if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
71 i += 1;
72 }
73
74 while i < j && lines[i].trim().is_empty() {
75 i += 1;
76 }
77 // like the first, a last line of all stars should be omitted
78 if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
79 j -= 1;
80 }
81
82 while j > i && lines[j - 1].trim().is_empty() {
83 j -= 1;
84 }
85
86 lines[i..j].to_vec()
87 }
88
89 /// remove a "[ \t]*\*" block from each line, if possible
90 fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
91 let mut i = usize::MAX;
92 let mut can_trim = true;
93 let mut first = true;
94
95 for line in &lines {
96 for (j, c) in line.chars().enumerate() {
97 if j > i || !"* \t".contains(c) {
98 can_trim = false;
99 break;
100 }
101 if c == '*' {
102 if first {
103 i = j;
104 first = false;
105 } else if i != j {
106 can_trim = false;
107 }
108 break;
109 }
110 }
111 if i >= line.len() {
112 can_trim = false;
113 }
114 if !can_trim {
115 break;
116 }
117 }
118
119 if can_trim {
120 lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
121 } else {
122 lines
123 }
124 }
125
126 // one-line comments lose their prefix
127 const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
128
129 for prefix in ONELINERS {
130 if comment.starts_with(*prefix) {
131 return (&comment[prefix.len()..]).to_string();
132 }
133 }
134
135 if comment.starts_with("/*") {
136 let lines =
137 comment[3..comment.len() - 2].lines().map(|s| s.to_string()).collect::<Vec<String>>();
138
139 let lines = vertical_trim(lines);
140 let lines = horizontal_trim(lines);
141
142 return lines.join("\n");
143 }
144
145 panic!("not a doc-comment: {}", comment);
146 }
147
148 /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
149 /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
150 /// whitespace. Note that `k` may be outside bounds of `s`.
151 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
152 let mut idx = 0;
153 for (i, ch) in s.char_indices().take(col.to_usize()) {
154 if !ch.is_whitespace() {
155 return None;
156 }
157 idx = i + ch.len_utf8();
158 }
159 Some(idx)
160 }
161
162 fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
163 let len = s.len();
164 match all_whitespace(&s, col) {
165 Some(col) => {
166 if col < len {
167 &s[col..]
168 } else {
169 ""
170 }
171 }
172 None => s,
173 }
174 }
175
176 fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
177 let mut res: Vec<String> = vec![];
178 let mut lines = text.lines();
179 // just push the first line
180 res.extend(lines.next().map(|it| it.to_string()));
181 // for other lines, strip common whitespace prefix
182 for line in lines {
183 res.push(trim_whitespace_prefix(line, col).to_string())
184 }
185 res
186 }
187
188 // it appears this function is called only from pprust... that's
189 // probably not a good thing.
190 pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
191 let sm = SourceMap::new(sm.path_mapping().clone());
192 let source_file = sm.new_source_file(path, src);
193 let text = (*source_file.src.as_ref().unwrap()).clone();
194
195 let text: &str = text.as_str();
196 let start_bpos = source_file.start_pos;
197 let mut pos = 0;
198 let mut comments: Vec<Comment> = Vec::new();
199 let mut code_to_the_left = false;
200
201 if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
202 comments.push(Comment {
203 style: Isolated,
204 lines: vec![text[..shebang_len].to_string()],
205 pos: start_bpos,
206 });
207 pos += shebang_len;
208 }
209
210 for token in rustc_lexer::tokenize(&text[pos..]) {
211 let token_text = &text[pos..pos + token.len];
212 match token.kind {
213 rustc_lexer::TokenKind::Whitespace => {
214 if let Some(mut idx) = token_text.find('\n') {
215 code_to_the_left = false;
216 while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
217 idx = idx + 1 + next_newline;
218 comments.push(Comment {
219 style: BlankLine,
220 lines: vec![],
221 pos: start_bpos + BytePos((pos + idx) as u32),
222 });
223 }
224 }
225 }
226 rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
227 if !is_block_doc_comment(token_text) {
228 let code_to_the_right = match text[pos + token.len..].chars().next() {
229 Some('\r' | '\n') => false,
230 _ => true,
231 };
232 let style = match (code_to_the_left, code_to_the_right) {
233 (_, true) => Mixed,
234 (false, false) => Isolated,
235 (true, false) => Trailing,
236 };
237
238 // Count the number of chars since the start of the line by rescanning.
239 let pos_in_file = start_bpos + BytePos(pos as u32);
240 let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
241 let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
242 let col = CharPos(text[line_begin_pos..pos].chars().count());
243
244 let lines = split_block_comment_into_lines(token_text, col);
245 comments.push(Comment { style, lines, pos: pos_in_file })
246 }
247 }
248 rustc_lexer::TokenKind::LineComment => {
249 if !is_doc_comment(token_text) {
250 comments.push(Comment {
251 style: if code_to_the_left { Trailing } else { Isolated },
252 lines: vec![token_text.to_string()],
253 pos: start_bpos + BytePos(pos as u32),
254 })
255 }
256 }
257 _ => {
258 code_to_the_left = true;
259 }
260 }
261 pos += token.len;
262 }
263
264 comments
265 }