]> git.proxmox.com Git - rustc.git/blob - src/libsyntax/parse/lexer/comments.rs
New upstream version 1.40.0+dfsg1
[rustc.git] / src / libsyntax / parse / lexer / comments.rs
1 pub use CommentStyle::*;
2
3 use crate::ast;
4 use crate::source_map::SourceMap;
5 use crate::parse::lexer::is_block_doc_comment;
6 use crate::parse::lexer::ParseSess;
7
8 use syntax_pos::{BytePos, CharPos, Pos, FileName};
9
10 use std::usize;
11
12 #[cfg(test)]
13 mod tests;
14
15 #[derive(Clone, Copy, PartialEq, Debug)]
16 pub enum CommentStyle {
17 /// No code on either side of each line of the comment
18 Isolated,
19 /// Code exists to the left of the comment
20 Trailing,
21 /// Code before /* foo */ and after the comment
22 Mixed,
23 /// Just a manual blank line "\n\n", for layout
24 BlankLine,
25 }
26
27 #[derive(Clone)]
28 pub struct Comment {
29 pub style: CommentStyle,
30 pub lines: Vec<String>,
31 pub pos: BytePos,
32 }
33
34 fn is_doc_comment(s: &str) -> bool {
35 (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") ||
36 (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!")
37 }
38
39 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
40 assert!(is_doc_comment(comment));
41 if comment.starts_with("//!") || comment.starts_with("/*!") {
42 ast::AttrStyle::Inner
43 } else {
44 ast::AttrStyle::Outer
45 }
46 }
47
48 pub fn strip_doc_comment_decoration(comment: &str) -> String {
49 /// remove whitespace-only lines from the start/end of lines
50 fn vertical_trim(lines: Vec<String>) -> Vec<String> {
51 let mut i = 0;
52 let mut j = lines.len();
53 // first line of all-stars should be omitted
54 if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
55 i += 1;
56 }
57
58 while i < j && lines[i].trim().is_empty() {
59 i += 1;
60 }
61 // like the first, a last line of all stars should be omitted
62 if j > i &&
63 lines[j - 1]
64 .chars()
65 .skip(1)
66 .all(|c| c == '*') {
67 j -= 1;
68 }
69
70 while j > i && lines[j - 1].trim().is_empty() {
71 j -= 1;
72 }
73
74 lines[i..j].to_vec()
75 }
76
77 /// remove a "[ \t]*\*" block from each line, if possible
78 fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
79 let mut i = usize::MAX;
80 let mut can_trim = true;
81 let mut first = true;
82
83 for line in &lines {
84 for (j, c) in line.chars().enumerate() {
85 if j > i || !"* \t".contains(c) {
86 can_trim = false;
87 break;
88 }
89 if c == '*' {
90 if first {
91 i = j;
92 first = false;
93 } else if i != j {
94 can_trim = false;
95 }
96 break;
97 }
98 }
99 if i >= line.len() {
100 can_trim = false;
101 }
102 if !can_trim {
103 break;
104 }
105 }
106
107 if can_trim {
108 lines.iter()
109 .map(|line| (&line[i + 1..line.len()]).to_string())
110 .collect()
111 } else {
112 lines
113 }
114 }
115
116 // one-line comments lose their prefix
117 const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
118
119 for prefix in ONELINERS {
120 if comment.starts_with(*prefix) {
121 return (&comment[prefix.len()..]).to_string();
122 }
123 }
124
125 if comment.starts_with("/*") {
126 let lines = comment[3..comment.len() - 2]
127 .lines()
128 .map(|s| s.to_string())
129 .collect::<Vec<String>>();
130
131 let lines = vertical_trim(lines);
132 let lines = horizontal_trim(lines);
133
134 return lines.join("\n");
135 }
136
137 panic!("not a doc-comment: {}", comment);
138 }
139
140 /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
141 /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
142 /// whitespace. Note that `k` may be outside bounds of `s`.
143 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
144 let mut idx = 0;
145 for (i, ch) in s.char_indices().take(col.to_usize()) {
146 if !ch.is_whitespace() {
147 return None;
148 }
149 idx = i + ch.len_utf8();
150 }
151 Some(idx)
152 }
153
154 fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
155 let len = s.len();
156 match all_whitespace(&s, col) {
157 Some(col) => if col < len { &s[col..] } else { "" },
158 None => s,
159 }
160 }
161
162 fn split_block_comment_into_lines(
163 text: &str,
164 col: CharPos,
165 ) -> Vec<String> {
166 let mut res: Vec<String> = vec![];
167 let mut lines = text.lines();
168 // just push the first line
169 res.extend(lines.next().map(|it| it.to_string()));
170 // for other lines, strip common whitespace prefix
171 for line in lines {
172 res.push(trim_whitespace_prefix(line, col).to_string())
173 }
174 res
175 }
176
177 // it appears this function is called only from pprust... that's
178 // probably not a good thing.
179 crate fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> {
180 let cm = SourceMap::new(sess.source_map().path_mapping().clone());
181 let source_file = cm.new_source_file(path, src);
182 let text = (*source_file.src.as_ref().unwrap()).clone();
183
184 let text: &str = text.as_str();
185 let start_bpos = source_file.start_pos;
186 let mut pos = 0;
187 let mut comments: Vec<Comment> = Vec::new();
188 let mut code_to_the_left = false;
189
190 if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
191 comments.push(Comment {
192 style: Isolated,
193 lines: vec![text[..shebang_len].to_string()],
194 pos: start_bpos,
195 });
196 pos += shebang_len;
197 }
198
199 for token in rustc_lexer::tokenize(&text[pos..]) {
200 let token_text = &text[pos..pos + token.len];
201 match token.kind {
202 rustc_lexer::TokenKind::Whitespace => {
203 if let Some(mut idx) = token_text.find('\n') {
204 code_to_the_left = false;
205 while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
206 idx = idx + 1 + next_newline;
207 comments.push(Comment {
208 style: BlankLine,
209 lines: vec![],
210 pos: start_bpos + BytePos((pos + idx) as u32),
211 });
212 }
213 }
214 }
215 rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
216 if !is_block_doc_comment(token_text) {
217 let code_to_the_right = match text[pos + token.len..].chars().next() {
218 Some('\r') | Some('\n') => false,
219 _ => true,
220 };
221 let style = match (code_to_the_left, code_to_the_right) {
222 (true, true) | (false, true) => Mixed,
223 (false, false) => Isolated,
224 (true, false) => Trailing,
225 };
226
227 // Count the number of chars since the start of the line by rescanning.
228 let pos_in_file = start_bpos + BytePos(pos as u32);
229 let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
230 let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
231 let col = CharPos(text[line_begin_pos..pos].chars().count());
232
233 let lines = split_block_comment_into_lines(token_text, col);
234 comments.push(Comment { style, lines, pos: pos_in_file })
235 }
236 }
237 rustc_lexer::TokenKind::LineComment => {
238 if !is_doc_comment(token_text) {
239 comments.push(Comment {
240 style: if code_to_the_left { Trailing } else { Isolated },
241 lines: vec![token_text.to_string()],
242 pos: start_bpos + BytePos(pos as u32),
243 })
244 }
245 }
246 _ => {
247 code_to_the_left = true;
248 }
249 }
250 pos += token.len;
251 }
252
253 comments
254 }