]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | pub use self::CommentStyle::*; | |
12 | ||
13 | use ast; | |
14 | use codemap::{BytePos, CharPos, CodeMap, Pos}; | |
9cc50fc6 | 15 | use errors; |
1a4d82fc | 16 | use parse::lexer::is_block_doc_comment; |
d9579d0f | 17 | use parse::lexer::{StringReader, TokenAndSpan}; |
54a0048b | 18 | use parse::lexer::{is_pattern_whitespace, Reader}; |
1a4d82fc JJ |
19 | use parse::lexer; |
20 | use print::pprust; | |
d9579d0f | 21 | use str::char_at; |
1a4d82fc | 22 | |
c34b1796 | 23 | use std::io::Read; |
85aaf69f | 24 | use std::usize; |
1a4d82fc JJ |
25 | |
26 | #[derive(Clone, Copy, PartialEq)] | |
27 | pub enum CommentStyle { | |
28 | /// No code on either side of each line of the comment | |
29 | Isolated, | |
30 | /// Code exists to the left of the comment | |
31 | Trailing, | |
32 | /// Code before /* foo */ and after the comment | |
33 | Mixed, | |
34 | /// Just a manual blank line "\n\n", for layout | |
35 | BlankLine, | |
36 | } | |
37 | ||
38 | #[derive(Clone)] | |
39 | pub struct Comment { | |
40 | pub style: CommentStyle, | |
41 | pub lines: Vec<String>, | |
42 | pub pos: BytePos, | |
43 | } | |
44 | ||
45 | pub fn is_doc_comment(s: &str) -> bool { | |
9cc50fc6 SL |
46 | (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || |
47 | (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") | |
1a4d82fc JJ |
48 | } |
49 | ||
50 | pub fn doc_comment_style(comment: &str) -> ast::AttrStyle { | |
51 | assert!(is_doc_comment(comment)); | |
52 | if comment.starts_with("//!") || comment.starts_with("/*!") { | |
b039eaaf | 53 | ast::AttrStyle::Inner |
1a4d82fc | 54 | } else { |
b039eaaf | 55 | ast::AttrStyle::Outer |
1a4d82fc JJ |
56 | } |
57 | } | |
58 | ||
59 | pub fn strip_doc_comment_decoration(comment: &str) -> String { | |
60 | /// remove whitespace-only lines from the start/end of lines | |
85aaf69f SL |
61 | fn vertical_trim(lines: Vec<String>) -> Vec<String> { |
62 | let mut i = 0; | |
1a4d82fc JJ |
63 | let mut j = lines.len(); |
64 | // first line of all-stars should be omitted | |
9cc50fc6 | 65 | if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { |
1a4d82fc JJ |
66 | i += 1; |
67 | } | |
68 | while i < j && lines[i].trim().is_empty() { | |
69 | i += 1; | |
70 | } | |
71 | // like the first, a last line of all stars should be omitted | |
9cc50fc6 SL |
72 | if j > i && |
73 | lines[j - 1] | |
74 | .chars() | |
75 | .skip(1) | |
76 | .all(|c| c == '*') { | |
1a4d82fc JJ |
77 | j -= 1; |
78 | } | |
79 | while j > i && lines[j - 1].trim().is_empty() { | |
80 | j -= 1; | |
81 | } | |
85aaf69f | 82 | lines[i..j].iter().cloned().collect() |
1a4d82fc JJ |
83 | } |
84 | ||
85 | /// remove a "[ \t]*\*" block from each line, if possible | |
9cc50fc6 | 86 | fn horizontal_trim(lines: Vec<String>) -> Vec<String> { |
85aaf69f | 87 | let mut i = usize::MAX; |
1a4d82fc JJ |
88 | let mut can_trim = true; |
89 | let mut first = true; | |
85aaf69f | 90 | for line in &lines { |
1a4d82fc | 91 | for (j, c) in line.chars().enumerate() { |
c34b1796 | 92 | if j > i || !"* \t".contains(c) { |
1a4d82fc JJ |
93 | can_trim = false; |
94 | break; | |
95 | } | |
96 | if c == '*' { | |
97 | if first { | |
98 | i = j; | |
99 | first = false; | |
100 | } else if i != j { | |
101 | can_trim = false; | |
102 | } | |
103 | break; | |
104 | } | |
105 | } | |
106 | if i > line.len() { | |
107 | can_trim = false; | |
108 | } | |
109 | if !can_trim { | |
110 | break; | |
111 | } | |
112 | } | |
113 | ||
114 | if can_trim { | |
9cc50fc6 SL |
115 | lines.iter() |
116 | .map(|line| (&line[i + 1..line.len()]).to_string()) | |
117 | .collect() | |
1a4d82fc JJ |
118 | } else { |
119 | lines | |
120 | } | |
121 | } | |
122 | ||
123 | // one-line comments lose their prefix | |
c34b1796 AL |
124 | const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"]; |
125 | for prefix in ONELINERS { | |
1a4d82fc JJ |
126 | if comment.starts_with(*prefix) { |
127 | return (&comment[prefix.len()..]).to_string(); | |
128 | } | |
129 | } | |
130 | ||
131 | if comment.starts_with("/*") { | |
85aaf69f | 132 | let lines = comment[3..comment.len() - 2] |
9cc50fc6 SL |
133 | .lines() |
134 | .map(|s| s.to_string()) | |
135 | .collect::<Vec<String>>(); | |
1a4d82fc JJ |
136 | |
137 | let lines = vertical_trim(lines); | |
138 | let lines = horizontal_trim(lines); | |
139 | ||
c1a9b12d | 140 | return lines.join("\n"); |
1a4d82fc JJ |
141 | } |
142 | ||
143 | panic!("not a doc-comment: {}", comment); | |
144 | } | |
145 | ||
146 | fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) { | |
147 | debug!(">>> blank-line comment"); | |
148 | comments.push(Comment { | |
149 | style: BlankLine, | |
150 | lines: Vec::new(), | |
151 | pos: rdr.last_pos, | |
152 | }); | |
153 | } | |
154 | ||
9cc50fc6 | 155 | fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec<Comment>) { |
54a0048b | 156 | while is_pattern_whitespace(rdr.curr) && !rdr.is_eof() { |
85aaf69f | 157 | if rdr.col == CharPos(0) && rdr.curr_is('\n') { |
1a4d82fc JJ |
158 | push_blank_line_comment(rdr, &mut *comments); |
159 | } | |
160 | rdr.bump(); | |
161 | } | |
162 | } | |
163 | ||
164 | ||
9cc50fc6 SL |
165 | fn read_shebang_comment(rdr: &mut StringReader, |
166 | code_to_the_left: bool, | |
1a4d82fc JJ |
167 | comments: &mut Vec<Comment>) { |
168 | debug!(">>> shebang comment"); | |
169 | let p = rdr.last_pos; | |
170 | debug!("<<< shebang comment"); | |
171 | comments.push(Comment { | |
172 | style: if code_to_the_left { Trailing } else { Isolated }, | |
9cc50fc6 SL |
173 | lines: vec![rdr.read_one_line_comment()], |
174 | pos: p, | |
1a4d82fc JJ |
175 | }); |
176 | } | |
177 | ||
9cc50fc6 SL |
178 | fn read_line_comments(rdr: &mut StringReader, |
179 | code_to_the_left: bool, | |
1a4d82fc JJ |
180 | comments: &mut Vec<Comment>) { |
181 | debug!(">>> line comments"); | |
182 | let p = rdr.last_pos; | |
183 | let mut lines: Vec<String> = Vec::new(); | |
184 | while rdr.curr_is('/') && rdr.nextch_is('/') { | |
185 | let line = rdr.read_one_line_comment(); | |
186 | debug!("{}", line); | |
187 | // Doc comments are not put in comments. | |
85aaf69f | 188 | if is_doc_comment(&line[..]) { |
1a4d82fc JJ |
189 | break; |
190 | } | |
191 | lines.push(line); | |
192 | rdr.consume_non_eol_whitespace(); | |
193 | } | |
194 | debug!("<<< line comments"); | |
195 | if !lines.is_empty() { | |
196 | comments.push(Comment { | |
197 | style: if code_to_the_left { Trailing } else { Isolated }, | |
198 | lines: lines, | |
9cc50fc6 | 199 | pos: p, |
1a4d82fc JJ |
200 | }); |
201 | } | |
202 | } | |
203 | ||
204 | /// Returns None if the first col chars of s contain a non-whitespace char. | |
205 | /// Otherwise returns Some(k) where k is first char offset after that leading | |
206 | /// whitespace. Note k may be outside bounds of s. | |
85aaf69f | 207 | fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { |
1a4d82fc | 208 | let len = s.len(); |
85aaf69f SL |
209 | let mut col = col.to_usize(); |
210 | let mut cursor: usize = 0; | |
1a4d82fc | 211 | while col > 0 && cursor < len { |
d9579d0f | 212 | let ch = char_at(s, cursor); |
c34b1796 | 213 | if !ch.is_whitespace() { |
1a4d82fc JJ |
214 | return None; |
215 | } | |
c34b1796 | 216 | cursor += ch.len_utf8(); |
1a4d82fc JJ |
217 | col -= 1; |
218 | } | |
219 | return Some(cursor); | |
220 | } | |
221 | ||
9cc50fc6 | 222 | fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) { |
1a4d82fc | 223 | let len = s.len(); |
85aaf69f | 224 | let s1 = match all_whitespace(&s[..], col) { |
1a4d82fc JJ |
225 | Some(col) => { |
226 | if col < len { | |
227 | (&s[col..len]).to_string() | |
228 | } else { | |
229 | "".to_string() | |
230 | } | |
231 | } | |
232 | None => s, | |
233 | }; | |
234 | debug!("pushing line: {}", s1); | |
235 | lines.push(s1); | |
236 | } | |
237 | ||
238 | fn read_block_comment(rdr: &mut StringReader, | |
239 | code_to_the_left: bool, | |
9cc50fc6 | 240 | comments: &mut Vec<Comment>) { |
1a4d82fc JJ |
241 | debug!(">>> block comment"); |
242 | let p = rdr.last_pos; | |
243 | let mut lines: Vec<String> = Vec::new(); | |
244 | let col = rdr.col; | |
245 | rdr.bump(); | |
246 | rdr.bump(); | |
247 | ||
d9579d0f | 248 | let mut curr_line = String::from("/*"); |
1a4d82fc JJ |
249 | |
250 | // doc-comments are not really comments, they are attributes | |
251 | if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') { | |
252 | while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() { | |
253 | curr_line.push(rdr.curr.unwrap()); | |
254 | rdr.bump(); | |
255 | } | |
256 | if !rdr.is_eof() { | |
257 | curr_line.push_str("*/"); | |
258 | rdr.bump(); | |
259 | rdr.bump(); | |
260 | } | |
85aaf69f | 261 | if is_block_doc_comment(&curr_line[..]) { |
9cc50fc6 | 262 | return; |
1a4d82fc | 263 | } |
c34b1796 | 264 | assert!(!curr_line.contains('\n')); |
1a4d82fc JJ |
265 | lines.push(curr_line); |
266 | } else { | |
85aaf69f | 267 | let mut level: isize = 1; |
1a4d82fc JJ |
268 | while level > 0 { |
269 | debug!("=== block comment level {}", level); | |
270 | if rdr.is_eof() { | |
92a42be0 | 271 | panic!(rdr.fatal("unterminated block comment")); |
1a4d82fc JJ |
272 | } |
273 | if rdr.curr_is('\n') { | |
9cc50fc6 | 274 | trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); |
1a4d82fc JJ |
275 | curr_line = String::new(); |
276 | rdr.bump(); | |
277 | } else { | |
278 | curr_line.push(rdr.curr.unwrap()); | |
279 | if rdr.curr_is('/') && rdr.nextch_is('*') { | |
280 | rdr.bump(); | |
281 | rdr.bump(); | |
282 | curr_line.push('*'); | |
283 | level += 1; | |
284 | } else { | |
285 | if rdr.curr_is('*') && rdr.nextch_is('/') { | |
286 | rdr.bump(); | |
287 | rdr.bump(); | |
288 | curr_line.push('/'); | |
289 | level -= 1; | |
9cc50fc6 SL |
290 | } else { |
291 | rdr.bump(); | |
292 | } | |
1a4d82fc JJ |
293 | } |
294 | } | |
295 | } | |
9346a6ac | 296 | if !curr_line.is_empty() { |
9cc50fc6 | 297 | trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); |
1a4d82fc JJ |
298 | } |
299 | } | |
300 | ||
9cc50fc6 SL |
301 | let mut style = if code_to_the_left { |
302 | Trailing | |
303 | } else { | |
304 | Isolated | |
305 | }; | |
1a4d82fc | 306 | rdr.consume_non_eol_whitespace(); |
85aaf69f | 307 | if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1 { |
1a4d82fc JJ |
308 | style = Mixed; |
309 | } | |
310 | debug!("<<< block comment"); | |
9cc50fc6 SL |
311 | comments.push(Comment { |
312 | style: style, | |
313 | lines: lines, | |
314 | pos: p, | |
315 | }); | |
1a4d82fc JJ |
316 | } |
317 | ||
318 | ||
9cc50fc6 | 319 | fn consume_comment(rdr: &mut StringReader, code_to_the_left: bool, comments: &mut Vec<Comment>) { |
1a4d82fc JJ |
320 | debug!(">>> consume comment"); |
321 | if rdr.curr_is('/') && rdr.nextch_is('/') { | |
322 | read_line_comments(rdr, code_to_the_left, comments); | |
323 | } else if rdr.curr_is('/') && rdr.nextch_is('*') { | |
324 | read_block_comment(rdr, code_to_the_left, comments); | |
325 | } else if rdr.curr_is('#') && rdr.nextch_is('!') { | |
326 | read_shebang_comment(rdr, code_to_the_left, comments); | |
9cc50fc6 SL |
327 | } else { |
328 | panic!(); | |
329 | } | |
1a4d82fc JJ |
330 | debug!("<<< consume comment"); |
331 | } | |
332 | ||
333 | #[derive(Clone)] | |
334 | pub struct Literal { | |
335 | pub lit: String, | |
336 | pub pos: BytePos, | |
337 | } | |
338 | ||
339 | // it appears this function is called only from pprust... that's | |
340 | // probably not a good thing. | |
9cc50fc6 | 341 | pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler, |
1a4d82fc | 342 | path: String, |
c34b1796 | 343 | srdr: &mut Read) |
9cc50fc6 | 344 | -> (Vec<Comment>, Vec<Literal>) { |
c34b1796 AL |
345 | let mut src = Vec::new(); |
346 | srdr.read_to_end(&mut src).unwrap(); | |
1a4d82fc JJ |
347 | let src = String::from_utf8(src).unwrap(); |
348 | let cm = CodeMap::new(); | |
349 | let filemap = cm.new_filemap(path, src); | |
350 | let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap); | |
351 | ||
352 | let mut comments: Vec<Comment> = Vec::new(); | |
353 | let mut literals: Vec<Literal> = Vec::new(); | |
354 | let mut first_read: bool = true; | |
355 | while !rdr.is_eof() { | |
356 | loop { | |
357 | let mut code_to_the_left = !first_read; | |
358 | rdr.consume_non_eol_whitespace(); | |
359 | if rdr.curr_is('\n') { | |
360 | code_to_the_left = false; | |
361 | consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); | |
362 | } | |
363 | while rdr.peeking_at_comment() { | |
364 | consume_comment(&mut rdr, code_to_the_left, &mut comments); | |
365 | consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); | |
366 | } | |
367 | break; | |
368 | } | |
369 | ||
370 | ||
371 | let bstart = rdr.last_pos; | |
372 | rdr.next_token(); | |
9cc50fc6 | 373 | // discard, and look ahead; we're working with internal state |
1a4d82fc JJ |
374 | let TokenAndSpan { tok, sp } = rdr.peek(); |
375 | if tok.is_lit() { | |
376 | rdr.with_str_from(bstart, |s| { | |
377 | debug!("tok lit: {}", s); | |
9cc50fc6 SL |
378 | literals.push(Literal { |
379 | lit: s.to_string(), | |
380 | pos: sp.lo, | |
381 | }); | |
1a4d82fc JJ |
382 | }) |
383 | } else { | |
384 | debug!("tok: {}", pprust::token_to_string(&tok)); | |
385 | } | |
386 | first_read = false; | |
387 | } | |
388 | ||
389 | (comments, literals) | |
390 | } | |
391 | ||
392 | #[cfg(test)] | |
d9579d0f | 393 | mod tests { |
1a4d82fc JJ |
394 | use super::*; |
395 | ||
9cc50fc6 SL |
396 | #[test] |
397 | fn test_block_doc_comment_1() { | |
1a4d82fc JJ |
398 | let comment = "/**\n * Test \n ** Test\n * Test\n*/"; |
399 | let stripped = strip_doc_comment_decoration(comment); | |
400 | assert_eq!(stripped, " Test \n* Test\n Test"); | |
401 | } | |
402 | ||
9cc50fc6 SL |
403 | #[test] |
404 | fn test_block_doc_comment_2() { | |
1a4d82fc JJ |
405 | let comment = "/**\n * Test\n * Test\n*/"; |
406 | let stripped = strip_doc_comment_decoration(comment); | |
407 | assert_eq!(stripped, " Test\n Test"); | |
408 | } | |
409 | ||
9cc50fc6 SL |
410 | #[test] |
411 | fn test_block_doc_comment_3() { | |
85aaf69f | 412 | let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; |
1a4d82fc | 413 | let stripped = strip_doc_comment_decoration(comment); |
85aaf69f | 414 | assert_eq!(stripped, " let a: *i32;\n *a = 5;"); |
1a4d82fc JJ |
415 | } |
416 | ||
9cc50fc6 SL |
417 | #[test] |
418 | fn test_block_doc_comment_4() { | |
1a4d82fc JJ |
419 | let comment = "/*******************\n test\n *********************/"; |
420 | let stripped = strip_doc_comment_decoration(comment); | |
421 | assert_eq!(stripped, " test"); | |
422 | } | |
423 | ||
9cc50fc6 SL |
424 | #[test] |
425 | fn test_line_doc_comment() { | |
1a4d82fc JJ |
426 | let stripped = strip_doc_comment_decoration("/// test"); |
427 | assert_eq!(stripped, " test"); | |
428 | let stripped = strip_doc_comment_decoration("///! test"); | |
429 | assert_eq!(stripped, " test"); | |
430 | let stripped = strip_doc_comment_decoration("// test"); | |
431 | assert_eq!(stripped, " test"); | |
432 | let stripped = strip_doc_comment_decoration("// test"); | |
433 | assert_eq!(stripped, " test"); | |
434 | let stripped = strip_doc_comment_decoration("///test"); | |
435 | assert_eq!(stripped, "test"); | |
436 | let stripped = strip_doc_comment_decoration("///!test"); | |
437 | assert_eq!(stripped, "test"); | |
438 | let stripped = strip_doc_comment_decoration("//test"); | |
439 | assert_eq!(stripped, "test"); | |
440 | } | |
441 | } |