]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | pub use self::CommentStyle::*; | |
12 | ||
13 | use ast; | |
14 | use codemap::{BytePos, CharPos, CodeMap, Pos}; | |
15 | use diagnostic; | |
1a4d82fc | 16 | use parse::lexer::is_block_doc_comment; |
d9579d0f AL |
17 | use parse::lexer::{StringReader, TokenAndSpan}; |
18 | use parse::lexer::{is_whitespace, Reader}; | |
1a4d82fc JJ |
19 | use parse::lexer; |
20 | use print::pprust; | |
d9579d0f | 21 | use str::char_at; |
1a4d82fc | 22 | |
c34b1796 | 23 | use std::io::Read; |
85aaf69f | 24 | use std::usize; |
1a4d82fc JJ |
25 | |
26 | #[derive(Clone, Copy, PartialEq)] | |
27 | pub enum CommentStyle { | |
28 | /// No code on either side of each line of the comment | |
29 | Isolated, | |
30 | /// Code exists to the left of the comment | |
31 | Trailing, | |
32 | /// Code before /* foo */ and after the comment | |
33 | Mixed, | |
34 | /// Just a manual blank line "\n\n", for layout | |
35 | BlankLine, | |
36 | } | |
37 | ||
38 | #[derive(Clone)] | |
39 | pub struct Comment { | |
40 | pub style: CommentStyle, | |
41 | pub lines: Vec<String>, | |
42 | pub pos: BytePos, | |
43 | } | |
44 | ||
45 | pub fn is_doc_comment(s: &str) -> bool { | |
46 | (s.starts_with("///") && super::is_doc_comment(s)) || | |
47 | s.starts_with("//!") || | |
48 | (s.starts_with("/**") && is_block_doc_comment(s)) || | |
49 | s.starts_with("/*!") | |
50 | } | |
51 | ||
52 | pub fn doc_comment_style(comment: &str) -> ast::AttrStyle { | |
53 | assert!(is_doc_comment(comment)); | |
54 | if comment.starts_with("//!") || comment.starts_with("/*!") { | |
55 | ast::AttrInner | |
56 | } else { | |
57 | ast::AttrOuter | |
58 | } | |
59 | } | |
60 | ||
61 | pub fn strip_doc_comment_decoration(comment: &str) -> String { | |
62 | /// remove whitespace-only lines from the start/end of lines | |
85aaf69f SL |
63 | fn vertical_trim(lines: Vec<String>) -> Vec<String> { |
64 | let mut i = 0; | |
1a4d82fc JJ |
65 | let mut j = lines.len(); |
66 | // first line of all-stars should be omitted | |
9346a6ac | 67 | if !lines.is_empty() && |
1a4d82fc JJ |
68 | lines[0].chars().all(|c| c == '*') { |
69 | i += 1; | |
70 | } | |
71 | while i < j && lines[i].trim().is_empty() { | |
72 | i += 1; | |
73 | } | |
74 | // like the first, a last line of all stars should be omitted | |
75 | if j > i && lines[j - 1] | |
76 | .chars() | |
77 | .skip(1) | |
78 | .all(|c| c == '*') { | |
79 | j -= 1; | |
80 | } | |
81 | while j > i && lines[j - 1].trim().is_empty() { | |
82 | j -= 1; | |
83 | } | |
85aaf69f | 84 | lines[i..j].iter().cloned().collect() |
1a4d82fc JJ |
85 | } |
86 | ||
87 | /// remove a "[ \t]*\*" block from each line, if possible | |
88 | fn horizontal_trim(lines: Vec<String> ) -> Vec<String> { | |
85aaf69f | 89 | let mut i = usize::MAX; |
1a4d82fc JJ |
90 | let mut can_trim = true; |
91 | let mut first = true; | |
85aaf69f | 92 | for line in &lines { |
1a4d82fc | 93 | for (j, c) in line.chars().enumerate() { |
c34b1796 | 94 | if j > i || !"* \t".contains(c) { |
1a4d82fc JJ |
95 | can_trim = false; |
96 | break; | |
97 | } | |
98 | if c == '*' { | |
99 | if first { | |
100 | i = j; | |
101 | first = false; | |
102 | } else if i != j { | |
103 | can_trim = false; | |
104 | } | |
105 | break; | |
106 | } | |
107 | } | |
108 | if i > line.len() { | |
109 | can_trim = false; | |
110 | } | |
111 | if !can_trim { | |
112 | break; | |
113 | } | |
114 | } | |
115 | ||
116 | if can_trim { | |
117 | lines.iter().map(|line| { | |
85aaf69f | 118 | (&line[i + 1..line.len()]).to_string() |
1a4d82fc JJ |
119 | }).collect() |
120 | } else { | |
121 | lines | |
122 | } | |
123 | } | |
124 | ||
125 | // one-line comments lose their prefix | |
c34b1796 AL |
126 | const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"]; |
127 | for prefix in ONELINERS { | |
1a4d82fc JJ |
128 | if comment.starts_with(*prefix) { |
129 | return (&comment[prefix.len()..]).to_string(); | |
130 | } | |
131 | } | |
132 | ||
133 | if comment.starts_with("/*") { | |
85aaf69f | 134 | let lines = comment[3..comment.len() - 2] |
e9174d1e | 135 | .lines() |
1a4d82fc JJ |
136 | .map(|s| s.to_string()) |
137 | .collect::<Vec<String> >(); | |
138 | ||
139 | let lines = vertical_trim(lines); | |
140 | let lines = horizontal_trim(lines); | |
141 | ||
c1a9b12d | 142 | return lines.join("\n"); |
1a4d82fc JJ |
143 | } |
144 | ||
145 | panic!("not a doc-comment: {}", comment); | |
146 | } | |
147 | ||
148 | fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) { | |
149 | debug!(">>> blank-line comment"); | |
150 | comments.push(Comment { | |
151 | style: BlankLine, | |
152 | lines: Vec::new(), | |
153 | pos: rdr.last_pos, | |
154 | }); | |
155 | } | |
156 | ||
157 | fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, | |
158 | comments: &mut Vec<Comment>) { | |
159 | while is_whitespace(rdr.curr) && !rdr.is_eof() { | |
85aaf69f | 160 | if rdr.col == CharPos(0) && rdr.curr_is('\n') { |
1a4d82fc JJ |
161 | push_blank_line_comment(rdr, &mut *comments); |
162 | } | |
163 | rdr.bump(); | |
164 | } | |
165 | } | |
166 | ||
167 | ||
168 | fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool, | |
169 | comments: &mut Vec<Comment>) { | |
170 | debug!(">>> shebang comment"); | |
171 | let p = rdr.last_pos; | |
172 | debug!("<<< shebang comment"); | |
173 | comments.push(Comment { | |
174 | style: if code_to_the_left { Trailing } else { Isolated }, | |
175 | lines: vec!(rdr.read_one_line_comment()), | |
176 | pos: p | |
177 | }); | |
178 | } | |
179 | ||
180 | fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool, | |
181 | comments: &mut Vec<Comment>) { | |
182 | debug!(">>> line comments"); | |
183 | let p = rdr.last_pos; | |
184 | let mut lines: Vec<String> = Vec::new(); | |
185 | while rdr.curr_is('/') && rdr.nextch_is('/') { | |
186 | let line = rdr.read_one_line_comment(); | |
187 | debug!("{}", line); | |
188 | // Doc comments are not put in comments. | |
85aaf69f | 189 | if is_doc_comment(&line[..]) { |
1a4d82fc JJ |
190 | break; |
191 | } | |
192 | lines.push(line); | |
193 | rdr.consume_non_eol_whitespace(); | |
194 | } | |
195 | debug!("<<< line comments"); | |
196 | if !lines.is_empty() { | |
197 | comments.push(Comment { | |
198 | style: if code_to_the_left { Trailing } else { Isolated }, | |
199 | lines: lines, | |
200 | pos: p | |
201 | }); | |
202 | } | |
203 | } | |
204 | ||
205 | /// Returns None if the first col chars of s contain a non-whitespace char. | |
206 | /// Otherwise returns Some(k) where k is first char offset after that leading | |
207 | /// whitespace. Note k may be outside bounds of s. | |
85aaf69f | 208 | fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { |
1a4d82fc | 209 | let len = s.len(); |
85aaf69f SL |
210 | let mut col = col.to_usize(); |
211 | let mut cursor: usize = 0; | |
1a4d82fc | 212 | while col > 0 && cursor < len { |
d9579d0f | 213 | let ch = char_at(s, cursor); |
c34b1796 | 214 | if !ch.is_whitespace() { |
1a4d82fc JJ |
215 | return None; |
216 | } | |
c34b1796 | 217 | cursor += ch.len_utf8(); |
1a4d82fc JJ |
218 | col -= 1; |
219 | } | |
220 | return Some(cursor); | |
221 | } | |
222 | ||
223 | fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> , | |
224 | s: String, col: CharPos) { | |
225 | let len = s.len(); | |
85aaf69f | 226 | let s1 = match all_whitespace(&s[..], col) { |
1a4d82fc JJ |
227 | Some(col) => { |
228 | if col < len { | |
229 | (&s[col..len]).to_string() | |
230 | } else { | |
231 | "".to_string() | |
232 | } | |
233 | } | |
234 | None => s, | |
235 | }; | |
236 | debug!("pushing line: {}", s1); | |
237 | lines.push(s1); | |
238 | } | |
239 | ||
240 | fn read_block_comment(rdr: &mut StringReader, | |
241 | code_to_the_left: bool, | |
242 | comments: &mut Vec<Comment> ) { | |
243 | debug!(">>> block comment"); | |
244 | let p = rdr.last_pos; | |
245 | let mut lines: Vec<String> = Vec::new(); | |
246 | let col = rdr.col; | |
247 | rdr.bump(); | |
248 | rdr.bump(); | |
249 | ||
d9579d0f | 250 | let mut curr_line = String::from("/*"); |
1a4d82fc JJ |
251 | |
252 | // doc-comments are not really comments, they are attributes | |
253 | if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') { | |
254 | while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() { | |
255 | curr_line.push(rdr.curr.unwrap()); | |
256 | rdr.bump(); | |
257 | } | |
258 | if !rdr.is_eof() { | |
259 | curr_line.push_str("*/"); | |
260 | rdr.bump(); | |
261 | rdr.bump(); | |
262 | } | |
85aaf69f | 263 | if is_block_doc_comment(&curr_line[..]) { |
1a4d82fc JJ |
264 | return |
265 | } | |
c34b1796 | 266 | assert!(!curr_line.contains('\n')); |
1a4d82fc JJ |
267 | lines.push(curr_line); |
268 | } else { | |
85aaf69f | 269 | let mut level: isize = 1; |
1a4d82fc JJ |
270 | while level > 0 { |
271 | debug!("=== block comment level {}", level); | |
272 | if rdr.is_eof() { | |
273 | rdr.fatal("unterminated block comment"); | |
274 | } | |
275 | if rdr.curr_is('\n') { | |
276 | trim_whitespace_prefix_and_push_line(&mut lines, | |
277 | curr_line, | |
278 | col); | |
279 | curr_line = String::new(); | |
280 | rdr.bump(); | |
281 | } else { | |
282 | curr_line.push(rdr.curr.unwrap()); | |
283 | if rdr.curr_is('/') && rdr.nextch_is('*') { | |
284 | rdr.bump(); | |
285 | rdr.bump(); | |
286 | curr_line.push('*'); | |
287 | level += 1; | |
288 | } else { | |
289 | if rdr.curr_is('*') && rdr.nextch_is('/') { | |
290 | rdr.bump(); | |
291 | rdr.bump(); | |
292 | curr_line.push('/'); | |
293 | level -= 1; | |
294 | } else { rdr.bump(); } | |
295 | } | |
296 | } | |
297 | } | |
9346a6ac | 298 | if !curr_line.is_empty() { |
1a4d82fc JJ |
299 | trim_whitespace_prefix_and_push_line(&mut lines, |
300 | curr_line, | |
301 | col); | |
302 | } | |
303 | } | |
304 | ||
305 | let mut style = if code_to_the_left { Trailing } else { Isolated }; | |
306 | rdr.consume_non_eol_whitespace(); | |
85aaf69f | 307 | if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1 { |
1a4d82fc JJ |
308 | style = Mixed; |
309 | } | |
310 | debug!("<<< block comment"); | |
311 | comments.push(Comment {style: style, lines: lines, pos: p}); | |
312 | } | |
313 | ||
314 | ||
315 | fn consume_comment(rdr: &mut StringReader, | |
316 | code_to_the_left: bool, | |
317 | comments: &mut Vec<Comment> ) { | |
318 | debug!(">>> consume comment"); | |
319 | if rdr.curr_is('/') && rdr.nextch_is('/') { | |
320 | read_line_comments(rdr, code_to_the_left, comments); | |
321 | } else if rdr.curr_is('/') && rdr.nextch_is('*') { | |
322 | read_block_comment(rdr, code_to_the_left, comments); | |
323 | } else if rdr.curr_is('#') && rdr.nextch_is('!') { | |
324 | read_shebang_comment(rdr, code_to_the_left, comments); | |
325 | } else { panic!(); } | |
326 | debug!("<<< consume comment"); | |
327 | } | |
328 | ||
329 | #[derive(Clone)] | |
330 | pub struct Literal { | |
331 | pub lit: String, | |
332 | pub pos: BytePos, | |
333 | } | |
334 | ||
335 | // it appears this function is called only from pprust... that's | |
336 | // probably not a good thing. | |
337 | pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler, | |
338 | path: String, | |
c34b1796 | 339 | srdr: &mut Read) |
1a4d82fc | 340 | -> (Vec<Comment>, Vec<Literal>) { |
c34b1796 AL |
341 | let mut src = Vec::new(); |
342 | srdr.read_to_end(&mut src).unwrap(); | |
1a4d82fc JJ |
343 | let src = String::from_utf8(src).unwrap(); |
344 | let cm = CodeMap::new(); | |
345 | let filemap = cm.new_filemap(path, src); | |
346 | let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap); | |
347 | ||
348 | let mut comments: Vec<Comment> = Vec::new(); | |
349 | let mut literals: Vec<Literal> = Vec::new(); | |
350 | let mut first_read: bool = true; | |
351 | while !rdr.is_eof() { | |
352 | loop { | |
353 | let mut code_to_the_left = !first_read; | |
354 | rdr.consume_non_eol_whitespace(); | |
355 | if rdr.curr_is('\n') { | |
356 | code_to_the_left = false; | |
357 | consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); | |
358 | } | |
359 | while rdr.peeking_at_comment() { | |
360 | consume_comment(&mut rdr, code_to_the_left, &mut comments); | |
361 | consume_whitespace_counting_blank_lines(&mut rdr, &mut comments); | |
362 | } | |
363 | break; | |
364 | } | |
365 | ||
366 | ||
367 | let bstart = rdr.last_pos; | |
368 | rdr.next_token(); | |
369 | //discard, and look ahead; we're working with internal state | |
370 | let TokenAndSpan { tok, sp } = rdr.peek(); | |
371 | if tok.is_lit() { | |
372 | rdr.with_str_from(bstart, |s| { | |
373 | debug!("tok lit: {}", s); | |
374 | literals.push(Literal {lit: s.to_string(), pos: sp.lo}); | |
375 | }) | |
376 | } else { | |
377 | debug!("tok: {}", pprust::token_to_string(&tok)); | |
378 | } | |
379 | first_read = false; | |
380 | } | |
381 | ||
382 | (comments, literals) | |
383 | } | |
384 | ||
385 | #[cfg(test)] | |
d9579d0f | 386 | mod tests { |
1a4d82fc JJ |
387 | use super::*; |
388 | ||
389 | #[test] fn test_block_doc_comment_1() { | |
390 | let comment = "/**\n * Test \n ** Test\n * Test\n*/"; | |
391 | let stripped = strip_doc_comment_decoration(comment); | |
392 | assert_eq!(stripped, " Test \n* Test\n Test"); | |
393 | } | |
394 | ||
395 | #[test] fn test_block_doc_comment_2() { | |
396 | let comment = "/**\n * Test\n * Test\n*/"; | |
397 | let stripped = strip_doc_comment_decoration(comment); | |
398 | assert_eq!(stripped, " Test\n Test"); | |
399 | } | |
400 | ||
401 | #[test] fn test_block_doc_comment_3() { | |
85aaf69f | 402 | let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; |
1a4d82fc | 403 | let stripped = strip_doc_comment_decoration(comment); |
85aaf69f | 404 | assert_eq!(stripped, " let a: *i32;\n *a = 5;"); |
1a4d82fc JJ |
405 | } |
406 | ||
407 | #[test] fn test_block_doc_comment_4() { | |
408 | let comment = "/*******************\n test\n *********************/"; | |
409 | let stripped = strip_doc_comment_decoration(comment); | |
410 | assert_eq!(stripped, " test"); | |
411 | } | |
412 | ||
413 | #[test] fn test_line_doc_comment() { | |
414 | let stripped = strip_doc_comment_decoration("/// test"); | |
415 | assert_eq!(stripped, " test"); | |
416 | let stripped = strip_doc_comment_decoration("///! test"); | |
417 | assert_eq!(stripped, " test"); | |
418 | let stripped = strip_doc_comment_decoration("// test"); | |
419 | assert_eq!(stripped, " test"); | |
420 | let stripped = strip_doc_comment_decoration("// test"); | |
421 | assert_eq!(stripped, " test"); | |
422 | let stripped = strip_doc_comment_decoration("///test"); | |
423 | assert_eq!(stripped, "test"); | |
424 | let stripped = strip_doc_comment_decoration("///!test"); | |
425 | assert_eq!(stripped, "test"); | |
426 | let stripped = strip_doc_comment_decoration("//test"); | |
427 | assert_eq!(stripped, "test"); | |
428 | } | |
429 | } |