]> git.proxmox.com Git - rustc.git/blob - src/libsyntax/parse/lexer/comments.rs
Imported Upstream version 1.6.0+dfsg1
[rustc.git] / src / libsyntax / parse / lexer / comments.rs
1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 pub use self::CommentStyle::*;
12
13 use ast;
14 use codemap::{BytePos, CharPos, CodeMap, Pos};
15 use diagnostic;
16 use parse::lexer::is_block_doc_comment;
17 use parse::lexer::{StringReader, TokenAndSpan};
18 use parse::lexer::{is_whitespace, Reader};
19 use parse::lexer;
20 use print::pprust;
21 use str::char_at;
22
23 use std::io::Read;
24 use std::usize;
25
26 #[derive(Clone, Copy, PartialEq)]
27 pub enum CommentStyle {
28 /// No code on either side of each line of the comment
29 Isolated,
30 /// Code exists to the left of the comment
31 Trailing,
32 /// Code before /* foo */ and after the comment
33 Mixed,
34 /// Just a manual blank line "\n\n", for layout
35 BlankLine,
36 }
37
38 #[derive(Clone)]
39 pub struct Comment {
40 pub style: CommentStyle,
41 pub lines: Vec<String>,
42 pub pos: BytePos,
43 }
44
45 pub fn is_doc_comment(s: &str) -> bool {
46 (s.starts_with("///") && super::is_doc_comment(s)) ||
47 s.starts_with("//!") ||
48 (s.starts_with("/**") && is_block_doc_comment(s)) ||
49 s.starts_with("/*!")
50 }
51
52 pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
53 assert!(is_doc_comment(comment));
54 if comment.starts_with("//!") || comment.starts_with("/*!") {
55 ast::AttrStyle::Inner
56 } else {
57 ast::AttrStyle::Outer
58 }
59 }
60
61 pub fn strip_doc_comment_decoration(comment: &str) -> String {
62 /// remove whitespace-only lines from the start/end of lines
63 fn vertical_trim(lines: Vec<String>) -> Vec<String> {
64 let mut i = 0;
65 let mut j = lines.len();
66 // first line of all-stars should be omitted
67 if !lines.is_empty() &&
68 lines[0].chars().all(|c| c == '*') {
69 i += 1;
70 }
71 while i < j && lines[i].trim().is_empty() {
72 i += 1;
73 }
74 // like the first, a last line of all stars should be omitted
75 if j > i && lines[j - 1]
76 .chars()
77 .skip(1)
78 .all(|c| c == '*') {
79 j -= 1;
80 }
81 while j > i && lines[j - 1].trim().is_empty() {
82 j -= 1;
83 }
84 lines[i..j].iter().cloned().collect()
85 }
86
87 /// remove a "[ \t]*\*" block from each line, if possible
88 fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
89 let mut i = usize::MAX;
90 let mut can_trim = true;
91 let mut first = true;
92 for line in &lines {
93 for (j, c) in line.chars().enumerate() {
94 if j > i || !"* \t".contains(c) {
95 can_trim = false;
96 break;
97 }
98 if c == '*' {
99 if first {
100 i = j;
101 first = false;
102 } else if i != j {
103 can_trim = false;
104 }
105 break;
106 }
107 }
108 if i > line.len() {
109 can_trim = false;
110 }
111 if !can_trim {
112 break;
113 }
114 }
115
116 if can_trim {
117 lines.iter().map(|line| {
118 (&line[i + 1..line.len()]).to_string()
119 }).collect()
120 } else {
121 lines
122 }
123 }
124
125 // one-line comments lose their prefix
126 const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
127 for prefix in ONELINERS {
128 if comment.starts_with(*prefix) {
129 return (&comment[prefix.len()..]).to_string();
130 }
131 }
132
133 if comment.starts_with("/*") {
134 let lines = comment[3..comment.len() - 2]
135 .lines()
136 .map(|s| s.to_string())
137 .collect::<Vec<String> >();
138
139 let lines = vertical_trim(lines);
140 let lines = horizontal_trim(lines);
141
142 return lines.join("\n");
143 }
144
145 panic!("not a doc-comment: {}", comment);
146 }
147
148 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
149 debug!(">>> blank-line comment");
150 comments.push(Comment {
151 style: BlankLine,
152 lines: Vec::new(),
153 pos: rdr.last_pos,
154 });
155 }
156
157 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
158 comments: &mut Vec<Comment>) {
159 while is_whitespace(rdr.curr) && !rdr.is_eof() {
160 if rdr.col == CharPos(0) && rdr.curr_is('\n') {
161 push_blank_line_comment(rdr, &mut *comments);
162 }
163 rdr.bump();
164 }
165 }
166
167
168 fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
169 comments: &mut Vec<Comment>) {
170 debug!(">>> shebang comment");
171 let p = rdr.last_pos;
172 debug!("<<< shebang comment");
173 comments.push(Comment {
174 style: if code_to_the_left { Trailing } else { Isolated },
175 lines: vec!(rdr.read_one_line_comment()),
176 pos: p
177 });
178 }
179
180 fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
181 comments: &mut Vec<Comment>) {
182 debug!(">>> line comments");
183 let p = rdr.last_pos;
184 let mut lines: Vec<String> = Vec::new();
185 while rdr.curr_is('/') && rdr.nextch_is('/') {
186 let line = rdr.read_one_line_comment();
187 debug!("{}", line);
188 // Doc comments are not put in comments.
189 if is_doc_comment(&line[..]) {
190 break;
191 }
192 lines.push(line);
193 rdr.consume_non_eol_whitespace();
194 }
195 debug!("<<< line comments");
196 if !lines.is_empty() {
197 comments.push(Comment {
198 style: if code_to_the_left { Trailing } else { Isolated },
199 lines: lines,
200 pos: p
201 });
202 }
203 }
204
205 /// Returns None if the first col chars of s contain a non-whitespace char.
206 /// Otherwise returns Some(k) where k is first char offset after that leading
207 /// whitespace. Note k may be outside bounds of s.
208 fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
209 let len = s.len();
210 let mut col = col.to_usize();
211 let mut cursor: usize = 0;
212 while col > 0 && cursor < len {
213 let ch = char_at(s, cursor);
214 if !ch.is_whitespace() {
215 return None;
216 }
217 cursor += ch.len_utf8();
218 col -= 1;
219 }
220 return Some(cursor);
221 }
222
223 fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
224 s: String, col: CharPos) {
225 let len = s.len();
226 let s1 = match all_whitespace(&s[..], col) {
227 Some(col) => {
228 if col < len {
229 (&s[col..len]).to_string()
230 } else {
231 "".to_string()
232 }
233 }
234 None => s,
235 };
236 debug!("pushing line: {}", s1);
237 lines.push(s1);
238 }
239
240 fn read_block_comment(rdr: &mut StringReader,
241 code_to_the_left: bool,
242 comments: &mut Vec<Comment> ) {
243 debug!(">>> block comment");
244 let p = rdr.last_pos;
245 let mut lines: Vec<String> = Vec::new();
246 let col = rdr.col;
247 rdr.bump();
248 rdr.bump();
249
250 let mut curr_line = String::from("/*");
251
252 // doc-comments are not really comments, they are attributes
253 if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
254 while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
255 curr_line.push(rdr.curr.unwrap());
256 rdr.bump();
257 }
258 if !rdr.is_eof() {
259 curr_line.push_str("*/");
260 rdr.bump();
261 rdr.bump();
262 }
263 if is_block_doc_comment(&curr_line[..]) {
264 return
265 }
266 assert!(!curr_line.contains('\n'));
267 lines.push(curr_line);
268 } else {
269 let mut level: isize = 1;
270 while level > 0 {
271 debug!("=== block comment level {}", level);
272 if rdr.is_eof() {
273 panic!(rdr.fatal("unterminated block comment"));
274 }
275 if rdr.curr_is('\n') {
276 trim_whitespace_prefix_and_push_line(&mut lines,
277 curr_line,
278 col);
279 curr_line = String::new();
280 rdr.bump();
281 } else {
282 curr_line.push(rdr.curr.unwrap());
283 if rdr.curr_is('/') && rdr.nextch_is('*') {
284 rdr.bump();
285 rdr.bump();
286 curr_line.push('*');
287 level += 1;
288 } else {
289 if rdr.curr_is('*') && rdr.nextch_is('/') {
290 rdr.bump();
291 rdr.bump();
292 curr_line.push('/');
293 level -= 1;
294 } else { rdr.bump(); }
295 }
296 }
297 }
298 if !curr_line.is_empty() {
299 trim_whitespace_prefix_and_push_line(&mut lines,
300 curr_line,
301 col);
302 }
303 }
304
305 let mut style = if code_to_the_left { Trailing } else { Isolated };
306 rdr.consume_non_eol_whitespace();
307 if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1 {
308 style = Mixed;
309 }
310 debug!("<<< block comment");
311 comments.push(Comment {style: style, lines: lines, pos: p});
312 }
313
314
315 fn consume_comment(rdr: &mut StringReader,
316 code_to_the_left: bool,
317 comments: &mut Vec<Comment> ) {
318 debug!(">>> consume comment");
319 if rdr.curr_is('/') && rdr.nextch_is('/') {
320 read_line_comments(rdr, code_to_the_left, comments);
321 } else if rdr.curr_is('/') && rdr.nextch_is('*') {
322 read_block_comment(rdr, code_to_the_left, comments);
323 } else if rdr.curr_is('#') && rdr.nextch_is('!') {
324 read_shebang_comment(rdr, code_to_the_left, comments);
325 } else { panic!(); }
326 debug!("<<< consume comment");
327 }
328
329 #[derive(Clone)]
330 pub struct Literal {
331 pub lit: String,
332 pub pos: BytePos,
333 }
334
335 // it appears this function is called only from pprust... that's
336 // probably not a good thing.
337 pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
338 path: String,
339 srdr: &mut Read)
340 -> (Vec<Comment>, Vec<Literal>) {
341 let mut src = Vec::new();
342 srdr.read_to_end(&mut src).unwrap();
343 let src = String::from_utf8(src).unwrap();
344 let cm = CodeMap::new();
345 let filemap = cm.new_filemap(path, src);
346 let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
347
348 let mut comments: Vec<Comment> = Vec::new();
349 let mut literals: Vec<Literal> = Vec::new();
350 let mut first_read: bool = true;
351 while !rdr.is_eof() {
352 loop {
353 let mut code_to_the_left = !first_read;
354 rdr.consume_non_eol_whitespace();
355 if rdr.curr_is('\n') {
356 code_to_the_left = false;
357 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
358 }
359 while rdr.peeking_at_comment() {
360 consume_comment(&mut rdr, code_to_the_left, &mut comments);
361 consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
362 }
363 break;
364 }
365
366
367 let bstart = rdr.last_pos;
368 rdr.next_token();
369 //discard, and look ahead; we're working with internal state
370 let TokenAndSpan { tok, sp } = rdr.peek();
371 if tok.is_lit() {
372 rdr.with_str_from(bstart, |s| {
373 debug!("tok lit: {}", s);
374 literals.push(Literal {lit: s.to_string(), pos: sp.lo});
375 })
376 } else {
377 debug!("tok: {}", pprust::token_to_string(&tok));
378 }
379 first_read = false;
380 }
381
382 (comments, literals)
383 }
384
385 #[cfg(test)]
386 mod tests {
387 use super::*;
388
389 #[test] fn test_block_doc_comment_1() {
390 let comment = "/**\n * Test \n ** Test\n * Test\n*/";
391 let stripped = strip_doc_comment_decoration(comment);
392 assert_eq!(stripped, " Test \n* Test\n Test");
393 }
394
395 #[test] fn test_block_doc_comment_2() {
396 let comment = "/**\n * Test\n * Test\n*/";
397 let stripped = strip_doc_comment_decoration(comment);
398 assert_eq!(stripped, " Test\n Test");
399 }
400
401 #[test] fn test_block_doc_comment_3() {
402 let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
403 let stripped = strip_doc_comment_decoration(comment);
404 assert_eq!(stripped, " let a: *i32;\n *a = 5;");
405 }
406
407 #[test] fn test_block_doc_comment_4() {
408 let comment = "/*******************\n test\n *********************/";
409 let stripped = strip_doc_comment_decoration(comment);
410 assert_eq!(stripped, " test");
411 }
412
413 #[test] fn test_line_doc_comment() {
414 let stripped = strip_doc_comment_decoration("/// test");
415 assert_eq!(stripped, " test");
416 let stripped = strip_doc_comment_decoration("///! test");
417 assert_eq!(stripped, " test");
418 let stripped = strip_doc_comment_decoration("// test");
419 assert_eq!(stripped, " test");
420 let stripped = strip_doc_comment_decoration("// test");
421 assert_eq!(stripped, " test");
422 let stripped = strip_doc_comment_decoration("///test");
423 assert_eq!(stripped, "test");
424 let stripped = strip_doc_comment_decoration("///!test");
425 assert_eq!(stripped, "test");
426 let stripped = strip_doc_comment_decoration("//test");
427 assert_eq!(stripped, "test");
428 }
429 }