]> git.proxmox.com Git - rustc.git/blame - src/libsyntax/parse/comments.rs
Imported Upstream version 0.7
[rustc.git] / src / libsyntax / parse / comments.rs
CommitLineData
223e47cc
LB
1// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
223e47cc
LB
11use ast;
12use codemap::{BytePos, CharPos, CodeMap, Pos};
13use diagnostic;
970d7e83 14use parse::lexer::{is_whitespace, with_str_from, reader};
223e47cc
LB
15use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan};
16use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
17use parse::lexer;
18use parse::token;
970d7e83 19use parse::token::{get_ident_interner};
223e47cc 20
970d7e83
LB
21use std::io;
22use std::str;
23use std::uint;
223e47cc
LB
24
25#[deriving(Eq)]
26pub enum cmnt_style {
27 isolated, // No code on either side of each line of the comment
28 trailing, // Code exists to the left of the comment
29 mixed, // Code before /* foo */ and after the comment
30 blank_line, // Just a manual blank line "\n\n", for layout
31}
32
33pub struct cmnt {
34 style: cmnt_style,
35 lines: ~[~str],
36 pos: BytePos
37}
38
39pub fn is_doc_comment(s: &str) -> bool {
970d7e83
LB
40 (s.starts_with("///") && !is_line_non_doc_comment(s)) ||
41 s.starts_with("//!") ||
42 (s.starts_with("/**") && !is_block_non_doc_comment(s)) ||
43 s.starts_with("/*!")
223e47cc
LB
44}
45
46pub fn doc_comment_style(comment: &str) -> ast::attr_style {
47 assert!(is_doc_comment(comment));
970d7e83 48 if comment.starts_with("//!") || comment.starts_with("/*!") {
223e47cc
LB
49 ast::attr_inner
50 } else {
51 ast::attr_outer
52 }
53}
54
55pub fn strip_doc_comment_decoration(comment: &str) -> ~str {
56
57 /// remove whitespace-only lines from the start/end of lines
58 fn vertical_trim(lines: ~[~str]) -> ~[~str] {
970d7e83
LB
59 let mut i = 0u;
60 let mut j = lines.len();
223e47cc
LB
61 while i < j && lines[i].trim().is_empty() {
62 i += 1u;
63 }
64 while j > i && lines[j - 1u].trim().is_empty() {
65 j -= 1u;
66 }
67 return lines.slice(i, j).to_owned();
68 }
69
970d7e83
LB
70 /// remove a "[ \t]*\*" block from each line, if possible
71 fn horizontal_trim(lines: ~[~str]) -> ~[~str] {
72 let mut i = uint::max_value;
73 let mut can_trim = true;
74 let mut first = true;
75 for lines.iter().advance |line| {
76 for line.iter().enumerate().advance |(j, c)| {
77 if j > i || !"* \t".contains_char(c) {
78 can_trim = false;
223e47cc
LB
79 break;
80 }
970d7e83
LB
81 if c == '*' {
82 if first {
83 i = j;
84 first = false;
85 } else if i != j {
86 can_trim = false;
87 }
223e47cc
LB
88 break;
89 }
90 }
970d7e83
LB
91 if i > line.len() {
92 can_trim = false;
93 }
94 if !can_trim {
95 break;
96 }
223e47cc
LB
97 }
98
970d7e83
LB
99 if can_trim {
100 do lines.map |line| {
101 line.slice(i + 1, line.len()).to_owned()
223e47cc 102 }
970d7e83
LB
103 } else {
104 lines
105 }
223e47cc
LB
106 }
107
970d7e83 108 if comment.starts_with("//") {
223e47cc 109 // FIXME #5475:
970d7e83
LB
110 // return comment.slice(3u, comment.len()).to_owned();
111 let r = comment.slice(3u, comment.len()); return r.to_owned();
223e47cc
LB
112 }
113
970d7e83
LB
114 if comment.starts_with("/*") {
115 let lines = comment.slice(3u, comment.len() - 2u)
116 .any_line_iter()
117 .transform(|s| s.to_owned())
118 .collect::<~[~str]>();
119
223e47cc 120 let lines = vertical_trim(lines);
970d7e83
LB
121 let lines = horizontal_trim(lines);
122
123 return lines.connect("\n");
223e47cc
LB
124 }
125
970d7e83 126 fail!("not a doc-comment: %s", comment);
223e47cc
LB
127}
128
129fn read_to_eol(rdr: @mut StringReader) -> ~str {
130 let mut val = ~"";
131 while rdr.curr != '\n' && !is_eof(rdr) {
970d7e83 132 val.push_char(rdr.curr);
223e47cc
LB
133 bump(rdr);
134 }
135 if rdr.curr == '\n' { bump(rdr); }
136 return val;
137}
138
139fn read_one_line_comment(rdr: @mut StringReader) -> ~str {
140 let val = read_to_eol(rdr);
141 assert!((val[0] == '/' as u8 && val[1] == '/' as u8) ||
142 (val[0] == '#' as u8 && val[1] == '!' as u8));
143 return val;
144}
145
146fn consume_non_eol_whitespace(rdr: @mut StringReader) {
147 while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) {
148 bump(rdr);
149 }
150}
151
152fn push_blank_line_comment(rdr: @mut StringReader, comments: &mut ~[cmnt]) {
153 debug!(">>> blank-line comment");
154 let v: ~[~str] = ~[];
155 comments.push(cmnt {style: blank_line, lines: v, pos: rdr.last_pos});
156}
157
158fn consume_whitespace_counting_blank_lines(rdr: @mut StringReader,
159 comments: &mut ~[cmnt]) {
160 while is_whitespace(rdr.curr) && !is_eof(rdr) {
161 if rdr.col == CharPos(0u) && rdr.curr == '\n' {
162 push_blank_line_comment(rdr, &mut *comments);
163 }
164 bump(rdr);
165 }
166}
167
168
169fn read_shebang_comment(rdr: @mut StringReader, code_to_the_left: bool,
170 comments: &mut ~[cmnt]) {
171 debug!(">>> shebang comment");
172 let p = rdr.last_pos;
173 debug!("<<< shebang comment");
174 comments.push(cmnt {
175 style: if code_to_the_left { trailing } else { isolated },
176 lines: ~[read_one_line_comment(rdr)],
177 pos: p
178 });
179}
180
181fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
182 comments: &mut ~[cmnt]) {
183 debug!(">>> line comments");
184 let p = rdr.last_pos;
185 let mut lines: ~[~str] = ~[];
186 while rdr.curr == '/' && nextch(rdr) == '/' {
187 let line = read_one_line_comment(rdr);
188 debug!("%s", line);
189 if is_doc_comment(line) { // doc-comments are not put in comments
190 break;
191 }
192 lines.push(line);
193 consume_non_eol_whitespace(rdr);
194 }
195 debug!("<<< line comments");
196 if !lines.is_empty() {
197 comments.push(cmnt {
198 style: if code_to_the_left { trailing } else { isolated },
199 lines: lines,
200 pos: p
201 });
202 }
203}
204
970d7e83
LB
205// Returns None if the first col chars of s contain a non-whitespace char.
206// Otherwise returns Some(k) where k is first char offset after that leading
207// whitespace. Note k may be outside bounds of s.
208fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
209 let len = s.len();
210 let mut col = col.to_uint();
211 let mut cursor: uint = 0;
212 while col > 0 && cursor < len {
213 let r: str::CharRange = s.char_range_at(cursor);
214 if !r.ch.is_whitespace() {
215 return None;
216 }
217 cursor = r.next;
218 col -= 1;
223e47cc 219 }
970d7e83 220 return Some(cursor);
223e47cc
LB
221}
222
223fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
224 s: ~str, col: CharPos) {
970d7e83
LB
225 let len = s.len();
226 let s1 = match all_whitespace(s, col) {
227 Some(col) => {
228 if col < len {
229 s.slice(col, len).to_owned()
230 } else { ~"" }
231 }
232 None => s,
233 };
223e47cc
LB
234 debug!("pushing line: %s", s1);
235 lines.push(s1);
236}
237
238fn read_block_comment(rdr: @mut StringReader,
239 code_to_the_left: bool,
240 comments: &mut ~[cmnt]) {
241 debug!(">>> block comment");
242 let p = rdr.last_pos;
243 let mut lines: ~[~str] = ~[];
970d7e83 244 let col: CharPos = rdr.col;
223e47cc
LB
245 bump(rdr);
246 bump(rdr);
247
248 let mut curr_line = ~"/*";
249
250 // doc-comments are not really comments, they are attributes
251 if rdr.curr == '*' || rdr.curr == '!' {
252 while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
970d7e83 253 curr_line.push_char(rdr.curr);
223e47cc
LB
254 bump(rdr);
255 }
256 if !is_eof(rdr) {
970d7e83 257 curr_line.push_str("*/");
223e47cc
LB
258 bump(rdr);
259 bump(rdr);
260 }
261 if !is_block_non_doc_comment(curr_line) { return; }
262 assert!(!curr_line.contains_char('\n'));
263 lines.push(curr_line);
264 } else {
265 let mut level: int = 1;
266 while level > 0 {
267 debug!("=== block comment level %d", level);
268 if is_eof(rdr) {
269 (rdr as @reader).fatal(~"unterminated block comment");
270 }
271 if rdr.curr == '\n' {
272 trim_whitespace_prefix_and_push_line(&mut lines, curr_line,
273 col);
274 curr_line = ~"";
275 bump(rdr);
276 } else {
970d7e83 277 curr_line.push_char(rdr.curr);
223e47cc
LB
278 if rdr.curr == '/' && nextch(rdr) == '*' {
279 bump(rdr);
280 bump(rdr);
970d7e83 281 curr_line.push_char('*');
223e47cc
LB
282 level += 1;
283 } else {
284 if rdr.curr == '*' && nextch(rdr) == '/' {
285 bump(rdr);
286 bump(rdr);
970d7e83 287 curr_line.push_char('/');
223e47cc
LB
288 level -= 1;
289 } else { bump(rdr); }
290 }
291 }
292 }
970d7e83 293 if curr_line.len() != 0 {
223e47cc
LB
294 trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
295 }
296 }
297
298 let mut style = if code_to_the_left { trailing } else { isolated };
299 consume_non_eol_whitespace(rdr);
970d7e83 300 if !is_eof(rdr) && rdr.curr != '\n' && lines.len() == 1u {
223e47cc
LB
301 style = mixed;
302 }
303 debug!("<<< block comment");
304 comments.push(cmnt {style: style, lines: lines, pos: p});
305}
306
307fn peeking_at_comment(rdr: @mut StringReader) -> bool {
308 return ((rdr.curr == '/' && nextch(rdr) == '/') ||
309 (rdr.curr == '/' && nextch(rdr) == '*')) ||
310 (rdr.curr == '#' && nextch(rdr) == '!');
311}
312
313fn consume_comment(rdr: @mut StringReader,
314 code_to_the_left: bool,
315 comments: &mut ~[cmnt]) {
316 debug!(">>> consume comment");
317 if rdr.curr == '/' && nextch(rdr) == '/' {
318 read_line_comments(rdr, code_to_the_left, comments);
319 } else if rdr.curr == '/' && nextch(rdr) == '*' {
320 read_block_comment(rdr, code_to_the_left, comments);
321 } else if rdr.curr == '#' && nextch(rdr) == '!' {
322 read_shebang_comment(rdr, code_to_the_left, comments);
323 } else { fail!(); }
324 debug!("<<< consume comment");
325}
326
327pub struct lit {
328 lit: ~str,
329 pos: BytePos
330}
331
970d7e83
LB
332// it appears this function is called only from pprust... that's
333// probably not a good thing.
223e47cc
LB
334pub fn gather_comments_and_literals(span_diagnostic:
335 @diagnostic::span_handler,
970d7e83 336 path: @str,
223e47cc
LB
337 srdr: @io::Reader)
338 -> (~[cmnt], ~[lit]) {
970d7e83 339 let src = str::from_bytes(srdr.read_whole_stream()).to_managed();
223e47cc
LB
340 let cm = CodeMap::new();
341 let filemap = cm.new_filemap(path, src);
970d7e83 342 let rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
223e47cc
LB
343
344 let mut comments: ~[cmnt] = ~[];
345 let mut literals: ~[lit] = ~[];
346 let mut first_read: bool = true;
347 while !is_eof(rdr) {
348 loop {
349 let mut code_to_the_left = !first_read;
350 consume_non_eol_whitespace(rdr);
351 if rdr.curr == '\n' {
352 code_to_the_left = false;
353 consume_whitespace_counting_blank_lines(rdr, &mut comments);
354 }
355 while peeking_at_comment(rdr) {
356 consume_comment(rdr, code_to_the_left, &mut comments);
357 consume_whitespace_counting_blank_lines(rdr, &mut comments);
358 }
359 break;
360 }
361
362
970d7e83 363 let bstart = rdr.last_pos;
223e47cc
LB
364 rdr.next_token();
365 //discard, and look ahead; we're working with internal state
366 let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
367 if token::is_lit(&tok) {
970d7e83
LB
368 do with_str_from(rdr, bstart) |s| {
369 debug!("tok lit: %s", s);
370 literals.push(lit {lit: s.to_owned(), pos: sp.lo});
371 }
223e47cc 372 } else {
970d7e83 373 debug!("tok: %s", token::to_str(get_ident_interner(), &tok));
223e47cc
LB
374 }
375 first_read = false;
376 }
377
378 (comments, literals)
379}
970d7e83
LB
380
381#[cfg(test)]
382mod test {
383 use super::*;
384
385 #[test] fn test_block_doc_comment_1() {
386 let comment = "/**\n * Test \n ** Test\n * Test\n*/";
387 let correct_stripped = " Test \n* Test\n Test";
388 let stripped = strip_doc_comment_decoration(comment);
389 assert_eq!(stripped.slice(0, stripped.len()), correct_stripped);
390 }
391
392 #[test] fn test_block_doc_comment_2() {
393 let comment = "/**\n * Test\n * Test\n*/";
394 let correct_stripped = " Test\n Test";
395 let stripped = strip_doc_comment_decoration(comment);
396 assert_eq!(stripped.slice(0, stripped.len()), correct_stripped);
397 }
398
399 #[test] fn test_block_doc_comment_3() {
400 let comment = "/**\n let a: *int;\n *a = 5;\n*/";
401 let correct_stripped = " let a: *int;\n *a = 5;";
402 let stripped = strip_doc_comment_decoration(comment);
403 assert_eq!(stripped.slice(0, stripped.len()), correct_stripped);
404 }
405
406 #[test] fn test_line_doc_comment() {
407 let comment = "/// Test";
408 let correct_stripped = " Test";
409 let stripped = strip_doc_comment_decoration(comment);
410 assert_eq!(stripped.slice(0, stripped.len()), correct_stripped);
411 }
412}