]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
223e47cc LB |
11 | use ast; |
12 | use codemap::{BytePos, CharPos, CodeMap, Pos}; | |
13 | use diagnostic; | |
970d7e83 | 14 | use parse::lexer::{is_whitespace, with_str_from, reader}; |
223e47cc LB |
15 | use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan}; |
16 | use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment}; | |
17 | use parse::lexer; | |
18 | use parse::token; | |
970d7e83 | 19 | use parse::token::{get_ident_interner}; |
223e47cc | 20 | |
970d7e83 LB |
21 | use std::io; |
22 | use std::str; | |
23 | use std::uint; | |
223e47cc LB |
24 | |
25 | #[deriving(Eq)] | |
26 | pub enum cmnt_style { | |
27 | isolated, // No code on either side of each line of the comment | |
28 | trailing, // Code exists to the left of the comment | |
29 | mixed, // Code before /* foo */ and after the comment | |
30 | blank_line, // Just a manual blank line "\n\n", for layout | |
31 | } | |
32 | ||
33 | pub struct cmnt { | |
34 | style: cmnt_style, | |
35 | lines: ~[~str], | |
36 | pos: BytePos | |
37 | } | |
38 | ||
39 | pub fn is_doc_comment(s: &str) -> bool { | |
970d7e83 LB |
40 | (s.starts_with("///") && !is_line_non_doc_comment(s)) || |
41 | s.starts_with("//!") || | |
42 | (s.starts_with("/**") && !is_block_non_doc_comment(s)) || | |
43 | s.starts_with("/*!") | |
223e47cc LB |
44 | } |
45 | ||
46 | pub fn doc_comment_style(comment: &str) -> ast::attr_style { | |
47 | assert!(is_doc_comment(comment)); | |
970d7e83 | 48 | if comment.starts_with("//!") || comment.starts_with("/*!") { |
223e47cc LB |
49 | ast::attr_inner |
50 | } else { | |
51 | ast::attr_outer | |
52 | } | |
53 | } | |
54 | ||
55 | pub fn strip_doc_comment_decoration(comment: &str) -> ~str { | |
56 | ||
57 | /// remove whitespace-only lines from the start/end of lines | |
58 | fn vertical_trim(lines: ~[~str]) -> ~[~str] { | |
970d7e83 LB |
59 | let mut i = 0u; |
60 | let mut j = lines.len(); | |
223e47cc LB |
61 | while i < j && lines[i].trim().is_empty() { |
62 | i += 1u; | |
63 | } | |
64 | while j > i && lines[j - 1u].trim().is_empty() { | |
65 | j -= 1u; | |
66 | } | |
67 | return lines.slice(i, j).to_owned(); | |
68 | } | |
69 | ||
970d7e83 LB |
70 | /// remove a "[ \t]*\*" block from each line, if possible |
71 | fn horizontal_trim(lines: ~[~str]) -> ~[~str] { | |
72 | let mut i = uint::max_value; | |
73 | let mut can_trim = true; | |
74 | let mut first = true; | |
75 | for lines.iter().advance |line| { | |
76 | for line.iter().enumerate().advance |(j, c)| { | |
77 | if j > i || !"* \t".contains_char(c) { | |
78 | can_trim = false; | |
223e47cc LB |
79 | break; |
80 | } | |
970d7e83 LB |
81 | if c == '*' { |
82 | if first { | |
83 | i = j; | |
84 | first = false; | |
85 | } else if i != j { | |
86 | can_trim = false; | |
87 | } | |
223e47cc LB |
88 | break; |
89 | } | |
90 | } | |
970d7e83 LB |
91 | if i > line.len() { |
92 | can_trim = false; | |
93 | } | |
94 | if !can_trim { | |
95 | break; | |
96 | } | |
223e47cc LB |
97 | } |
98 | ||
970d7e83 LB |
99 | if can_trim { |
100 | do lines.map |line| { | |
101 | line.slice(i + 1, line.len()).to_owned() | |
223e47cc | 102 | } |
970d7e83 LB |
103 | } else { |
104 | lines | |
105 | } | |
223e47cc LB |
106 | } |
107 | ||
970d7e83 | 108 | if comment.starts_with("//") { |
223e47cc | 109 | // FIXME #5475: |
970d7e83 LB |
110 | // return comment.slice(3u, comment.len()).to_owned(); |
111 | let r = comment.slice(3u, comment.len()); return r.to_owned(); | |
223e47cc LB |
112 | } |
113 | ||
970d7e83 LB |
114 | if comment.starts_with("/*") { |
115 | let lines = comment.slice(3u, comment.len() - 2u) | |
116 | .any_line_iter() | |
117 | .transform(|s| s.to_owned()) | |
118 | .collect::<~[~str]>(); | |
119 | ||
223e47cc | 120 | let lines = vertical_trim(lines); |
970d7e83 LB |
121 | let lines = horizontal_trim(lines); |
122 | ||
123 | return lines.connect("\n"); | |
223e47cc LB |
124 | } |
125 | ||
970d7e83 | 126 | fail!("not a doc-comment: %s", comment); |
223e47cc LB |
127 | } |
128 | ||
129 | fn read_to_eol(rdr: @mut StringReader) -> ~str { | |
130 | let mut val = ~""; | |
131 | while rdr.curr != '\n' && !is_eof(rdr) { | |
970d7e83 | 132 | val.push_char(rdr.curr); |
223e47cc LB |
133 | bump(rdr); |
134 | } | |
135 | if rdr.curr == '\n' { bump(rdr); } | |
136 | return val; | |
137 | } | |
138 | ||
139 | fn read_one_line_comment(rdr: @mut StringReader) -> ~str { | |
140 | let val = read_to_eol(rdr); | |
141 | assert!((val[0] == '/' as u8 && val[1] == '/' as u8) || | |
142 | (val[0] == '#' as u8 && val[1] == '!' as u8)); | |
143 | return val; | |
144 | } | |
145 | ||
146 | fn consume_non_eol_whitespace(rdr: @mut StringReader) { | |
147 | while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) { | |
148 | bump(rdr); | |
149 | } | |
150 | } | |
151 | ||
152 | fn push_blank_line_comment(rdr: @mut StringReader, comments: &mut ~[cmnt]) { | |
153 | debug!(">>> blank-line comment"); | |
154 | let v: ~[~str] = ~[]; | |
155 | comments.push(cmnt {style: blank_line, lines: v, pos: rdr.last_pos}); | |
156 | } | |
157 | ||
158 | fn consume_whitespace_counting_blank_lines(rdr: @mut StringReader, | |
159 | comments: &mut ~[cmnt]) { | |
160 | while is_whitespace(rdr.curr) && !is_eof(rdr) { | |
161 | if rdr.col == CharPos(0u) && rdr.curr == '\n' { | |
162 | push_blank_line_comment(rdr, &mut *comments); | |
163 | } | |
164 | bump(rdr); | |
165 | } | |
166 | } | |
167 | ||
168 | ||
169 | fn read_shebang_comment(rdr: @mut StringReader, code_to_the_left: bool, | |
170 | comments: &mut ~[cmnt]) { | |
171 | debug!(">>> shebang comment"); | |
172 | let p = rdr.last_pos; | |
173 | debug!("<<< shebang comment"); | |
174 | comments.push(cmnt { | |
175 | style: if code_to_the_left { trailing } else { isolated }, | |
176 | lines: ~[read_one_line_comment(rdr)], | |
177 | pos: p | |
178 | }); | |
179 | } | |
180 | ||
181 | fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool, | |
182 | comments: &mut ~[cmnt]) { | |
183 | debug!(">>> line comments"); | |
184 | let p = rdr.last_pos; | |
185 | let mut lines: ~[~str] = ~[]; | |
186 | while rdr.curr == '/' && nextch(rdr) == '/' { | |
187 | let line = read_one_line_comment(rdr); | |
188 | debug!("%s", line); | |
189 | if is_doc_comment(line) { // doc-comments are not put in comments | |
190 | break; | |
191 | } | |
192 | lines.push(line); | |
193 | consume_non_eol_whitespace(rdr); | |
194 | } | |
195 | debug!("<<< line comments"); | |
196 | if !lines.is_empty() { | |
197 | comments.push(cmnt { | |
198 | style: if code_to_the_left { trailing } else { isolated }, | |
199 | lines: lines, | |
200 | pos: p | |
201 | }); | |
202 | } | |
203 | } | |
204 | ||
970d7e83 LB |
205 | // Returns None if the first col chars of s contain a non-whitespace char. |
206 | // Otherwise returns Some(k) where k is first char offset after that leading | |
207 | // whitespace. Note k may be outside bounds of s. | |
208 | fn all_whitespace(s: &str, col: CharPos) -> Option<uint> { | |
209 | let len = s.len(); | |
210 | let mut col = col.to_uint(); | |
211 | let mut cursor: uint = 0; | |
212 | while col > 0 && cursor < len { | |
213 | let r: str::CharRange = s.char_range_at(cursor); | |
214 | if !r.ch.is_whitespace() { | |
215 | return None; | |
216 | } | |
217 | cursor = r.next; | |
218 | col -= 1; | |
223e47cc | 219 | } |
970d7e83 | 220 | return Some(cursor); |
223e47cc LB |
221 | } |
222 | ||
223 | fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str], | |
224 | s: ~str, col: CharPos) { | |
970d7e83 LB |
225 | let len = s.len(); |
226 | let s1 = match all_whitespace(s, col) { | |
227 | Some(col) => { | |
228 | if col < len { | |
229 | s.slice(col, len).to_owned() | |
230 | } else { ~"" } | |
231 | } | |
232 | None => s, | |
233 | }; | |
223e47cc LB |
234 | debug!("pushing line: %s", s1); |
235 | lines.push(s1); | |
236 | } | |
237 | ||
238 | fn read_block_comment(rdr: @mut StringReader, | |
239 | code_to_the_left: bool, | |
240 | comments: &mut ~[cmnt]) { | |
241 | debug!(">>> block comment"); | |
242 | let p = rdr.last_pos; | |
243 | let mut lines: ~[~str] = ~[]; | |
970d7e83 | 244 | let col: CharPos = rdr.col; |
223e47cc LB |
245 | bump(rdr); |
246 | bump(rdr); | |
247 | ||
248 | let mut curr_line = ~"/*"; | |
249 | ||
250 | // doc-comments are not really comments, they are attributes | |
251 | if rdr.curr == '*' || rdr.curr == '!' { | |
252 | while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) { | |
970d7e83 | 253 | curr_line.push_char(rdr.curr); |
223e47cc LB |
254 | bump(rdr); |
255 | } | |
256 | if !is_eof(rdr) { | |
970d7e83 | 257 | curr_line.push_str("*/"); |
223e47cc LB |
258 | bump(rdr); |
259 | bump(rdr); | |
260 | } | |
261 | if !is_block_non_doc_comment(curr_line) { return; } | |
262 | assert!(!curr_line.contains_char('\n')); | |
263 | lines.push(curr_line); | |
264 | } else { | |
265 | let mut level: int = 1; | |
266 | while level > 0 { | |
267 | debug!("=== block comment level %d", level); | |
268 | if is_eof(rdr) { | |
269 | (rdr as @reader).fatal(~"unterminated block comment"); | |
270 | } | |
271 | if rdr.curr == '\n' { | |
272 | trim_whitespace_prefix_and_push_line(&mut lines, curr_line, | |
273 | col); | |
274 | curr_line = ~""; | |
275 | bump(rdr); | |
276 | } else { | |
970d7e83 | 277 | curr_line.push_char(rdr.curr); |
223e47cc LB |
278 | if rdr.curr == '/' && nextch(rdr) == '*' { |
279 | bump(rdr); | |
280 | bump(rdr); | |
970d7e83 | 281 | curr_line.push_char('*'); |
223e47cc LB |
282 | level += 1; |
283 | } else { | |
284 | if rdr.curr == '*' && nextch(rdr) == '/' { | |
285 | bump(rdr); | |
286 | bump(rdr); | |
970d7e83 | 287 | curr_line.push_char('/'); |
223e47cc LB |
288 | level -= 1; |
289 | } else { bump(rdr); } | |
290 | } | |
291 | } | |
292 | } | |
970d7e83 | 293 | if curr_line.len() != 0 { |
223e47cc LB |
294 | trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); |
295 | } | |
296 | } | |
297 | ||
298 | let mut style = if code_to_the_left { trailing } else { isolated }; | |
299 | consume_non_eol_whitespace(rdr); | |
970d7e83 | 300 | if !is_eof(rdr) && rdr.curr != '\n' && lines.len() == 1u { |
223e47cc LB |
301 | style = mixed; |
302 | } | |
303 | debug!("<<< block comment"); | |
304 | comments.push(cmnt {style: style, lines: lines, pos: p}); | |
305 | } | |
306 | ||
307 | fn peeking_at_comment(rdr: @mut StringReader) -> bool { | |
308 | return ((rdr.curr == '/' && nextch(rdr) == '/') || | |
309 | (rdr.curr == '/' && nextch(rdr) == '*')) || | |
310 | (rdr.curr == '#' && nextch(rdr) == '!'); | |
311 | } | |
312 | ||
313 | fn consume_comment(rdr: @mut StringReader, | |
314 | code_to_the_left: bool, | |
315 | comments: &mut ~[cmnt]) { | |
316 | debug!(">>> consume comment"); | |
317 | if rdr.curr == '/' && nextch(rdr) == '/' { | |
318 | read_line_comments(rdr, code_to_the_left, comments); | |
319 | } else if rdr.curr == '/' && nextch(rdr) == '*' { | |
320 | read_block_comment(rdr, code_to_the_left, comments); | |
321 | } else if rdr.curr == '#' && nextch(rdr) == '!' { | |
322 | read_shebang_comment(rdr, code_to_the_left, comments); | |
323 | } else { fail!(); } | |
324 | debug!("<<< consume comment"); | |
325 | } | |
326 | ||
327 | pub struct lit { | |
328 | lit: ~str, | |
329 | pos: BytePos | |
330 | } | |
331 | ||
970d7e83 LB |
332 | // it appears this function is called only from pprust... that's |
333 | // probably not a good thing. | |
223e47cc LB |
334 | pub fn gather_comments_and_literals(span_diagnostic: |
335 | @diagnostic::span_handler, | |
970d7e83 | 336 | path: @str, |
223e47cc LB |
337 | srdr: @io::Reader) |
338 | -> (~[cmnt], ~[lit]) { | |
970d7e83 | 339 | let src = str::from_bytes(srdr.read_whole_stream()).to_managed(); |
223e47cc LB |
340 | let cm = CodeMap::new(); |
341 | let filemap = cm.new_filemap(path, src); | |
970d7e83 | 342 | let rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap); |
223e47cc LB |
343 | |
344 | let mut comments: ~[cmnt] = ~[]; | |
345 | let mut literals: ~[lit] = ~[]; | |
346 | let mut first_read: bool = true; | |
347 | while !is_eof(rdr) { | |
348 | loop { | |
349 | let mut code_to_the_left = !first_read; | |
350 | consume_non_eol_whitespace(rdr); | |
351 | if rdr.curr == '\n' { | |
352 | code_to_the_left = false; | |
353 | consume_whitespace_counting_blank_lines(rdr, &mut comments); | |
354 | } | |
355 | while peeking_at_comment(rdr) { | |
356 | consume_comment(rdr, code_to_the_left, &mut comments); | |
357 | consume_whitespace_counting_blank_lines(rdr, &mut comments); | |
358 | } | |
359 | break; | |
360 | } | |
361 | ||
362 | ||
970d7e83 | 363 | let bstart = rdr.last_pos; |
223e47cc LB |
364 | rdr.next_token(); |
365 | //discard, and look ahead; we're working with internal state | |
366 | let TokenAndSpan {tok: tok, sp: sp} = rdr.peek(); | |
367 | if token::is_lit(&tok) { | |
970d7e83 LB |
368 | do with_str_from(rdr, bstart) |s| { |
369 | debug!("tok lit: %s", s); | |
370 | literals.push(lit {lit: s.to_owned(), pos: sp.lo}); | |
371 | } | |
223e47cc | 372 | } else { |
970d7e83 | 373 | debug!("tok: %s", token::to_str(get_ident_interner(), &tok)); |
223e47cc LB |
374 | } |
375 | first_read = false; | |
376 | } | |
377 | ||
378 | (comments, literals) | |
379 | } | |
970d7e83 LB |
380 | |
381 | #[cfg(test)] | |
382 | mod test { | |
383 | use super::*; | |
384 | ||
385 | #[test] fn test_block_doc_comment_1() { | |
386 | let comment = "/**\n * Test \n ** Test\n * Test\n*/"; | |
387 | let correct_stripped = " Test \n* Test\n Test"; | |
388 | let stripped = strip_doc_comment_decoration(comment); | |
389 | assert_eq!(stripped.slice(0, stripped.len()), correct_stripped); | |
390 | } | |
391 | ||
392 | #[test] fn test_block_doc_comment_2() { | |
393 | let comment = "/**\n * Test\n * Test\n*/"; | |
394 | let correct_stripped = " Test\n Test"; | |
395 | let stripped = strip_doc_comment_decoration(comment); | |
396 | assert_eq!(stripped.slice(0, stripped.len()), correct_stripped); | |
397 | } | |
398 | ||
399 | #[test] fn test_block_doc_comment_3() { | |
400 | let comment = "/**\n let a: *int;\n *a = 5;\n*/"; | |
401 | let correct_stripped = " let a: *int;\n *a = 5;"; | |
402 | let stripped = strip_doc_comment_decoration(comment); | |
403 | assert_eq!(stripped.slice(0, stripped.len()), correct_stripped); | |
404 | } | |
405 | ||
406 | #[test] fn test_line_doc_comment() { | |
407 | let comment = "/// Test"; | |
408 | let correct_stripped = " Test"; | |
409 | let stripped = strip_doc_comment_decoration(comment); | |
410 | assert_eq!(stripped.slice(0, stripped.len()), correct_stripped); | |
411 | } | |
412 | } |