]> git.proxmox.com Git - rustc.git/blame - src/tools/rustfmt/src/comment.rs
New upstream version 1.63.0+dfsg1
[rustc.git] / src / tools / rustfmt / src / comment.rs
CommitLineData
f20569fa
XL
1// Formatting and tools for comments.
2
3use std::{self, borrow::Cow, iter};
4
5use itertools::{multipeek, MultiPeek};
a2a8927a
XL
6use lazy_static::lazy_static;
7use regex::Regex;
f20569fa
XL
8use rustc_span::Span;
9
10use crate::config::Config;
11use crate::rewrite::RewriteContext;
12use crate::shape::{Indent, Shape};
13use crate::string::{rewrite_string, StringFormat};
14use crate::utils::{
3c0e092e
XL
15 count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
16 trimmed_last_line_width, unicode_str_width,
f20569fa
XL
17};
18use crate::{ErrorKind, FormattingError};
19
a2a8927a
XL
20lazy_static! {
21 /// A regex matching reference doc links.
22 ///
23 /// ```markdown
24 /// /// An [example].
25 /// ///
26 /// /// [example]: this::is::a::link
27 /// ```
28 static ref REFERENCE_LINK_URL: Regex = Regex::new(r"^\[.+\]\s?:").unwrap();
29}
30
f20569fa
XL
31fn is_custom_comment(comment: &str) -> bool {
32 if !comment.starts_with("//") {
33 false
34 } else if let Some(c) = comment.chars().nth(2) {
35 !c.is_alphanumeric() && !c.is_whitespace()
36 } else {
37 false
38 }
39}
40
41#[derive(Copy, Clone, PartialEq, Eq)]
42pub(crate) enum CommentStyle<'a> {
43 DoubleSlash,
44 TripleSlash,
45 Doc,
46 SingleBullet,
47 DoubleBullet,
48 Exclamation,
49 Custom(&'a str),
50}
51
52fn custom_opener(s: &str) -> &str {
53 s.lines().next().map_or("", |first_line| {
54 first_line
55 .find(' ')
56 .map_or(first_line, |space_index| &first_line[0..=space_index])
57 })
58}
59
60impl<'a> CommentStyle<'a> {
61 /// Returns `true` if the commenting style covers a line only.
62 pub(crate) fn is_line_comment(&self) -> bool {
63 match *self {
64 CommentStyle::DoubleSlash
65 | CommentStyle::TripleSlash
66 | CommentStyle::Doc
67 | CommentStyle::Custom(_) => true,
68 _ => false,
69 }
70 }
71
72 /// Returns `true` if the commenting style can span over multiple lines.
73 pub(crate) fn is_block_comment(&self) -> bool {
74 match *self {
75 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
76 true
77 }
78 _ => false,
79 }
80 }
81
82 /// Returns `true` if the commenting style is for documentation.
83 pub(crate) fn is_doc_comment(&self) -> bool {
94222f64 84 matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
f20569fa
XL
85 }
86
87 pub(crate) fn opener(&self) -> &'a str {
88 match *self {
89 CommentStyle::DoubleSlash => "// ",
90 CommentStyle::TripleSlash => "/// ",
91 CommentStyle::Doc => "//! ",
92 CommentStyle::SingleBullet => "/* ",
93 CommentStyle::DoubleBullet => "/** ",
94 CommentStyle::Exclamation => "/*! ",
95 CommentStyle::Custom(opener) => opener,
96 }
97 }
98
99 pub(crate) fn closer(&self) -> &'a str {
100 match *self {
101 CommentStyle::DoubleSlash
102 | CommentStyle::TripleSlash
103 | CommentStyle::Custom(..)
104 | CommentStyle::Doc => "",
105 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
106 " */"
107 }
108 }
109 }
110
111 pub(crate) fn line_start(&self) -> &'a str {
112 match *self {
113 CommentStyle::DoubleSlash => "// ",
114 CommentStyle::TripleSlash => "/// ",
115 CommentStyle::Doc => "//! ",
116 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
117 " * "
118 }
119 CommentStyle::Custom(opener) => opener,
120 }
121 }
122
123 pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
124 (self.opener(), self.closer(), self.line_start())
125 }
126}
127
128pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
129 if !normalize_comments {
130 if orig.starts_with("/**") && !orig.starts_with("/**/") {
131 CommentStyle::DoubleBullet
132 } else if orig.starts_with("/*!") {
133 CommentStyle::Exclamation
134 } else if orig.starts_with("/*") {
135 CommentStyle::SingleBullet
136 } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
137 CommentStyle::TripleSlash
138 } else if orig.starts_with("//!") {
139 CommentStyle::Doc
140 } else if is_custom_comment(orig) {
141 CommentStyle::Custom(custom_opener(orig))
142 } else {
143 CommentStyle::DoubleSlash
144 }
145 } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
146 || (orig.starts_with("/**") && !orig.starts_with("/**/"))
147 {
148 CommentStyle::TripleSlash
149 } else if orig.starts_with("//!") || orig.starts_with("/*!") {
150 CommentStyle::Doc
151 } else if is_custom_comment(orig) {
152 CommentStyle::Custom(custom_opener(orig))
153 } else {
154 CommentStyle::DoubleSlash
155 }
156}
157
158/// Returns true if the last line of the passed string finishes with a block-comment.
159pub(crate) fn is_last_comment_block(s: &str) -> bool {
160 s.trim_end().ends_with("*/")
161}
162
163/// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
164/// comments between two strings. If there are such comments, then that will be
165/// recovered. If `allow_extend` is true and there is no comment between the two
166/// strings, then they will be put on a single line as long as doing so does not
167/// exceed max width.
168pub(crate) fn combine_strs_with_missing_comments(
169 context: &RewriteContext<'_>,
170 prev_str: &str,
171 next_str: &str,
172 span: Span,
173 shape: Shape,
174 allow_extend: bool,
175) -> Option<String> {
176 trace!(
177 "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
178 prev_str,
179 next_str,
180 span,
181 shape
182 );
183
184 let mut result =
185 String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
186 result.push_str(prev_str);
187 let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
3c0e092e
XL
188 let first_sep =
189 if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
190 ""
191 } else {
192 " "
193 };
f20569fa
XL
194 let mut one_line_width =
195 last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
196
197 let config = context.config;
198 let indent = shape.indent;
199 let missing_comment = rewrite_missing_comment(span, shape, context)?;
200
201 if missing_comment.is_empty() {
3c0e092e 202 if allow_extend && one_line_width <= shape.width {
f20569fa
XL
203 result.push_str(first_sep);
204 } else if !prev_str.is_empty() {
205 result.push_str(&indent.to_string_with_newline(config))
206 }
207 result.push_str(next_str);
208 return Some(result);
209 }
210
211 // We have a missing comment between the first expression and the second expression.
212
213 // Peek the the original source code and find out whether there is a newline between the first
214 // expression and the second expression or the missing comment. We will preserve the original
215 // layout whenever possible.
216 let original_snippet = context.snippet(span);
217 let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
218 !original_snippet[..pos].contains('\n')
219 } else {
220 !original_snippet.contains('\n')
221 };
222
223 one_line_width -= first_sep.len();
224 let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
225 Cow::from("")
226 } else {
227 let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
228 if prefer_same_line && one_line_width <= shape.width {
229 Cow::from(" ")
230 } else {
231 indent.to_string_with_newline(config)
232 }
233 };
234 result.push_str(&first_sep);
235 result.push_str(&missing_comment);
236
237 let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
238 Cow::from("")
239 } else if missing_comment.starts_with("//") {
240 indent.to_string_with_newline(config)
241 } else {
242 one_line_width += missing_comment.len() + first_sep.len() + 1;
243 allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
244 if prefer_same_line && allow_one_line && one_line_width <= shape.width {
245 Cow::from(" ")
246 } else {
247 indent.to_string_with_newline(config)
248 }
249 };
250 result.push_str(&second_sep);
251 result.push_str(next_str);
252
253 Some(result)
254}
255
256pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String> {
257 identify_comment(orig, false, shape, config, true)
258}
259
260pub(crate) fn rewrite_comment(
261 orig: &str,
262 block_style: bool,
263 shape: Shape,
264 config: &Config,
265) -> Option<String> {
266 identify_comment(orig, block_style, shape, config, false)
267}
268
269fn identify_comment(
270 orig: &str,
271 block_style: bool,
272 shape: Shape,
273 config: &Config,
274 is_doc_comment: bool,
275) -> Option<String> {
276 let style = comment_style(orig, false);
277
278 // Computes the byte length of line taking into account a newline if the line is part of a
279 // paragraph.
280 fn compute_len(orig: &str, line: &str) -> usize {
281 if orig.len() > line.len() {
282 if orig.as_bytes()[line.len()] == b'\r' {
283 line.len() + 2
284 } else {
285 line.len() + 1
286 }
287 } else {
288 line.len()
289 }
290 }
291
292 // Get the first group of line comments having the same commenting style.
293 //
294 // Returns a tuple with:
295 // - a boolean indicating if there is a blank line
296 // - a number indicating the size of the first group of comments
297 fn consume_same_line_comments(
298 style: CommentStyle<'_>,
299 orig: &str,
300 line_start: &str,
301 ) -> (bool, usize) {
302 let mut first_group_ending = 0;
303 let mut hbl = false;
304
305 for line in orig.lines() {
306 let trimmed_line = line.trim_start();
307 if trimmed_line.is_empty() {
308 hbl = true;
309 break;
310 } else if trimmed_line.starts_with(line_start)
311 || comment_style(trimmed_line, false) == style
312 {
313 first_group_ending += compute_len(&orig[first_group_ending..], line);
314 } else {
315 break;
316 }
317 }
318 (hbl, first_group_ending)
319 }
320
321 let (has_bare_lines, first_group_ending) = match style {
322 CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
323 let line_start = style.line_start().trim_start();
324 consume_same_line_comments(style, orig, line_start)
325 }
326 CommentStyle::Custom(opener) => {
327 let trimmed_opener = opener.trim_end();
328 consume_same_line_comments(style, orig, trimmed_opener)
329 }
330 // for a block comment, search for the closing symbol
331 CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
332 let closer = style.closer().trim_start();
333 let mut count = orig.matches(closer).count();
334 let mut closing_symbol_offset = 0;
335 let mut hbl = false;
336 let mut first = true;
337 for line in orig.lines() {
338 closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
339 let mut trimmed_line = line.trim_start();
340 if !trimmed_line.starts_with('*')
341 && !trimmed_line.starts_with("//")
342 && !trimmed_line.starts_with("/*")
343 {
344 hbl = true;
345 }
346
347 // Remove opener from consideration when searching for closer
348 if first {
349 let opener = style.opener().trim_end();
350 trimmed_line = &trimmed_line[opener.len()..];
351 first = false;
352 }
353 if trimmed_line.ends_with(closer) {
354 count -= 1;
355 if count == 0 {
356 break;
357 }
358 }
359 }
360 (hbl, closing_symbol_offset)
361 }
362 };
363
364 let (first_group, rest) = orig.split_at(first_group_ending);
365 let rewritten_first_group =
366 if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
367 trim_left_preserve_layout(first_group, shape.indent, config)?
368 } else if !config.normalize_comments()
369 && !config.wrap_comments()
370 && !config.format_code_in_doc_comments()
371 {
372 light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
373 } else {
374 rewrite_comment_inner(
375 first_group,
376 block_style,
377 style,
378 shape,
379 config,
380 is_doc_comment || style.is_doc_comment(),
381 )?
382 };
383 if rest.is_empty() {
384 Some(rewritten_first_group)
385 } else {
386 identify_comment(
387 rest.trim_start(),
388 block_style,
389 shape,
390 config,
391 is_doc_comment,
392 )
393 .map(|rest_str| {
394 format!(
395 "{}\n{}{}{}",
396 rewritten_first_group,
397 // insert back the blank line
398 if has_bare_lines && style.is_line_comment() {
399 "\n"
400 } else {
401 ""
402 },
403 shape.indent.to_string(config),
404 rest_str
405 )
406 })
407 }
408}
409
3c0e092e 410/// Enum indicating if the code block contains rust based on attributes
f20569fa
XL
411enum CodeBlockAttribute {
412 Rust,
3c0e092e 413 NotRust,
f20569fa
XL
414}
415
416impl CodeBlockAttribute {
3c0e092e
XL
417 /// Parse comma separated attributes list. Return rust only if all
418 /// attributes are valid rust attributes
419 /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
420 fn new(attributes: &str) -> CodeBlockAttribute {
421 for attribute in attributes.split(',') {
422 match attribute.trim() {
423 "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
424 | "edition2021" => (),
425 "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
426 _ => return CodeBlockAttribute::NotRust,
427 }
f20569fa 428 }
3c0e092e 429 CodeBlockAttribute::Rust
f20569fa
XL
430 }
431}
432
433/// Block that is formatted as an item.
434///
5e7ed085
FG
435/// An item starts with either a star `*` a dash `-` or a greater-than `>`.
436/// Different level of indentation are handled by shrinking the shape accordingly.
f20569fa
XL
437struct ItemizedBlock {
438 /// the lines that are identified as part of an itemized block
439 lines: Vec<String>,
5e7ed085 440 /// the number of characters (typically whitespaces) up to the item sigil
f20569fa
XL
441 indent: usize,
442 /// the string that marks the start of an item
443 opener: String,
5e7ed085 444 /// sequence of characters (typically whitespaces) to prefix new lines that are part of the item
f20569fa
XL
445 line_start: String,
446}
447
448impl ItemizedBlock {
449 /// Returns `true` if the line is formatted as an item
450 fn is_itemized_line(line: &str) -> bool {
451 let trimmed = line.trim_start();
5e7ed085 452 trimmed.starts_with("* ") || trimmed.starts_with("- ") || trimmed.starts_with("> ")
f20569fa
XL
453 }
454
455 /// Creates a new ItemizedBlock described with the given line.
456 /// The `is_itemized_line` needs to be called first.
457 fn new(line: &str) -> ItemizedBlock {
458 let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
5e7ed085
FG
459 // +2 = '* ', which will add the appropriate amount of whitespace to keep itemized
460 // content formatted correctly.
461 let mut indent = space_to_sigil + 2;
462 let mut line_start = " ".repeat(indent);
463
464 // Markdown blockquote start with a "> "
465 if line.trim_start().starts_with(">") {
466 // remove the original +2 indent because there might be multiple nested block quotes
467 // and it's easier to reason about the final indent by just taking the length
468 // of th new line_start. We update the indent because it effects the max width
469 // of each formatted line.
470 line_start = itemized_block_quote_start(line, line_start, 2);
471 indent = line_start.len();
472 }
f20569fa
XL
473 ItemizedBlock {
474 lines: vec![line[indent..].to_string()],
475 indent,
476 opener: line[..indent].to_string(),
5e7ed085 477 line_start,
f20569fa
XL
478 }
479 }
480
481 /// Returns a `StringFormat` used for formatting the content of an item.
482 fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
483 StringFormat {
484 opener: "",
485 closer: "",
486 line_start: "",
487 line_end: "",
488 shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
489 trim_end: true,
490 config: fmt.config,
491 }
492 }
493
494 /// Returns `true` if the line is part of the current itemized block.
495 /// If it is, then it is added to the internal lines list.
496 fn add_line(&mut self, line: &str) -> bool {
497 if !ItemizedBlock::is_itemized_line(line)
498 && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
499 {
500 self.lines.push(line.to_string());
501 return true;
502 }
503 false
504 }
505
506 /// Returns the block as a string, with each line trimmed at the start.
507 fn trimmed_block_as_string(&self) -> String {
508 self.lines
509 .iter()
510 .map(|line| format!("{} ", line.trim_start()))
511 .collect::<String>()
512 }
513
514 /// Returns the block as a string under its original form.
515 fn original_block_as_string(&self) -> String {
516 self.lines.join("\n")
517 }
518}
519
5e7ed085
FG
520/// Determine the line_start when formatting markdown block quotes.
521/// The original line_start likely contains indentation (whitespaces), which we'd like to
522/// replace with '> ' characters.
523fn itemized_block_quote_start(line: &str, mut line_start: String, remove_indent: usize) -> String {
524 let quote_level = line
525 .chars()
526 .take_while(|c| !c.is_alphanumeric())
527 .fold(0, |acc, c| if c == '>' { acc + 1 } else { acc });
528
529 for _ in 0..remove_indent {
530 line_start.pop();
531 }
532
533 for _ in 0..quote_level {
534 line_start.push_str("> ")
535 }
536 line_start
537}
538
f20569fa
XL
539struct CommentRewrite<'a> {
540 result: String,
541 code_block_buffer: String,
542 is_prev_line_multi_line: bool,
543 code_block_attr: Option<CodeBlockAttribute>,
544 item_block: Option<ItemizedBlock>,
545 comment_line_separator: String,
546 indent_str: String,
547 max_width: usize,
548 fmt_indent: Indent,
549 fmt: StringFormat<'a>,
550
551 opener: String,
552 closer: String,
553 line_start: String,
a2a8927a 554 style: CommentStyle<'a>,
f20569fa
XL
555}
556
557impl<'a> CommentRewrite<'a> {
558 fn new(
559 orig: &'a str,
560 block_style: bool,
561 shape: Shape,
562 config: &'a Config,
563 ) -> CommentRewrite<'a> {
a2a8927a
XL
564 let ((opener, closer, line_start), style) = if block_style {
565 (
566 CommentStyle::SingleBullet.to_str_tuplet(),
567 CommentStyle::SingleBullet,
568 )
f20569fa 569 } else {
a2a8927a
XL
570 let style = comment_style(orig, config.normalize_comments());
571 (style.to_str_tuplet(), style)
f20569fa
XL
572 };
573
574 let max_width = shape
575 .width
576 .checked_sub(closer.len() + opener.len())
577 .unwrap_or(1);
578 let indent_str = shape.indent.to_string_with_newline(config).to_string();
579
580 let mut cr = CommentRewrite {
581 result: String::with_capacity(orig.len() * 2),
582 code_block_buffer: String::with_capacity(128),
583 is_prev_line_multi_line: false,
584 code_block_attr: None,
585 item_block: None,
586 comment_line_separator: format!("{}{}", indent_str, line_start),
587 max_width,
588 indent_str,
589 fmt_indent: shape.indent,
590
591 fmt: StringFormat {
592 opener: "",
593 closer: "",
594 line_start,
595 line_end: "",
596 shape: Shape::legacy(max_width, shape.indent),
597 trim_end: true,
598 config,
599 },
600
601 opener: opener.to_owned(),
602 closer: closer.to_owned(),
603 line_start: line_start.to_owned(),
a2a8927a 604 style,
f20569fa
XL
605 };
606 cr.result.push_str(opener);
607 cr
608 }
609
610 fn join_block(s: &str, sep: &str) -> String {
611 let mut result = String::with_capacity(s.len() + 128);
612 let mut iter = s.lines().peekable();
613 while let Some(line) = iter.next() {
614 result.push_str(line);
615 result.push_str(match iter.peek() {
616 Some(next_line) if next_line.is_empty() => sep.trim_end(),
3c0e092e 617 Some(..) => sep,
f20569fa
XL
618 None => "",
619 });
620 }
621 result
622 }
623
a2a8927a
XL
624 /// Check if any characters were written to the result buffer after the start of the comment.
625 /// when calling [`CommentRewrite::new()`] the result buffer is initiazlied with the opening
626 /// characters for the comment.
627 fn buffer_contains_comment(&self) -> bool {
628 // if self.result.len() < self.opener.len() then an empty comment is in the buffer
629 // if self.result.len() > self.opener.len() then a non empty comment is in the buffer
630 self.result.len() != self.opener.len()
631 }
632
f20569fa
XL
633 fn finish(mut self) -> String {
634 if !self.code_block_buffer.is_empty() {
635 // There is a code block that is not properly enclosed by backticks.
636 // We will leave them untouched.
637 self.result.push_str(&self.comment_line_separator);
638 self.result.push_str(&Self::join_block(
639 &trim_custom_comment_prefix(&self.code_block_buffer),
640 &self.comment_line_separator,
641 ));
642 }
643
644 if let Some(ref ib) = self.item_block {
645 // the last few lines are part of an itemized block
646 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
647 let item_fmt = ib.create_string_format(&self.fmt);
a2a8927a
XL
648
649 // only push a comment_line_separator for ItemizedBlocks if the comment is not empty
650 if self.buffer_contains_comment() {
651 self.result.push_str(&self.comment_line_separator);
652 }
653
f20569fa
XL
654 self.result.push_str(&ib.opener);
655 match rewrite_string(
656 &ib.trimmed_block_as_string(),
657 &item_fmt,
658 self.max_width.saturating_sub(ib.indent),
659 ) {
660 Some(s) => self.result.push_str(&Self::join_block(
661 &s,
662 &format!("{}{}", self.comment_line_separator, ib.line_start),
663 )),
664 None => self.result.push_str(&Self::join_block(
665 &ib.original_block_as_string(),
666 &self.comment_line_separator,
667 )),
668 };
669 }
670
671 self.result.push_str(&self.closer);
672 if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
673 // Trailing space.
674 self.result.pop();
675 }
676
677 self.result
678 }
679
680 fn handle_line(
681 &mut self,
682 orig: &'a str,
683 i: usize,
684 line: &'a str,
685 has_leading_whitespace: bool,
5e7ed085 686 is_doc_comment: bool,
f20569fa 687 ) -> bool {
a2a8927a
XL
688 let num_newlines = count_newlines(orig);
689 let is_last = i == num_newlines;
690 let needs_new_comment_line = if self.style.is_block_comment() {
691 num_newlines > 0 || self.buffer_contains_comment()
692 } else {
693 self.buffer_contains_comment()
694 };
f20569fa
XL
695
696 if let Some(ref mut ib) = self.item_block {
3c0e092e 697 if ib.add_line(line) {
f20569fa
XL
698 return false;
699 }
700 self.is_prev_line_multi_line = false;
701 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
702 let item_fmt = ib.create_string_format(&self.fmt);
a2a8927a
XL
703
704 // only push a comment_line_separator if we need to start a new comment line
705 if needs_new_comment_line {
706 self.result.push_str(&self.comment_line_separator);
707 }
708
f20569fa
XL
709 self.result.push_str(&ib.opener);
710 match rewrite_string(
711 &ib.trimmed_block_as_string(),
712 &item_fmt,
713 self.max_width.saturating_sub(ib.indent),
714 ) {
715 Some(s) => self.result.push_str(&Self::join_block(
716 &s,
717 &format!("{}{}", self.comment_line_separator, ib.line_start),
718 )),
719 None => self.result.push_str(&Self::join_block(
720 &ib.original_block_as_string(),
721 &self.comment_line_separator,
722 )),
723 };
724 } else if self.code_block_attr.is_some() {
725 if line.starts_with("```") {
726 let code_block = match self.code_block_attr.as_ref().unwrap() {
3c0e092e
XL
727 CodeBlockAttribute::Rust
728 if self.fmt.config.format_code_in_doc_comments()
729 && !self.code_block_buffer.is_empty() =>
730 {
f20569fa
XL
731 let mut config = self.fmt.config.clone();
732 config.set().wrap_comments(false);
923072b8
FG
733 let comment_max_width = config
734 .doc_comment_code_block_width()
735 .min(config.max_width());
736 config.set().max_width(comment_max_width);
3c0e092e
XL
737 if let Some(s) =
738 crate::format_code_block(&self.code_block_buffer, &config, false)
739 {
740 trim_custom_comment_prefix(&s.snippet)
f20569fa
XL
741 } else {
742 trim_custom_comment_prefix(&self.code_block_buffer)
743 }
744 }
3c0e092e 745 _ => trim_custom_comment_prefix(&self.code_block_buffer),
f20569fa
XL
746 };
747 if !code_block.is_empty() {
748 self.result.push_str(&self.comment_line_separator);
749 self.result
750 .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
751 }
752 self.code_block_buffer.clear();
753 self.result.push_str(&self.comment_line_separator);
754 self.result.push_str(line);
755 self.code_block_attr = None;
756 } else {
757 self.code_block_buffer
758 .push_str(&hide_sharp_behind_comment(line));
759 self.code_block_buffer.push('\n');
760 }
761 return false;
762 }
763
764 self.code_block_attr = None;
765 self.item_block = None;
94222f64
XL
766 if let Some(stripped) = line.strip_prefix("```") {
767 self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
3c0e092e
XL
768 } else if self.fmt.config.wrap_comments() && ItemizedBlock::is_itemized_line(line) {
769 let ib = ItemizedBlock::new(line);
f20569fa
XL
770 self.item_block = Some(ib);
771 return false;
772 }
773
774 if self.result == self.opener {
775 let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
776 if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
777 self.result.pop();
778 }
779 if line.is_empty() {
780 return false;
781 }
782 } else if self.is_prev_line_multi_line && !line.is_empty() {
783 self.result.push(' ')
784 } else if is_last && line.is_empty() {
785 // trailing blank lines are unwanted
786 if !self.closer.is_empty() {
787 self.result.push_str(&self.indent_str);
788 }
789 return true;
790 } else {
791 self.result.push_str(&self.comment_line_separator);
792 if !has_leading_whitespace && self.result.ends_with(' ') {
793 self.result.pop();
794 }
795 }
796
5e7ed085
FG
797 let is_markdown_header_doc_comment = is_doc_comment && line.starts_with("#");
798
799 // We only want to wrap the comment if:
800 // 1) wrap_comments = true is configured
801 // 2) The comment is not the start of a markdown header doc comment
802 // 3) The comment width exceeds the shape's width
923072b8 803 // 4) No URLS were found in the comment
5e7ed085
FG
804 let should_wrap_comment = self.fmt.config.wrap_comments()
805 && !is_markdown_header_doc_comment
f20569fa 806 && unicode_str_width(line) > self.fmt.shape.width
5e7ed085
FG
807 && !has_url(line);
808
809 if should_wrap_comment {
f20569fa
XL
810 match rewrite_string(line, &self.fmt, self.max_width) {
811 Some(ref s) => {
812 self.is_prev_line_multi_line = s.contains('\n');
813 self.result.push_str(s);
814 }
815 None if self.is_prev_line_multi_line => {
816 // We failed to put the current `line` next to the previous `line`.
817 // Remove the trailing space, then start rewrite on the next line.
818 self.result.pop();
819 self.result.push_str(&self.comment_line_separator);
820 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
821 match rewrite_string(line, &self.fmt, self.max_width) {
822 Some(ref s) => {
823 self.is_prev_line_multi_line = s.contains('\n');
824 self.result.push_str(s);
825 }
826 None => {
827 self.is_prev_line_multi_line = false;
828 self.result.push_str(line);
829 }
830 }
831 }
832 None => {
833 self.is_prev_line_multi_line = false;
834 self.result.push_str(line);
835 }
836 }
837
838 self.fmt.shape = if self.is_prev_line_multi_line {
839 // 1 = " "
840 let offset = 1 + last_line_width(&self.result) - self.line_start.len();
841 Shape {
842 width: self.max_width.saturating_sub(offset),
843 indent: self.fmt_indent,
844 offset: self.fmt.shape.offset + offset,
845 }
846 } else {
847 Shape::legacy(self.max_width, self.fmt_indent)
848 };
849 } else {
850 if line.is_empty() && self.result.ends_with(' ') && !is_last {
851 // Remove space if this is an empty comment or a doc comment.
852 self.result.pop();
853 }
854 self.result.push_str(line);
855 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
856 self.is_prev_line_multi_line = false;
857 }
858
859 false
860 }
861}
862
863fn rewrite_comment_inner(
864 orig: &str,
865 block_style: bool,
866 style: CommentStyle<'_>,
867 shape: Shape,
868 config: &Config,
869 is_doc_comment: bool,
870) -> Option<String> {
871 let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
872
873 let line_breaks = count_newlines(orig.trim_end());
874 let lines = orig
875 .lines()
876 .enumerate()
877 .map(|(i, mut line)| {
878 line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
879 // Drop old closer.
880 if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
881 line = line[..(line.len() - 2)].trim_end();
882 }
883
884 line
885 })
886 .map(|s| left_trim_comment_line(s, &style))
887 .map(|(line, has_leading_whitespace)| {
888 if orig.starts_with("/*") && line_breaks == 0 {
889 (
890 line.trim_start(),
891 has_leading_whitespace || config.normalize_comments(),
892 )
893 } else {
894 (line, has_leading_whitespace || config.normalize_comments())
895 }
896 });
897
898 for (i, (line, has_leading_whitespace)) in lines.enumerate() {
5e7ed085 899 if rewriter.handle_line(orig, i, line, has_leading_whitespace, is_doc_comment) {
f20569fa
XL
900 break;
901 }
902 }
903
904 Some(rewriter.finish())
905}
906
907const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
908
909fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
910 let s_trimmed = s.trim();
911 if s_trimmed.starts_with("# ") || s_trimmed == "#" {
912 Cow::from(format!("{}{}", RUSTFMT_CUSTOM_COMMENT_PREFIX, s))
913 } else {
914 Cow::from(s)
915 }
916}
917
918fn trim_custom_comment_prefix(s: &str) -> String {
919 s.lines()
920 .map(|line| {
921 let left_trimmed = line.trim_start();
922 if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
923 left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
924 } else {
925 line
926 }
927 })
928 .collect::<Vec<_>>()
929 .join("\n")
930}
931
932/// Returns `true` if the given string MAY include URLs or alike.
933fn has_url(s: &str) -> bool {
934 // This function may return false positive, but should get its job done in most cases.
a2a8927a
XL
935 s.contains("https://")
936 || s.contains("http://")
937 || s.contains("ftp://")
938 || s.contains("file://")
939 || REFERENCE_LINK_URL.is_match(s)
f20569fa
XL
940}
941
942/// Given the span, rewrite the missing comment inside it if available.
943/// Note that the given span must only include comments (or leading/trailing whitespaces).
944pub(crate) fn rewrite_missing_comment(
945 span: Span,
946 shape: Shape,
947 context: &RewriteContext<'_>,
948) -> Option<String> {
949 let missing_snippet = context.snippet(span);
950 let trimmed_snippet = missing_snippet.trim();
951 // check the span starts with a comment
952 let pos = trimmed_snippet.find('/');
953 if !trimmed_snippet.is_empty() && pos.is_some() {
954 rewrite_comment(trimmed_snippet, false, shape, context.config)
955 } else {
956 Some(String::new())
957 }
958}
959
960/// Recover the missing comments in the specified span, if available.
961/// The layout of the comments will be preserved as long as it does not break the code
962/// and its total width does not exceed the max width.
963pub(crate) fn recover_missing_comment_in_span(
964 span: Span,
965 shape: Shape,
966 context: &RewriteContext<'_>,
967 used_width: usize,
968) -> Option<String> {
969 let missing_comment = rewrite_missing_comment(span, shape, context)?;
970 if missing_comment.is_empty() {
971 Some(String::new())
972 } else {
973 let missing_snippet = context.snippet(span);
974 let pos = missing_snippet.find('/')?;
975 // 1 = ` `
976 let total_width = missing_comment.len() + used_width + 1;
977 let force_new_line_before_comment =
978 missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
979 let sep = if force_new_line_before_comment {
980 shape.indent.to_string_with_newline(context.config)
981 } else {
982 Cow::from(" ")
983 };
984 Some(format!("{}{}", sep, missing_comment))
985 }
986}
987
988/// Trim trailing whitespaces unless they consist of two or more whitespaces.
989fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
990 if is_doc_comment && s.ends_with(" ") {
991 s
992 } else {
993 s.trim_end()
994 }
995}
996
997/// Trims whitespace and aligns to indent, but otherwise does not change comments.
998fn light_rewrite_comment(
999 orig: &str,
1000 offset: Indent,
1001 config: &Config,
1002 is_doc_comment: bool,
1003) -> String {
1004 let lines: Vec<&str> = orig
1005 .lines()
1006 .map(|l| {
1007 // This is basically just l.trim(), but in the case that a line starts
1008 // with `*` we want to leave one space before it, so it aligns with the
1009 // `*` in `/*`.
1010 let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
1011 let left_trimmed = if let Some(fnw) = first_non_whitespace {
1012 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
1013 &l[fnw - 1..]
1014 } else {
1015 &l[fnw..]
1016 }
1017 } else {
1018 ""
1019 };
1020 // Preserve markdown's double-space line break syntax in doc comment.
1021 trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
1022 })
1023 .collect();
1024 lines.join(&format!("\n{}", offset.to_string(config)))
1025}
1026
1027/// Trims comment characters and possibly a single space from the left of a string.
1028/// Does not trim all whitespace. If a single space is trimmed from the left of the string,
1029/// this function returns true.
1030fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
1031 if line.starts_with("//! ")
1032 || line.starts_with("/// ")
1033 || line.starts_with("/*! ")
1034 || line.starts_with("/** ")
1035 {
1036 (&line[4..], true)
1037 } else if let CommentStyle::Custom(opener) = *style {
3c0e092e 1038 if let Some(stripped) = line.strip_prefix(opener) {
94222f64 1039 (stripped, true)
f20569fa
XL
1040 } else {
1041 (&line[opener.trim_end().len()..], false)
1042 }
1043 } else if line.starts_with("/* ")
1044 || line.starts_with("// ")
1045 || line.starts_with("//!")
1046 || line.starts_with("///")
1047 || line.starts_with("** ")
1048 || line.starts_with("/*!")
1049 || (line.starts_with("/**") && !line.starts_with("/**/"))
1050 {
1051 (&line[3..], line.chars().nth(2).unwrap() == ' ')
1052 } else if line.starts_with("/*")
1053 || line.starts_with("* ")
1054 || line.starts_with("//")
1055 || line.starts_with("**")
1056 {
1057 (&line[2..], line.chars().nth(1).unwrap() == ' ')
94222f64
XL
1058 } else if let Some(stripped) = line.strip_prefix('*') {
1059 (stripped, false)
f20569fa
XL
1060 } else {
1061 (line, line.starts_with(' '))
1062 }
1063}
1064
1065pub(crate) trait FindUncommented {
1066 fn find_uncommented(&self, pat: &str) -> Option<usize>;
cdc7bbd5 1067 fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
f20569fa
XL
1068}
1069
1070impl FindUncommented for str {
1071 fn find_uncommented(&self, pat: &str) -> Option<usize> {
1072 let mut needle_iter = pat.chars();
1073 for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
1074 match needle_iter.next() {
1075 None => {
1076 return Some(i - pat.len());
1077 }
1078 Some(c) => match kind {
1079 FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
1080 _ => {
1081 needle_iter = pat.chars();
1082 }
1083 },
1084 }
1085 }
1086
1087 // Handle case where the pattern is a suffix of the search string
1088 match needle_iter.next() {
1089 Some(_) => None,
1090 None => Some(self.len() - pat.len()),
1091 }
1092 }
cdc7bbd5
XL
1093
1094 fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1095 if let Some(left) = self.find_uncommented(pat) {
1096 let mut result = left;
1097 // add 1 to use find_last_uncommented for &str after pat
1098 while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1099 result += next + 1;
1100 }
1101 Some(result)
1102 } else {
1103 None
1104 }
1105 }
f20569fa
XL
1106}
1107
1108// Returns the first byte position after the first comment. The given string
1109// is expected to be prefixed by a comment, including delimiters.
1110// Good: `/* /* inner */ outer */ code();`
1111// Bad: `code(); // hello\n world!`
1112pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1113 let mut iter = CharClasses::new(s.char_indices());
1114 for (kind, (i, _c)) in &mut iter {
1115 if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1116 return Some(i);
1117 }
1118 }
1119
1120 // Handle case where the comment ends at the end of `s`.
1121 if iter.status == CharClassesStatus::Normal {
1122 Some(s.len())
1123 } else {
1124 None
1125 }
1126}
1127
1128/// Returns `true` if text contains any comment.
1129pub(crate) fn contains_comment(text: &str) -> bool {
1130 CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1131}
1132
1133pub(crate) struct CharClasses<T>
1134where
1135 T: Iterator,
1136 T::Item: RichChar,
1137{
1138 base: MultiPeek<T>,
1139 status: CharClassesStatus,
1140}
1141
1142pub(crate) trait RichChar {
1143 fn get_char(&self) -> char;
1144}
1145
1146impl RichChar for char {
1147 fn get_char(&self) -> char {
1148 *self
1149 }
1150}
1151
1152impl RichChar for (usize, char) {
1153 fn get_char(&self) -> char {
1154 self.1
1155 }
1156}
1157
1158#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1159enum CharClassesStatus {
1160 Normal,
1161 /// Character is within a string
1162 LitString,
1163 LitStringEscape,
1164 /// Character is within a raw string
1165 LitRawString(u32),
1166 RawStringPrefix(u32),
1167 RawStringSuffix(u32),
1168 LitChar,
1169 LitCharEscape,
1170 /// Character inside a block comment, with the integer indicating the nesting deepness of the
1171 /// comment
1172 BlockComment(u32),
1173 /// Character inside a block-commented string, with the integer indicating the nesting deepness
1174 /// of the comment
1175 StringInBlockComment(u32),
1176 /// Status when the '/' has been consumed, but not yet the '*', deepness is
1177 /// the new deepness (after the comment opening).
1178 BlockCommentOpening(u32),
1179 /// Status when the '*' has been consumed, but not yet the '/', deepness is
1180 /// the new deepness (after the comment closing).
1181 BlockCommentClosing(u32),
1182 /// Character is within a line comment
1183 LineComment,
1184}
1185
1186/// Distinguish between functional part of code and comments
1187#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1188pub(crate) enum CodeCharKind {
1189 Normal,
1190 Comment,
1191}
1192
1193/// Distinguish between functional part of code and comments,
1194/// describing opening and closing of comments for ease when chunking
1195/// code from tagged characters
1196#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1197pub(crate) enum FullCodeCharKind {
1198 Normal,
1199 /// The first character of a comment, there is only one for a comment (always '/')
1200 StartComment,
1201 /// Any character inside a comment including the second character of comment
1202 /// marks ("//", "/*")
1203 InComment,
1204 /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1205 EndComment,
1206 /// Start of a mutlitine string inside a comment
1207 StartStringCommented,
1208 /// End of a mutlitine string inside a comment
1209 EndStringCommented,
1210 /// Inside a commented string
1211 InStringCommented,
1212 /// Start of a mutlitine string
1213 StartString,
1214 /// End of a mutlitine string
1215 EndString,
1216 /// Inside a string.
1217 InString,
1218}
1219
1220impl FullCodeCharKind {
1221 pub(crate) fn is_comment(self) -> bool {
1222 match self {
1223 FullCodeCharKind::StartComment
1224 | FullCodeCharKind::InComment
1225 | FullCodeCharKind::EndComment
1226 | FullCodeCharKind::StartStringCommented
1227 | FullCodeCharKind::InStringCommented
1228 | FullCodeCharKind::EndStringCommented => true,
1229 _ => false,
1230 }
1231 }
1232
1233 /// Returns true if the character is inside a comment
1234 pub(crate) fn inside_comment(self) -> bool {
1235 match self {
1236 FullCodeCharKind::InComment
1237 | FullCodeCharKind::StartStringCommented
1238 | FullCodeCharKind::InStringCommented
1239 | FullCodeCharKind::EndStringCommented => true,
1240 _ => false,
1241 }
1242 }
1243
1244 pub(crate) fn is_string(self) -> bool {
1245 self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1246 }
1247
1248 /// Returns true if the character is within a commented string
1249 pub(crate) fn is_commented_string(self) -> bool {
1250 self == FullCodeCharKind::InStringCommented
1251 || self == FullCodeCharKind::StartStringCommented
1252 }
1253
1254 fn to_codecharkind(self) -> CodeCharKind {
1255 if self.is_comment() {
1256 CodeCharKind::Comment
1257 } else {
1258 CodeCharKind::Normal
1259 }
1260 }
1261}
1262
1263impl<T> CharClasses<T>
1264where
1265 T: Iterator,
1266 T::Item: RichChar,
1267{
1268 pub(crate) fn new(base: T) -> CharClasses<T> {
1269 CharClasses {
1270 base: multipeek(base),
1271 status: CharClassesStatus::Normal,
1272 }
1273 }
1274}
1275
1276fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1277where
1278 T: Iterator,
1279 T::Item: RichChar,
1280{
1281 for _ in 0..count {
1282 match iter.peek() {
1283 Some(c) if c.get_char() == '#' => continue,
1284 _ => return false,
1285 }
1286 }
1287 true
1288}
1289
1290impl<T> Iterator for CharClasses<T>
1291where
1292 T: Iterator,
1293 T::Item: RichChar,
1294{
1295 type Item = (FullCodeCharKind, T::Item);
1296
1297 fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1298 let item = self.base.next()?;
1299 let chr = item.get_char();
1300 let mut char_kind = FullCodeCharKind::Normal;
1301 self.status = match self.status {
1302 CharClassesStatus::LitRawString(sharps) => {
1303 char_kind = FullCodeCharKind::InString;
1304 match chr {
1305 '"' => {
1306 if sharps == 0 {
1307 char_kind = FullCodeCharKind::Normal;
1308 CharClassesStatus::Normal
1309 } else if is_raw_string_suffix(&mut self.base, sharps) {
1310 CharClassesStatus::RawStringSuffix(sharps)
1311 } else {
1312 CharClassesStatus::LitRawString(sharps)
1313 }
1314 }
1315 _ => CharClassesStatus::LitRawString(sharps),
1316 }
1317 }
1318 CharClassesStatus::RawStringPrefix(sharps) => {
1319 char_kind = FullCodeCharKind::InString;
1320 match chr {
1321 '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1322 '"' => CharClassesStatus::LitRawString(sharps),
1323 _ => CharClassesStatus::Normal, // Unreachable.
1324 }
1325 }
1326 CharClassesStatus::RawStringSuffix(sharps) => {
1327 match chr {
1328 '#' => {
1329 if sharps == 1 {
1330 CharClassesStatus::Normal
1331 } else {
1332 char_kind = FullCodeCharKind::InString;
1333 CharClassesStatus::RawStringSuffix(sharps - 1)
1334 }
1335 }
1336 _ => CharClassesStatus::Normal, // Unreachable
1337 }
1338 }
1339 CharClassesStatus::LitString => {
1340 char_kind = FullCodeCharKind::InString;
1341 match chr {
1342 '"' => CharClassesStatus::Normal,
1343 '\\' => CharClassesStatus::LitStringEscape,
1344 _ => CharClassesStatus::LitString,
1345 }
1346 }
1347 CharClassesStatus::LitStringEscape => {
1348 char_kind = FullCodeCharKind::InString;
1349 CharClassesStatus::LitString
1350 }
1351 CharClassesStatus::LitChar => match chr {
1352 '\\' => CharClassesStatus::LitCharEscape,
1353 '\'' => CharClassesStatus::Normal,
1354 _ => CharClassesStatus::LitChar,
1355 },
1356 CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1357 CharClassesStatus::Normal => match chr {
1358 'r' => match self.base.peek().map(RichChar::get_char) {
1359 Some('#') | Some('"') => {
1360 char_kind = FullCodeCharKind::InString;
1361 CharClassesStatus::RawStringPrefix(0)
1362 }
1363 _ => CharClassesStatus::Normal,
1364 },
1365 '"' => {
1366 char_kind = FullCodeCharKind::InString;
1367 CharClassesStatus::LitString
1368 }
1369 '\'' => {
1370 // HACK: Work around mut borrow.
1371 match self.base.peek() {
1372 Some(next) if next.get_char() == '\\' => {
1373 self.status = CharClassesStatus::LitChar;
1374 return Some((char_kind, item));
1375 }
1376 _ => (),
1377 }
1378
1379 match self.base.peek() {
1380 Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1381 _ => CharClassesStatus::Normal,
1382 }
1383 }
1384 '/' => match self.base.peek() {
1385 Some(next) if next.get_char() == '*' => {
1386 self.status = CharClassesStatus::BlockCommentOpening(1);
1387 return Some((FullCodeCharKind::StartComment, item));
1388 }
1389 Some(next) if next.get_char() == '/' => {
1390 self.status = CharClassesStatus::LineComment;
1391 return Some((FullCodeCharKind::StartComment, item));
1392 }
1393 _ => CharClassesStatus::Normal,
1394 },
1395 _ => CharClassesStatus::Normal,
1396 },
1397 CharClassesStatus::StringInBlockComment(deepness) => {
1398 char_kind = FullCodeCharKind::InStringCommented;
1399 if chr == '"' {
1400 CharClassesStatus::BlockComment(deepness)
cdc7bbd5
XL
1401 } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1402 char_kind = FullCodeCharKind::InComment;
1403 CharClassesStatus::BlockCommentClosing(deepness - 1)
f20569fa
XL
1404 } else {
1405 CharClassesStatus::StringInBlockComment(deepness)
1406 }
1407 }
1408 CharClassesStatus::BlockComment(deepness) => {
1409 assert_ne!(deepness, 0);
1410 char_kind = FullCodeCharKind::InComment;
1411 match self.base.peek() {
1412 Some(next) if next.get_char() == '/' && chr == '*' => {
1413 CharClassesStatus::BlockCommentClosing(deepness - 1)
1414 }
1415 Some(next) if next.get_char() == '*' && chr == '/' => {
1416 CharClassesStatus::BlockCommentOpening(deepness + 1)
1417 }
1418 _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1419 _ => self.status,
1420 }
1421 }
1422 CharClassesStatus::BlockCommentOpening(deepness) => {
1423 assert_eq!(chr, '*');
1424 self.status = CharClassesStatus::BlockComment(deepness);
1425 return Some((FullCodeCharKind::InComment, item));
1426 }
1427 CharClassesStatus::BlockCommentClosing(deepness) => {
1428 assert_eq!(chr, '/');
1429 if deepness == 0 {
1430 self.status = CharClassesStatus::Normal;
1431 return Some((FullCodeCharKind::EndComment, item));
1432 } else {
1433 self.status = CharClassesStatus::BlockComment(deepness);
1434 return Some((FullCodeCharKind::InComment, item));
1435 }
1436 }
1437 CharClassesStatus::LineComment => match chr {
1438 '\n' => {
1439 self.status = CharClassesStatus::Normal;
1440 return Some((FullCodeCharKind::EndComment, item));
1441 }
1442 _ => {
1443 self.status = CharClassesStatus::LineComment;
1444 return Some((FullCodeCharKind::InComment, item));
1445 }
1446 },
1447 };
1448 Some((char_kind, item))
1449 }
1450}
1451
1452/// An iterator over the lines of a string, paired with the char kind at the
1453/// end of the line.
1454pub(crate) struct LineClasses<'a> {
1455 base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1456 kind: FullCodeCharKind,
1457}
1458
1459impl<'a> LineClasses<'a> {
1460 pub(crate) fn new(s: &'a str) -> Self {
1461 LineClasses {
1462 base: CharClasses::new(s.chars()).peekable(),
1463 kind: FullCodeCharKind::Normal,
1464 }
1465 }
1466}
1467
1468impl<'a> Iterator for LineClasses<'a> {
1469 type Item = (FullCodeCharKind, String);
1470
1471 fn next(&mut self) -> Option<Self::Item> {
1472 self.base.peek()?;
1473
1474 let mut line = String::new();
1475
1476 let start_kind = match self.base.peek() {
1477 Some((kind, _)) => *kind,
1478 None => unreachable!(),
1479 };
1480
3c0e092e 1481 for (kind, c) in self.base.by_ref() {
f20569fa
XL
1482 // needed to set the kind of the ending character on the last line
1483 self.kind = kind;
1484 if c == '\n' {
1485 self.kind = match (start_kind, kind) {
1486 (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1487 FullCodeCharKind::StartString
1488 }
1489 (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1490 FullCodeCharKind::EndString
1491 }
1492 (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1493 FullCodeCharKind::StartStringCommented
1494 }
1495 (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1496 FullCodeCharKind::EndStringCommented
1497 }
1498 _ => kind,
1499 };
1500 break;
1501 }
1502 line.push(c);
1503 }
1504
1505 // Workaround for CRLF newline.
1506 if line.ends_with('\r') {
1507 line.pop();
1508 }
1509
1510 Some((self.kind, line))
1511 }
1512}
1513
1514/// Iterator over functional and commented parts of a string. Any part of a string is either
1515/// functional code, either *one* block comment, either *one* line comment. Whitespace between
1516/// comments is functional code. Line comments contain their ending newlines.
1517struct UngroupedCommentCodeSlices<'a> {
1518 slice: &'a str,
1519 iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1520}
1521
1522impl<'a> UngroupedCommentCodeSlices<'a> {
1523 fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1524 UngroupedCommentCodeSlices {
1525 slice: code,
1526 iter: CharClasses::new(code.char_indices()).peekable(),
1527 }
1528 }
1529}
1530
1531impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1532 type Item = (CodeCharKind, usize, &'a str);
1533
1534 fn next(&mut self) -> Option<Self::Item> {
1535 let (kind, (start_idx, _)) = self.iter.next()?;
1536 match kind {
1537 FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1538 // Consume all the Normal code
1539 while let Some(&(char_kind, _)) = self.iter.peek() {
1540 if char_kind.is_comment() {
1541 break;
1542 }
1543 let _ = self.iter.next();
1544 }
1545 }
1546 FullCodeCharKind::StartComment => {
1547 // Consume the whole comment
1548 loop {
1549 match self.iter.next() {
1550 Some((kind, ..)) if kind.inside_comment() => continue,
1551 _ => break,
1552 }
1553 }
1554 }
1555 _ => panic!(),
1556 }
1557 let slice = match self.iter.peek() {
1558 Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1559 None => &self.slice[start_idx..],
1560 };
1561 Some((
1562 if kind.is_comment() {
1563 CodeCharKind::Comment
1564 } else {
1565 CodeCharKind::Normal
1566 },
1567 start_idx,
1568 slice,
1569 ))
1570 }
1571}
1572
1573/// Iterator over an alternating sequence of functional and commented parts of
1574/// a string. The first item is always a, possibly zero length, subslice of
1575/// functional text. Line style comments contain their ending newlines.
1576pub(crate) struct CommentCodeSlices<'a> {
1577 slice: &'a str,
1578 last_slice_kind: CodeCharKind,
1579 last_slice_end: usize,
1580}
1581
1582impl<'a> CommentCodeSlices<'a> {
1583 pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1584 CommentCodeSlices {
1585 slice,
1586 last_slice_kind: CodeCharKind::Comment,
1587 last_slice_end: 0,
1588 }
1589 }
1590}
1591
1592impl<'a> Iterator for CommentCodeSlices<'a> {
1593 type Item = (CodeCharKind, usize, &'a str);
1594
1595 fn next(&mut self) -> Option<Self::Item> {
1596 if self.last_slice_end == self.slice.len() {
1597 return None;
1598 }
1599
1600 let mut sub_slice_end = self.last_slice_end;
1601 let mut first_whitespace = None;
1602 let subslice = &self.slice[self.last_slice_end..];
1603 let mut iter = CharClasses::new(subslice.char_indices());
1604
1605 for (kind, (i, c)) in &mut iter {
1606 let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1607 && &subslice[..2] == "//"
1608 && [' ', '\t'].contains(&c);
1609
1610 if is_comment_connector && first_whitespace.is_none() {
1611 first_whitespace = Some(i);
1612 }
1613
1614 if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1615 let last_index = match first_whitespace {
1616 Some(j) => j,
1617 None => i,
1618 };
1619 sub_slice_end = self.last_slice_end + last_index;
1620 break;
1621 }
1622
1623 if !is_comment_connector {
1624 first_whitespace = None;
1625 }
1626 }
1627
1628 if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1629 // This was the last subslice.
1630 sub_slice_end = match first_whitespace {
1631 Some(i) => self.last_slice_end + i,
1632 None => self.slice.len(),
1633 };
1634 }
1635
1636 let kind = match self.last_slice_kind {
1637 CodeCharKind::Comment => CodeCharKind::Normal,
1638 CodeCharKind::Normal => CodeCharKind::Comment,
1639 };
1640 let res = (
1641 kind,
1642 self.last_slice_end,
1643 &self.slice[self.last_slice_end..sub_slice_end],
1644 );
1645 self.last_slice_end = sub_slice_end;
1646 self.last_slice_kind = kind;
1647
1648 Some(res)
1649 }
1650}
1651
1652/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1653/// (if it fits in the width/offset, else return `None`), else return `new`
1654pub(crate) fn recover_comment_removed(
1655 new: String,
1656 span: Span,
1657 context: &RewriteContext<'_>,
1658) -> Option<String> {
1659 let snippet = context.snippet(span);
1660 if snippet != new && changed_comment_content(snippet, &new) {
1661 // We missed some comments. Warn and keep the original text.
1662 if context.config.error_on_unformatted() {
1663 context.report.append(
1664 context.parse_sess.span_to_filename(span),
1665 vec![FormattingError::from_span(
1666 span,
3c0e092e 1667 context.parse_sess,
f20569fa
XL
1668 ErrorKind::LostComment,
1669 )],
1670 );
1671 }
1672 Some(snippet.to_owned())
1673 } else {
1674 Some(new)
1675 }
1676}
1677
1678pub(crate) fn filter_normal_code(code: &str) -> String {
1679 let mut buffer = String::with_capacity(code.len());
1680 LineClasses::new(code).for_each(|(kind, line)| match kind {
1681 FullCodeCharKind::Normal
1682 | FullCodeCharKind::StartString
1683 | FullCodeCharKind::InString
1684 | FullCodeCharKind::EndString => {
1685 buffer.push_str(&line);
1686 buffer.push('\n');
1687 }
1688 _ => (),
1689 });
1690 if !code.ends_with('\n') && buffer.ends_with('\n') {
1691 buffer.pop();
1692 }
1693 buffer
1694}
1695
1696/// Returns `true` if the two strings of code have the same payload of comments.
1697/// The payload of comments is everything in the string except:
1698/// - actual code (not comments),
1699/// - comment start/end marks,
1700/// - whitespace,
1701/// - '*' at the beginning of lines in block comments.
1702fn changed_comment_content(orig: &str, new: &str) -> bool {
1703 // Cannot write this as a fn since we cannot return types containing closures.
1704 let code_comment_content = |code| {
1705 let slices = UngroupedCommentCodeSlices::new(code);
1706 slices
1707 .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
1708 .flat_map(|(_, _, s)| CommentReducer::new(s))
1709 };
1710 let res = code_comment_content(orig).ne(code_comment_content(new));
1711 debug!(
1712 "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1713 res,
1714 orig,
1715 new,
1716 code_comment_content(orig).collect::<String>(),
1717 code_comment_content(new).collect::<String>()
1718 );
1719 res
1720}
1721
1722/// Iterator over the 'payload' characters of a comment.
1723/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1724/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1725/// for example).
1726struct CommentReducer<'a> {
1727 is_block: bool,
1728 at_start_line: bool,
1729 iter: std::str::Chars<'a>,
1730}
1731
1732impl<'a> CommentReducer<'a> {
1733 fn new(comment: &'a str) -> CommentReducer<'a> {
1734 let is_block = comment.starts_with("/*");
1735 let comment = remove_comment_header(comment);
1736 CommentReducer {
1737 is_block,
1738 // There are no supplementary '*' on the first line.
1739 at_start_line: false,
1740 iter: comment.chars(),
1741 }
1742 }
1743}
1744
1745impl<'a> Iterator for CommentReducer<'a> {
1746 type Item = char;
1747
1748 fn next(&mut self) -> Option<Self::Item> {
1749 loop {
1750 let mut c = self.iter.next()?;
1751 if self.is_block && self.at_start_line {
1752 while c.is_whitespace() {
1753 c = self.iter.next()?;
1754 }
1755 // Ignore leading '*'.
1756 if c == '*' {
1757 c = self.iter.next()?;
1758 }
1759 } else if c == '\n' {
1760 self.at_start_line = true;
1761 }
1762 if !c.is_whitespace() {
1763 return Some(c);
1764 }
1765 }
1766 }
1767}
1768
1769fn remove_comment_header(comment: &str) -> &str {
1770 if comment.starts_with("///") || comment.starts_with("//!") {
1771 &comment[3..]
3c0e092e 1772 } else if let Some(stripped) = comment.strip_prefix("//") {
94222f64 1773 stripped
f20569fa
XL
1774 } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1775 || comment.starts_with("/*!")
1776 {
1777 &comment[3..comment.len() - 2]
1778 } else {
1779 assert!(
1780 comment.starts_with("/*"),
cdc7bbd5
XL
1781 "string '{}' is not a comment",
1782 comment
f20569fa
XL
1783 );
1784 &comment[2..comment.len() - 2]
1785 }
1786}
1787
1788#[cfg(test)]
1789mod test {
1790 use super::*;
1791 use crate::shape::{Indent, Shape};
1792
1793 #[test]
1794 fn char_classes() {
1795 let mut iter = CharClasses::new("//\n\n".chars());
1796
1797 assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1798 assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1799 assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1800 assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1801 assert_eq!(None, iter.next());
1802 }
1803
1804 #[test]
1805 fn comment_code_slices() {
1806 let input = "code(); /* test */ 1 + 1";
1807 let mut iter = CommentCodeSlices::new(input);
1808
1809 assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1810 assert_eq!(
1811 (CodeCharKind::Comment, 8, "/* test */"),
1812 iter.next().unwrap()
1813 );
1814 assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1815 assert_eq!(None, iter.next());
1816 }
1817
1818 #[test]
1819 fn comment_code_slices_two() {
1820 let input = "// comment\n test();";
1821 let mut iter = CommentCodeSlices::new(input);
1822
1823 assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1824 assert_eq!(
1825 (CodeCharKind::Comment, 0, "// comment\n"),
1826 iter.next().unwrap()
1827 );
1828 assert_eq!(
1829 (CodeCharKind::Normal, 11, " test();"),
1830 iter.next().unwrap()
1831 );
1832 assert_eq!(None, iter.next());
1833 }
1834
1835 #[test]
1836 fn comment_code_slices_three() {
1837 let input = "1 // comment\n // comment2\n\n";
1838 let mut iter = CommentCodeSlices::new(input);
1839
1840 assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1841 assert_eq!(
1842 (CodeCharKind::Comment, 2, "// comment\n // comment2\n"),
1843 iter.next().unwrap()
1844 );
1845 assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1846 assert_eq!(None, iter.next());
1847 }
1848
1849 #[test]
1850 #[rustfmt::skip]
1851 fn format_doc_comments() {
1852 let mut wrap_normalize_config: crate::config::Config = Default::default();
1853 wrap_normalize_config.set().wrap_comments(true);
1854 wrap_normalize_config.set().normalize_comments(true);
1855
1856 let mut wrap_config: crate::config::Config = Default::default();
1857 wrap_config.set().wrap_comments(true);
1858
1859 let comment = rewrite_comment(" //test",
1860 true,
1861 Shape::legacy(100, Indent::new(0, 100)),
1862 &wrap_normalize_config).unwrap();
1863 assert_eq!("/* test */", comment);
1864
1865 let comment = rewrite_comment("// comment on a",
1866 false,
1867 Shape::legacy(10, Indent::empty()),
1868 &wrap_normalize_config).unwrap();
1869 assert_eq!("// comment\n// on a", comment);
1870
1871 let comment = rewrite_comment("// A multi line comment\n // between args.",
1872 false,
1873 Shape::legacy(60, Indent::new(0, 12)),
1874 &wrap_normalize_config).unwrap();
1875 assert_eq!("// A multi line comment\n // between args.", comment);
1876
1877 let input = "// comment";
1878 let expected =
1879 "/* comment */";
1880 let comment = rewrite_comment(input,
1881 true,
1882 Shape::legacy(9, Indent::new(0, 69)),
1883 &wrap_normalize_config).unwrap();
1884 assert_eq!(expected, comment);
1885
1886 let comment = rewrite_comment("/* trimmed */",
1887 true,
1888 Shape::legacy(100, Indent::new(0, 100)),
1889 &wrap_normalize_config).unwrap();
1890 assert_eq!("/* trimmed */", comment);
1891
1892 // Check that different comment style are properly recognised.
1893 let comment = rewrite_comment(r#"/// test1
1894 /// test2
1895 /*
1896 * test3
1897 */"#,
1898 false,
1899 Shape::legacy(100, Indent::new(0, 0)),
1900 &wrap_normalize_config).unwrap();
1901 assert_eq!("/// test1\n/// test2\n// test3", comment);
1902
1903 // Check that the blank line marks the end of a commented paragraph.
1904 let comment = rewrite_comment(r#"// test1
1905
1906 // test2"#,
1907 false,
1908 Shape::legacy(100, Indent::new(0, 0)),
1909 &wrap_normalize_config).unwrap();
1910 assert_eq!("// test1\n\n// test2", comment);
1911
1912 // Check that the blank line marks the end of a custom-commented paragraph.
1913 let comment = rewrite_comment(r#"//@ test1
1914
1915 //@ test2"#,
1916 false,
1917 Shape::legacy(100, Indent::new(0, 0)),
1918 &wrap_normalize_config).unwrap();
1919 assert_eq!("//@ test1\n\n//@ test2", comment);
1920
1921 // Check that bare lines are just indented but otherwise left unchanged.
1922 let comment = rewrite_comment(r#"// test1
1923 /*
1924 a bare line!
1925
1926 another bare line!
1927 */"#,
1928 false,
1929 Shape::legacy(100, Indent::new(0, 0)),
1930 &wrap_config).unwrap();
1931 assert_eq!("// test1\n/*\n a bare line!\n\n another bare line!\n*/", comment);
1932 }
1933
1934 // This is probably intended to be a non-test fn, but it is not used.
1935 // We should keep this around unless it helps us test stuff to remove it.
1936 fn uncommented(text: &str) -> String {
1937 CharClasses::new(text.chars())
1938 .filter_map(|(s, c)| match s {
1939 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1940 _ => None,
1941 })
1942 .collect()
1943 }
1944
1945 #[test]
1946 fn test_uncommented() {
1947 assert_eq!(&uncommented("abc/*...*/"), "abc");
1948 assert_eq!(
1949 &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
1950 "..ac\n"
1951 );
1952 assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
1953 }
1954
1955 #[test]
1956 fn test_contains_comment() {
1957 assert_eq!(contains_comment("abc"), false);
1958 assert_eq!(contains_comment("abc // qsdf"), true);
1959 assert_eq!(contains_comment("abc /* kqsdf"), true);
1960 assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
1961 }
1962
1963 #[test]
1964 fn test_find_uncommented() {
1965 fn check(haystack: &str, needle: &str, expected: Option<usize>) {
1966 assert_eq!(expected, haystack.find_uncommented(needle));
1967 }
1968
1969 check("/*/ */test", "test", Some(6));
1970 check("//test\ntest", "test", Some(7));
1971 check("/* comment only */", "whatever", None);
1972 check(
1973 "/* comment */ some text /* more commentary */ result",
1974 "result",
1975 Some(46),
1976 );
1977 check("sup // sup", "p", Some(2));
1978 check("sup", "x", None);
1979 check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
1980 check("/*sup yo? \n sup*/ sup", "p", Some(20));
1981 check("hel/*lohello*/lo", "hello", None);
1982 check("acb", "ab", None);
1983 check(",/*A*/ ", ",", Some(0));
1984 check("abc", "abc", Some(0));
1985 check("/* abc */", "abc", None);
1986 check("/**/abc/* */", "abc", Some(4));
1987 check("\"/* abc */\"", "abc", Some(4));
1988 check("\"/* abc", "abc", Some(4));
1989 }
1990
1991 #[test]
1992 fn test_filter_normal_code() {
1993 let s = r#"
1994fn main() {
1995 println!("hello, world");
1996}
1997"#;
1998 assert_eq!(s, filter_normal_code(s));
1999 let s_with_comment = r#"
2000fn main() {
2001 // hello, world
2002 println!("hello, world");
2003}
2004"#;
2005 assert_eq!(s, filter_normal_code(s_with_comment));
2006 }
2007}