]> git.proxmox.com Git - rustc.git/blame - src/tools/rustfmt/src/comment.rs
New upstream version 1.59.0+dfsg1
[rustc.git] / src / tools / rustfmt / src / comment.rs
CommitLineData
f20569fa
XL
1// Formatting and tools for comments.
2
3use std::{self, borrow::Cow, iter};
4
5use itertools::{multipeek, MultiPeek};
a2a8927a
XL
6use lazy_static::lazy_static;
7use regex::Regex;
f20569fa
XL
8use rustc_span::Span;
9
10use crate::config::Config;
11use crate::rewrite::RewriteContext;
12use crate::shape::{Indent, Shape};
13use crate::string::{rewrite_string, StringFormat};
14use crate::utils::{
3c0e092e
XL
15 count_newlines, first_line_width, last_line_width, trim_left_preserve_layout,
16 trimmed_last_line_width, unicode_str_width,
f20569fa
XL
17};
18use crate::{ErrorKind, FormattingError};
19
a2a8927a
XL
20lazy_static! {
21 /// A regex matching reference doc links.
22 ///
23 /// ```markdown
24 /// /// An [example].
25 /// ///
26 /// /// [example]: this::is::a::link
27 /// ```
28 static ref REFERENCE_LINK_URL: Regex = Regex::new(r"^\[.+\]\s?:").unwrap();
29}
30
f20569fa
XL
31fn is_custom_comment(comment: &str) -> bool {
32 if !comment.starts_with("//") {
33 false
34 } else if let Some(c) = comment.chars().nth(2) {
35 !c.is_alphanumeric() && !c.is_whitespace()
36 } else {
37 false
38 }
39}
40
41#[derive(Copy, Clone, PartialEq, Eq)]
42pub(crate) enum CommentStyle<'a> {
43 DoubleSlash,
44 TripleSlash,
45 Doc,
46 SingleBullet,
47 DoubleBullet,
48 Exclamation,
49 Custom(&'a str),
50}
51
52fn custom_opener(s: &str) -> &str {
53 s.lines().next().map_or("", |first_line| {
54 first_line
55 .find(' ')
56 .map_or(first_line, |space_index| &first_line[0..=space_index])
57 })
58}
59
60impl<'a> CommentStyle<'a> {
61 /// Returns `true` if the commenting style covers a line only.
62 pub(crate) fn is_line_comment(&self) -> bool {
63 match *self {
64 CommentStyle::DoubleSlash
65 | CommentStyle::TripleSlash
66 | CommentStyle::Doc
67 | CommentStyle::Custom(_) => true,
68 _ => false,
69 }
70 }
71
72 /// Returns `true` if the commenting style can span over multiple lines.
73 pub(crate) fn is_block_comment(&self) -> bool {
74 match *self {
75 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
76 true
77 }
78 _ => false,
79 }
80 }
81
82 /// Returns `true` if the commenting style is for documentation.
83 pub(crate) fn is_doc_comment(&self) -> bool {
94222f64 84 matches!(*self, CommentStyle::TripleSlash | CommentStyle::Doc)
f20569fa
XL
85 }
86
87 pub(crate) fn opener(&self) -> &'a str {
88 match *self {
89 CommentStyle::DoubleSlash => "// ",
90 CommentStyle::TripleSlash => "/// ",
91 CommentStyle::Doc => "//! ",
92 CommentStyle::SingleBullet => "/* ",
93 CommentStyle::DoubleBullet => "/** ",
94 CommentStyle::Exclamation => "/*! ",
95 CommentStyle::Custom(opener) => opener,
96 }
97 }
98
99 pub(crate) fn closer(&self) -> &'a str {
100 match *self {
101 CommentStyle::DoubleSlash
102 | CommentStyle::TripleSlash
103 | CommentStyle::Custom(..)
104 | CommentStyle::Doc => "",
105 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
106 " */"
107 }
108 }
109 }
110
111 pub(crate) fn line_start(&self) -> &'a str {
112 match *self {
113 CommentStyle::DoubleSlash => "// ",
114 CommentStyle::TripleSlash => "/// ",
115 CommentStyle::Doc => "//! ",
116 CommentStyle::SingleBullet | CommentStyle::DoubleBullet | CommentStyle::Exclamation => {
117 " * "
118 }
119 CommentStyle::Custom(opener) => opener,
120 }
121 }
122
123 pub(crate) fn to_str_tuplet(&self) -> (&'a str, &'a str, &'a str) {
124 (self.opener(), self.closer(), self.line_start())
125 }
126}
127
128pub(crate) fn comment_style(orig: &str, normalize_comments: bool) -> CommentStyle<'_> {
129 if !normalize_comments {
130 if orig.starts_with("/**") && !orig.starts_with("/**/") {
131 CommentStyle::DoubleBullet
132 } else if orig.starts_with("/*!") {
133 CommentStyle::Exclamation
134 } else if orig.starts_with("/*") {
135 CommentStyle::SingleBullet
136 } else if orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/') {
137 CommentStyle::TripleSlash
138 } else if orig.starts_with("//!") {
139 CommentStyle::Doc
140 } else if is_custom_comment(orig) {
141 CommentStyle::Custom(custom_opener(orig))
142 } else {
143 CommentStyle::DoubleSlash
144 }
145 } else if (orig.starts_with("///") && orig.chars().nth(3).map_or(true, |c| c != '/'))
146 || (orig.starts_with("/**") && !orig.starts_with("/**/"))
147 {
148 CommentStyle::TripleSlash
149 } else if orig.starts_with("//!") || orig.starts_with("/*!") {
150 CommentStyle::Doc
151 } else if is_custom_comment(orig) {
152 CommentStyle::Custom(custom_opener(orig))
153 } else {
154 CommentStyle::DoubleSlash
155 }
156}
157
158/// Returns true if the last line of the passed string finishes with a block-comment.
159pub(crate) fn is_last_comment_block(s: &str) -> bool {
160 s.trim_end().ends_with("*/")
161}
162
163/// Combine `prev_str` and `next_str` into a single `String`. `span` may contain
164/// comments between two strings. If there are such comments, then that will be
165/// recovered. If `allow_extend` is true and there is no comment between the two
166/// strings, then they will be put on a single line as long as doing so does not
167/// exceed max width.
168pub(crate) fn combine_strs_with_missing_comments(
169 context: &RewriteContext<'_>,
170 prev_str: &str,
171 next_str: &str,
172 span: Span,
173 shape: Shape,
174 allow_extend: bool,
175) -> Option<String> {
176 trace!(
177 "combine_strs_with_missing_comments `{}` `{}` {:?} {:?}",
178 prev_str,
179 next_str,
180 span,
181 shape
182 );
183
184 let mut result =
185 String::with_capacity(prev_str.len() + next_str.len() + shape.indent.width() + 128);
186 result.push_str(prev_str);
187 let mut allow_one_line = !prev_str.contains('\n') && !next_str.contains('\n');
3c0e092e
XL
188 let first_sep =
189 if prev_str.is_empty() || next_str.is_empty() || trimmed_last_line_width(prev_str) == 0 {
190 ""
191 } else {
192 " "
193 };
f20569fa
XL
194 let mut one_line_width =
195 last_line_width(prev_str) + first_line_width(next_str) + first_sep.len();
196
197 let config = context.config;
198 let indent = shape.indent;
199 let missing_comment = rewrite_missing_comment(span, shape, context)?;
200
201 if missing_comment.is_empty() {
3c0e092e 202 if allow_extend && one_line_width <= shape.width {
f20569fa
XL
203 result.push_str(first_sep);
204 } else if !prev_str.is_empty() {
205 result.push_str(&indent.to_string_with_newline(config))
206 }
207 result.push_str(next_str);
208 return Some(result);
209 }
210
211 // We have a missing comment between the first expression and the second expression.
212
213 // Peek the the original source code and find out whether there is a newline between the first
214 // expression and the second expression or the missing comment. We will preserve the original
215 // layout whenever possible.
216 let original_snippet = context.snippet(span);
217 let prefer_same_line = if let Some(pos) = original_snippet.find('/') {
218 !original_snippet[..pos].contains('\n')
219 } else {
220 !original_snippet.contains('\n')
221 };
222
223 one_line_width -= first_sep.len();
224 let first_sep = if prev_str.is_empty() || missing_comment.is_empty() {
225 Cow::from("")
226 } else {
227 let one_line_width = last_line_width(prev_str) + first_line_width(&missing_comment) + 1;
228 if prefer_same_line && one_line_width <= shape.width {
229 Cow::from(" ")
230 } else {
231 indent.to_string_with_newline(config)
232 }
233 };
234 result.push_str(&first_sep);
235 result.push_str(&missing_comment);
236
237 let second_sep = if missing_comment.is_empty() || next_str.is_empty() {
238 Cow::from("")
239 } else if missing_comment.starts_with("//") {
240 indent.to_string_with_newline(config)
241 } else {
242 one_line_width += missing_comment.len() + first_sep.len() + 1;
243 allow_one_line &= !missing_comment.starts_with("//") && !missing_comment.contains('\n');
244 if prefer_same_line && allow_one_line && one_line_width <= shape.width {
245 Cow::from(" ")
246 } else {
247 indent.to_string_with_newline(config)
248 }
249 };
250 result.push_str(&second_sep);
251 result.push_str(next_str);
252
253 Some(result)
254}
255
256pub(crate) fn rewrite_doc_comment(orig: &str, shape: Shape, config: &Config) -> Option<String> {
257 identify_comment(orig, false, shape, config, true)
258}
259
260pub(crate) fn rewrite_comment(
261 orig: &str,
262 block_style: bool,
263 shape: Shape,
264 config: &Config,
265) -> Option<String> {
266 identify_comment(orig, block_style, shape, config, false)
267}
268
269fn identify_comment(
270 orig: &str,
271 block_style: bool,
272 shape: Shape,
273 config: &Config,
274 is_doc_comment: bool,
275) -> Option<String> {
276 let style = comment_style(orig, false);
277
278 // Computes the byte length of line taking into account a newline if the line is part of a
279 // paragraph.
280 fn compute_len(orig: &str, line: &str) -> usize {
281 if orig.len() > line.len() {
282 if orig.as_bytes()[line.len()] == b'\r' {
283 line.len() + 2
284 } else {
285 line.len() + 1
286 }
287 } else {
288 line.len()
289 }
290 }
291
292 // Get the first group of line comments having the same commenting style.
293 //
294 // Returns a tuple with:
295 // - a boolean indicating if there is a blank line
296 // - a number indicating the size of the first group of comments
297 fn consume_same_line_comments(
298 style: CommentStyle<'_>,
299 orig: &str,
300 line_start: &str,
301 ) -> (bool, usize) {
302 let mut first_group_ending = 0;
303 let mut hbl = false;
304
305 for line in orig.lines() {
306 let trimmed_line = line.trim_start();
307 if trimmed_line.is_empty() {
308 hbl = true;
309 break;
310 } else if trimmed_line.starts_with(line_start)
311 || comment_style(trimmed_line, false) == style
312 {
313 first_group_ending += compute_len(&orig[first_group_ending..], line);
314 } else {
315 break;
316 }
317 }
318 (hbl, first_group_ending)
319 }
320
321 let (has_bare_lines, first_group_ending) = match style {
322 CommentStyle::DoubleSlash | CommentStyle::TripleSlash | CommentStyle::Doc => {
323 let line_start = style.line_start().trim_start();
324 consume_same_line_comments(style, orig, line_start)
325 }
326 CommentStyle::Custom(opener) => {
327 let trimmed_opener = opener.trim_end();
328 consume_same_line_comments(style, orig, trimmed_opener)
329 }
330 // for a block comment, search for the closing symbol
331 CommentStyle::DoubleBullet | CommentStyle::SingleBullet | CommentStyle::Exclamation => {
332 let closer = style.closer().trim_start();
333 let mut count = orig.matches(closer).count();
334 let mut closing_symbol_offset = 0;
335 let mut hbl = false;
336 let mut first = true;
337 for line in orig.lines() {
338 closing_symbol_offset += compute_len(&orig[closing_symbol_offset..], line);
339 let mut trimmed_line = line.trim_start();
340 if !trimmed_line.starts_with('*')
341 && !trimmed_line.starts_with("//")
342 && !trimmed_line.starts_with("/*")
343 {
344 hbl = true;
345 }
346
347 // Remove opener from consideration when searching for closer
348 if first {
349 let opener = style.opener().trim_end();
350 trimmed_line = &trimmed_line[opener.len()..];
351 first = false;
352 }
353 if trimmed_line.ends_with(closer) {
354 count -= 1;
355 if count == 0 {
356 break;
357 }
358 }
359 }
360 (hbl, closing_symbol_offset)
361 }
362 };
363
364 let (first_group, rest) = orig.split_at(first_group_ending);
365 let rewritten_first_group =
366 if !config.normalize_comments() && has_bare_lines && style.is_block_comment() {
367 trim_left_preserve_layout(first_group, shape.indent, config)?
368 } else if !config.normalize_comments()
369 && !config.wrap_comments()
370 && !config.format_code_in_doc_comments()
371 {
372 light_rewrite_comment(first_group, shape.indent, config, is_doc_comment)
373 } else {
374 rewrite_comment_inner(
375 first_group,
376 block_style,
377 style,
378 shape,
379 config,
380 is_doc_comment || style.is_doc_comment(),
381 )?
382 };
383 if rest.is_empty() {
384 Some(rewritten_first_group)
385 } else {
386 identify_comment(
387 rest.trim_start(),
388 block_style,
389 shape,
390 config,
391 is_doc_comment,
392 )
393 .map(|rest_str| {
394 format!(
395 "{}\n{}{}{}",
396 rewritten_first_group,
397 // insert back the blank line
398 if has_bare_lines && style.is_line_comment() {
399 "\n"
400 } else {
401 ""
402 },
403 shape.indent.to_string(config),
404 rest_str
405 )
406 })
407 }
408}
409
3c0e092e 410/// Enum indicating if the code block contains rust based on attributes
f20569fa
XL
411enum CodeBlockAttribute {
412 Rust,
3c0e092e 413 NotRust,
f20569fa
XL
414}
415
416impl CodeBlockAttribute {
3c0e092e
XL
417 /// Parse comma separated attributes list. Return rust only if all
418 /// attributes are valid rust attributes
419 /// See <https://doc.rust-lang.org/rustdoc/print.html#attributes>
420 fn new(attributes: &str) -> CodeBlockAttribute {
421 for attribute in attributes.split(',') {
422 match attribute.trim() {
423 "" | "rust" | "should_panic" | "no_run" | "edition2015" | "edition2018"
424 | "edition2021" => (),
425 "ignore" | "compile_fail" | "text" => return CodeBlockAttribute::NotRust,
426 _ => return CodeBlockAttribute::NotRust,
427 }
f20569fa 428 }
3c0e092e 429 CodeBlockAttribute::Rust
f20569fa
XL
430 }
431}
432
433/// Block that is formatted as an item.
434///
435/// An item starts with either a star `*` or a dash `-`. Different level of indentation are
436/// handled by shrinking the shape accordingly.
437struct ItemizedBlock {
438 /// the lines that are identified as part of an itemized block
439 lines: Vec<String>,
440 /// the number of whitespaces up to the item sigil
441 indent: usize,
442 /// the string that marks the start of an item
443 opener: String,
444 /// sequence of whitespaces to prefix new lines that are part of the item
445 line_start: String,
446}
447
448impl ItemizedBlock {
449 /// Returns `true` if the line is formatted as an item
450 fn is_itemized_line(line: &str) -> bool {
451 let trimmed = line.trim_start();
452 trimmed.starts_with("* ") || trimmed.starts_with("- ")
453 }
454
455 /// Creates a new ItemizedBlock described with the given line.
456 /// The `is_itemized_line` needs to be called first.
457 fn new(line: &str) -> ItemizedBlock {
458 let space_to_sigil = line.chars().take_while(|c| c.is_whitespace()).count();
459 let indent = space_to_sigil + 2;
460 ItemizedBlock {
461 lines: vec![line[indent..].to_string()],
462 indent,
463 opener: line[..indent].to_string(),
464 line_start: " ".repeat(indent),
465 }
466 }
467
468 /// Returns a `StringFormat` used for formatting the content of an item.
469 fn create_string_format<'a>(&'a self, fmt: &'a StringFormat<'_>) -> StringFormat<'a> {
470 StringFormat {
471 opener: "",
472 closer: "",
473 line_start: "",
474 line_end: "",
475 shape: Shape::legacy(fmt.shape.width.saturating_sub(self.indent), Indent::empty()),
476 trim_end: true,
477 config: fmt.config,
478 }
479 }
480
481 /// Returns `true` if the line is part of the current itemized block.
482 /// If it is, then it is added to the internal lines list.
483 fn add_line(&mut self, line: &str) -> bool {
484 if !ItemizedBlock::is_itemized_line(line)
485 && self.indent <= line.chars().take_while(|c| c.is_whitespace()).count()
486 {
487 self.lines.push(line.to_string());
488 return true;
489 }
490 false
491 }
492
493 /// Returns the block as a string, with each line trimmed at the start.
494 fn trimmed_block_as_string(&self) -> String {
495 self.lines
496 .iter()
497 .map(|line| format!("{} ", line.trim_start()))
498 .collect::<String>()
499 }
500
501 /// Returns the block as a string under its original form.
502 fn original_block_as_string(&self) -> String {
503 self.lines.join("\n")
504 }
505}
506
507struct CommentRewrite<'a> {
508 result: String,
509 code_block_buffer: String,
510 is_prev_line_multi_line: bool,
511 code_block_attr: Option<CodeBlockAttribute>,
512 item_block: Option<ItemizedBlock>,
513 comment_line_separator: String,
514 indent_str: String,
515 max_width: usize,
516 fmt_indent: Indent,
517 fmt: StringFormat<'a>,
518
519 opener: String,
520 closer: String,
521 line_start: String,
a2a8927a 522 style: CommentStyle<'a>,
f20569fa
XL
523}
524
525impl<'a> CommentRewrite<'a> {
526 fn new(
527 orig: &'a str,
528 block_style: bool,
529 shape: Shape,
530 config: &'a Config,
531 ) -> CommentRewrite<'a> {
a2a8927a
XL
532 let ((opener, closer, line_start), style) = if block_style {
533 (
534 CommentStyle::SingleBullet.to_str_tuplet(),
535 CommentStyle::SingleBullet,
536 )
f20569fa 537 } else {
a2a8927a
XL
538 let style = comment_style(orig, config.normalize_comments());
539 (style.to_str_tuplet(), style)
f20569fa
XL
540 };
541
542 let max_width = shape
543 .width
544 .checked_sub(closer.len() + opener.len())
545 .unwrap_or(1);
546 let indent_str = shape.indent.to_string_with_newline(config).to_string();
547
548 let mut cr = CommentRewrite {
549 result: String::with_capacity(orig.len() * 2),
550 code_block_buffer: String::with_capacity(128),
551 is_prev_line_multi_line: false,
552 code_block_attr: None,
553 item_block: None,
554 comment_line_separator: format!("{}{}", indent_str, line_start),
555 max_width,
556 indent_str,
557 fmt_indent: shape.indent,
558
559 fmt: StringFormat {
560 opener: "",
561 closer: "",
562 line_start,
563 line_end: "",
564 shape: Shape::legacy(max_width, shape.indent),
565 trim_end: true,
566 config,
567 },
568
569 opener: opener.to_owned(),
570 closer: closer.to_owned(),
571 line_start: line_start.to_owned(),
a2a8927a 572 style,
f20569fa
XL
573 };
574 cr.result.push_str(opener);
575 cr
576 }
577
578 fn join_block(s: &str, sep: &str) -> String {
579 let mut result = String::with_capacity(s.len() + 128);
580 let mut iter = s.lines().peekable();
581 while let Some(line) = iter.next() {
582 result.push_str(line);
583 result.push_str(match iter.peek() {
584 Some(next_line) if next_line.is_empty() => sep.trim_end(),
3c0e092e 585 Some(..) => sep,
f20569fa
XL
586 None => "",
587 });
588 }
589 result
590 }
591
a2a8927a
XL
592 /// Check if any characters were written to the result buffer after the start of the comment.
593 /// when calling [`CommentRewrite::new()`] the result buffer is initiazlied with the opening
594 /// characters for the comment.
595 fn buffer_contains_comment(&self) -> bool {
596 // if self.result.len() < self.opener.len() then an empty comment is in the buffer
597 // if self.result.len() > self.opener.len() then a non empty comment is in the buffer
598 self.result.len() != self.opener.len()
599 }
600
f20569fa
XL
601 fn finish(mut self) -> String {
602 if !self.code_block_buffer.is_empty() {
603 // There is a code block that is not properly enclosed by backticks.
604 // We will leave them untouched.
605 self.result.push_str(&self.comment_line_separator);
606 self.result.push_str(&Self::join_block(
607 &trim_custom_comment_prefix(&self.code_block_buffer),
608 &self.comment_line_separator,
609 ));
610 }
611
612 if let Some(ref ib) = self.item_block {
613 // the last few lines are part of an itemized block
614 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
615 let item_fmt = ib.create_string_format(&self.fmt);
a2a8927a
XL
616
617 // only push a comment_line_separator for ItemizedBlocks if the comment is not empty
618 if self.buffer_contains_comment() {
619 self.result.push_str(&self.comment_line_separator);
620 }
621
f20569fa
XL
622 self.result.push_str(&ib.opener);
623 match rewrite_string(
624 &ib.trimmed_block_as_string(),
625 &item_fmt,
626 self.max_width.saturating_sub(ib.indent),
627 ) {
628 Some(s) => self.result.push_str(&Self::join_block(
629 &s,
630 &format!("{}{}", self.comment_line_separator, ib.line_start),
631 )),
632 None => self.result.push_str(&Self::join_block(
633 &ib.original_block_as_string(),
634 &self.comment_line_separator,
635 )),
636 };
637 }
638
639 self.result.push_str(&self.closer);
640 if self.result.ends_with(&self.opener) && self.opener.ends_with(' ') {
641 // Trailing space.
642 self.result.pop();
643 }
644
645 self.result
646 }
647
648 fn handle_line(
649 &mut self,
650 orig: &'a str,
651 i: usize,
652 line: &'a str,
653 has_leading_whitespace: bool,
654 ) -> bool {
a2a8927a
XL
655 let num_newlines = count_newlines(orig);
656 let is_last = i == num_newlines;
657 let needs_new_comment_line = if self.style.is_block_comment() {
658 num_newlines > 0 || self.buffer_contains_comment()
659 } else {
660 self.buffer_contains_comment()
661 };
f20569fa
XL
662
663 if let Some(ref mut ib) = self.item_block {
3c0e092e 664 if ib.add_line(line) {
f20569fa
XL
665 return false;
666 }
667 self.is_prev_line_multi_line = false;
668 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
669 let item_fmt = ib.create_string_format(&self.fmt);
a2a8927a
XL
670
671 // only push a comment_line_separator if we need to start a new comment line
672 if needs_new_comment_line {
673 self.result.push_str(&self.comment_line_separator);
674 }
675
f20569fa
XL
676 self.result.push_str(&ib.opener);
677 match rewrite_string(
678 &ib.trimmed_block_as_string(),
679 &item_fmt,
680 self.max_width.saturating_sub(ib.indent),
681 ) {
682 Some(s) => self.result.push_str(&Self::join_block(
683 &s,
684 &format!("{}{}", self.comment_line_separator, ib.line_start),
685 )),
686 None => self.result.push_str(&Self::join_block(
687 &ib.original_block_as_string(),
688 &self.comment_line_separator,
689 )),
690 };
691 } else if self.code_block_attr.is_some() {
692 if line.starts_with("```") {
693 let code_block = match self.code_block_attr.as_ref().unwrap() {
3c0e092e
XL
694 CodeBlockAttribute::Rust
695 if self.fmt.config.format_code_in_doc_comments()
696 && !self.code_block_buffer.is_empty() =>
697 {
f20569fa
XL
698 let mut config = self.fmt.config.clone();
699 config.set().wrap_comments(false);
3c0e092e
XL
700 if let Some(s) =
701 crate::format_code_block(&self.code_block_buffer, &config, false)
702 {
703 trim_custom_comment_prefix(&s.snippet)
f20569fa
XL
704 } else {
705 trim_custom_comment_prefix(&self.code_block_buffer)
706 }
707 }
3c0e092e 708 _ => trim_custom_comment_prefix(&self.code_block_buffer),
f20569fa
XL
709 };
710 if !code_block.is_empty() {
711 self.result.push_str(&self.comment_line_separator);
712 self.result
713 .push_str(&Self::join_block(&code_block, &self.comment_line_separator));
714 }
715 self.code_block_buffer.clear();
716 self.result.push_str(&self.comment_line_separator);
717 self.result.push_str(line);
718 self.code_block_attr = None;
719 } else {
720 self.code_block_buffer
721 .push_str(&hide_sharp_behind_comment(line));
722 self.code_block_buffer.push('\n');
723 }
724 return false;
725 }
726
727 self.code_block_attr = None;
728 self.item_block = None;
94222f64
XL
729 if let Some(stripped) = line.strip_prefix("```") {
730 self.code_block_attr = Some(CodeBlockAttribute::new(stripped))
3c0e092e
XL
731 } else if self.fmt.config.wrap_comments() && ItemizedBlock::is_itemized_line(line) {
732 let ib = ItemizedBlock::new(line);
f20569fa
XL
733 self.item_block = Some(ib);
734 return false;
735 }
736
737 if self.result == self.opener {
738 let force_leading_whitespace = &self.opener == "/* " && count_newlines(orig) == 0;
739 if !has_leading_whitespace && !force_leading_whitespace && self.result.ends_with(' ') {
740 self.result.pop();
741 }
742 if line.is_empty() {
743 return false;
744 }
745 } else if self.is_prev_line_multi_line && !line.is_empty() {
746 self.result.push(' ')
747 } else if is_last && line.is_empty() {
748 // trailing blank lines are unwanted
749 if !self.closer.is_empty() {
750 self.result.push_str(&self.indent_str);
751 }
752 return true;
753 } else {
754 self.result.push_str(&self.comment_line_separator);
755 if !has_leading_whitespace && self.result.ends_with(' ') {
756 self.result.pop();
757 }
758 }
759
760 if self.fmt.config.wrap_comments()
761 && unicode_str_width(line) > self.fmt.shape.width
762 && !has_url(line)
763 {
764 match rewrite_string(line, &self.fmt, self.max_width) {
765 Some(ref s) => {
766 self.is_prev_line_multi_line = s.contains('\n');
767 self.result.push_str(s);
768 }
769 None if self.is_prev_line_multi_line => {
770 // We failed to put the current `line` next to the previous `line`.
771 // Remove the trailing space, then start rewrite on the next line.
772 self.result.pop();
773 self.result.push_str(&self.comment_line_separator);
774 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
775 match rewrite_string(line, &self.fmt, self.max_width) {
776 Some(ref s) => {
777 self.is_prev_line_multi_line = s.contains('\n');
778 self.result.push_str(s);
779 }
780 None => {
781 self.is_prev_line_multi_line = false;
782 self.result.push_str(line);
783 }
784 }
785 }
786 None => {
787 self.is_prev_line_multi_line = false;
788 self.result.push_str(line);
789 }
790 }
791
792 self.fmt.shape = if self.is_prev_line_multi_line {
793 // 1 = " "
794 let offset = 1 + last_line_width(&self.result) - self.line_start.len();
795 Shape {
796 width: self.max_width.saturating_sub(offset),
797 indent: self.fmt_indent,
798 offset: self.fmt.shape.offset + offset,
799 }
800 } else {
801 Shape::legacy(self.max_width, self.fmt_indent)
802 };
803 } else {
804 if line.is_empty() && self.result.ends_with(' ') && !is_last {
805 // Remove space if this is an empty comment or a doc comment.
806 self.result.pop();
807 }
808 self.result.push_str(line);
809 self.fmt.shape = Shape::legacy(self.max_width, self.fmt_indent);
810 self.is_prev_line_multi_line = false;
811 }
812
813 false
814 }
815}
816
817fn rewrite_comment_inner(
818 orig: &str,
819 block_style: bool,
820 style: CommentStyle<'_>,
821 shape: Shape,
822 config: &Config,
823 is_doc_comment: bool,
824) -> Option<String> {
825 let mut rewriter = CommentRewrite::new(orig, block_style, shape, config);
826
827 let line_breaks = count_newlines(orig.trim_end());
828 let lines = orig
829 .lines()
830 .enumerate()
831 .map(|(i, mut line)| {
832 line = trim_end_unless_two_whitespaces(line.trim_start(), is_doc_comment);
833 // Drop old closer.
834 if i == line_breaks && line.ends_with("*/") && !line.starts_with("//") {
835 line = line[..(line.len() - 2)].trim_end();
836 }
837
838 line
839 })
840 .map(|s| left_trim_comment_line(s, &style))
841 .map(|(line, has_leading_whitespace)| {
842 if orig.starts_with("/*") && line_breaks == 0 {
843 (
844 line.trim_start(),
845 has_leading_whitespace || config.normalize_comments(),
846 )
847 } else {
848 (line, has_leading_whitespace || config.normalize_comments())
849 }
850 });
851
852 for (i, (line, has_leading_whitespace)) in lines.enumerate() {
853 if rewriter.handle_line(orig, i, line, has_leading_whitespace) {
854 break;
855 }
856 }
857
858 Some(rewriter.finish())
859}
860
861const RUSTFMT_CUSTOM_COMMENT_PREFIX: &str = "//#### ";
862
863fn hide_sharp_behind_comment(s: &str) -> Cow<'_, str> {
864 let s_trimmed = s.trim();
865 if s_trimmed.starts_with("# ") || s_trimmed == "#" {
866 Cow::from(format!("{}{}", RUSTFMT_CUSTOM_COMMENT_PREFIX, s))
867 } else {
868 Cow::from(s)
869 }
870}
871
872fn trim_custom_comment_prefix(s: &str) -> String {
873 s.lines()
874 .map(|line| {
875 let left_trimmed = line.trim_start();
876 if left_trimmed.starts_with(RUSTFMT_CUSTOM_COMMENT_PREFIX) {
877 left_trimmed.trim_start_matches(RUSTFMT_CUSTOM_COMMENT_PREFIX)
878 } else {
879 line
880 }
881 })
882 .collect::<Vec<_>>()
883 .join("\n")
884}
885
886/// Returns `true` if the given string MAY include URLs or alike.
887fn has_url(s: &str) -> bool {
888 // This function may return false positive, but should get its job done in most cases.
a2a8927a
XL
889 s.contains("https://")
890 || s.contains("http://")
891 || s.contains("ftp://")
892 || s.contains("file://")
893 || REFERENCE_LINK_URL.is_match(s)
f20569fa
XL
894}
895
896/// Given the span, rewrite the missing comment inside it if available.
897/// Note that the given span must only include comments (or leading/trailing whitespaces).
898pub(crate) fn rewrite_missing_comment(
899 span: Span,
900 shape: Shape,
901 context: &RewriteContext<'_>,
902) -> Option<String> {
903 let missing_snippet = context.snippet(span);
904 let trimmed_snippet = missing_snippet.trim();
905 // check the span starts with a comment
906 let pos = trimmed_snippet.find('/');
907 if !trimmed_snippet.is_empty() && pos.is_some() {
908 rewrite_comment(trimmed_snippet, false, shape, context.config)
909 } else {
910 Some(String::new())
911 }
912}
913
914/// Recover the missing comments in the specified span, if available.
915/// The layout of the comments will be preserved as long as it does not break the code
916/// and its total width does not exceed the max width.
917pub(crate) fn recover_missing_comment_in_span(
918 span: Span,
919 shape: Shape,
920 context: &RewriteContext<'_>,
921 used_width: usize,
922) -> Option<String> {
923 let missing_comment = rewrite_missing_comment(span, shape, context)?;
924 if missing_comment.is_empty() {
925 Some(String::new())
926 } else {
927 let missing_snippet = context.snippet(span);
928 let pos = missing_snippet.find('/')?;
929 // 1 = ` `
930 let total_width = missing_comment.len() + used_width + 1;
931 let force_new_line_before_comment =
932 missing_snippet[..pos].contains('\n') || total_width > context.config.max_width();
933 let sep = if force_new_line_before_comment {
934 shape.indent.to_string_with_newline(context.config)
935 } else {
936 Cow::from(" ")
937 };
938 Some(format!("{}{}", sep, missing_comment))
939 }
940}
941
942/// Trim trailing whitespaces unless they consist of two or more whitespaces.
943fn trim_end_unless_two_whitespaces(s: &str, is_doc_comment: bool) -> &str {
944 if is_doc_comment && s.ends_with(" ") {
945 s
946 } else {
947 s.trim_end()
948 }
949}
950
951/// Trims whitespace and aligns to indent, but otherwise does not change comments.
952fn light_rewrite_comment(
953 orig: &str,
954 offset: Indent,
955 config: &Config,
956 is_doc_comment: bool,
957) -> String {
958 let lines: Vec<&str> = orig
959 .lines()
960 .map(|l| {
961 // This is basically just l.trim(), but in the case that a line starts
962 // with `*` we want to leave one space before it, so it aligns with the
963 // `*` in `/*`.
964 let first_non_whitespace = l.find(|c| !char::is_whitespace(c));
965 let left_trimmed = if let Some(fnw) = first_non_whitespace {
966 if l.as_bytes()[fnw] == b'*' && fnw > 0 {
967 &l[fnw - 1..]
968 } else {
969 &l[fnw..]
970 }
971 } else {
972 ""
973 };
974 // Preserve markdown's double-space line break syntax in doc comment.
975 trim_end_unless_two_whitespaces(left_trimmed, is_doc_comment)
976 })
977 .collect();
978 lines.join(&format!("\n{}", offset.to_string(config)))
979}
980
981/// Trims comment characters and possibly a single space from the left of a string.
982/// Does not trim all whitespace. If a single space is trimmed from the left of the string,
983/// this function returns true.
984fn left_trim_comment_line<'a>(line: &'a str, style: &CommentStyle<'_>) -> (&'a str, bool) {
985 if line.starts_with("//! ")
986 || line.starts_with("/// ")
987 || line.starts_with("/*! ")
988 || line.starts_with("/** ")
989 {
990 (&line[4..], true)
991 } else if let CommentStyle::Custom(opener) = *style {
3c0e092e 992 if let Some(stripped) = line.strip_prefix(opener) {
94222f64 993 (stripped, true)
f20569fa
XL
994 } else {
995 (&line[opener.trim_end().len()..], false)
996 }
997 } else if line.starts_with("/* ")
998 || line.starts_with("// ")
999 || line.starts_with("//!")
1000 || line.starts_with("///")
1001 || line.starts_with("** ")
1002 || line.starts_with("/*!")
1003 || (line.starts_with("/**") && !line.starts_with("/**/"))
1004 {
1005 (&line[3..], line.chars().nth(2).unwrap() == ' ')
1006 } else if line.starts_with("/*")
1007 || line.starts_with("* ")
1008 || line.starts_with("//")
1009 || line.starts_with("**")
1010 {
1011 (&line[2..], line.chars().nth(1).unwrap() == ' ')
94222f64
XL
1012 } else if let Some(stripped) = line.strip_prefix('*') {
1013 (stripped, false)
f20569fa
XL
1014 } else {
1015 (line, line.starts_with(' '))
1016 }
1017}
1018
1019pub(crate) trait FindUncommented {
1020 fn find_uncommented(&self, pat: &str) -> Option<usize>;
cdc7bbd5 1021 fn find_last_uncommented(&self, pat: &str) -> Option<usize>;
f20569fa
XL
1022}
1023
1024impl FindUncommented for str {
1025 fn find_uncommented(&self, pat: &str) -> Option<usize> {
1026 let mut needle_iter = pat.chars();
1027 for (kind, (i, b)) in CharClasses::new(self.char_indices()) {
1028 match needle_iter.next() {
1029 None => {
1030 return Some(i - pat.len());
1031 }
1032 Some(c) => match kind {
1033 FullCodeCharKind::Normal | FullCodeCharKind::InString if b == c => {}
1034 _ => {
1035 needle_iter = pat.chars();
1036 }
1037 },
1038 }
1039 }
1040
1041 // Handle case where the pattern is a suffix of the search string
1042 match needle_iter.next() {
1043 Some(_) => None,
1044 None => Some(self.len() - pat.len()),
1045 }
1046 }
cdc7bbd5
XL
1047
1048 fn find_last_uncommented(&self, pat: &str) -> Option<usize> {
1049 if let Some(left) = self.find_uncommented(pat) {
1050 let mut result = left;
1051 // add 1 to use find_last_uncommented for &str after pat
1052 while let Some(next) = self[(result + 1)..].find_last_uncommented(pat) {
1053 result += next + 1;
1054 }
1055 Some(result)
1056 } else {
1057 None
1058 }
1059 }
f20569fa
XL
1060}
1061
1062// Returns the first byte position after the first comment. The given string
1063// is expected to be prefixed by a comment, including delimiters.
1064// Good: `/* /* inner */ outer */ code();`
1065// Bad: `code(); // hello\n world!`
1066pub(crate) fn find_comment_end(s: &str) -> Option<usize> {
1067 let mut iter = CharClasses::new(s.char_indices());
1068 for (kind, (i, _c)) in &mut iter {
1069 if kind == FullCodeCharKind::Normal || kind == FullCodeCharKind::InString {
1070 return Some(i);
1071 }
1072 }
1073
1074 // Handle case where the comment ends at the end of `s`.
1075 if iter.status == CharClassesStatus::Normal {
1076 Some(s.len())
1077 } else {
1078 None
1079 }
1080}
1081
1082/// Returns `true` if text contains any comment.
1083pub(crate) fn contains_comment(text: &str) -> bool {
1084 CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
1085}
1086
1087pub(crate) struct CharClasses<T>
1088where
1089 T: Iterator,
1090 T::Item: RichChar,
1091{
1092 base: MultiPeek<T>,
1093 status: CharClassesStatus,
1094}
1095
1096pub(crate) trait RichChar {
1097 fn get_char(&self) -> char;
1098}
1099
1100impl RichChar for char {
1101 fn get_char(&self) -> char {
1102 *self
1103 }
1104}
1105
1106impl RichChar for (usize, char) {
1107 fn get_char(&self) -> char {
1108 self.1
1109 }
1110}
1111
1112#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1113enum CharClassesStatus {
1114 Normal,
1115 /// Character is within a string
1116 LitString,
1117 LitStringEscape,
1118 /// Character is within a raw string
1119 LitRawString(u32),
1120 RawStringPrefix(u32),
1121 RawStringSuffix(u32),
1122 LitChar,
1123 LitCharEscape,
1124 /// Character inside a block comment, with the integer indicating the nesting deepness of the
1125 /// comment
1126 BlockComment(u32),
1127 /// Character inside a block-commented string, with the integer indicating the nesting deepness
1128 /// of the comment
1129 StringInBlockComment(u32),
1130 /// Status when the '/' has been consumed, but not yet the '*', deepness is
1131 /// the new deepness (after the comment opening).
1132 BlockCommentOpening(u32),
1133 /// Status when the '*' has been consumed, but not yet the '/', deepness is
1134 /// the new deepness (after the comment closing).
1135 BlockCommentClosing(u32),
1136 /// Character is within a line comment
1137 LineComment,
1138}
1139
1140/// Distinguish between functional part of code and comments
1141#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1142pub(crate) enum CodeCharKind {
1143 Normal,
1144 Comment,
1145}
1146
1147/// Distinguish between functional part of code and comments,
1148/// describing opening and closing of comments for ease when chunking
1149/// code from tagged characters
1150#[derive(PartialEq, Eq, Debug, Clone, Copy)]
1151pub(crate) enum FullCodeCharKind {
1152 Normal,
1153 /// The first character of a comment, there is only one for a comment (always '/')
1154 StartComment,
1155 /// Any character inside a comment including the second character of comment
1156 /// marks ("//", "/*")
1157 InComment,
1158 /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
1159 EndComment,
1160 /// Start of a mutlitine string inside a comment
1161 StartStringCommented,
1162 /// End of a mutlitine string inside a comment
1163 EndStringCommented,
1164 /// Inside a commented string
1165 InStringCommented,
1166 /// Start of a mutlitine string
1167 StartString,
1168 /// End of a mutlitine string
1169 EndString,
1170 /// Inside a string.
1171 InString,
1172}
1173
1174impl FullCodeCharKind {
1175 pub(crate) fn is_comment(self) -> bool {
1176 match self {
1177 FullCodeCharKind::StartComment
1178 | FullCodeCharKind::InComment
1179 | FullCodeCharKind::EndComment
1180 | FullCodeCharKind::StartStringCommented
1181 | FullCodeCharKind::InStringCommented
1182 | FullCodeCharKind::EndStringCommented => true,
1183 _ => false,
1184 }
1185 }
1186
1187 /// Returns true if the character is inside a comment
1188 pub(crate) fn inside_comment(self) -> bool {
1189 match self {
1190 FullCodeCharKind::InComment
1191 | FullCodeCharKind::StartStringCommented
1192 | FullCodeCharKind::InStringCommented
1193 | FullCodeCharKind::EndStringCommented => true,
1194 _ => false,
1195 }
1196 }
1197
1198 pub(crate) fn is_string(self) -> bool {
1199 self == FullCodeCharKind::InString || self == FullCodeCharKind::StartString
1200 }
1201
1202 /// Returns true if the character is within a commented string
1203 pub(crate) fn is_commented_string(self) -> bool {
1204 self == FullCodeCharKind::InStringCommented
1205 || self == FullCodeCharKind::StartStringCommented
1206 }
1207
1208 fn to_codecharkind(self) -> CodeCharKind {
1209 if self.is_comment() {
1210 CodeCharKind::Comment
1211 } else {
1212 CodeCharKind::Normal
1213 }
1214 }
1215}
1216
1217impl<T> CharClasses<T>
1218where
1219 T: Iterator,
1220 T::Item: RichChar,
1221{
1222 pub(crate) fn new(base: T) -> CharClasses<T> {
1223 CharClasses {
1224 base: multipeek(base),
1225 status: CharClassesStatus::Normal,
1226 }
1227 }
1228}
1229
1230fn is_raw_string_suffix<T>(iter: &mut MultiPeek<T>, count: u32) -> bool
1231where
1232 T: Iterator,
1233 T::Item: RichChar,
1234{
1235 for _ in 0..count {
1236 match iter.peek() {
1237 Some(c) if c.get_char() == '#' => continue,
1238 _ => return false,
1239 }
1240 }
1241 true
1242}
1243
1244impl<T> Iterator for CharClasses<T>
1245where
1246 T: Iterator,
1247 T::Item: RichChar,
1248{
1249 type Item = (FullCodeCharKind, T::Item);
1250
1251 fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
1252 let item = self.base.next()?;
1253 let chr = item.get_char();
1254 let mut char_kind = FullCodeCharKind::Normal;
1255 self.status = match self.status {
1256 CharClassesStatus::LitRawString(sharps) => {
1257 char_kind = FullCodeCharKind::InString;
1258 match chr {
1259 '"' => {
1260 if sharps == 0 {
1261 char_kind = FullCodeCharKind::Normal;
1262 CharClassesStatus::Normal
1263 } else if is_raw_string_suffix(&mut self.base, sharps) {
1264 CharClassesStatus::RawStringSuffix(sharps)
1265 } else {
1266 CharClassesStatus::LitRawString(sharps)
1267 }
1268 }
1269 _ => CharClassesStatus::LitRawString(sharps),
1270 }
1271 }
1272 CharClassesStatus::RawStringPrefix(sharps) => {
1273 char_kind = FullCodeCharKind::InString;
1274 match chr {
1275 '#' => CharClassesStatus::RawStringPrefix(sharps + 1),
1276 '"' => CharClassesStatus::LitRawString(sharps),
1277 _ => CharClassesStatus::Normal, // Unreachable.
1278 }
1279 }
1280 CharClassesStatus::RawStringSuffix(sharps) => {
1281 match chr {
1282 '#' => {
1283 if sharps == 1 {
1284 CharClassesStatus::Normal
1285 } else {
1286 char_kind = FullCodeCharKind::InString;
1287 CharClassesStatus::RawStringSuffix(sharps - 1)
1288 }
1289 }
1290 _ => CharClassesStatus::Normal, // Unreachable
1291 }
1292 }
1293 CharClassesStatus::LitString => {
1294 char_kind = FullCodeCharKind::InString;
1295 match chr {
1296 '"' => CharClassesStatus::Normal,
1297 '\\' => CharClassesStatus::LitStringEscape,
1298 _ => CharClassesStatus::LitString,
1299 }
1300 }
1301 CharClassesStatus::LitStringEscape => {
1302 char_kind = FullCodeCharKind::InString;
1303 CharClassesStatus::LitString
1304 }
1305 CharClassesStatus::LitChar => match chr {
1306 '\\' => CharClassesStatus::LitCharEscape,
1307 '\'' => CharClassesStatus::Normal,
1308 _ => CharClassesStatus::LitChar,
1309 },
1310 CharClassesStatus::LitCharEscape => CharClassesStatus::LitChar,
1311 CharClassesStatus::Normal => match chr {
1312 'r' => match self.base.peek().map(RichChar::get_char) {
1313 Some('#') | Some('"') => {
1314 char_kind = FullCodeCharKind::InString;
1315 CharClassesStatus::RawStringPrefix(0)
1316 }
1317 _ => CharClassesStatus::Normal,
1318 },
1319 '"' => {
1320 char_kind = FullCodeCharKind::InString;
1321 CharClassesStatus::LitString
1322 }
1323 '\'' => {
1324 // HACK: Work around mut borrow.
1325 match self.base.peek() {
1326 Some(next) if next.get_char() == '\\' => {
1327 self.status = CharClassesStatus::LitChar;
1328 return Some((char_kind, item));
1329 }
1330 _ => (),
1331 }
1332
1333 match self.base.peek() {
1334 Some(next) if next.get_char() == '\'' => CharClassesStatus::LitChar,
1335 _ => CharClassesStatus::Normal,
1336 }
1337 }
1338 '/' => match self.base.peek() {
1339 Some(next) if next.get_char() == '*' => {
1340 self.status = CharClassesStatus::BlockCommentOpening(1);
1341 return Some((FullCodeCharKind::StartComment, item));
1342 }
1343 Some(next) if next.get_char() == '/' => {
1344 self.status = CharClassesStatus::LineComment;
1345 return Some((FullCodeCharKind::StartComment, item));
1346 }
1347 _ => CharClassesStatus::Normal,
1348 },
1349 _ => CharClassesStatus::Normal,
1350 },
1351 CharClassesStatus::StringInBlockComment(deepness) => {
1352 char_kind = FullCodeCharKind::InStringCommented;
1353 if chr == '"' {
1354 CharClassesStatus::BlockComment(deepness)
cdc7bbd5
XL
1355 } else if chr == '*' && self.base.peek().map(RichChar::get_char) == Some('/') {
1356 char_kind = FullCodeCharKind::InComment;
1357 CharClassesStatus::BlockCommentClosing(deepness - 1)
f20569fa
XL
1358 } else {
1359 CharClassesStatus::StringInBlockComment(deepness)
1360 }
1361 }
1362 CharClassesStatus::BlockComment(deepness) => {
1363 assert_ne!(deepness, 0);
1364 char_kind = FullCodeCharKind::InComment;
1365 match self.base.peek() {
1366 Some(next) if next.get_char() == '/' && chr == '*' => {
1367 CharClassesStatus::BlockCommentClosing(deepness - 1)
1368 }
1369 Some(next) if next.get_char() == '*' && chr == '/' => {
1370 CharClassesStatus::BlockCommentOpening(deepness + 1)
1371 }
1372 _ if chr == '"' => CharClassesStatus::StringInBlockComment(deepness),
1373 _ => self.status,
1374 }
1375 }
1376 CharClassesStatus::BlockCommentOpening(deepness) => {
1377 assert_eq!(chr, '*');
1378 self.status = CharClassesStatus::BlockComment(deepness);
1379 return Some((FullCodeCharKind::InComment, item));
1380 }
1381 CharClassesStatus::BlockCommentClosing(deepness) => {
1382 assert_eq!(chr, '/');
1383 if deepness == 0 {
1384 self.status = CharClassesStatus::Normal;
1385 return Some((FullCodeCharKind::EndComment, item));
1386 } else {
1387 self.status = CharClassesStatus::BlockComment(deepness);
1388 return Some((FullCodeCharKind::InComment, item));
1389 }
1390 }
1391 CharClassesStatus::LineComment => match chr {
1392 '\n' => {
1393 self.status = CharClassesStatus::Normal;
1394 return Some((FullCodeCharKind::EndComment, item));
1395 }
1396 _ => {
1397 self.status = CharClassesStatus::LineComment;
1398 return Some((FullCodeCharKind::InComment, item));
1399 }
1400 },
1401 };
1402 Some((char_kind, item))
1403 }
1404}
1405
1406/// An iterator over the lines of a string, paired with the char kind at the
1407/// end of the line.
1408pub(crate) struct LineClasses<'a> {
1409 base: iter::Peekable<CharClasses<std::str::Chars<'a>>>,
1410 kind: FullCodeCharKind,
1411}
1412
1413impl<'a> LineClasses<'a> {
1414 pub(crate) fn new(s: &'a str) -> Self {
1415 LineClasses {
1416 base: CharClasses::new(s.chars()).peekable(),
1417 kind: FullCodeCharKind::Normal,
1418 }
1419 }
1420}
1421
1422impl<'a> Iterator for LineClasses<'a> {
1423 type Item = (FullCodeCharKind, String);
1424
1425 fn next(&mut self) -> Option<Self::Item> {
1426 self.base.peek()?;
1427
1428 let mut line = String::new();
1429
1430 let start_kind = match self.base.peek() {
1431 Some((kind, _)) => *kind,
1432 None => unreachable!(),
1433 };
1434
3c0e092e 1435 for (kind, c) in self.base.by_ref() {
f20569fa
XL
1436 // needed to set the kind of the ending character on the last line
1437 self.kind = kind;
1438 if c == '\n' {
1439 self.kind = match (start_kind, kind) {
1440 (FullCodeCharKind::Normal, FullCodeCharKind::InString) => {
1441 FullCodeCharKind::StartString
1442 }
1443 (FullCodeCharKind::InString, FullCodeCharKind::Normal) => {
1444 FullCodeCharKind::EndString
1445 }
1446 (FullCodeCharKind::InComment, FullCodeCharKind::InStringCommented) => {
1447 FullCodeCharKind::StartStringCommented
1448 }
1449 (FullCodeCharKind::InStringCommented, FullCodeCharKind::InComment) => {
1450 FullCodeCharKind::EndStringCommented
1451 }
1452 _ => kind,
1453 };
1454 break;
1455 }
1456 line.push(c);
1457 }
1458
1459 // Workaround for CRLF newline.
1460 if line.ends_with('\r') {
1461 line.pop();
1462 }
1463
1464 Some((self.kind, line))
1465 }
1466}
1467
1468/// Iterator over functional and commented parts of a string. Any part of a string is either
1469/// functional code, either *one* block comment, either *one* line comment. Whitespace between
1470/// comments is functional code. Line comments contain their ending newlines.
1471struct UngroupedCommentCodeSlices<'a> {
1472 slice: &'a str,
1473 iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
1474}
1475
1476impl<'a> UngroupedCommentCodeSlices<'a> {
1477 fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
1478 UngroupedCommentCodeSlices {
1479 slice: code,
1480 iter: CharClasses::new(code.char_indices()).peekable(),
1481 }
1482 }
1483}
1484
1485impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
1486 type Item = (CodeCharKind, usize, &'a str);
1487
1488 fn next(&mut self) -> Option<Self::Item> {
1489 let (kind, (start_idx, _)) = self.iter.next()?;
1490 match kind {
1491 FullCodeCharKind::Normal | FullCodeCharKind::InString => {
1492 // Consume all the Normal code
1493 while let Some(&(char_kind, _)) = self.iter.peek() {
1494 if char_kind.is_comment() {
1495 break;
1496 }
1497 let _ = self.iter.next();
1498 }
1499 }
1500 FullCodeCharKind::StartComment => {
1501 // Consume the whole comment
1502 loop {
1503 match self.iter.next() {
1504 Some((kind, ..)) if kind.inside_comment() => continue,
1505 _ => break,
1506 }
1507 }
1508 }
1509 _ => panic!(),
1510 }
1511 let slice = match self.iter.peek() {
1512 Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
1513 None => &self.slice[start_idx..],
1514 };
1515 Some((
1516 if kind.is_comment() {
1517 CodeCharKind::Comment
1518 } else {
1519 CodeCharKind::Normal
1520 },
1521 start_idx,
1522 slice,
1523 ))
1524 }
1525}
1526
1527/// Iterator over an alternating sequence of functional and commented parts of
1528/// a string. The first item is always a, possibly zero length, subslice of
1529/// functional text. Line style comments contain their ending newlines.
1530pub(crate) struct CommentCodeSlices<'a> {
1531 slice: &'a str,
1532 last_slice_kind: CodeCharKind,
1533 last_slice_end: usize,
1534}
1535
1536impl<'a> CommentCodeSlices<'a> {
1537 pub(crate) fn new(slice: &'a str) -> CommentCodeSlices<'a> {
1538 CommentCodeSlices {
1539 slice,
1540 last_slice_kind: CodeCharKind::Comment,
1541 last_slice_end: 0,
1542 }
1543 }
1544}
1545
1546impl<'a> Iterator for CommentCodeSlices<'a> {
1547 type Item = (CodeCharKind, usize, &'a str);
1548
1549 fn next(&mut self) -> Option<Self::Item> {
1550 if self.last_slice_end == self.slice.len() {
1551 return None;
1552 }
1553
1554 let mut sub_slice_end = self.last_slice_end;
1555 let mut first_whitespace = None;
1556 let subslice = &self.slice[self.last_slice_end..];
1557 let mut iter = CharClasses::new(subslice.char_indices());
1558
1559 for (kind, (i, c)) in &mut iter {
1560 let is_comment_connector = self.last_slice_kind == CodeCharKind::Normal
1561 && &subslice[..2] == "//"
1562 && [' ', '\t'].contains(&c);
1563
1564 if is_comment_connector && first_whitespace.is_none() {
1565 first_whitespace = Some(i);
1566 }
1567
1568 if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
1569 let last_index = match first_whitespace {
1570 Some(j) => j,
1571 None => i,
1572 };
1573 sub_slice_end = self.last_slice_end + last_index;
1574 break;
1575 }
1576
1577 if !is_comment_connector {
1578 first_whitespace = None;
1579 }
1580 }
1581
1582 if let (None, true) = (iter.next(), sub_slice_end == self.last_slice_end) {
1583 // This was the last subslice.
1584 sub_slice_end = match first_whitespace {
1585 Some(i) => self.last_slice_end + i,
1586 None => self.slice.len(),
1587 };
1588 }
1589
1590 let kind = match self.last_slice_kind {
1591 CodeCharKind::Comment => CodeCharKind::Normal,
1592 CodeCharKind::Normal => CodeCharKind::Comment,
1593 };
1594 let res = (
1595 kind,
1596 self.last_slice_end,
1597 &self.slice[self.last_slice_end..sub_slice_end],
1598 );
1599 self.last_slice_end = sub_slice_end;
1600 self.last_slice_kind = kind;
1601
1602 Some(res)
1603 }
1604}
1605
1606/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
1607/// (if it fits in the width/offset, else return `None`), else return `new`
1608pub(crate) fn recover_comment_removed(
1609 new: String,
1610 span: Span,
1611 context: &RewriteContext<'_>,
1612) -> Option<String> {
1613 let snippet = context.snippet(span);
1614 if snippet != new && changed_comment_content(snippet, &new) {
1615 // We missed some comments. Warn and keep the original text.
1616 if context.config.error_on_unformatted() {
1617 context.report.append(
1618 context.parse_sess.span_to_filename(span),
1619 vec![FormattingError::from_span(
1620 span,
3c0e092e 1621 context.parse_sess,
f20569fa
XL
1622 ErrorKind::LostComment,
1623 )],
1624 );
1625 }
1626 Some(snippet.to_owned())
1627 } else {
1628 Some(new)
1629 }
1630}
1631
1632pub(crate) fn filter_normal_code(code: &str) -> String {
1633 let mut buffer = String::with_capacity(code.len());
1634 LineClasses::new(code).for_each(|(kind, line)| match kind {
1635 FullCodeCharKind::Normal
1636 | FullCodeCharKind::StartString
1637 | FullCodeCharKind::InString
1638 | FullCodeCharKind::EndString => {
1639 buffer.push_str(&line);
1640 buffer.push('\n');
1641 }
1642 _ => (),
1643 });
1644 if !code.ends_with('\n') && buffer.ends_with('\n') {
1645 buffer.pop();
1646 }
1647 buffer
1648}
1649
1650/// Returns `true` if the two strings of code have the same payload of comments.
1651/// The payload of comments is everything in the string except:
1652/// - actual code (not comments),
1653/// - comment start/end marks,
1654/// - whitespace,
1655/// - '*' at the beginning of lines in block comments.
1656fn changed_comment_content(orig: &str, new: &str) -> bool {
1657 // Cannot write this as a fn since we cannot return types containing closures.
1658 let code_comment_content = |code| {
1659 let slices = UngroupedCommentCodeSlices::new(code);
1660 slices
1661 .filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
1662 .flat_map(|(_, _, s)| CommentReducer::new(s))
1663 };
1664 let res = code_comment_content(orig).ne(code_comment_content(new));
1665 debug!(
1666 "comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
1667 res,
1668 orig,
1669 new,
1670 code_comment_content(orig).collect::<String>(),
1671 code_comment_content(new).collect::<String>()
1672 );
1673 res
1674}
1675
1676/// Iterator over the 'payload' characters of a comment.
1677/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
1678/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
1679/// for example).
1680struct CommentReducer<'a> {
1681 is_block: bool,
1682 at_start_line: bool,
1683 iter: std::str::Chars<'a>,
1684}
1685
1686impl<'a> CommentReducer<'a> {
1687 fn new(comment: &'a str) -> CommentReducer<'a> {
1688 let is_block = comment.starts_with("/*");
1689 let comment = remove_comment_header(comment);
1690 CommentReducer {
1691 is_block,
1692 // There are no supplementary '*' on the first line.
1693 at_start_line: false,
1694 iter: comment.chars(),
1695 }
1696 }
1697}
1698
1699impl<'a> Iterator for CommentReducer<'a> {
1700 type Item = char;
1701
1702 fn next(&mut self) -> Option<Self::Item> {
1703 loop {
1704 let mut c = self.iter.next()?;
1705 if self.is_block && self.at_start_line {
1706 while c.is_whitespace() {
1707 c = self.iter.next()?;
1708 }
1709 // Ignore leading '*'.
1710 if c == '*' {
1711 c = self.iter.next()?;
1712 }
1713 } else if c == '\n' {
1714 self.at_start_line = true;
1715 }
1716 if !c.is_whitespace() {
1717 return Some(c);
1718 }
1719 }
1720 }
1721}
1722
1723fn remove_comment_header(comment: &str) -> &str {
1724 if comment.starts_with("///") || comment.starts_with("//!") {
1725 &comment[3..]
3c0e092e 1726 } else if let Some(stripped) = comment.strip_prefix("//") {
94222f64 1727 stripped
f20569fa
XL
1728 } else if (comment.starts_with("/**") && !comment.starts_with("/**/"))
1729 || comment.starts_with("/*!")
1730 {
1731 &comment[3..comment.len() - 2]
1732 } else {
1733 assert!(
1734 comment.starts_with("/*"),
cdc7bbd5
XL
1735 "string '{}' is not a comment",
1736 comment
f20569fa
XL
1737 );
1738 &comment[2..comment.len() - 2]
1739 }
1740}
1741
1742#[cfg(test)]
1743mod test {
1744 use super::*;
1745 use crate::shape::{Indent, Shape};
1746
1747 #[test]
1748 fn char_classes() {
1749 let mut iter = CharClasses::new("//\n\n".chars());
1750
1751 assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
1752 assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
1753 assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
1754 assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
1755 assert_eq!(None, iter.next());
1756 }
1757
1758 #[test]
1759 fn comment_code_slices() {
1760 let input = "code(); /* test */ 1 + 1";
1761 let mut iter = CommentCodeSlices::new(input);
1762
1763 assert_eq!((CodeCharKind::Normal, 0, "code(); "), iter.next().unwrap());
1764 assert_eq!(
1765 (CodeCharKind::Comment, 8, "/* test */"),
1766 iter.next().unwrap()
1767 );
1768 assert_eq!((CodeCharKind::Normal, 18, " 1 + 1"), iter.next().unwrap());
1769 assert_eq!(None, iter.next());
1770 }
1771
1772 #[test]
1773 fn comment_code_slices_two() {
1774 let input = "// comment\n test();";
1775 let mut iter = CommentCodeSlices::new(input);
1776
1777 assert_eq!((CodeCharKind::Normal, 0, ""), iter.next().unwrap());
1778 assert_eq!(
1779 (CodeCharKind::Comment, 0, "// comment\n"),
1780 iter.next().unwrap()
1781 );
1782 assert_eq!(
1783 (CodeCharKind::Normal, 11, " test();"),
1784 iter.next().unwrap()
1785 );
1786 assert_eq!(None, iter.next());
1787 }
1788
1789 #[test]
1790 fn comment_code_slices_three() {
1791 let input = "1 // comment\n // comment2\n\n";
1792 let mut iter = CommentCodeSlices::new(input);
1793
1794 assert_eq!((CodeCharKind::Normal, 0, "1 "), iter.next().unwrap());
1795 assert_eq!(
1796 (CodeCharKind::Comment, 2, "// comment\n // comment2\n"),
1797 iter.next().unwrap()
1798 );
1799 assert_eq!((CodeCharKind::Normal, 29, "\n"), iter.next().unwrap());
1800 assert_eq!(None, iter.next());
1801 }
1802
1803 #[test]
1804 #[rustfmt::skip]
1805 fn format_doc_comments() {
1806 let mut wrap_normalize_config: crate::config::Config = Default::default();
1807 wrap_normalize_config.set().wrap_comments(true);
1808 wrap_normalize_config.set().normalize_comments(true);
1809
1810 let mut wrap_config: crate::config::Config = Default::default();
1811 wrap_config.set().wrap_comments(true);
1812
1813 let comment = rewrite_comment(" //test",
1814 true,
1815 Shape::legacy(100, Indent::new(0, 100)),
1816 &wrap_normalize_config).unwrap();
1817 assert_eq!("/* test */", comment);
1818
1819 let comment = rewrite_comment("// comment on a",
1820 false,
1821 Shape::legacy(10, Indent::empty()),
1822 &wrap_normalize_config).unwrap();
1823 assert_eq!("// comment\n// on a", comment);
1824
1825 let comment = rewrite_comment("// A multi line comment\n // between args.",
1826 false,
1827 Shape::legacy(60, Indent::new(0, 12)),
1828 &wrap_normalize_config).unwrap();
1829 assert_eq!("// A multi line comment\n // between args.", comment);
1830
1831 let input = "// comment";
1832 let expected =
1833 "/* comment */";
1834 let comment = rewrite_comment(input,
1835 true,
1836 Shape::legacy(9, Indent::new(0, 69)),
1837 &wrap_normalize_config).unwrap();
1838 assert_eq!(expected, comment);
1839
1840 let comment = rewrite_comment("/* trimmed */",
1841 true,
1842 Shape::legacy(100, Indent::new(0, 100)),
1843 &wrap_normalize_config).unwrap();
1844 assert_eq!("/* trimmed */", comment);
1845
1846 // Check that different comment style are properly recognised.
1847 let comment = rewrite_comment(r#"/// test1
1848 /// test2
1849 /*
1850 * test3
1851 */"#,
1852 false,
1853 Shape::legacy(100, Indent::new(0, 0)),
1854 &wrap_normalize_config).unwrap();
1855 assert_eq!("/// test1\n/// test2\n// test3", comment);
1856
1857 // Check that the blank line marks the end of a commented paragraph.
1858 let comment = rewrite_comment(r#"// test1
1859
1860 // test2"#,
1861 false,
1862 Shape::legacy(100, Indent::new(0, 0)),
1863 &wrap_normalize_config).unwrap();
1864 assert_eq!("// test1\n\n// test2", comment);
1865
1866 // Check that the blank line marks the end of a custom-commented paragraph.
1867 let comment = rewrite_comment(r#"//@ test1
1868
1869 //@ test2"#,
1870 false,
1871 Shape::legacy(100, Indent::new(0, 0)),
1872 &wrap_normalize_config).unwrap();
1873 assert_eq!("//@ test1\n\n//@ test2", comment);
1874
1875 // Check that bare lines are just indented but otherwise left unchanged.
1876 let comment = rewrite_comment(r#"// test1
1877 /*
1878 a bare line!
1879
1880 another bare line!
1881 */"#,
1882 false,
1883 Shape::legacy(100, Indent::new(0, 0)),
1884 &wrap_config).unwrap();
1885 assert_eq!("// test1\n/*\n a bare line!\n\n another bare line!\n*/", comment);
1886 }
1887
1888 // This is probably intended to be a non-test fn, but it is not used.
1889 // We should keep this around unless it helps us test stuff to remove it.
1890 fn uncommented(text: &str) -> String {
1891 CharClasses::new(text.chars())
1892 .filter_map(|(s, c)| match s {
1893 FullCodeCharKind::Normal | FullCodeCharKind::InString => Some(c),
1894 _ => None,
1895 })
1896 .collect()
1897 }
1898
1899 #[test]
1900 fn test_uncommented() {
1901 assert_eq!(&uncommented("abc/*...*/"), "abc");
1902 assert_eq!(
1903 &uncommented("// .... /* \n../* /* *** / */ */a/* // */c\n"),
1904 "..ac\n"
1905 );
1906 assert_eq!(&uncommented("abc \" /* */\" qsdf"), "abc \" /* */\" qsdf");
1907 }
1908
1909 #[test]
1910 fn test_contains_comment() {
1911 assert_eq!(contains_comment("abc"), false);
1912 assert_eq!(contains_comment("abc // qsdf"), true);
1913 assert_eq!(contains_comment("abc /* kqsdf"), true);
1914 assert_eq!(contains_comment("abc \" /* */\" qsdf"), false);
1915 }
1916
1917 #[test]
1918 fn test_find_uncommented() {
1919 fn check(haystack: &str, needle: &str, expected: Option<usize>) {
1920 assert_eq!(expected, haystack.find_uncommented(needle));
1921 }
1922
1923 check("/*/ */test", "test", Some(6));
1924 check("//test\ntest", "test", Some(7));
1925 check("/* comment only */", "whatever", None);
1926 check(
1927 "/* comment */ some text /* more commentary */ result",
1928 "result",
1929 Some(46),
1930 );
1931 check("sup // sup", "p", Some(2));
1932 check("sup", "x", None);
1933 check(r#"π? /**/ π is nice!"#, r#"π is nice"#, Some(9));
1934 check("/*sup yo? \n sup*/ sup", "p", Some(20));
1935 check("hel/*lohello*/lo", "hello", None);
1936 check("acb", "ab", None);
1937 check(",/*A*/ ", ",", Some(0));
1938 check("abc", "abc", Some(0));
1939 check("/* abc */", "abc", None);
1940 check("/**/abc/* */", "abc", Some(4));
1941 check("\"/* abc */\"", "abc", Some(4));
1942 check("\"/* abc", "abc", Some(4));
1943 }
1944
1945 #[test]
1946 fn test_filter_normal_code() {
1947 let s = r#"
1948fn main() {
1949 println!("hello, world");
1950}
1951"#;
1952 assert_eq!(s, filter_normal_code(s));
1953 let s_with_comment = r#"
1954fn main() {
1955 // hello, world
1956 println!("hello, world");
1957}
1958"#;
1959 assert_eq!(s, filter_normal_code(s_with_comment));
1960 }
1961}