[rustc.git] / vendor / rustc-ap-rustc_ast / src / util / comments.rs

use rustc_span::source_map::SourceMap;
use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};

#[cfg(test)]
mod tests;

#[derive(Clone, Copy, PartialEq, Debug)]
pub enum CommentStyle {
    /// No code on either side of each line of the comment
    Isolated,
    /// Code exists to the left of the comment
    Trailing,
    /// Code before /* foo */ and after the comment
    Mixed,
    /// Just a manual blank line "\n\n", for layout
    BlankLine,
}

#[derive(Clone)]
pub struct Comment {
    pub style: CommentStyle,
    pub lines: Vec<String>,
    pub pos: BytePos,
}

/// Makes a doc string more presentable to users.
/// Used by rustdoc and perhaps other tools, but not by rustc.
pub fn beautify_doc_string(data: Symbol) -> Symbol {
    fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> {
        let mut i = 0;
        let mut j = lines.len();
        // first line of all-stars should be omitted
        if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
            i += 1;
        }

        while i < j && lines[i].trim().is_empty() {
            i += 1;
        }
        // like the first, a last line of all stars should be omitted
        if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
            j -= 1;
        }

        while j > i && lines[j - 1].trim().is_empty() {
            j -= 1;
        }

        if i != 0 || j != lines.len() { Some((i, j)) } else { None }
    }

    fn get_horizontal_trim(lines: &[&str]) -> Option<usize> {
        let mut i = usize::MAX;
        let mut first = true;

        for line in lines {
            for (j, c) in line.chars().enumerate() {
                if j > i || !"* \t".contains(c) {
                    return None;
                }
                if c == '*' {
                    if first {
                        i = j;
                        first = false;
                    } else if i != j {
                        return None;
                    }
                    break;
                }
            }
            if i >= line.len() {
                return None;
            }
        }
        Some(i)
    }

    let data_s = data.as_str();
    if data_s.contains('\n') {
        let mut lines = data_s.lines().collect::<Vec<&str>>();
        let mut changes = false;
        let lines = if let Some((i, j)) = get_vertical_trim(&lines) {
            changes = true;
            // remove whitespace-only lines from the start/end of lines
            &mut lines[i..j]
        } else {
            &mut lines
        };
        if let Some(horizontal) = get_horizontal_trim(&lines) {
            changes = true;
            // remove a "[ \t]*\*" block from each line, if possible
            for line in lines.iter_mut() {
                *line = &line[horizontal + 1..];
            }
        }
        if changes {
            return Symbol::intern(&lines.join("\n"));
        }
    }
    data
}

/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
/// whitespace. Note that `k` may be outside bounds of `s`.
fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
    let mut idx = 0;
    for (i, ch) in s.char_indices().take(col.to_usize()) {
        if !ch.is_whitespace() {
            return None;
        }
        idx = i + ch.len_utf8();
    }
    Some(idx)
}

fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
    let len = s.len();
    match all_whitespace(&s, col) {
        Some(col) => {
            if col < len {
                &s[col..]
            } else {
                ""
            }
        }
        None => s,
    }
}

fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
    let mut res: Vec<String> = vec![];
    let mut lines = text.lines();
    // just push the first line
    res.extend(lines.next().map(|it| it.to_string()));
    // for other lines, strip common whitespace prefix
    for line in lines {
        res.push(trim_whitespace_prefix(line, col).to_string())
    }
    res
}

// it appears this function is called only from pprust... that's
// probably not a good thing.
pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
    let sm = SourceMap::new(sm.path_mapping().clone());
    let source_file = sm.new_source_file(path, src);
    let text = (*source_file.src.as_ref().unwrap()).clone();

    let text: &str = text.as_str();
    let start_bpos = source_file.start_pos;
    let mut pos = 0;
    let mut comments: Vec<Comment> = Vec::new();
    let mut code_to_the_left = false;

    if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
        comments.push(Comment {
            style: CommentStyle::Isolated,
            lines: vec![text[..shebang_len].to_string()],
            pos: start_bpos,
        });
        pos += shebang_len;
    }

    for token in rustc_lexer::tokenize(&text[pos..]) {
        let token_text = &text[pos..pos + token.len];
        match token.kind {
            rustc_lexer::TokenKind::Whitespace => {
                if let Some(mut idx) = token_text.find('\n') {
                    code_to_the_left = false;
                    while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
                        idx = idx + 1 + next_newline;
                        comments.push(Comment {
                            style: CommentStyle::BlankLine,
                            lines: vec![],
                            pos: start_bpos + BytePos((pos + idx) as u32),
                        });
                    }
                }
            }
            rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
                if doc_style.is_none() {
                    let code_to_the_right =
                        !matches!(text[pos + token.len..].chars().next(), Some('\r' | '\n'));
                    let style = match (code_to_the_left, code_to_the_right) {
                        (_, true) => CommentStyle::Mixed,
                        (false, false) => CommentStyle::Isolated,
                        (true, false) => CommentStyle::Trailing,
                    };

                    // Count the number of chars since the start of the line by rescanning.
                    let pos_in_file = start_bpos + BytePos(pos as u32);
                    let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
                    let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
                    let col = CharPos(text[line_begin_pos..pos].chars().count());

                    let lines = split_block_comment_into_lines(token_text, col);
                    comments.push(Comment { style, lines, pos: pos_in_file })
                }
            }
            rustc_lexer::TokenKind::LineComment { doc_style } => {
                if doc_style.is_none() {
                    comments.push(Comment {
                        style: if code_to_the_left {
                            CommentStyle::Trailing
                        } else {
                            CommentStyle::Isolated
                        },
                        lines: vec![token_text.to_string()],
                        pos: start_bpos + BytePos(pos as u32),
                    })
                }
            }
            _ => {
                code_to_the_left = true;
            }
        }
        pos += token.len;
    }

    comments
}
Commit	Line	Data
f20569fa XL	1	use rustc_span::source_map::SourceMap;
	2	use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
	3
	4	#[cfg(test)]
	5	mod tests;
	6
	7	#[derive(Clone, Copy, PartialEq, Debug)]
	8	pub enum CommentStyle {
	9	/// No code on either side of each line of the comment
	10	Isolated,
	11	/// Code exists to the left of the comment
	12	Trailing,
	13	/// Code before /* foo */ and after the comment
	14	Mixed,
	15	/// Just a manual blank line "\n\n", for layout
	16	BlankLine,
	17	}
	18
	19	#[derive(Clone)]
	20	pub struct Comment {
	21	pub style: CommentStyle,
	22	pub lines: Vec<String>,
	23	pub pos: BytePos,
	24	}
	25
	26	/// Makes a doc string more presentable to users.
	27	/// Used by rustdoc and perhaps other tools, but not by rustc.
	28	pub fn beautify_doc_string(data: Symbol) -> Symbol {
	29	fn get_vertical_trim(lines: &[&str]) -> Option<(usize, usize)> {
	30	let mut i = 0;
	31	let mut j = lines.len();
	32	// first line of all-stars should be omitted
	33	if !lines.is_empty() && lines[0].chars().all(\|c\| c == '*') {
	34	i += 1;
	35	}
	36
	37	while i < j && lines[i].trim().is_empty() {
	38	i += 1;
	39	}
	40	// like the first, a last line of all stars should be omitted
	41	if j > i && lines[j - 1].chars().skip(1).all(\|c\| c == '*') {
	42	j -= 1;
	43	}
	44
	45	while j > i && lines[j - 1].trim().is_empty() {
	46	j -= 1;
	47	}
	48
	49	if i != 0 \|\| j != lines.len() { Some((i, j)) } else { None }
	50	}
	51
	52	fn get_horizontal_trim(lines: &[&str]) -> Option<usize> {
	53	let mut i = usize::MAX;
	54	let mut first = true;
	55
	56	for line in lines {
	57	for (j, c) in line.chars().enumerate() {
	58	if j > i \|\| !"* \t".contains(c) {
	59	return None;
	60	}
	61	if c == '*' {
	62	if first {
	63	i = j;
	64	first = false;
65	} else if i != j {
66	return None;
67	}
68	break;
69	}
70	}
71	if i >= line.len() {
72	return None;
73	}
74	}
75	Some(i)
76	}
77
78	let data_s = data.as_str();
79	if data_s.contains('\n') {
80	let mut lines = data_s.lines().collect::<Vec<&str>>();
81	let mut changes = false;
82	let lines = if let Some((i, j)) = get_vertical_trim(&lines) {
83	changes = true;
84	// remove whitespace-only lines from the start/end of lines
85	&mut lines[i..j]
86	} else {
87	&mut lines
88	};
89	if let Some(horizontal) = get_horizontal_trim(&lines) {
90	changes = true;
91	// remove a "[ \t]\" block from each line, if possible
92	for line in lines.iter_mut() {
93	*line = &line[horizontal + 1..];
94	}
95	}
96	if changes {
97	return Symbol::intern(&lines.join("\n"));
98	}
99	}
100	data
101	}
102
103	/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
104	/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
105	/// whitespace. Note that `k` may be outside bounds of `s`.
106	fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
107	let mut idx = 0;
108	for (i, ch) in s.char_indices().take(col.to_usize()) {
109	if !ch.is_whitespace() {
110	return None;
111	}
112	idx = i + ch.len_utf8();
113	}
114	Some(idx)
115	}
116
117	fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
118	let len = s.len();
119	match all_whitespace(&s, col) {
120	Some(col) => {
121	if col < len {
122	&s[col..]
123	} else {
124	""
125	}
126	}
127	None => s,
128	}
129	}
130
131	fn split_block_comment_into_lines(text: &str, col: CharPos) -> Vec<String> {
132	let mut res: Vec<String> = vec![];
133	let mut lines = text.lines();
134	// just push the first line
135	res.extend(lines.next().map(\|it\| it.to_string()));
136	// for other lines, strip common whitespace prefix
137	for line in lines {
138	res.push(trim_whitespace_prefix(line, col).to_string())
139	}
140	res
141	}
142
143	// it appears this function is called only from pprust... that's
144	// probably not a good thing.
145	pub fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment> {
146	let sm = SourceMap::new(sm.path_mapping().clone());
147	let source_file = sm.new_source_file(path, src);
148	let text = (*source_file.src.as_ref().unwrap()).clone();
149
150	let text: &str = text.as_str();
151	let start_bpos = source_file.start_pos;
152	let mut pos = 0;
153	let mut comments: Vec<Comment> = Vec::new();
154	let mut code_to_the_left = false;
155
156	if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
157	comments.push(Comment {
158	style: CommentStyle::Isolated,
159	lines: vec![text[..shebang_len].to_string()],
160	pos: start_bpos,
161	});
162	pos += shebang_len;
163	}
164
165	for token in rustc_lexer::tokenize(&text[pos..]) {
166	let token_text = &text[pos..pos + token.len];
167	match token.kind {
168	rustc_lexer::TokenKind::Whitespace => {
169	if let Some(mut idx) = token_text.find('\n') {
170	code_to_the_left = false;
171	while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
172	idx = idx + 1 + next_newline;
173	comments.push(Comment {
174	style: CommentStyle::BlankLine,
175	lines: vec![],
176	pos: start_bpos + BytePos((pos + idx) as u32),
177	});
178	}
179	}
180	}
181	rustc_lexer::TokenKind::BlockComment { doc_style, .. } => {
182	if doc_style.is_none() {
183	let code_to_the_right =
184	!matches!(text[pos + token.len..].chars().next(), Some('\r' \| '\n'));
185	let style = match (code_to_the_left, code_to_the_right) {
186	(_, true) => CommentStyle::Mixed,
187	(false, false) => CommentStyle::Isolated,
188	(true, false) => CommentStyle::Trailing,
189	};
190
191	// Count the number of chars since the start of the line by rescanning.
192	let pos_in_file = start_bpos + BytePos(pos as u32);
193	let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
194	let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
195	let col = CharPos(text[line_begin_pos..pos].chars().count());
196
197	let lines = split_block_comment_into_lines(token_text, col);
198	comments.push(Comment { style, lines, pos: pos_in_file })
199	}
200	}
201	rustc_lexer::TokenKind::LineComment { doc_style } => {
202	if doc_style.is_none() {
203	comments.push(Comment {
204	style: if code_to_the_left {
205	CommentStyle::Trailing
206	} else {
207	CommentStyle::Isolated
208	},
209	lines: vec![token_text.to_string()],
210	pos: start_bpos + BytePos(pos as u32),
211	})
212	}
213	}
214	_ => {
215	code_to_the_left = true;
216	}
217	}
218	pos += token.len;
219	}
220
221	comments
222	}