]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
New upstream version 1.73.0+dfsg1
[rustc.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
1 //! Utilities for rendering escape sequence errors as diagnostics.
2
3 use std::iter::once;
4 use std::ops::Range;
5
6 use rustc_errors::{Applicability, Handler};
7 use rustc_lexer::unescape::{EscapeError, Mode};
8 use rustc_span::{BytePos, Span};
9
10 use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError};
11
12 pub(crate) fn emit_unescape_error(
13 handler: &Handler,
14 // interior part of the literal, without quotes
15 lit: &str,
16 // full span of the literal, including quotes
17 span_with_quotes: Span,
18 // interior span of the literal, without quotes
19 span: Span,
20 mode: Mode,
21 // range of the error inside `lit`
22 range: Range<usize>,
23 error: EscapeError,
24 ) {
25 debug!(
26 "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
27 lit, span_with_quotes, mode, range, error
28 );
29 let last_char = || {
30 let c = lit[range.clone()].chars().next_back().unwrap();
31 let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
32 (c, span)
33 };
34 match error {
35 EscapeError::LoneSurrogateUnicodeEscape => {
36 handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: true });
37 }
38 EscapeError::OutOfRangeUnicodeEscape => {
39 handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: false });
40 }
41 EscapeError::MoreThanOneChar => {
42 use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
43 let mut sugg = None;
44 let mut note = None;
45
46 let lit_chars = lit.chars().collect::<Vec<_>>();
47 let (first, rest) = lit_chars.split_first().unwrap();
48 if rest.iter().copied().all(is_combining_mark) {
49 let normalized = lit.nfc().to_string();
50 if normalized.chars().count() == 1 {
51 let ch = normalized.chars().next().unwrap().escape_default().to_string();
52 sugg = Some(MoreThanOneCharSugg::NormalizedForm { span, ch, normalized });
53 }
54 let escaped_marks =
55 rest.iter().map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
56 note = Some(MoreThanOneCharNote::AllCombining {
57 span,
58 chr: format!("{first}"),
59 len: escaped_marks.len(),
60 escaped_marks: escaped_marks.join(""),
61 });
62 } else {
63 let printable: Vec<char> = lit
64 .chars()
65 .filter(|&x| {
66 unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
67 && !x.is_whitespace()
68 })
69 .collect();
70
71 if let &[ch] = printable.as_slice() {
72 sugg =
73 Some(MoreThanOneCharSugg::RemoveNonPrinting { span, ch: ch.to_string() });
74 note = Some(MoreThanOneCharNote::NonPrinting {
75 span,
76 escaped: lit.escape_default().to_string(),
77 });
78 }
79 };
80 let sugg = sugg.unwrap_or_else(|| {
81 let prefix = mode.prefix_noraw();
82 let mut escaped = String::with_capacity(lit.len());
83 let mut in_escape = false;
84 for c in lit.chars() {
85 match c {
86 '\\' => in_escape = !in_escape,
87 '"' if !in_escape => escaped.push('\\'),
88 _ => in_escape = false,
89 }
90 escaped.push(c);
91 }
92 let sugg = format!("{prefix}\"{escaped}\"");
93 MoreThanOneCharSugg::Quotes {
94 span: span_with_quotes,
95 is_byte: mode == Mode::Byte,
96 sugg,
97 }
98 });
99 handler.emit_err(UnescapeError::MoreThanOneChar {
100 span: span_with_quotes,
101 note,
102 suggestion: sugg,
103 });
104 }
105 EscapeError::EscapeOnlyChar => {
106 let (c, char_span) = last_char();
107 handler.emit_err(UnescapeError::EscapeOnlyChar {
108 span,
109 char_span,
110 escaped_sugg: c.escape_default().to_string(),
111 escaped_msg: escaped_char(c),
112 byte: mode == Mode::Byte,
113 });
114 }
115 EscapeError::BareCarriageReturn => {
116 let double_quotes = mode.in_double_quotes();
117 handler.emit_err(UnescapeError::BareCr { span, double_quotes });
118 }
119 EscapeError::BareCarriageReturnInRawString => {
120 assert!(mode.in_double_quotes());
121 handler.emit_err(UnescapeError::BareCrRawString(span));
122 }
123 EscapeError::InvalidEscape => {
124 let (c, span) = last_char();
125
126 let label = if mode == Mode::Byte || mode == Mode::ByteStr {
127 "unknown byte escape"
128 } else {
129 "unknown character escape"
130 };
131 let ec = escaped_char(c);
132 let mut diag = handler.struct_span_err(span, format!("{label}: `{ec}`"));
133 diag.span_label(span, label);
134 if c == '{' || c == '}' && matches!(mode, Mode::Str | Mode::RawStr) {
135 diag.help(
136 "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
137 );
138 } else if c == '\r' {
139 diag.help(
140 "this is an isolated carriage return; consider checking your editor and \
141 version control settings",
142 );
143 } else {
144 if mode == Mode::Str || mode == Mode::Char {
145 diag.span_suggestion(
146 span_with_quotes,
147 "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
148 format!("r\"{lit}\""),
149 Applicability::MaybeIncorrect,
150 );
151 }
152
153 diag.help(
154 "for more information, visit \
155 <https://doc.rust-lang.org/reference/tokens.html#literals>",
156 );
157 }
158 diag.emit();
159 }
160 EscapeError::TooShortHexEscape => {
161 handler.emit_err(UnescapeError::TooShortHexEscape(span));
162 }
163 EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
164 let (c, span) = last_char();
165 let is_hex = error == EscapeError::InvalidCharInHexEscape;
166 let ch = escaped_char(c);
167 handler.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch });
168 }
169 EscapeError::NonAsciiCharInByte => {
170 let (c, span) = last_char();
171 let desc = match mode {
172 Mode::Byte => "byte literal",
173 Mode::ByteStr => "byte string literal",
174 Mode::RawByteStr => "raw byte string literal",
175 _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
176 };
177 let mut err = handler.struct_span_err(span, format!("non-ASCII character in {desc}"));
178 let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
179 format!(" but is {c:?}")
180 } else {
181 String::new()
182 };
183 err.span_label(span, format!("must be ASCII{postfix}"));
184 // Note: the \\xHH suggestions are not given for raw byte string
185 // literals, because they are araw and so cannot use any escapes.
186 if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
187 err.span_suggestion(
188 span,
189 format!(
190 "if you meant to use the unicode code point for {c:?}, use a \\xHH escape"
191 ),
192 format!("\\x{:X}", c as u32),
193 Applicability::MaybeIncorrect,
194 );
195 } else if mode == Mode::Byte {
196 err.span_label(span, "this multibyte character does not fit into a single byte");
197 } else if mode != Mode::RawByteStr {
198 let mut utf8 = String::new();
199 utf8.push(c);
200 err.span_suggestion(
201 span,
202 format!("if you meant to use the UTF-8 encoding of {c:?}, use \\xHH escapes"),
203 utf8.as_bytes()
204 .iter()
205 .map(|b: &u8| format!("\\x{:X}", *b))
206 .fold("".to_string(), |a, c| a + &c),
207 Applicability::MaybeIncorrect,
208 );
209 }
210 err.emit();
211 }
212 EscapeError::OutOfRangeHexEscape => {
213 handler.emit_err(UnescapeError::OutOfRangeHexEscape(span));
214 }
215 EscapeError::LeadingUnderscoreUnicodeEscape => {
216 let (c, span) = last_char();
217 handler.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape {
218 span,
219 ch: escaped_char(c),
220 });
221 }
222 EscapeError::OverlongUnicodeEscape => {
223 handler.emit_err(UnescapeError::OverlongUnicodeEscape(span));
224 }
225 EscapeError::UnclosedUnicodeEscape => {
226 handler.emit_err(UnescapeError::UnclosedUnicodeEscape(span, span.shrink_to_hi()));
227 }
228 EscapeError::NoBraceInUnicodeEscape => {
229 let mut suggestion = "\\u{".to_owned();
230 let mut suggestion_len = 0;
231 let (c, char_span) = last_char();
232 let chars = once(c).chain(lit[range.end..].chars());
233 for c in chars.take(6).take_while(|c| c.is_digit(16)) {
234 suggestion.push(c);
235 suggestion_len += c.len_utf8();
236 }
237
238 let (label, sub) = if suggestion_len > 0 {
239 suggestion.push('}');
240 let hi = char_span.lo() + BytePos(suggestion_len as u32);
241 (None, NoBraceUnicodeSub::Suggestion { span: span.with_hi(hi), suggestion })
242 } else {
243 (Some(span), NoBraceUnicodeSub::Help)
244 };
245 handler.emit_err(UnescapeError::NoBraceInUnicodeEscape { span, label, sub });
246 }
247 EscapeError::UnicodeEscapeInByte => {
248 handler.emit_err(UnescapeError::UnicodeEscapeInByte(span));
249 }
250 EscapeError::EmptyUnicodeEscape => {
251 handler.emit_err(UnescapeError::EmptyUnicodeEscape(span));
252 }
253 EscapeError::ZeroChars => {
254 handler.emit_err(UnescapeError::ZeroChars(span));
255 }
256 EscapeError::LoneSlash => {
257 handler.emit_err(UnescapeError::LoneSlash(span));
258 }
259 EscapeError::UnskippedWhitespaceWarning => {
260 let (c, char_span) = last_char();
261 handler.emit_warning(UnescapeError::UnskippedWhitespace {
262 span,
263 ch: escaped_char(c),
264 char_span,
265 });
266 }
267 EscapeError::MultipleSkippedLinesWarning => {
268 handler.emit_warning(UnescapeError::MultipleSkippedLinesWarning(span));
269 }
270 }
271 }
272
273 /// Pushes a character to a message string for error reporting
274 pub(crate) fn escaped_char(c: char) -> String {
275 match c {
276 '\u{20}'..='\u{7e}' => {
277 // Don't escape \, ' or " for user-facing messages
278 c.to_string()
279 }
280 _ => c.escape_default().to_string(),
281 }
282 }