]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
New upstream version 1.67.1+dfsg1
[rustc.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
CommitLineData
48663c56
XL
1//! Utilities for rendering escape sequence errors as diagnostics.
2
48663c56 3use std::iter::once;
dfeec247 4use std::ops::Range;
48663c56 5
c295e0f8 6use rustc_errors::{pluralize, Applicability, Handler};
416331ca 7use rustc_lexer::unescape::{EscapeError, Mode};
dfeec247 8use rustc_span::{BytePos, Span};
48663c56 9
48663c56
XL
10pub(crate) fn emit_unescape_error(
11 handler: &Handler,
12 // interior part of the literal, without quotes
13 lit: &str,
14 // full span of the literal, including quotes
15 span_with_quotes: Span,
fc512014
XL
16 // interior span of the literal, without quotes
17 span: Span,
48663c56
XL
18 mode: Mode,
19 // range of the error inside `lit`
20 range: Range<usize>,
21 error: EscapeError,
22) {
f2b60f7d 23 debug!(
dfeec247 24 "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
f2b60f7d 25 lit, span_with_quotes, mode, range, error
dfeec247 26 );
48663c56
XL
27 let last_char = || {
28 let c = lit[range.clone()].chars().rev().next().unwrap();
29 let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
30 (c, span)
31 };
32 match error {
33 EscapeError::LoneSurrogateUnicodeEscape => {
dfeec247
XL
34 handler
35 .struct_span_err(span, "invalid unicode character escape")
5869c6ff 36 .span_label(span, "invalid escape")
48663c56
XL
37 .help("unicode escape must not be a surrogate")
38 .emit();
39 }
40 EscapeError::OutOfRangeUnicodeEscape => {
dfeec247
XL
41 handler
42 .struct_span_err(span, "invalid unicode character escape")
5869c6ff 43 .span_label(span, "invalid escape")
48663c56
XL
44 .help("unicode escape must be at most 10FFFF")
45 .emit();
46 }
47 EscapeError::MoreThanOneChar => {
c295e0f8 48 use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
e74abb32 49
c295e0f8
XL
50 let mut has_help = false;
51 let mut handler = handler.struct_span_err(
52 span_with_quotes,
53 "character literal may only contain one codepoint",
54 );
55
56 if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
57 let escaped_marks =
58 lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
59 handler.span_note(
60 span,
61 &format!(
62 "this `{}` is followed by the combining mark{} `{}`",
63 lit.chars().next().unwrap(),
64 pluralize!(escaped_marks.len()),
65 escaped_marks.join(""),
66 ),
67 );
68 let normalized = lit.nfc().to_string();
69 if normalized.chars().count() == 1 {
70 has_help = true;
71 handler.span_suggestion(
72 span,
73 &format!(
74 "consider using the normalized form `{}` of this character",
75 normalized.chars().next().unwrap().escape_default()
76 ),
77 normalized,
78 Applicability::MachineApplicable,
79 );
80 }
3c0e092e
XL
81 } else {
82 let printable: Vec<char> = lit
83 .chars()
84 .filter(|&x| {
85 unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
86 && !x.is_whitespace()
87 })
88 .collect();
89
90 if let [ch] = printable.as_slice() {
91 has_help = true;
92
93 handler.span_note(
94 span,
95 &format!(
96 "there are non-printing characters, the full sequence is `{}`",
97 lit.escape_default(),
98 ),
99 );
100
101 handler.span_suggestion(
102 span,
103 "consider removing the non-printing characters",
923072b8 104 ch,
3c0e092e
XL
105 Applicability::MaybeIncorrect,
106 );
107 }
c295e0f8
XL
108 }
109
110 if !has_help {
487cf647 111 let (prefix, msg) = if mode.is_byte() {
c295e0f8
XL
112 ("b", "if you meant to write a byte string literal, use double quotes")
113 } else {
114 ("", "if you meant to write a `str` literal, use double quotes")
115 };
2b03887a
FG
116 let mut escaped = String::with_capacity(lit.len());
117 let mut chrs = lit.chars().peekable();
118 while let Some(first) = chrs.next() {
119 match (first, chrs.peek()) {
120 ('\\', Some('"')) => {
121 escaped.push('\\');
122 escaped.push('"');
123 chrs.next();
124 }
125 ('"', _) => {
126 escaped.push('\\');
127 escaped.push('"')
128 }
129 (c, _) => escaped.push(c),
130 };
131 }
c295e0f8 132 handler.span_suggestion(
48663c56 133 span_with_quotes,
e74abb32 134 msg,
2b03887a 135 format!("{prefix}\"{escaped}\""),
48663c56 136 Applicability::MachineApplicable,
c295e0f8
XL
137 );
138 }
139
140 handler.emit();
48663c56
XL
141 }
142 EscapeError::EscapeOnlyChar => {
5869c6ff 143 let (c, char_span) = last_char();
48663c56 144
487cf647 145 let msg = if mode.is_byte() {
5869c6ff 146 "byte constant must be escaped"
48663c56 147 } else {
5869c6ff
XL
148 "character constant must be escaped"
149 };
150 handler
151 .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
152 .span_suggestion(
153 char_span,
154 "escape the character",
923072b8 155 c.escape_default(),
5869c6ff
XL
156 Applicability::MachineApplicable,
157 )
5e7ed085 158 .emit();
48663c56
XL
159 }
160 EscapeError::BareCarriageReturn => {
161 let msg = if mode.in_double_quotes() {
5869c6ff 162 "bare CR not allowed in string, use `\\r` instead"
48663c56 163 } else {
5869c6ff 164 "character constant must be escaped: `\\r`"
48663c56 165 };
5869c6ff
XL
166 handler
167 .struct_span_err(span, msg)
168 .span_suggestion(
169 span,
170 "escape the character",
923072b8 171 "\\r",
5869c6ff
XL
172 Applicability::MachineApplicable,
173 )
174 .emit();
48663c56 175 }
dc9dc135
XL
176 EscapeError::BareCarriageReturnInRawString => {
177 assert!(mode.in_double_quotes());
178 let msg = "bare CR not allowed in raw string";
179 handler.span_err(span, msg);
180 }
48663c56
XL
181 EscapeError::InvalidEscape => {
182 let (c, span) = last_char();
183
dfeec247 184 let label =
487cf647 185 if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
5869c6ff
XL
186 let ec = escaped_char(c);
187 let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
48663c56 188 diag.span_label(span, label);
487cf647 189 if c == '{' || c == '}' && !mode.is_byte() {
dfeec247 190 diag.help(
5869c6ff 191 "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
dfeec247 192 );
48663c56 193 } else if c == '\r' {
dfeec247 194 diag.help(
5869c6ff
XL
195 "this is an isolated carriage return; consider checking your editor and \
196 version control settings",
197 );
198 } else {
487cf647 199 if !mode.is_byte() {
5099ac24
FG
200 diag.span_suggestion(
201 span_with_quotes,
202 "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
203 format!("r\"{}\"", lit),
204 Applicability::MaybeIncorrect,
205 );
206 }
207
5869c6ff
XL
208 diag.help(
209 "for more information, visit \
210 <https://static.rust-lang.org/doc/master/reference.html#literals>",
dfeec247 211 );
48663c56
XL
212 }
213 diag.emit();
214 }
215 EscapeError::TooShortHexEscape => {
5e7ed085 216 handler.span_err(span, "numeric character escape is too short");
48663c56
XL
217 }
218 EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
219 let (c, span) = last_char();
220
5869c6ff
XL
221 let msg = if error == EscapeError::InvalidCharInHexEscape {
222 "invalid character in numeric character escape"
48663c56 223 } else {
5869c6ff
XL
224 "invalid character in unicode escape"
225 };
226 let c = escaped_char(c);
48663c56 227
5869c6ff
XL
228 handler
229 .struct_span_err(span, &format!("{}: `{}`", msg, c))
230 .span_label(span, msg)
231 .emit();
48663c56
XL
232 }
233 EscapeError::NonAsciiCharInByte => {
5869c6ff 234 let (c, span) = last_char();
487cf647
FG
235 let desc = match mode {
236 Mode::Byte => "byte literal",
237 Mode::ByteStr => "byte string literal",
238 Mode::RawByteStr => "raw byte string literal",
239 _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
240 };
241 let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
c295e0f8
XL
242 let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
243 format!(" but is {:?}", c)
244 } else {
245 String::new()
246 };
487cf647
FG
247 err.span_label(span, &format!("must be ASCII{}", postfix));
248 // Note: the \\xHH suggestions are not given for raw byte string
249 // literals, because they are araw and so cannot use any escapes.
250 if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
94222f64 251 err.span_suggestion(
5869c6ff 252 span,
94222f64 253 &format!(
c295e0f8 254 "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
94222f64
XL
255 c
256 ),
5869c6ff 257 format!("\\x{:X}", c as u32),
94222f64
XL
258 Applicability::MaybeIncorrect,
259 );
487cf647 260 } else if mode == Mode::Byte {
94222f64 261 err.span_label(span, "this multibyte character does not fit into a single byte");
487cf647 262 } else if mode != Mode::RawByteStr {
94222f64
XL
263 let mut utf8 = String::new();
264 utf8.push(c);
265 err.span_suggestion(
266 span,
267 &format!(
c295e0f8 268 "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
94222f64
XL
269 c
270 ),
271 utf8.as_bytes()
272 .iter()
273 .map(|b: &u8| format!("\\x{:X}", *b))
274 .fold("".to_string(), |a, c| a + &c),
275 Applicability::MaybeIncorrect,
276 );
277 }
278 err.emit();
48663c56 279 }
5869c6ff
XL
280 EscapeError::OutOfRangeHexEscape => {
281 handler
282 .struct_span_err(span, "out of range hex escape")
283 .span_label(span, "must be a character in the range [\\x00-\\x7f]")
284 .emit();
dc9dc135 285 }
48663c56 286 EscapeError::LeadingUnderscoreUnicodeEscape => {
5869c6ff
XL
287 let (c, span) = last_char();
288 let msg = "invalid start of unicode escape";
289 handler
290 .struct_span_err(span, &format!("{}: `{}`", msg, c))
291 .span_label(span, msg)
292 .emit();
48663c56
XL
293 }
294 EscapeError::OverlongUnicodeEscape => {
5869c6ff
XL
295 handler
296 .struct_span_err(span, "overlong unicode escape")
297 .span_label(span, "must have at most 6 hex digits")
298 .emit();
48663c56 299 }
5e7ed085
FG
300 EscapeError::UnclosedUnicodeEscape => {
301 handler
302 .struct_span_err(span, "unterminated unicode escape")
303 .span_label(span, "missing a closing `}`")
304 .span_suggestion_verbose(
305 span.shrink_to_hi(),
306 "terminate the unicode escape",
923072b8 307 "}",
5e7ed085
FG
308 Applicability::MaybeIncorrect,
309 )
310 .emit();
311 }
48663c56
XL
312 EscapeError::NoBraceInUnicodeEscape => {
313 let msg = "incorrect unicode escape sequence";
314 let mut diag = handler.struct_span_err(span, msg);
315
316 let mut suggestion = "\\u{".to_owned();
317 let mut suggestion_len = 0;
318 let (c, char_span) = last_char();
319 let chars = once(c).chain(lit[range.end..].chars());
320 for c in chars.take(6).take_while(|c| c.is_digit(16)) {
321 suggestion.push(c);
322 suggestion_len += c.len_utf8();
323 }
324
325 if suggestion_len > 0 {
326 suggestion.push('}');
29967ef6 327 let hi = char_span.lo() + BytePos(suggestion_len as u32);
48663c56 328 diag.span_suggestion(
29967ef6 329 span.with_hi(hi),
48663c56
XL
330 "format of unicode escape sequences uses braces",
331 suggestion,
332 Applicability::MaybeIncorrect,
333 );
334 } else {
335 diag.span_label(span, msg);
dfeec247 336 diag.help("format of unicode escape sequences is `\\u{...}`");
48663c56
XL
337 }
338
339 diag.emit();
340 }
5869c6ff
XL
341 EscapeError::UnicodeEscapeInByte => {
342 let msg = "unicode escape in byte string";
343 handler
344 .struct_span_err(span, msg)
345 .span_label(span, msg)
346 .help("unicode escape sequences cannot be used as a byte or in a byte string")
347 .emit();
348 }
48663c56 349 EscapeError::EmptyUnicodeEscape => {
5869c6ff
XL
350 handler
351 .struct_span_err(span, "empty unicode escape")
352 .span_label(span, "this escape must have at least 1 hex digit")
353 .emit();
354 }
355 EscapeError::ZeroChars => {
356 let msg = "empty character literal";
5e7ed085 357 handler.struct_span_err(span, msg).span_label(span, msg).emit();
5869c6ff
XL
358 }
359 EscapeError::LoneSlash => {
360 let msg = "invalid trailing slash in literal";
361 handler.struct_span_err(span, msg).span_label(span, msg).emit();
48663c56 362 }
94222f64
XL
363 EscapeError::UnskippedWhitespaceWarning => {
364 let (c, char_span) = last_char();
365 let msg =
366 format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
367 handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
368 }
369 EscapeError::MultipleSkippedLinesWarning => {
370 let msg = "multiple lines skipped by escaped newline";
371 let bottom_msg = "skipping everything up to and including this point";
372 handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
373 }
48663c56
XL
374 }
375}
376
377/// Pushes a character to a message string for error reporting
5869c6ff 378pub(crate) fn escaped_char(c: char) -> String {
48663c56
XL
379 match c {
380 '\u{20}'..='\u{7e}' => {
381 // Don't escape \, ' or " for user-facing messages
5869c6ff 382 c.to_string()
48663c56 383 }
5869c6ff 384 _ => c.escape_default().to_string(),
48663c56
XL
385 }
386}