]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
New upstream version 1.58.1+dfsg1
[rustc.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
CommitLineData
48663c56
XL
1//! Utilities for rendering escape sequence errors as diagnostics.
2
48663c56 3use std::iter::once;
dfeec247 4use std::ops::Range;
48663c56 5
c295e0f8 6use rustc_errors::{pluralize, Applicability, Handler};
416331ca 7use rustc_lexer::unescape::{EscapeError, Mode};
dfeec247 8use rustc_span::{BytePos, Span};
48663c56 9
48663c56
XL
10pub(crate) fn emit_unescape_error(
11 handler: &Handler,
12 // interior part of the literal, without quotes
13 lit: &str,
14 // full span of the literal, including quotes
15 span_with_quotes: Span,
fc512014
XL
16 // interior span of the literal, without quotes
17 span: Span,
48663c56
XL
18 mode: Mode,
19 // range of the error inside `lit`
20 range: Range<usize>,
21 error: EscapeError,
22) {
3dfed10e 23 tracing::debug!(
dfeec247
XL
24 "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
25 lit,
26 span_with_quotes,
27 mode,
28 range,
29 error
30 );
48663c56
XL
31 let last_char = || {
32 let c = lit[range.clone()].chars().rev().next().unwrap();
33 let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
34 (c, span)
35 };
36 match error {
37 EscapeError::LoneSurrogateUnicodeEscape => {
dfeec247
XL
38 handler
39 .struct_span_err(span, "invalid unicode character escape")
5869c6ff 40 .span_label(span, "invalid escape")
48663c56
XL
41 .help("unicode escape must not be a surrogate")
42 .emit();
43 }
44 EscapeError::OutOfRangeUnicodeEscape => {
dfeec247
XL
45 handler
46 .struct_span_err(span, "invalid unicode character escape")
5869c6ff 47 .span_label(span, "invalid escape")
48663c56
XL
48 .help("unicode escape must be at most 10FFFF")
49 .emit();
50 }
51 EscapeError::MoreThanOneChar => {
c295e0f8 52 use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
e74abb32 53
c295e0f8
XL
54 let mut has_help = false;
55 let mut handler = handler.struct_span_err(
56 span_with_quotes,
57 "character literal may only contain one codepoint",
58 );
59
60 if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
61 let escaped_marks =
62 lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
63 handler.span_note(
64 span,
65 &format!(
66 "this `{}` is followed by the combining mark{} `{}`",
67 lit.chars().next().unwrap(),
68 pluralize!(escaped_marks.len()),
69 escaped_marks.join(""),
70 ),
71 );
72 let normalized = lit.nfc().to_string();
73 if normalized.chars().count() == 1 {
74 has_help = true;
75 handler.span_suggestion(
76 span,
77 &format!(
78 "consider using the normalized form `{}` of this character",
79 normalized.chars().next().unwrap().escape_default()
80 ),
81 normalized,
82 Applicability::MachineApplicable,
83 );
84 }
3c0e092e
XL
85 } else {
86 let printable: Vec<char> = lit
87 .chars()
88 .filter(|&x| {
89 unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
90 && !x.is_whitespace()
91 })
92 .collect();
93
94 if let [ch] = printable.as_slice() {
95 has_help = true;
96
97 handler.span_note(
98 span,
99 &format!(
100 "there are non-printing characters, the full sequence is `{}`",
101 lit.escape_default(),
102 ),
103 );
104
105 handler.span_suggestion(
106 span,
107 "consider removing the non-printing characters",
108 ch.to_string(),
109 Applicability::MaybeIncorrect,
110 );
111 }
c295e0f8
XL
112 }
113
114 if !has_help {
115 let (prefix, msg) = if mode.is_bytes() {
116 ("b", "if you meant to write a byte string literal, use double quotes")
117 } else {
118 ("", "if you meant to write a `str` literal, use double quotes")
119 };
120
121 handler.span_suggestion(
48663c56 122 span_with_quotes,
e74abb32 123 msg,
5869c6ff 124 format!("{}\"{}\"", prefix, lit),
48663c56 125 Applicability::MachineApplicable,
c295e0f8
XL
126 );
127 }
128
129 handler.emit();
48663c56
XL
130 }
131 EscapeError::EscapeOnlyChar => {
5869c6ff 132 let (c, char_span) = last_char();
48663c56 133
5869c6ff
XL
134 let msg = if mode.is_bytes() {
135 "byte constant must be escaped"
48663c56 136 } else {
5869c6ff
XL
137 "character constant must be escaped"
138 };
139 handler
140 .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
141 .span_suggestion(
142 char_span,
143 "escape the character",
144 c.escape_default().to_string(),
145 Applicability::MachineApplicable,
146 )
147 .emit()
48663c56
XL
148 }
149 EscapeError::BareCarriageReturn => {
150 let msg = if mode.in_double_quotes() {
5869c6ff 151 "bare CR not allowed in string, use `\\r` instead"
48663c56 152 } else {
5869c6ff 153 "character constant must be escaped: `\\r`"
48663c56 154 };
5869c6ff
XL
155 handler
156 .struct_span_err(span, msg)
157 .span_suggestion(
158 span,
159 "escape the character",
160 "\\r".to_string(),
161 Applicability::MachineApplicable,
162 )
163 .emit();
48663c56 164 }
dc9dc135
XL
165 EscapeError::BareCarriageReturnInRawString => {
166 assert!(mode.in_double_quotes());
167 let msg = "bare CR not allowed in raw string";
168 handler.span_err(span, msg);
169 }
48663c56
XL
170 EscapeError::InvalidEscape => {
171 let (c, span) = last_char();
172
dfeec247
XL
173 let label =
174 if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
5869c6ff
XL
175 let ec = escaped_char(c);
176 let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
48663c56
XL
177 diag.span_label(span, label);
178 if c == '{' || c == '}' && !mode.is_bytes() {
dfeec247 179 diag.help(
5869c6ff 180 "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
dfeec247 181 );
48663c56 182 } else if c == '\r' {
dfeec247 183 diag.help(
5869c6ff
XL
184 "this is an isolated carriage return; consider checking your editor and \
185 version control settings",
186 );
187 } else {
188 diag.help(
189 "for more information, visit \
190 <https://static.rust-lang.org/doc/master/reference.html#literals>",
dfeec247 191 );
48663c56
XL
192 }
193 diag.emit();
194 }
195 EscapeError::TooShortHexEscape => {
196 handler.span_err(span, "numeric character escape is too short")
197 }
198 EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
199 let (c, span) = last_char();
200
5869c6ff
XL
201 let msg = if error == EscapeError::InvalidCharInHexEscape {
202 "invalid character in numeric character escape"
48663c56 203 } else {
5869c6ff
XL
204 "invalid character in unicode escape"
205 };
206 let c = escaped_char(c);
48663c56 207
5869c6ff
XL
208 handler
209 .struct_span_err(span, &format!("{}: `{}`", msg, c))
210 .span_label(span, msg)
211 .emit();
48663c56
XL
212 }
213 EscapeError::NonAsciiCharInByte => {
214 assert!(mode.is_bytes());
5869c6ff 215 let (c, span) = last_char();
94222f64 216 let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant");
c295e0f8
XL
217 let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
218 format!(" but is {:?}", c)
219 } else {
220 String::new()
221 };
222 err.span_label(span, &format!("byte constant must be ASCII{}", postfix));
94222f64
XL
223 if (c as u32) <= 0xFF {
224 err.span_suggestion(
5869c6ff 225 span,
94222f64 226 &format!(
c295e0f8 227 "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
94222f64
XL
228 c
229 ),
5869c6ff 230 format!("\\x{:X}", c as u32),
94222f64
XL
231 Applicability::MaybeIncorrect,
232 );
233 } else if matches!(mode, Mode::Byte) {
234 err.span_label(span, "this multibyte character does not fit into a single byte");
235 } else if matches!(mode, Mode::ByteStr) {
236 let mut utf8 = String::new();
237 utf8.push(c);
238 err.span_suggestion(
239 span,
240 &format!(
c295e0f8 241 "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
94222f64
XL
242 c
243 ),
244 utf8.as_bytes()
245 .iter()
246 .map(|b: &u8| format!("\\x{:X}", *b))
247 .fold("".to_string(), |a, c| a + &c),
248 Applicability::MaybeIncorrect,
249 );
250 }
251 err.emit();
48663c56 252 }
dc9dc135
XL
253 EscapeError::NonAsciiCharInByteString => {
254 assert!(mode.is_bytes());
c295e0f8
XL
255 let (c, span) = last_char();
256 let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
257 format!(" but is {:?}", c)
258 } else {
259 String::new()
260 };
5869c6ff
XL
261 handler
262 .struct_span_err(span, "raw byte string must be ASCII")
c295e0f8 263 .span_label(span, &format!("must be ASCII{}", postfix))
5869c6ff
XL
264 .emit();
265 }
266 EscapeError::OutOfRangeHexEscape => {
267 handler
268 .struct_span_err(span, "out of range hex escape")
269 .span_label(span, "must be a character in the range [\\x00-\\x7f]")
270 .emit();
dc9dc135 271 }
48663c56 272 EscapeError::LeadingUnderscoreUnicodeEscape => {
5869c6ff
XL
273 let (c, span) = last_char();
274 let msg = "invalid start of unicode escape";
275 handler
276 .struct_span_err(span, &format!("{}: `{}`", msg, c))
277 .span_label(span, msg)
278 .emit();
48663c56
XL
279 }
280 EscapeError::OverlongUnicodeEscape => {
5869c6ff
XL
281 handler
282 .struct_span_err(span, "overlong unicode escape")
283 .span_label(span, "must have at most 6 hex digits")
284 .emit();
48663c56 285 }
5869c6ff
XL
286 EscapeError::UnclosedUnicodeEscape => handler
287 .struct_span_err(span, "unterminated unicode escape")
288 .span_label(span, "missing a closing `}`")
289 .span_suggestion_verbose(
290 span.shrink_to_hi(),
291 "terminate the unicode escape",
292 "}".to_string(),
293 Applicability::MaybeIncorrect,
294 )
295 .emit(),
48663c56
XL
296 EscapeError::NoBraceInUnicodeEscape => {
297 let msg = "incorrect unicode escape sequence";
298 let mut diag = handler.struct_span_err(span, msg);
299
300 let mut suggestion = "\\u{".to_owned();
301 let mut suggestion_len = 0;
302 let (c, char_span) = last_char();
303 let chars = once(c).chain(lit[range.end..].chars());
304 for c in chars.take(6).take_while(|c| c.is_digit(16)) {
305 suggestion.push(c);
306 suggestion_len += c.len_utf8();
307 }
308
309 if suggestion_len > 0 {
310 suggestion.push('}');
29967ef6 311 let hi = char_span.lo() + BytePos(suggestion_len as u32);
48663c56 312 diag.span_suggestion(
29967ef6 313 span.with_hi(hi),
48663c56
XL
314 "format of unicode escape sequences uses braces",
315 suggestion,
316 Applicability::MaybeIncorrect,
317 );
318 } else {
319 diag.span_label(span, msg);
dfeec247 320 diag.help("format of unicode escape sequences is `\\u{...}`");
48663c56
XL
321 }
322
323 diag.emit();
324 }
5869c6ff
XL
325 EscapeError::UnicodeEscapeInByte => {
326 let msg = "unicode escape in byte string";
327 handler
328 .struct_span_err(span, msg)
329 .span_label(span, msg)
330 .help("unicode escape sequences cannot be used as a byte or in a byte string")
331 .emit();
332 }
48663c56 333 EscapeError::EmptyUnicodeEscape => {
5869c6ff
XL
334 handler
335 .struct_span_err(span, "empty unicode escape")
336 .span_label(span, "this escape must have at least 1 hex digit")
337 .emit();
338 }
339 EscapeError::ZeroChars => {
340 let msg = "empty character literal";
341 handler.struct_span_err(span, msg).span_label(span, msg).emit()
342 }
343 EscapeError::LoneSlash => {
344 let msg = "invalid trailing slash in literal";
345 handler.struct_span_err(span, msg).span_label(span, msg).emit();
48663c56 346 }
94222f64
XL
347 EscapeError::UnskippedWhitespaceWarning => {
348 let (c, char_span) = last_char();
349 let msg =
350 format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
351 handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
352 }
353 EscapeError::MultipleSkippedLinesWarning => {
354 let msg = "multiple lines skipped by escaped newline";
355 let bottom_msg = "skipping everything up to and including this point";
356 handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit();
357 }
48663c56
XL
358 }
359}
360
361/// Pushes a character to a message string for error reporting
5869c6ff 362pub(crate) fn escaped_char(c: char) -> String {
48663c56
XL
363 match c {
364 '\u{20}'..='\u{7e}' => {
365 // Don't escape \, ' or " for user-facing messages
5869c6ff 366 c.to_string()
48663c56 367 }
5869c6ff 368 _ => c.escape_default().to_string(),
48663c56
XL
369 }
370}