]>
Commit | Line | Data |
---|---|---|
48663c56 XL |
1 | //! Utilities for rendering escape sequence errors as diagnostics. |
2 | ||
48663c56 | 3 | use std::iter::once; |
dfeec247 | 4 | use std::ops::Range; |
48663c56 | 5 | |
c295e0f8 | 6 | use rustc_errors::{pluralize, Applicability, Handler}; |
416331ca | 7 | use rustc_lexer::unescape::{EscapeError, Mode}; |
dfeec247 | 8 | use rustc_span::{BytePos, Span}; |
48663c56 | 9 | |
48663c56 XL |
10 | pub(crate) fn emit_unescape_error( |
11 | handler: &Handler, | |
12 | // interior part of the literal, without quotes | |
13 | lit: &str, | |
14 | // full span of the literal, including quotes | |
15 | span_with_quotes: Span, | |
fc512014 XL |
16 | // interior span of the literal, without quotes |
17 | span: Span, | |
48663c56 XL |
18 | mode: Mode, |
19 | // range of the error inside `lit` | |
20 | range: Range<usize>, | |
21 | error: EscapeError, | |
22 | ) { | |
f2b60f7d | 23 | debug!( |
dfeec247 | 24 | "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", |
f2b60f7d | 25 | lit, span_with_quotes, mode, range, error |
dfeec247 | 26 | ); |
48663c56 XL |
27 | let last_char = || { |
28 | let c = lit[range.clone()].chars().rev().next().unwrap(); | |
29 | let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); | |
30 | (c, span) | |
31 | }; | |
32 | match error { | |
33 | EscapeError::LoneSurrogateUnicodeEscape => { | |
dfeec247 XL |
34 | handler |
35 | .struct_span_err(span, "invalid unicode character escape") | |
5869c6ff | 36 | .span_label(span, "invalid escape") |
48663c56 XL |
37 | .help("unicode escape must not be a surrogate") |
38 | .emit(); | |
39 | } | |
40 | EscapeError::OutOfRangeUnicodeEscape => { | |
dfeec247 XL |
41 | handler |
42 | .struct_span_err(span, "invalid unicode character escape") | |
5869c6ff | 43 | .span_label(span, "invalid escape") |
48663c56 XL |
44 | .help("unicode escape must be at most 10FFFF") |
45 | .emit(); | |
46 | } | |
47 | EscapeError::MoreThanOneChar => { | |
c295e0f8 | 48 | use unicode_normalization::{char::is_combining_mark, UnicodeNormalization}; |
e74abb32 | 49 | |
c295e0f8 XL |
50 | let mut has_help = false; |
51 | let mut handler = handler.struct_span_err( | |
52 | span_with_quotes, | |
53 | "character literal may only contain one codepoint", | |
54 | ); | |
55 | ||
56 | if lit.chars().skip(1).all(|c| is_combining_mark(c)) { | |
57 | let escaped_marks = | |
58 | lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>(); | |
59 | handler.span_note( | |
60 | span, | |
61 | &format!( | |
62 | "this `{}` is followed by the combining mark{} `{}`", | |
63 | lit.chars().next().unwrap(), | |
64 | pluralize!(escaped_marks.len()), | |
65 | escaped_marks.join(""), | |
66 | ), | |
67 | ); | |
68 | let normalized = lit.nfc().to_string(); | |
69 | if normalized.chars().count() == 1 { | |
70 | has_help = true; | |
71 | handler.span_suggestion( | |
72 | span, | |
73 | &format!( | |
74 | "consider using the normalized form `{}` of this character", | |
75 | normalized.chars().next().unwrap().escape_default() | |
76 | ), | |
77 | normalized, | |
78 | Applicability::MachineApplicable, | |
79 | ); | |
80 | } | |
3c0e092e XL |
81 | } else { |
82 | let printable: Vec<char> = lit | |
83 | .chars() | |
84 | .filter(|&x| { | |
85 | unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0 | |
86 | && !x.is_whitespace() | |
87 | }) | |
88 | .collect(); | |
89 | ||
90 | if let [ch] = printable.as_slice() { | |
91 | has_help = true; | |
92 | ||
93 | handler.span_note( | |
94 | span, | |
95 | &format!( | |
96 | "there are non-printing characters, the full sequence is `{}`", | |
97 | lit.escape_default(), | |
98 | ), | |
99 | ); | |
100 | ||
101 | handler.span_suggestion( | |
102 | span, | |
103 | "consider removing the non-printing characters", | |
923072b8 | 104 | ch, |
3c0e092e XL |
105 | Applicability::MaybeIncorrect, |
106 | ); | |
107 | } | |
c295e0f8 XL |
108 | } |
109 | ||
110 | if !has_help { | |
487cf647 | 111 | let (prefix, msg) = if mode.is_byte() { |
c295e0f8 XL |
112 | ("b", "if you meant to write a byte string literal, use double quotes") |
113 | } else { | |
114 | ("", "if you meant to write a `str` literal, use double quotes") | |
115 | }; | |
2b03887a FG |
116 | let mut escaped = String::with_capacity(lit.len()); |
117 | let mut chrs = lit.chars().peekable(); | |
118 | while let Some(first) = chrs.next() { | |
119 | match (first, chrs.peek()) { | |
120 | ('\\', Some('"')) => { | |
121 | escaped.push('\\'); | |
122 | escaped.push('"'); | |
123 | chrs.next(); | |
124 | } | |
125 | ('"', _) => { | |
126 | escaped.push('\\'); | |
127 | escaped.push('"') | |
128 | } | |
129 | (c, _) => escaped.push(c), | |
130 | }; | |
131 | } | |
c295e0f8 | 132 | handler.span_suggestion( |
48663c56 | 133 | span_with_quotes, |
e74abb32 | 134 | msg, |
2b03887a | 135 | format!("{prefix}\"{escaped}\""), |
48663c56 | 136 | Applicability::MachineApplicable, |
c295e0f8 XL |
137 | ); |
138 | } | |
139 | ||
140 | handler.emit(); | |
48663c56 XL |
141 | } |
142 | EscapeError::EscapeOnlyChar => { | |
5869c6ff | 143 | let (c, char_span) = last_char(); |
48663c56 | 144 | |
487cf647 | 145 | let msg = if mode.is_byte() { |
5869c6ff | 146 | "byte constant must be escaped" |
48663c56 | 147 | } else { |
5869c6ff XL |
148 | "character constant must be escaped" |
149 | }; | |
150 | handler | |
151 | .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c))) | |
152 | .span_suggestion( | |
153 | char_span, | |
154 | "escape the character", | |
923072b8 | 155 | c.escape_default(), |
5869c6ff XL |
156 | Applicability::MachineApplicable, |
157 | ) | |
5e7ed085 | 158 | .emit(); |
48663c56 XL |
159 | } |
160 | EscapeError::BareCarriageReturn => { | |
161 | let msg = if mode.in_double_quotes() { | |
5869c6ff | 162 | "bare CR not allowed in string, use `\\r` instead" |
48663c56 | 163 | } else { |
5869c6ff | 164 | "character constant must be escaped: `\\r`" |
48663c56 | 165 | }; |
5869c6ff XL |
166 | handler |
167 | .struct_span_err(span, msg) | |
168 | .span_suggestion( | |
169 | span, | |
170 | "escape the character", | |
923072b8 | 171 | "\\r", |
5869c6ff XL |
172 | Applicability::MachineApplicable, |
173 | ) | |
174 | .emit(); | |
48663c56 | 175 | } |
dc9dc135 XL |
176 | EscapeError::BareCarriageReturnInRawString => { |
177 | assert!(mode.in_double_quotes()); | |
178 | let msg = "bare CR not allowed in raw string"; | |
179 | handler.span_err(span, msg); | |
180 | } | |
48663c56 XL |
181 | EscapeError::InvalidEscape => { |
182 | let (c, span) = last_char(); | |
183 | ||
dfeec247 | 184 | let label = |
487cf647 | 185 | if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" }; |
5869c6ff XL |
186 | let ec = escaped_char(c); |
187 | let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); | |
48663c56 | 188 | diag.span_label(span, label); |
487cf647 | 189 | if c == '{' || c == '}' && !mode.is_byte() { |
dfeec247 | 190 | diag.help( |
5869c6ff | 191 | "if used in a formatting string, curly braces are escaped with `{{` and `}}`", |
dfeec247 | 192 | ); |
48663c56 | 193 | } else if c == '\r' { |
dfeec247 | 194 | diag.help( |
5869c6ff XL |
195 | "this is an isolated carriage return; consider checking your editor and \ |
196 | version control settings", | |
197 | ); | |
198 | } else { | |
487cf647 | 199 | if !mode.is_byte() { |
5099ac24 FG |
200 | diag.span_suggestion( |
201 | span_with_quotes, | |
202 | "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", | |
203 | format!("r\"{}\"", lit), | |
204 | Applicability::MaybeIncorrect, | |
205 | ); | |
206 | } | |
207 | ||
5869c6ff XL |
208 | diag.help( |
209 | "for more information, visit \ | |
210 | <https://static.rust-lang.org/doc/master/reference.html#literals>", | |
dfeec247 | 211 | ); |
48663c56 XL |
212 | } |
213 | diag.emit(); | |
214 | } | |
215 | EscapeError::TooShortHexEscape => { | |
5e7ed085 | 216 | handler.span_err(span, "numeric character escape is too short"); |
48663c56 XL |
217 | } |
218 | EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { | |
219 | let (c, span) = last_char(); | |
220 | ||
5869c6ff XL |
221 | let msg = if error == EscapeError::InvalidCharInHexEscape { |
222 | "invalid character in numeric character escape" | |
48663c56 | 223 | } else { |
5869c6ff XL |
224 | "invalid character in unicode escape" |
225 | }; | |
226 | let c = escaped_char(c); | |
48663c56 | 227 | |
5869c6ff XL |
228 | handler |
229 | .struct_span_err(span, &format!("{}: `{}`", msg, c)) | |
230 | .span_label(span, msg) | |
231 | .emit(); | |
48663c56 XL |
232 | } |
233 | EscapeError::NonAsciiCharInByte => { | |
5869c6ff | 234 | let (c, span) = last_char(); |
487cf647 FG |
235 | let desc = match mode { |
236 | Mode::Byte => "byte literal", | |
237 | Mode::ByteStr => "byte string literal", | |
238 | Mode::RawByteStr => "raw byte string literal", | |
239 | _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"), | |
240 | }; | |
241 | let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc)); | |
c295e0f8 XL |
242 | let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { |
243 | format!(" but is {:?}", c) | |
244 | } else { | |
245 | String::new() | |
246 | }; | |
487cf647 FG |
247 | err.span_label(span, &format!("must be ASCII{}", postfix)); |
248 | // Note: the \\xHH suggestions are not given for raw byte string | |
249 | // literals, because they are araw and so cannot use any escapes. | |
250 | if (c as u32) <= 0xFF && mode != Mode::RawByteStr { | |
94222f64 | 251 | err.span_suggestion( |
5869c6ff | 252 | span, |
94222f64 | 253 | &format!( |
c295e0f8 | 254 | "if you meant to use the unicode code point for {:?}, use a \\xHH escape", |
94222f64 XL |
255 | c |
256 | ), | |
5869c6ff | 257 | format!("\\x{:X}", c as u32), |
94222f64 XL |
258 | Applicability::MaybeIncorrect, |
259 | ); | |
487cf647 | 260 | } else if mode == Mode::Byte { |
94222f64 | 261 | err.span_label(span, "this multibyte character does not fit into a single byte"); |
487cf647 | 262 | } else if mode != Mode::RawByteStr { |
94222f64 XL |
263 | let mut utf8 = String::new(); |
264 | utf8.push(c); | |
265 | err.span_suggestion( | |
266 | span, | |
267 | &format!( | |
c295e0f8 | 268 | "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", |
94222f64 XL |
269 | c |
270 | ), | |
271 | utf8.as_bytes() | |
272 | .iter() | |
273 | .map(|b: &u8| format!("\\x{:X}", *b)) | |
274 | .fold("".to_string(), |a, c| a + &c), | |
275 | Applicability::MaybeIncorrect, | |
276 | ); | |
277 | } | |
278 | err.emit(); | |
48663c56 | 279 | } |
5869c6ff XL |
280 | EscapeError::OutOfRangeHexEscape => { |
281 | handler | |
282 | .struct_span_err(span, "out of range hex escape") | |
283 | .span_label(span, "must be a character in the range [\\x00-\\x7f]") | |
284 | .emit(); | |
dc9dc135 | 285 | } |
48663c56 | 286 | EscapeError::LeadingUnderscoreUnicodeEscape => { |
5869c6ff XL |
287 | let (c, span) = last_char(); |
288 | let msg = "invalid start of unicode escape"; | |
289 | handler | |
290 | .struct_span_err(span, &format!("{}: `{}`", msg, c)) | |
291 | .span_label(span, msg) | |
292 | .emit(); | |
48663c56 XL |
293 | } |
294 | EscapeError::OverlongUnicodeEscape => { | |
5869c6ff XL |
295 | handler |
296 | .struct_span_err(span, "overlong unicode escape") | |
297 | .span_label(span, "must have at most 6 hex digits") | |
298 | .emit(); | |
48663c56 | 299 | } |
5e7ed085 FG |
300 | EscapeError::UnclosedUnicodeEscape => { |
301 | handler | |
302 | .struct_span_err(span, "unterminated unicode escape") | |
303 | .span_label(span, "missing a closing `}`") | |
304 | .span_suggestion_verbose( | |
305 | span.shrink_to_hi(), | |
306 | "terminate the unicode escape", | |
923072b8 | 307 | "}", |
5e7ed085 FG |
308 | Applicability::MaybeIncorrect, |
309 | ) | |
310 | .emit(); | |
311 | } | |
48663c56 XL |
312 | EscapeError::NoBraceInUnicodeEscape => { |
313 | let msg = "incorrect unicode escape sequence"; | |
314 | let mut diag = handler.struct_span_err(span, msg); | |
315 | ||
316 | let mut suggestion = "\\u{".to_owned(); | |
317 | let mut suggestion_len = 0; | |
318 | let (c, char_span) = last_char(); | |
319 | let chars = once(c).chain(lit[range.end..].chars()); | |
320 | for c in chars.take(6).take_while(|c| c.is_digit(16)) { | |
321 | suggestion.push(c); | |
322 | suggestion_len += c.len_utf8(); | |
323 | } | |
324 | ||
325 | if suggestion_len > 0 { | |
326 | suggestion.push('}'); | |
29967ef6 | 327 | let hi = char_span.lo() + BytePos(suggestion_len as u32); |
48663c56 | 328 | diag.span_suggestion( |
29967ef6 | 329 | span.with_hi(hi), |
48663c56 XL |
330 | "format of unicode escape sequences uses braces", |
331 | suggestion, | |
332 | Applicability::MaybeIncorrect, | |
333 | ); | |
334 | } else { | |
335 | diag.span_label(span, msg); | |
dfeec247 | 336 | diag.help("format of unicode escape sequences is `\\u{...}`"); |
48663c56 XL |
337 | } |
338 | ||
339 | diag.emit(); | |
340 | } | |
5869c6ff XL |
341 | EscapeError::UnicodeEscapeInByte => { |
342 | let msg = "unicode escape in byte string"; | |
343 | handler | |
344 | .struct_span_err(span, msg) | |
345 | .span_label(span, msg) | |
346 | .help("unicode escape sequences cannot be used as a byte or in a byte string") | |
347 | .emit(); | |
348 | } | |
48663c56 | 349 | EscapeError::EmptyUnicodeEscape => { |
5869c6ff XL |
350 | handler |
351 | .struct_span_err(span, "empty unicode escape") | |
352 | .span_label(span, "this escape must have at least 1 hex digit") | |
353 | .emit(); | |
354 | } | |
355 | EscapeError::ZeroChars => { | |
356 | let msg = "empty character literal"; | |
5e7ed085 | 357 | handler.struct_span_err(span, msg).span_label(span, msg).emit(); |
5869c6ff XL |
358 | } |
359 | EscapeError::LoneSlash => { | |
360 | let msg = "invalid trailing slash in literal"; | |
361 | handler.struct_span_err(span, msg).span_label(span, msg).emit(); | |
48663c56 | 362 | } |
94222f64 XL |
363 | EscapeError::UnskippedWhitespaceWarning => { |
364 | let (c, char_span) = last_char(); | |
365 | let msg = | |
366 | format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode()); | |
367 | handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit(); | |
368 | } | |
369 | EscapeError::MultipleSkippedLinesWarning => { | |
370 | let msg = "multiple lines skipped by escaped newline"; | |
371 | let bottom_msg = "skipping everything up to and including this point"; | |
372 | handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit(); | |
373 | } | |
48663c56 XL |
374 | } |
375 | } | |
376 | ||
377 | /// Pushes a character to a message string for error reporting | |
5869c6ff | 378 | pub(crate) fn escaped_char(c: char) -> String { |
48663c56 XL |
379 | match c { |
380 | '\u{20}'..='\u{7e}' => { | |
381 | // Don't escape \, ' or " for user-facing messages | |
5869c6ff | 382 | c.to_string() |
48663c56 | 383 | } |
5869c6ff | 384 | _ => c.escape_default().to_string(), |
48663c56 XL |
385 | } |
386 | } |