]>
Commit | Line | Data |
---|---|---|
48663c56 XL |
1 | //! Utilities for rendering escape sequence errors as diagnostics. |
2 | ||
48663c56 | 3 | use std::iter::once; |
dfeec247 | 4 | use std::ops::Range; |
48663c56 | 5 | |
c295e0f8 | 6 | use rustc_errors::{pluralize, Applicability, Handler}; |
416331ca | 7 | use rustc_lexer::unescape::{EscapeError, Mode}; |
dfeec247 | 8 | use rustc_span::{BytePos, Span}; |
48663c56 | 9 | |
48663c56 XL |
10 | pub(crate) fn emit_unescape_error( |
11 | handler: &Handler, | |
12 | // interior part of the literal, without quotes | |
13 | lit: &str, | |
14 | // full span of the literal, including quotes | |
15 | span_with_quotes: Span, | |
fc512014 XL |
16 | // interior span of the literal, without quotes |
17 | span: Span, | |
48663c56 XL |
18 | mode: Mode, |
19 | // range of the error inside `lit` | |
20 | range: Range<usize>, | |
21 | error: EscapeError, | |
22 | ) { | |
f2b60f7d | 23 | debug!( |
dfeec247 | 24 | "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", |
f2b60f7d | 25 | lit, span_with_quotes, mode, range, error |
dfeec247 | 26 | ); |
48663c56 XL |
27 | let last_char = || { |
28 | let c = lit[range.clone()].chars().rev().next().unwrap(); | |
29 | let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); | |
30 | (c, span) | |
31 | }; | |
32 | match error { | |
33 | EscapeError::LoneSurrogateUnicodeEscape => { | |
dfeec247 XL |
34 | handler |
35 | .struct_span_err(span, "invalid unicode character escape") | |
5869c6ff | 36 | .span_label(span, "invalid escape") |
48663c56 XL |
37 | .help("unicode escape must not be a surrogate") |
38 | .emit(); | |
39 | } | |
40 | EscapeError::OutOfRangeUnicodeEscape => { | |
dfeec247 XL |
41 | handler |
42 | .struct_span_err(span, "invalid unicode character escape") | |
5869c6ff | 43 | .span_label(span, "invalid escape") |
48663c56 XL |
44 | .help("unicode escape must be at most 10FFFF") |
45 | .emit(); | |
46 | } | |
47 | EscapeError::MoreThanOneChar => { | |
c295e0f8 | 48 | use unicode_normalization::{char::is_combining_mark, UnicodeNormalization}; |
e74abb32 | 49 | |
c295e0f8 XL |
50 | let mut has_help = false; |
51 | let mut handler = handler.struct_span_err( | |
52 | span_with_quotes, | |
53 | "character literal may only contain one codepoint", | |
54 | ); | |
55 | ||
56 | if lit.chars().skip(1).all(|c| is_combining_mark(c)) { | |
57 | let escaped_marks = | |
58 | lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>(); | |
59 | handler.span_note( | |
60 | span, | |
61 | &format!( | |
62 | "this `{}` is followed by the combining mark{} `{}`", | |
63 | lit.chars().next().unwrap(), | |
64 | pluralize!(escaped_marks.len()), | |
65 | escaped_marks.join(""), | |
66 | ), | |
67 | ); | |
68 | let normalized = lit.nfc().to_string(); | |
69 | if normalized.chars().count() == 1 { | |
70 | has_help = true; | |
71 | handler.span_suggestion( | |
72 | span, | |
73 | &format!( | |
74 | "consider using the normalized form `{}` of this character", | |
75 | normalized.chars().next().unwrap().escape_default() | |
76 | ), | |
77 | normalized, | |
78 | Applicability::MachineApplicable, | |
79 | ); | |
80 | } | |
3c0e092e XL |
81 | } else { |
82 | let printable: Vec<char> = lit | |
83 | .chars() | |
84 | .filter(|&x| { | |
85 | unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0 | |
86 | && !x.is_whitespace() | |
87 | }) | |
88 | .collect(); | |
89 | ||
90 | if let [ch] = printable.as_slice() { | |
91 | has_help = true; | |
92 | ||
93 | handler.span_note( | |
94 | span, | |
95 | &format!( | |
96 | "there are non-printing characters, the full sequence is `{}`", | |
97 | lit.escape_default(), | |
98 | ), | |
99 | ); | |
100 | ||
101 | handler.span_suggestion( | |
102 | span, | |
103 | "consider removing the non-printing characters", | |
923072b8 | 104 | ch, |
3c0e092e XL |
105 | Applicability::MaybeIncorrect, |
106 | ); | |
107 | } | |
c295e0f8 XL |
108 | } |
109 | ||
110 | if !has_help { | |
111 | let (prefix, msg) = if mode.is_bytes() { | |
112 | ("b", "if you meant to write a byte string literal, use double quotes") | |
113 | } else { | |
114 | ("", "if you meant to write a `str` literal, use double quotes") | |
115 | }; | |
116 | ||
117 | handler.span_suggestion( | |
48663c56 | 118 | span_with_quotes, |
e74abb32 | 119 | msg, |
5869c6ff | 120 | format!("{}\"{}\"", prefix, lit), |
48663c56 | 121 | Applicability::MachineApplicable, |
c295e0f8 XL |
122 | ); |
123 | } | |
124 | ||
125 | handler.emit(); | |
48663c56 XL |
126 | } |
127 | EscapeError::EscapeOnlyChar => { | |
5869c6ff | 128 | let (c, char_span) = last_char(); |
48663c56 | 129 | |
5869c6ff XL |
130 | let msg = if mode.is_bytes() { |
131 | "byte constant must be escaped" | |
48663c56 | 132 | } else { |
5869c6ff XL |
133 | "character constant must be escaped" |
134 | }; | |
135 | handler | |
136 | .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c))) | |
137 | .span_suggestion( | |
138 | char_span, | |
139 | "escape the character", | |
923072b8 | 140 | c.escape_default(), |
5869c6ff XL |
141 | Applicability::MachineApplicable, |
142 | ) | |
5e7ed085 | 143 | .emit(); |
48663c56 XL |
144 | } |
145 | EscapeError::BareCarriageReturn => { | |
146 | let msg = if mode.in_double_quotes() { | |
5869c6ff | 147 | "bare CR not allowed in string, use `\\r` instead" |
48663c56 | 148 | } else { |
5869c6ff | 149 | "character constant must be escaped: `\\r`" |
48663c56 | 150 | }; |
5869c6ff XL |
151 | handler |
152 | .struct_span_err(span, msg) | |
153 | .span_suggestion( | |
154 | span, | |
155 | "escape the character", | |
923072b8 | 156 | "\\r", |
5869c6ff XL |
157 | Applicability::MachineApplicable, |
158 | ) | |
159 | .emit(); | |
48663c56 | 160 | } |
dc9dc135 XL |
161 | EscapeError::BareCarriageReturnInRawString => { |
162 | assert!(mode.in_double_quotes()); | |
163 | let msg = "bare CR not allowed in raw string"; | |
164 | handler.span_err(span, msg); | |
165 | } | |
48663c56 XL |
166 | EscapeError::InvalidEscape => { |
167 | let (c, span) = last_char(); | |
168 | ||
dfeec247 XL |
169 | let label = |
170 | if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; | |
5869c6ff XL |
171 | let ec = escaped_char(c); |
172 | let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); | |
48663c56 XL |
173 | diag.span_label(span, label); |
174 | if c == '{' || c == '}' && !mode.is_bytes() { | |
dfeec247 | 175 | diag.help( |
5869c6ff | 176 | "if used in a formatting string, curly braces are escaped with `{{` and `}}`", |
dfeec247 | 177 | ); |
48663c56 | 178 | } else if c == '\r' { |
dfeec247 | 179 | diag.help( |
5869c6ff XL |
180 | "this is an isolated carriage return; consider checking your editor and \ |
181 | version control settings", | |
182 | ); | |
183 | } else { | |
5099ac24 FG |
184 | if !mode.is_bytes() { |
185 | diag.span_suggestion( | |
186 | span_with_quotes, | |
187 | "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal", | |
188 | format!("r\"{}\"", lit), | |
189 | Applicability::MaybeIncorrect, | |
190 | ); | |
191 | } | |
192 | ||
5869c6ff XL |
193 | diag.help( |
194 | "for more information, visit \ | |
195 | <https://static.rust-lang.org/doc/master/reference.html#literals>", | |
dfeec247 | 196 | ); |
48663c56 XL |
197 | } |
198 | diag.emit(); | |
199 | } | |
200 | EscapeError::TooShortHexEscape => { | |
5e7ed085 | 201 | handler.span_err(span, "numeric character escape is too short"); |
48663c56 XL |
202 | } |
203 | EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { | |
204 | let (c, span) = last_char(); | |
205 | ||
5869c6ff XL |
206 | let msg = if error == EscapeError::InvalidCharInHexEscape { |
207 | "invalid character in numeric character escape" | |
48663c56 | 208 | } else { |
5869c6ff XL |
209 | "invalid character in unicode escape" |
210 | }; | |
211 | let c = escaped_char(c); | |
48663c56 | 212 | |
5869c6ff XL |
213 | handler |
214 | .struct_span_err(span, &format!("{}: `{}`", msg, c)) | |
215 | .span_label(span, msg) | |
216 | .emit(); | |
48663c56 XL |
217 | } |
218 | EscapeError::NonAsciiCharInByte => { | |
219 | assert!(mode.is_bytes()); | |
5869c6ff | 220 | let (c, span) = last_char(); |
94222f64 | 221 | let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); |
c295e0f8 XL |
222 | let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { |
223 | format!(" but is {:?}", c) | |
224 | } else { | |
225 | String::new() | |
226 | }; | |
227 | err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); | |
94222f64 XL |
228 | if (c as u32) <= 0xFF { |
229 | err.span_suggestion( | |
5869c6ff | 230 | span, |
94222f64 | 231 | &format!( |
c295e0f8 | 232 | "if you meant to use the unicode code point for {:?}, use a \\xHH escape", |
94222f64 XL |
233 | c |
234 | ), | |
5869c6ff | 235 | format!("\\x{:X}", c as u32), |
94222f64 XL |
236 | Applicability::MaybeIncorrect, |
237 | ); | |
238 | } else if matches!(mode, Mode::Byte) { | |
239 | err.span_label(span, "this multibyte character does not fit into a single byte"); | |
240 | } else if matches!(mode, Mode::ByteStr) { | |
241 | let mut utf8 = String::new(); | |
242 | utf8.push(c); | |
243 | err.span_suggestion( | |
244 | span, | |
245 | &format!( | |
c295e0f8 | 246 | "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", |
94222f64 XL |
247 | c |
248 | ), | |
249 | utf8.as_bytes() | |
250 | .iter() | |
251 | .map(|b: &u8| format!("\\x{:X}", *b)) | |
252 | .fold("".to_string(), |a, c| a + &c), | |
253 | Applicability::MaybeIncorrect, | |
254 | ); | |
255 | } | |
256 | err.emit(); | |
48663c56 | 257 | } |
dc9dc135 XL |
258 | EscapeError::NonAsciiCharInByteString => { |
259 | assert!(mode.is_bytes()); | |
c295e0f8 XL |
260 | let (c, span) = last_char(); |
261 | let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { | |
262 | format!(" but is {:?}", c) | |
263 | } else { | |
264 | String::new() | |
265 | }; | |
5869c6ff XL |
266 | handler |
267 | .struct_span_err(span, "raw byte string must be ASCII") | |
c295e0f8 | 268 | .span_label(span, &format!("must be ASCII{}", postfix)) |
5869c6ff XL |
269 | .emit(); |
270 | } | |
271 | EscapeError::OutOfRangeHexEscape => { | |
272 | handler | |
273 | .struct_span_err(span, "out of range hex escape") | |
274 | .span_label(span, "must be a character in the range [\\x00-\\x7f]") | |
275 | .emit(); | |
dc9dc135 | 276 | } |
48663c56 | 277 | EscapeError::LeadingUnderscoreUnicodeEscape => { |
5869c6ff XL |
278 | let (c, span) = last_char(); |
279 | let msg = "invalid start of unicode escape"; | |
280 | handler | |
281 | .struct_span_err(span, &format!("{}: `{}`", msg, c)) | |
282 | .span_label(span, msg) | |
283 | .emit(); | |
48663c56 XL |
284 | } |
285 | EscapeError::OverlongUnicodeEscape => { | |
5869c6ff XL |
286 | handler |
287 | .struct_span_err(span, "overlong unicode escape") | |
288 | .span_label(span, "must have at most 6 hex digits") | |
289 | .emit(); | |
48663c56 | 290 | } |
5e7ed085 FG |
291 | EscapeError::UnclosedUnicodeEscape => { |
292 | handler | |
293 | .struct_span_err(span, "unterminated unicode escape") | |
294 | .span_label(span, "missing a closing `}`") | |
295 | .span_suggestion_verbose( | |
296 | span.shrink_to_hi(), | |
297 | "terminate the unicode escape", | |
923072b8 | 298 | "}", |
5e7ed085 FG |
299 | Applicability::MaybeIncorrect, |
300 | ) | |
301 | .emit(); | |
302 | } | |
48663c56 XL |
303 | EscapeError::NoBraceInUnicodeEscape => { |
304 | let msg = "incorrect unicode escape sequence"; | |
305 | let mut diag = handler.struct_span_err(span, msg); | |
306 | ||
307 | let mut suggestion = "\\u{".to_owned(); | |
308 | let mut suggestion_len = 0; | |
309 | let (c, char_span) = last_char(); | |
310 | let chars = once(c).chain(lit[range.end..].chars()); | |
311 | for c in chars.take(6).take_while(|c| c.is_digit(16)) { | |
312 | suggestion.push(c); | |
313 | suggestion_len += c.len_utf8(); | |
314 | } | |
315 | ||
316 | if suggestion_len > 0 { | |
317 | suggestion.push('}'); | |
29967ef6 | 318 | let hi = char_span.lo() + BytePos(suggestion_len as u32); |
48663c56 | 319 | diag.span_suggestion( |
29967ef6 | 320 | span.with_hi(hi), |
48663c56 XL |
321 | "format of unicode escape sequences uses braces", |
322 | suggestion, | |
323 | Applicability::MaybeIncorrect, | |
324 | ); | |
325 | } else { | |
326 | diag.span_label(span, msg); | |
dfeec247 | 327 | diag.help("format of unicode escape sequences is `\\u{...}`"); |
48663c56 XL |
328 | } |
329 | ||
330 | diag.emit(); | |
331 | } | |
5869c6ff XL |
332 | EscapeError::UnicodeEscapeInByte => { |
333 | let msg = "unicode escape in byte string"; | |
334 | handler | |
335 | .struct_span_err(span, msg) | |
336 | .span_label(span, msg) | |
337 | .help("unicode escape sequences cannot be used as a byte or in a byte string") | |
338 | .emit(); | |
339 | } | |
48663c56 | 340 | EscapeError::EmptyUnicodeEscape => { |
5869c6ff XL |
341 | handler |
342 | .struct_span_err(span, "empty unicode escape") | |
343 | .span_label(span, "this escape must have at least 1 hex digit") | |
344 | .emit(); | |
345 | } | |
346 | EscapeError::ZeroChars => { | |
347 | let msg = "empty character literal"; | |
5e7ed085 | 348 | handler.struct_span_err(span, msg).span_label(span, msg).emit(); |
5869c6ff XL |
349 | } |
350 | EscapeError::LoneSlash => { | |
351 | let msg = "invalid trailing slash in literal"; | |
352 | handler.struct_span_err(span, msg).span_label(span, msg).emit(); | |
48663c56 | 353 | } |
94222f64 XL |
354 | EscapeError::UnskippedWhitespaceWarning => { |
355 | let (c, char_span) = last_char(); | |
356 | let msg = | |
357 | format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode()); | |
358 | handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit(); | |
359 | } | |
360 | EscapeError::MultipleSkippedLinesWarning => { | |
361 | let msg = "multiple lines skipped by escaped newline"; | |
362 | let bottom_msg = "skipping everything up to and including this point"; | |
363 | handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit(); | |
364 | } | |
48663c56 XL |
365 | } |
366 | } | |
367 | ||
368 | /// Pushes a character to a message string for error reporting | |
5869c6ff | 369 | pub(crate) fn escaped_char(c: char) -> String { |
48663c56 XL |
370 | match c { |
371 | '\u{20}'..='\u{7e}' => { | |
372 | // Don't escape \, ' or " for user-facing messages | |
5869c6ff | 373 | c.to_string() |
48663c56 | 374 | } |
5869c6ff | 375 | _ => c.escape_default().to_string(), |
48663c56 XL |
376 | } |
377 | } |