]>
Commit | Line | Data |
---|---|---|
48663c56 XL |
1 | //! Utilities for rendering escape sequence errors as diagnostics. |
2 | ||
48663c56 | 3 | use std::iter::once; |
dfeec247 | 4 | use std::ops::Range; |
48663c56 | 5 | |
c295e0f8 | 6 | use rustc_errors::{pluralize, Applicability, Handler}; |
416331ca | 7 | use rustc_lexer::unescape::{EscapeError, Mode}; |
dfeec247 | 8 | use rustc_span::{BytePos, Span}; |
48663c56 | 9 | |
48663c56 XL |
10 | pub(crate) fn emit_unescape_error( |
11 | handler: &Handler, | |
12 | // interior part of the literal, without quotes | |
13 | lit: &str, | |
14 | // full span of the literal, including quotes | |
15 | span_with_quotes: Span, | |
fc512014 XL |
16 | // interior span of the literal, without quotes |
17 | span: Span, | |
48663c56 XL |
18 | mode: Mode, |
19 | // range of the error inside `lit` | |
20 | range: Range<usize>, | |
21 | error: EscapeError, | |
22 | ) { | |
3dfed10e | 23 | tracing::debug!( |
dfeec247 XL |
24 | "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}", |
25 | lit, | |
26 | span_with_quotes, | |
27 | mode, | |
28 | range, | |
29 | error | |
30 | ); | |
48663c56 XL |
31 | let last_char = || { |
32 | let c = lit[range.clone()].chars().rev().next().unwrap(); | |
33 | let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32)); | |
34 | (c, span) | |
35 | }; | |
36 | match error { | |
37 | EscapeError::LoneSurrogateUnicodeEscape => { | |
dfeec247 XL |
38 | handler |
39 | .struct_span_err(span, "invalid unicode character escape") | |
5869c6ff | 40 | .span_label(span, "invalid escape") |
48663c56 XL |
41 | .help("unicode escape must not be a surrogate") |
42 | .emit(); | |
43 | } | |
44 | EscapeError::OutOfRangeUnicodeEscape => { | |
dfeec247 XL |
45 | handler |
46 | .struct_span_err(span, "invalid unicode character escape") | |
5869c6ff | 47 | .span_label(span, "invalid escape") |
48663c56 XL |
48 | .help("unicode escape must be at most 10FFFF") |
49 | .emit(); | |
50 | } | |
51 | EscapeError::MoreThanOneChar => { | |
c295e0f8 | 52 | use unicode_normalization::{char::is_combining_mark, UnicodeNormalization}; |
e74abb32 | 53 | |
c295e0f8 XL |
54 | let mut has_help = false; |
55 | let mut handler = handler.struct_span_err( | |
56 | span_with_quotes, | |
57 | "character literal may only contain one codepoint", | |
58 | ); | |
59 | ||
60 | if lit.chars().skip(1).all(|c| is_combining_mark(c)) { | |
61 | let escaped_marks = | |
62 | lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>(); | |
63 | handler.span_note( | |
64 | span, | |
65 | &format!( | |
66 | "this `{}` is followed by the combining mark{} `{}`", | |
67 | lit.chars().next().unwrap(), | |
68 | pluralize!(escaped_marks.len()), | |
69 | escaped_marks.join(""), | |
70 | ), | |
71 | ); | |
72 | let normalized = lit.nfc().to_string(); | |
73 | if normalized.chars().count() == 1 { | |
74 | has_help = true; | |
75 | handler.span_suggestion( | |
76 | span, | |
77 | &format!( | |
78 | "consider using the normalized form `{}` of this character", | |
79 | normalized.chars().next().unwrap().escape_default() | |
80 | ), | |
81 | normalized, | |
82 | Applicability::MachineApplicable, | |
83 | ); | |
84 | } | |
3c0e092e XL |
85 | } else { |
86 | let printable: Vec<char> = lit | |
87 | .chars() | |
88 | .filter(|&x| { | |
89 | unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0 | |
90 | && !x.is_whitespace() | |
91 | }) | |
92 | .collect(); | |
93 | ||
94 | if let [ch] = printable.as_slice() { | |
95 | has_help = true; | |
96 | ||
97 | handler.span_note( | |
98 | span, | |
99 | &format!( | |
100 | "there are non-printing characters, the full sequence is `{}`", | |
101 | lit.escape_default(), | |
102 | ), | |
103 | ); | |
104 | ||
105 | handler.span_suggestion( | |
106 | span, | |
107 | "consider removing the non-printing characters", | |
108 | ch.to_string(), | |
109 | Applicability::MaybeIncorrect, | |
110 | ); | |
111 | } | |
c295e0f8 XL |
112 | } |
113 | ||
114 | if !has_help { | |
115 | let (prefix, msg) = if mode.is_bytes() { | |
116 | ("b", "if you meant to write a byte string literal, use double quotes") | |
117 | } else { | |
118 | ("", "if you meant to write a `str` literal, use double quotes") | |
119 | }; | |
120 | ||
121 | handler.span_suggestion( | |
48663c56 | 122 | span_with_quotes, |
e74abb32 | 123 | msg, |
5869c6ff | 124 | format!("{}\"{}\"", prefix, lit), |
48663c56 | 125 | Applicability::MachineApplicable, |
c295e0f8 XL |
126 | ); |
127 | } | |
128 | ||
129 | handler.emit(); | |
48663c56 XL |
130 | } |
131 | EscapeError::EscapeOnlyChar => { | |
5869c6ff | 132 | let (c, char_span) = last_char(); |
48663c56 | 133 | |
5869c6ff XL |
134 | let msg = if mode.is_bytes() { |
135 | "byte constant must be escaped" | |
48663c56 | 136 | } else { |
5869c6ff XL |
137 | "character constant must be escaped" |
138 | }; | |
139 | handler | |
140 | .struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c))) | |
141 | .span_suggestion( | |
142 | char_span, | |
143 | "escape the character", | |
144 | c.escape_default().to_string(), | |
145 | Applicability::MachineApplicable, | |
146 | ) | |
147 | .emit() | |
48663c56 XL |
148 | } |
149 | EscapeError::BareCarriageReturn => { | |
150 | let msg = if mode.in_double_quotes() { | |
5869c6ff | 151 | "bare CR not allowed in string, use `\\r` instead" |
48663c56 | 152 | } else { |
5869c6ff | 153 | "character constant must be escaped: `\\r`" |
48663c56 | 154 | }; |
5869c6ff XL |
155 | handler |
156 | .struct_span_err(span, msg) | |
157 | .span_suggestion( | |
158 | span, | |
159 | "escape the character", | |
160 | "\\r".to_string(), | |
161 | Applicability::MachineApplicable, | |
162 | ) | |
163 | .emit(); | |
48663c56 | 164 | } |
dc9dc135 XL |
165 | EscapeError::BareCarriageReturnInRawString => { |
166 | assert!(mode.in_double_quotes()); | |
167 | let msg = "bare CR not allowed in raw string"; | |
168 | handler.span_err(span, msg); | |
169 | } | |
48663c56 XL |
170 | EscapeError::InvalidEscape => { |
171 | let (c, span) = last_char(); | |
172 | ||
dfeec247 XL |
173 | let label = |
174 | if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" }; | |
5869c6ff XL |
175 | let ec = escaped_char(c); |
176 | let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec)); | |
48663c56 XL |
177 | diag.span_label(span, label); |
178 | if c == '{' || c == '}' && !mode.is_bytes() { | |
dfeec247 | 179 | diag.help( |
5869c6ff | 180 | "if used in a formatting string, curly braces are escaped with `{{` and `}}`", |
dfeec247 | 181 | ); |
48663c56 | 182 | } else if c == '\r' { |
dfeec247 | 183 | diag.help( |
5869c6ff XL |
184 | "this is an isolated carriage return; consider checking your editor and \ |
185 | version control settings", | |
186 | ); | |
187 | } else { | |
188 | diag.help( | |
189 | "for more information, visit \ | |
190 | <https://static.rust-lang.org/doc/master/reference.html#literals>", | |
dfeec247 | 191 | ); |
48663c56 XL |
192 | } |
193 | diag.emit(); | |
194 | } | |
195 | EscapeError::TooShortHexEscape => { | |
196 | handler.span_err(span, "numeric character escape is too short") | |
197 | } | |
198 | EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => { | |
199 | let (c, span) = last_char(); | |
200 | ||
5869c6ff XL |
201 | let msg = if error == EscapeError::InvalidCharInHexEscape { |
202 | "invalid character in numeric character escape" | |
48663c56 | 203 | } else { |
5869c6ff XL |
204 | "invalid character in unicode escape" |
205 | }; | |
206 | let c = escaped_char(c); | |
48663c56 | 207 | |
5869c6ff XL |
208 | handler |
209 | .struct_span_err(span, &format!("{}: `{}`", msg, c)) | |
210 | .span_label(span, msg) | |
211 | .emit(); | |
48663c56 XL |
212 | } |
213 | EscapeError::NonAsciiCharInByte => { | |
214 | assert!(mode.is_bytes()); | |
5869c6ff | 215 | let (c, span) = last_char(); |
94222f64 | 216 | let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); |
c295e0f8 XL |
217 | let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { |
218 | format!(" but is {:?}", c) | |
219 | } else { | |
220 | String::new() | |
221 | }; | |
222 | err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); | |
94222f64 XL |
223 | if (c as u32) <= 0xFF { |
224 | err.span_suggestion( | |
5869c6ff | 225 | span, |
94222f64 | 226 | &format!( |
c295e0f8 | 227 | "if you meant to use the unicode code point for {:?}, use a \\xHH escape", |
94222f64 XL |
228 | c |
229 | ), | |
5869c6ff | 230 | format!("\\x{:X}", c as u32), |
94222f64 XL |
231 | Applicability::MaybeIncorrect, |
232 | ); | |
233 | } else if matches!(mode, Mode::Byte) { | |
234 | err.span_label(span, "this multibyte character does not fit into a single byte"); | |
235 | } else if matches!(mode, Mode::ByteStr) { | |
236 | let mut utf8 = String::new(); | |
237 | utf8.push(c); | |
238 | err.span_suggestion( | |
239 | span, | |
240 | &format!( | |
c295e0f8 | 241 | "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", |
94222f64 XL |
242 | c |
243 | ), | |
244 | utf8.as_bytes() | |
245 | .iter() | |
246 | .map(|b: &u8| format!("\\x{:X}", *b)) | |
247 | .fold("".to_string(), |a, c| a + &c), | |
248 | Applicability::MaybeIncorrect, | |
249 | ); | |
250 | } | |
251 | err.emit(); | |
48663c56 | 252 | } |
dc9dc135 XL |
253 | EscapeError::NonAsciiCharInByteString => { |
254 | assert!(mode.is_bytes()); | |
c295e0f8 XL |
255 | let (c, span) = last_char(); |
256 | let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { | |
257 | format!(" but is {:?}", c) | |
258 | } else { | |
259 | String::new() | |
260 | }; | |
5869c6ff XL |
261 | handler |
262 | .struct_span_err(span, "raw byte string must be ASCII") | |
c295e0f8 | 263 | .span_label(span, &format!("must be ASCII{}", postfix)) |
5869c6ff XL |
264 | .emit(); |
265 | } | |
266 | EscapeError::OutOfRangeHexEscape => { | |
267 | handler | |
268 | .struct_span_err(span, "out of range hex escape") | |
269 | .span_label(span, "must be a character in the range [\\x00-\\x7f]") | |
270 | .emit(); | |
dc9dc135 | 271 | } |
48663c56 | 272 | EscapeError::LeadingUnderscoreUnicodeEscape => { |
5869c6ff XL |
273 | let (c, span) = last_char(); |
274 | let msg = "invalid start of unicode escape"; | |
275 | handler | |
276 | .struct_span_err(span, &format!("{}: `{}`", msg, c)) | |
277 | .span_label(span, msg) | |
278 | .emit(); | |
48663c56 XL |
279 | } |
280 | EscapeError::OverlongUnicodeEscape => { | |
5869c6ff XL |
281 | handler |
282 | .struct_span_err(span, "overlong unicode escape") | |
283 | .span_label(span, "must have at most 6 hex digits") | |
284 | .emit(); | |
48663c56 | 285 | } |
5869c6ff XL |
286 | EscapeError::UnclosedUnicodeEscape => handler |
287 | .struct_span_err(span, "unterminated unicode escape") | |
288 | .span_label(span, "missing a closing `}`") | |
289 | .span_suggestion_verbose( | |
290 | span.shrink_to_hi(), | |
291 | "terminate the unicode escape", | |
292 | "}".to_string(), | |
293 | Applicability::MaybeIncorrect, | |
294 | ) | |
295 | .emit(), | |
48663c56 XL |
296 | EscapeError::NoBraceInUnicodeEscape => { |
297 | let msg = "incorrect unicode escape sequence"; | |
298 | let mut diag = handler.struct_span_err(span, msg); | |
299 | ||
300 | let mut suggestion = "\\u{".to_owned(); | |
301 | let mut suggestion_len = 0; | |
302 | let (c, char_span) = last_char(); | |
303 | let chars = once(c).chain(lit[range.end..].chars()); | |
304 | for c in chars.take(6).take_while(|c| c.is_digit(16)) { | |
305 | suggestion.push(c); | |
306 | suggestion_len += c.len_utf8(); | |
307 | } | |
308 | ||
309 | if suggestion_len > 0 { | |
310 | suggestion.push('}'); | |
29967ef6 | 311 | let hi = char_span.lo() + BytePos(suggestion_len as u32); |
48663c56 | 312 | diag.span_suggestion( |
29967ef6 | 313 | span.with_hi(hi), |
48663c56 XL |
314 | "format of unicode escape sequences uses braces", |
315 | suggestion, | |
316 | Applicability::MaybeIncorrect, | |
317 | ); | |
318 | } else { | |
319 | diag.span_label(span, msg); | |
dfeec247 | 320 | diag.help("format of unicode escape sequences is `\\u{...}`"); |
48663c56 XL |
321 | } |
322 | ||
323 | diag.emit(); | |
324 | } | |
5869c6ff XL |
325 | EscapeError::UnicodeEscapeInByte => { |
326 | let msg = "unicode escape in byte string"; | |
327 | handler | |
328 | .struct_span_err(span, msg) | |
329 | .span_label(span, msg) | |
330 | .help("unicode escape sequences cannot be used as a byte or in a byte string") | |
331 | .emit(); | |
332 | } | |
48663c56 | 333 | EscapeError::EmptyUnicodeEscape => { |
5869c6ff XL |
334 | handler |
335 | .struct_span_err(span, "empty unicode escape") | |
336 | .span_label(span, "this escape must have at least 1 hex digit") | |
337 | .emit(); | |
338 | } | |
339 | EscapeError::ZeroChars => { | |
340 | let msg = "empty character literal"; | |
341 | handler.struct_span_err(span, msg).span_label(span, msg).emit() | |
342 | } | |
343 | EscapeError::LoneSlash => { | |
344 | let msg = "invalid trailing slash in literal"; | |
345 | handler.struct_span_err(span, msg).span_label(span, msg).emit(); | |
48663c56 | 346 | } |
94222f64 XL |
347 | EscapeError::UnskippedWhitespaceWarning => { |
348 | let (c, char_span) = last_char(); | |
349 | let msg = | |
350 | format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode()); | |
351 | handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit(); | |
352 | } | |
353 | EscapeError::MultipleSkippedLinesWarning => { | |
354 | let msg = "multiple lines skipped by escaped newline"; | |
355 | let bottom_msg = "skipping everything up to and including this point"; | |
356 | handler.struct_span_warn(span, msg).span_label(span, bottom_msg).emit(); | |
357 | } | |
48663c56 XL |
358 | } |
359 | } | |
360 | ||
361 | /// Pushes a character to a message string for error reporting | |
5869c6ff | 362 | pub(crate) fn escaped_char(c: char) -> String { |
48663c56 XL |
363 | match c { |
364 | '\u{20}'..='\u{7e}' => { | |
365 | // Don't escape \, ' or " for user-facing messages | |
5869c6ff | 366 | c.to_string() |
48663c56 | 367 | } |
5869c6ff | 368 | _ => c.escape_default().to_string(), |
48663c56 XL |
369 | } |
370 | } |