]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
New upstream version 1.69.0+dfsg1
[rustc.git] / compiler / rustc_parse / src / lexer / unescape_error_reporting.rs
1 //! Utilities for rendering escape sequence errors as diagnostics.
2
3 use std::iter::once;
4 use std::ops::Range;
5
6 use rustc_errors::{Applicability, Handler};
7 use rustc_lexer::unescape::{EscapeError, Mode};
8 use rustc_span::{BytePos, Span};
9
10 use crate::errors::{MoreThanOneCharNote, MoreThanOneCharSugg, NoBraceUnicodeSub, UnescapeError};
11
12 pub(crate) fn emit_unescape_error(
13 handler: &Handler,
14 // interior part of the literal, without quotes
15 lit: &str,
16 // full span of the literal, including quotes
17 span_with_quotes: Span,
18 // interior span of the literal, without quotes
19 span: Span,
20 mode: Mode,
21 // range of the error inside `lit`
22 range: Range<usize>,
23 error: EscapeError,
24 ) {
25 debug!(
26 "emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
27 lit, span_with_quotes, mode, range, error
28 );
29 let last_char = || {
30 let c = lit[range.clone()].chars().rev().next().unwrap();
31 let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
32 (c, span)
33 };
34 match error {
35 EscapeError::LoneSurrogateUnicodeEscape => {
36 handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: true });
37 }
38 EscapeError::OutOfRangeUnicodeEscape => {
39 handler.emit_err(UnescapeError::InvalidUnicodeEscape { span, surrogate: false });
40 }
41 EscapeError::MoreThanOneChar => {
42 use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
43 let mut sugg = None;
44 let mut note = None;
45
46 let lit_chars = lit.chars().collect::<Vec<_>>();
47 let (first, rest) = lit_chars.split_first().unwrap();
48 if rest.iter().copied().all(is_combining_mark) {
49 let normalized = lit.nfc().to_string();
50 if normalized.chars().count() == 1 {
51 let ch = normalized.chars().next().unwrap().escape_default().to_string();
52 sugg = Some(MoreThanOneCharSugg::NormalizedForm { span, ch, normalized });
53 }
54 let escaped_marks =
55 rest.iter().map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
56 note = Some(MoreThanOneCharNote::AllCombining {
57 span,
58 chr: format!("{first}"),
59 len: escaped_marks.len(),
60 escaped_marks: escaped_marks.join(""),
61 });
62 } else {
63 let printable: Vec<char> = lit
64 .chars()
65 .filter(|&x| {
66 unicode_width::UnicodeWidthChar::width(x).unwrap_or(0) != 0
67 && !x.is_whitespace()
68 })
69 .collect();
70
71 if let &[ch] = printable.as_slice() {
72 sugg =
73 Some(MoreThanOneCharSugg::RemoveNonPrinting { span, ch: ch.to_string() });
74 note = Some(MoreThanOneCharNote::NonPrinting {
75 span,
76 escaped: lit.escape_default().to_string(),
77 });
78 }
79 };
80 let sugg = sugg.unwrap_or_else(|| {
81 let is_byte = mode.is_byte();
82 let prefix = if is_byte { "b" } else { "" };
83 let mut escaped = String::with_capacity(lit.len());
84 let mut chrs = lit.chars().peekable();
85 while let Some(first) = chrs.next() {
86 match (first, chrs.peek()) {
87 ('\\', Some('"')) => {
88 escaped.push('\\');
89 escaped.push('"');
90 chrs.next();
91 }
92 ('"', _) => {
93 escaped.push('\\');
94 escaped.push('"')
95 }
96 (c, _) => escaped.push(c),
97 };
98 }
99 let sugg = format!("{prefix}\"{escaped}\"");
100 MoreThanOneCharSugg::Quotes { span: span_with_quotes, is_byte, sugg }
101 });
102 handler.emit_err(UnescapeError::MoreThanOneChar {
103 span: span_with_quotes,
104 note,
105 suggestion: sugg,
106 });
107 }
108 EscapeError::EscapeOnlyChar => {
109 let (c, char_span) = last_char();
110 handler.emit_err(UnescapeError::EscapeOnlyChar {
111 span,
112 char_span,
113 escaped_sugg: c.escape_default().to_string(),
114 escaped_msg: escaped_char(c),
115 byte: mode.is_byte(),
116 });
117 }
118 EscapeError::BareCarriageReturn => {
119 let double_quotes = mode.in_double_quotes();
120 handler.emit_err(UnescapeError::BareCr { span, double_quotes });
121 }
122 EscapeError::BareCarriageReturnInRawString => {
123 assert!(mode.in_double_quotes());
124 handler.emit_err(UnescapeError::BareCrRawString(span));
125 }
126 EscapeError::InvalidEscape => {
127 let (c, span) = last_char();
128
129 let label =
130 if mode.is_byte() { "unknown byte escape" } else { "unknown character escape" };
131 let ec = escaped_char(c);
132 let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
133 diag.span_label(span, label);
134 if c == '{' || c == '}' && !mode.is_byte() {
135 diag.help(
136 "if used in a formatting string, curly braces are escaped with `{{` and `}}`",
137 );
138 } else if c == '\r' {
139 diag.help(
140 "this is an isolated carriage return; consider checking your editor and \
141 version control settings",
142 );
143 } else {
144 if !mode.is_byte() {
145 diag.span_suggestion(
146 span_with_quotes,
147 "if you meant to write a literal backslash (perhaps escaping in a regular expression), consider a raw string literal",
148 format!("r\"{}\"", lit),
149 Applicability::MaybeIncorrect,
150 );
151 }
152
153 diag.help(
154 "for more information, visit \
155 <https://static.rust-lang.org/doc/master/reference.html#literals>",
156 );
157 }
158 diag.emit();
159 }
160 EscapeError::TooShortHexEscape => {
161 handler.emit_err(UnescapeError::TooShortHexEscape(span));
162 }
163 EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
164 let (c, span) = last_char();
165 let is_hex = error == EscapeError::InvalidCharInHexEscape;
166 let ch = escaped_char(c);
167 handler.emit_err(UnescapeError::InvalidCharInEscape { span, is_hex, ch });
168 }
169 EscapeError::NonAsciiCharInByte => {
170 let (c, span) = last_char();
171 let desc = match mode {
172 Mode::Byte => "byte literal",
173 Mode::ByteStr => "byte string literal",
174 Mode::RawByteStr => "raw byte string literal",
175 _ => panic!("non-is_byte literal paired with NonAsciiCharInByte"),
176 };
177 let mut err = handler.struct_span_err(span, format!("non-ASCII character in {}", desc));
178 let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 {
179 format!(" but is {:?}", c)
180 } else {
181 String::new()
182 };
183 err.span_label(span, &format!("must be ASCII{}", postfix));
184 // Note: the \\xHH suggestions are not given for raw byte string
185 // literals, because they are araw and so cannot use any escapes.
186 if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
187 err.span_suggestion(
188 span,
189 &format!(
190 "if you meant to use the unicode code point for {:?}, use a \\xHH escape",
191 c
192 ),
193 format!("\\x{:X}", c as u32),
194 Applicability::MaybeIncorrect,
195 );
196 } else if mode == Mode::Byte {
197 err.span_label(span, "this multibyte character does not fit into a single byte");
198 } else if mode != Mode::RawByteStr {
199 let mut utf8 = String::new();
200 utf8.push(c);
201 err.span_suggestion(
202 span,
203 &format!(
204 "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes",
205 c
206 ),
207 utf8.as_bytes()
208 .iter()
209 .map(|b: &u8| format!("\\x{:X}", *b))
210 .fold("".to_string(), |a, c| a + &c),
211 Applicability::MaybeIncorrect,
212 );
213 }
214 err.emit();
215 }
216 EscapeError::OutOfRangeHexEscape => {
217 handler.emit_err(UnescapeError::OutOfRangeHexEscape(span));
218 }
219 EscapeError::LeadingUnderscoreUnicodeEscape => {
220 let (c, span) = last_char();
221 handler.emit_err(UnescapeError::LeadingUnderscoreUnicodeEscape {
222 span,
223 ch: escaped_char(c),
224 });
225 }
226 EscapeError::OverlongUnicodeEscape => {
227 handler.emit_err(UnescapeError::OverlongUnicodeEscape(span));
228 }
229 EscapeError::UnclosedUnicodeEscape => {
230 handler.emit_err(UnescapeError::UnclosedUnicodeEscape(span, span.shrink_to_hi()));
231 }
232 EscapeError::NoBraceInUnicodeEscape => {
233 let mut suggestion = "\\u{".to_owned();
234 let mut suggestion_len = 0;
235 let (c, char_span) = last_char();
236 let chars = once(c).chain(lit[range.end..].chars());
237 for c in chars.take(6).take_while(|c| c.is_digit(16)) {
238 suggestion.push(c);
239 suggestion_len += c.len_utf8();
240 }
241
242 let (label, sub) = if suggestion_len > 0 {
243 suggestion.push('}');
244 let hi = char_span.lo() + BytePos(suggestion_len as u32);
245 (None, NoBraceUnicodeSub::Suggestion { span: span.with_hi(hi), suggestion })
246 } else {
247 (Some(span), NoBraceUnicodeSub::Help)
248 };
249 handler.emit_err(UnescapeError::NoBraceInUnicodeEscape { span, label, sub });
250 }
251 EscapeError::UnicodeEscapeInByte => {
252 handler.emit_err(UnescapeError::UnicodeEscapeInByte(span));
253 }
254 EscapeError::EmptyUnicodeEscape => {
255 handler.emit_err(UnescapeError::EmptyUnicodeEscape(span));
256 }
257 EscapeError::ZeroChars => {
258 handler.emit_err(UnescapeError::ZeroChars(span));
259 }
260 EscapeError::LoneSlash => {
261 handler.emit_err(UnescapeError::LoneSlash(span));
262 }
263 EscapeError::UnskippedWhitespaceWarning => {
264 let (c, char_span) = last_char();
265 handler.emit_warning(UnescapeError::UnskippedWhitespace {
266 span,
267 ch: escaped_char(c),
268 char_span,
269 });
270 }
271 EscapeError::MultipleSkippedLinesWarning => {
272 handler.emit_warning(UnescapeError::MultipleSkippedLinesWarning(span));
273 }
274 }
275 }
276
277 /// Pushes a character to a message string for error reporting
278 pub(crate) fn escaped_char(c: char) -> String {
279 match c {
280 '\u{20}'..='\u{7e}' => {
281 // Don't escape \, ' or " for user-facing messages
282 c.to_string()
283 }
284 _ => c.escape_default().to_string(),
285 }
286 }