]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //! Macro support for format strings |
2 | //! | |
3 | //! These structures are used when parsing format strings for the compiler. | |
4 | //! Parsing does not happen at runtime: structures of `std::fmt::rt` are | |
5 | //! generated instead. | |
6 | ||
dfeec247 | 7 | #![doc( |
1b1a35ee | 8 | html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/", |
dfeec247 XL |
9 | html_playground_url = "https://play.rust-lang.org/", |
10 | test(attr(deny(warnings))) | |
11 | )] | |
f2b60f7d FG |
12 | #![deny(rustc::untranslatable_diagnostic)] |
13 | #![deny(rustc::diagnostic_outside_of_impl)] | |
04454e1e FG |
14 | // We want to be able to build this crate with a stable compiler, so no |
15 | // `#![feature]` attributes should be added. | |
b7449926 | 16 | |
9fa01778 | 17 | pub use Alignment::*; |
9fa01778 | 18 | pub use Count::*; |
dfeec247 XL |
19 | pub use Piece::*; |
20 | pub use Position::*; | |
1a4d82fc | 21 | |
dfeec247 | 22 | use std::iter; |
1a4d82fc JJ |
23 | use std::str; |
24 | use std::string; | |
25 | ||
04454e1e FG |
26 | // Note: copied from rustc_span |
27 | /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. | |
28 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] | |
29 | pub struct InnerSpan { | |
30 | pub start: usize, | |
31 | pub end: usize, | |
32 | } | |
33 | ||
34 | impl InnerSpan { | |
35 | pub fn new(start: usize, end: usize) -> InnerSpan { | |
36 | InnerSpan { start, end } | |
37 | } | |
38 | } | |
dc9dc135 | 39 | |
9c376795 FG |
40 | /// The location and before/after width of a character whose width has changed from its source code |
41 | /// representation | |
42 | #[derive(Copy, Clone, PartialEq, Eq)] | |
43 | pub struct InnerWidthMapping { | |
44 | /// Index of the character in the source | |
45 | pub position: usize, | |
46 | /// The inner width in characters | |
47 | pub before: usize, | |
48 | /// The transformed width in characters | |
49 | pub after: usize, | |
50 | } | |
51 | ||
52 | impl InnerWidthMapping { | |
53 | pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping { | |
54 | InnerWidthMapping { position, before, after } | |
55 | } | |
56 | } | |
57 | ||
58 | /// Whether the input string is a literal. If yes, it contains the inner width mappings. | |
59 | #[derive(Clone, PartialEq, Eq)] | |
60 | enum InputStringKind { | |
61 | NotALiteral, | |
62 | Literal { width_mappings: Vec<InnerWidthMapping> }, | |
63 | } | |
64 | ||
f9f354fc XL |
65 | /// The type of format string that we are parsing. |
66 | #[derive(Copy, Clone, Debug, Eq, PartialEq)] | |
67 | pub enum ParseMode { | |
68 | /// A normal format string as per `format_args!`. | |
69 | Format, | |
70 | /// An inline assembly template string for `asm!`. | |
71 | InlineAsm, | |
72 | } | |
73 | ||
dc9dc135 XL |
74 | #[derive(Copy, Clone)] |
75 | struct InnerOffset(usize); | |
76 | ||
77 | impl InnerOffset { | |
78 | fn to(self, end: InnerOffset) -> InnerSpan { | |
79 | InnerSpan::new(self.0, end.0) | |
80 | } | |
81 | } | |
82 | ||
1a4d82fc JJ |
83 | /// A piece is a portion of the format string which represents the next part |
84 | /// to emit. These are emitted as a stream by the `Parser` class. | |
9c376795 | 85 | #[derive(Clone, Debug, PartialEq)] |
1a4d82fc JJ |
86 | pub enum Piece<'a> { |
87 | /// A literal string which should directly be emitted | |
88 | String(&'a str), | |
89 | /// This describes that formatting should process the next argument (as | |
90 | /// specified inside) for emission. | |
9c376795 | 91 | NextArgument(Box<Argument<'a>>), |
1a4d82fc JJ |
92 | } |
93 | ||
94 | /// Representation of an argument specification. | |
60c5eb7d | 95 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc JJ |
96 | pub struct Argument<'a> { |
97 | /// Where to find this argument | |
04454e1e | 98 | pub position: Position<'a>, |
064997fb FG |
99 | /// The span of the position indicator. Includes any whitespace in implicit |
100 | /// positions (`{ }`). | |
101 | pub position_span: InnerSpan, | |
1a4d82fc JJ |
102 | /// How to format the argument |
103 | pub format: FormatSpec<'a>, | |
104 | } | |
105 | ||
106 | /// Specification for the formatting of an argument in the format string. | |
60c5eb7d | 107 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc | 108 | pub struct FormatSpec<'a> { |
416331ca | 109 | /// Optionally specified character to fill alignment with. |
1a4d82fc | 110 | pub fill: Option<char>, |
416331ca | 111 | /// Optionally specified alignment. |
1a4d82fc | 112 | pub align: Alignment, |
9ffffee4 FG |
113 | /// The `+` or `-` flag. |
114 | pub sign: Option<Sign>, | |
115 | /// The `#` flag. | |
116 | pub alternate: bool, | |
117 | /// The `0` flag. | |
118 | pub zero_pad: bool, | |
119 | /// The `x` or `X` flag. (Only for `Debug`.) | |
120 | pub debug_hex: Option<DebugHex>, | |
416331ca | 121 | /// The integer precision to use. |
04454e1e | 122 | pub precision: Count<'a>, |
416331ca XL |
123 | /// The span of the precision formatting flag (for diagnostics). |
124 | pub precision_span: Option<InnerSpan>, | |
125 | /// The string width requested for the resulting format. | |
04454e1e | 126 | pub width: Count<'a>, |
416331ca XL |
127 | /// The span of the width formatting flag (for diagnostics). |
128 | pub width_span: Option<InnerSpan>, | |
1a4d82fc JJ |
129 | /// The descriptor string representing the name of the format desired for |
130 | /// this argument, this can be empty or any number of characters, although | |
131 | /// it is required to be one word. | |
b039eaaf | 132 | pub ty: &'a str, |
60c5eb7d XL |
133 | /// The span of the descriptor string (for diagnostics). |
134 | pub ty_span: Option<InnerSpan>, | |
1a4d82fc JJ |
135 | } |
136 | ||
137 | /// Enum describing where an argument for a format can be located. | |
60c5eb7d | 138 | #[derive(Copy, Clone, Debug, PartialEq)] |
04454e1e | 139 | pub enum Position<'a> { |
2c00a5a8 | 140 | /// The argument is implied to be located at an index |
abe05a73 | 141 | ArgumentImplicitlyIs(usize), |
064997fb | 142 | /// The argument is located at a specific index given in the format, |
c34b1796 | 143 | ArgumentIs(usize), |
1a4d82fc | 144 | /// The argument has a name. |
064997fb | 145 | ArgumentNamed(&'a str), |
1a4d82fc JJ |
146 | } |
147 | ||
04454e1e | 148 | impl Position<'_> { |
9fa01778 XL |
149 | pub fn index(&self) -> Option<usize> { |
150 | match self { | |
064997fb | 151 | ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i), |
9fa01778 XL |
152 | _ => None, |
153 | } | |
154 | } | |
155 | } | |
156 | ||
1a4d82fc | 157 | /// Enum of alignments which are supported. |
60c5eb7d | 158 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc JJ |
159 | pub enum Alignment { |
160 | /// The value will be aligned to the left. | |
161 | AlignLeft, | |
162 | /// The value will be aligned to the right. | |
163 | AlignRight, | |
164 | /// The value will be aligned in the center. | |
165 | AlignCenter, | |
166 | /// The value will take on a default alignment. | |
167 | AlignUnknown, | |
168 | } | |
169 | ||
9ffffee4 | 170 | /// Enum for the sign flags. |
60c5eb7d | 171 | #[derive(Copy, Clone, Debug, PartialEq)] |
9ffffee4 FG |
172 | pub enum Sign { |
173 | /// The `+` flag. | |
174 | Plus, | |
175 | /// The `-` flag. | |
176 | Minus, | |
177 | } | |
178 | ||
179 | /// Enum for the debug hex flags. | |
180 | #[derive(Copy, Clone, Debug, PartialEq)] | |
181 | pub enum DebugHex { | |
182 | /// The `x` flag in `{:x?}`. | |
183 | Lower, | |
184 | /// The `X` flag in `{:X?}`. | |
185 | Upper, | |
1a4d82fc JJ |
186 | } |
187 | ||
188 | /// A count is used for the precision and width parameters of an integer, and | |
189 | /// can reference either an argument or a literal integer. | |
60c5eb7d | 190 | #[derive(Copy, Clone, Debug, PartialEq)] |
04454e1e | 191 | pub enum Count<'a> { |
1a4d82fc | 192 | /// The count is specified explicitly. |
c34b1796 | 193 | CountIs(usize), |
1a4d82fc | 194 | /// The count is specified by the argument with the given name. |
04454e1e | 195 | CountIsName(&'a str, InnerSpan), |
1a4d82fc | 196 | /// The count is specified by the argument at the given index. |
c34b1796 | 197 | CountIsParam(usize), |
f2b60f7d FG |
198 | /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index. |
199 | CountIsStar(usize), | |
1a4d82fc JJ |
200 | /// The count is implied and cannot be explicitly specified. |
201 | CountImplied, | |
202 | } | |
203 | ||
94b46f34 XL |
204 | pub struct ParseError { |
205 | pub description: string::String, | |
206 | pub note: Option<string::String>, | |
207 | pub label: string::String, | |
dc9dc135 XL |
208 | pub span: InnerSpan, |
209 | pub secondary_label: Option<(string::String, InnerSpan)>, | |
064997fb | 210 | pub should_be_replaced_with_positional_argument: bool, |
94b46f34 XL |
211 | } |
212 | ||
1a4d82fc | 213 | /// The parser structure for interpreting the input format string. This is |
b039eaaf | 214 | /// modeled as an iterator over `Piece` structures to form a stream of tokens |
1a4d82fc JJ |
215 | /// being output. |
216 | /// | |
217 | /// This is a recursive-descent parser for the sake of simplicity, and if | |
218 | /// necessary there's probably lots of room for improvement performance-wise. | |
219 | pub struct Parser<'a> { | |
f9f354fc | 220 | mode: ParseMode, |
1a4d82fc | 221 | input: &'a str, |
e9174d1e | 222 | cur: iter::Peekable<str::CharIndices<'a>>, |
1a4d82fc | 223 | /// Error messages accumulated during parsing |
94b46f34 | 224 | pub errors: Vec<ParseError>, |
5bcae85e | 225 | /// Current position of implicit positional argument pointer |
f035d41b | 226 | pub curarg: usize, |
8faf50e0 XL |
227 | /// `Some(raw count)` when the string is "raw", used to position spans correctly |
228 | style: Option<usize>, | |
b7449926 | 229 | /// Start and end byte offset of every successfully parsed argument |
dc9dc135 | 230 | pub arg_places: Vec<InnerSpan>, |
9c376795 FG |
231 | /// Characters whose length has been changed from their in-code representation |
232 | width_map: Vec<InnerWidthMapping>, | |
dc9dc135 XL |
233 | /// Span of the last opening brace seen, used for error reporting |
234 | last_opening_brace: Option<InnerSpan>, | |
74b04a01 | 235 | /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` |
0731742a | 236 | append_newline: bool, |
f9f354fc | 237 | /// Whether this formatting string is a literal or it comes from a macro. |
f035d41b | 238 | pub is_literal: bool, |
f9f354fc XL |
239 | /// Start position of the current line. |
240 | cur_line_start: usize, | |
241 | /// Start and end byte offset of every line of the format string. Excludes | |
242 | /// newline characters and leading whitespace. | |
243 | pub line_spans: Vec<InnerSpan>, | |
0731742a XL |
244 | } |
245 | ||
1a4d82fc JJ |
246 | impl<'a> Iterator for Parser<'a> { |
247 | type Item = Piece<'a>; | |
248 | ||
249 | fn next(&mut self) -> Option<Piece<'a>> { | |
e9174d1e SL |
250 | if let Some(&(pos, c)) = self.cur.peek() { |
251 | match c { | |
252 | '{' => { | |
dc9dc135 XL |
253 | let curr_last_brace = self.last_opening_brace; |
254 | let byte_pos = self.to_span_index(pos); | |
9c376795 | 255 | let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos)); |
064997fb | 256 | self.last_opening_brace = Some(byte_pos.to(lbrace_end)); |
e9174d1e SL |
257 | self.cur.next(); |
258 | if self.consume('{') { | |
dc9dc135 | 259 | self.last_opening_brace = curr_last_brace; |
0731742a | 260 | |
e9174d1e SL |
261 | Some(String(self.string(pos + 1))) |
262 | } else { | |
064997fb | 263 | let arg = self.argument(lbrace_end); |
9c376795 | 264 | if let Some(rbrace_pos) = self.must_consume('}') { |
f9f354fc | 265 | if self.is_literal { |
9c376795 FG |
266 | let lbrace_byte_pos = self.to_span_index(pos); |
267 | let rbrace_byte_pos = self.to_span_index(rbrace_pos); | |
268 | ||
269 | let width = self.to_span_width(rbrace_pos); | |
270 | ||
cdc7bbd5 | 271 | self.arg_places.push( |
9c376795 | 272 | lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)), |
cdc7bbd5 | 273 | ); |
f9f354fc | 274 | } |
064997fb | 275 | } else { |
9ffffee4 FG |
276 | if let Some(&(_, maybe)) = self.cur.peek() { |
277 | if maybe == '?' { | |
278 | self.suggest_format(); | |
279 | } else { | |
280 | self.suggest_positional_arg_instead_of_captured_arg(arg); | |
281 | } | |
282 | } | |
8faf50e0 | 283 | } |
9c376795 | 284 | Some(NextArgument(Box::new(arg))) |
e9174d1e | 285 | } |
1a4d82fc | 286 | } |
e9174d1e SL |
287 | '}' => { |
288 | self.cur.next(); | |
289 | if self.consume('}') { | |
8faf50e0 | 290 | Some(String(self.string(pos + 1))) |
e9174d1e | 291 | } else { |
0731742a | 292 | let err_pos = self.to_span_index(pos); |
94b46f34 XL |
293 | self.err_with_note( |
294 | "unmatched `}` found", | |
295 | "unmatched `}`", | |
296 | "if you intended to print `}`, you can escape it using `}}`", | |
dc9dc135 | 297 | err_pos.to(err_pos), |
94b46f34 | 298 | ); |
e9174d1e SL |
299 | None |
300 | } | |
1a4d82fc | 301 | } |
e9174d1e | 302 | _ => Some(String(self.string(pos))), |
1a4d82fc | 303 | } |
e9174d1e | 304 | } else { |
f035d41b | 305 | if self.is_literal { |
f2b60f7d | 306 | let span = self.span(self.cur_line_start, self.input.len()); |
f035d41b XL |
307 | if self.line_spans.last() != Some(&span) { |
308 | self.line_spans.push(span); | |
309 | } | |
f9f354fc | 310 | } |
e9174d1e | 311 | None |
1a4d82fc JJ |
312 | } |
313 | } | |
314 | } | |
315 | ||
316 | impl<'a> Parser<'a> { | |
317 | /// Creates a new parser for the given format string | |
0731742a XL |
318 | pub fn new( |
319 | s: &'a str, | |
320 | style: Option<usize>, | |
f9f354fc | 321 | snippet: Option<string::String>, |
0731742a | 322 | append_newline: bool, |
f9f354fc | 323 | mode: ParseMode, |
0731742a | 324 | ) -> Parser<'a> { |
9c376795 FG |
325 | let input_string_kind = find_width_map_from_snippet(snippet, style); |
326 | let (width_map, is_literal) = match input_string_kind { | |
327 | InputStringKind::Literal { width_mappings } => (width_mappings, true), | |
328 | InputStringKind::NotALiteral => (Vec::new(), false), | |
329 | }; | |
330 | ||
1a4d82fc | 331 | Parser { |
f9f354fc | 332 | mode, |
1a4d82fc | 333 | input: s, |
e9174d1e | 334 | cur: s.char_indices().peekable(), |
92a42be0 | 335 | errors: vec![], |
5bcae85e | 336 | curarg: 0, |
8faf50e0 | 337 | style, |
8faf50e0 | 338 | arg_places: vec![], |
9c376795 | 339 | width_map, |
dc9dc135 | 340 | last_opening_brace: None, |
0731742a | 341 | append_newline, |
f9f354fc XL |
342 | is_literal, |
343 | cur_line_start: 0, | |
344 | line_spans: vec![], | |
1a4d82fc JJ |
345 | } |
346 | } | |
347 | ||
348 | /// Notifies of an error. The message doesn't actually need to be of type | |
349 | /// String, but I think it does when this eventually uses conditions so it | |
350 | /// might as well start using it now. | |
94b46f34 XL |
351 | fn err<S1: Into<string::String>, S2: Into<string::String>>( |
352 | &mut self, | |
353 | description: S1, | |
354 | label: S2, | |
dc9dc135 | 355 | span: InnerSpan, |
94b46f34 XL |
356 | ) { |
357 | self.errors.push(ParseError { | |
358 | description: description.into(), | |
359 | note: None, | |
360 | label: label.into(), | |
dc9dc135 | 361 | span, |
0731742a | 362 | secondary_label: None, |
064997fb | 363 | should_be_replaced_with_positional_argument: false, |
94b46f34 | 364 | }); |
476ff2be SL |
365 | } |
366 | ||
367 | /// Notifies of an error. The message doesn't actually need to be of type | |
368 | /// String, but I think it does when this eventually uses conditions so it | |
369 | /// might as well start using it now. | |
dfeec247 XL |
370 | fn err_with_note< |
371 | S1: Into<string::String>, | |
372 | S2: Into<string::String>, | |
373 | S3: Into<string::String>, | |
374 | >( | |
94b46f34 XL |
375 | &mut self, |
376 | description: S1, | |
377 | label: S2, | |
378 | note: S3, | |
dc9dc135 | 379 | span: InnerSpan, |
94b46f34 XL |
380 | ) { |
381 | self.errors.push(ParseError { | |
382 | description: description.into(), | |
383 | note: Some(note.into()), | |
384 | label: label.into(), | |
dc9dc135 | 385 | span, |
0731742a | 386 | secondary_label: None, |
064997fb | 387 | should_be_replaced_with_positional_argument: false, |
94b46f34 | 388 | }); |
1a4d82fc JJ |
389 | } |
390 | ||
391 | /// Optionally consumes the specified character. If the character is not at | |
416331ca XL |
392 | /// the current position, then the current iterator isn't moved and `false` is |
393 | /// returned, otherwise the character is consumed and `true` is returned. | |
1a4d82fc | 394 | fn consume(&mut self, c: char) -> bool { |
416331ca XL |
395 | self.consume_pos(c).is_some() |
396 | } | |
397 | ||
398 | /// Optionally consumes the specified character. If the character is not at | |
399 | /// the current position, then the current iterator isn't moved and `None` is | |
400 | /// returned, otherwise the character is consumed and the current position is | |
401 | /// returned. | |
402 | fn consume_pos(&mut self, c: char) -> Option<usize> { | |
403 | if let Some(&(pos, maybe)) = self.cur.peek() { | |
b039eaaf SL |
404 | if c == maybe { |
405 | self.cur.next(); | |
416331ca | 406 | return Some(pos); |
b039eaaf | 407 | } |
1a4d82fc | 408 | } |
416331ca | 409 | None |
1a4d82fc JJ |
410 | } |
411 | ||
9c376795 FG |
412 | fn remap_pos(&self, mut pos: usize) -> InnerOffset { |
413 | for width in &self.width_map { | |
414 | if pos > width.position { | |
415 | pos += width.before - width.after; | |
416 | } else if pos == width.position && width.after == 0 { | |
417 | pos += width.before; | |
418 | } else { | |
419 | break; | |
420 | } | |
421 | } | |
422 | ||
423 | InnerOffset(pos) | |
424 | } | |
425 | ||
dc9dc135 | 426 | fn to_span_index(&self, pos: usize) -> InnerOffset { |
dc9dc135 XL |
427 | // This handles the raw string case, the raw argument is the number of # |
428 | // in r###"..."### (we need to add one because of the `r`). | |
5869c6ff | 429 | let raw = self.style.map_or(0, |raw| raw + 1); |
9c376795 FG |
430 | let pos = self.remap_pos(pos); |
431 | InnerOffset(raw + pos.0 + 1) | |
432 | } | |
433 | ||
434 | fn to_span_width(&self, pos: usize) -> usize { | |
435 | let pos = self.remap_pos(pos); | |
436 | match self.width_map.iter().find(|w| w.position == pos.0) { | |
437 | Some(w) => w.before, | |
438 | None => 1, | |
0731742a | 439 | } |
0731742a XL |
440 | } |
441 | ||
f2b60f7d FG |
442 | fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan { |
443 | let start = self.to_span_index(start_pos); | |
444 | let end = self.to_span_index(end_pos); | |
445 | start.to(end) | |
446 | } | |
447 | ||
1a4d82fc JJ |
448 | /// Forces consumption of the specified character. If the character is not |
449 | /// found, an error is emitted. | |
8faf50e0 | 450 | fn must_consume(&mut self, c: char) -> Option<usize> { |
1a4d82fc | 451 | self.ws(); |
8faf50e0 | 452 | |
94b46f34 | 453 | if let Some(&(pos, maybe)) = self.cur.peek() { |
e9174d1e | 454 | if c == maybe { |
1a4d82fc | 455 | self.cur.next(); |
8faf50e0 | 456 | Some(pos) |
e9174d1e | 457 | } else { |
0731742a | 458 | let pos = self.to_span_index(pos); |
9c376795 | 459 | let description = format!("expected `'}}'`, found `{maybe:?}`"); |
0731742a XL |
460 | let label = "expected `}`".to_owned(); |
461 | let (note, secondary_label) = if c == '}' { | |
dfeec247 XL |
462 | ( |
463 | Some( | |
464 | "if you intended to print `{`, you can escape it using `{{`".to_owned(), | |
465 | ), | |
466 | self.last_opening_brace | |
467 | .map(|sp| ("because of this opening brace".to_owned(), sp)), | |
468 | ) | |
0731742a XL |
469 | } else { |
470 | (None, None) | |
471 | }; | |
472 | self.errors.push(ParseError { | |
473 | description, | |
474 | note, | |
475 | label, | |
dc9dc135 | 476 | span: pos.to(pos), |
0731742a | 477 | secondary_label, |
064997fb | 478 | should_be_replaced_with_positional_argument: false, |
0731742a | 479 | }); |
8faf50e0 | 480 | None |
1a4d82fc | 481 | } |
e9174d1e | 482 | } else { |
9c376795 | 483 | let description = format!("expected `{c:?}` but string was terminated"); |
0731742a XL |
484 | // point at closing `"` |
485 | let pos = self.input.len() - if self.append_newline { 1 } else { 0 }; | |
486 | let pos = self.to_span_index(pos); | |
476ff2be | 487 | if c == '}' { |
9c376795 | 488 | let label = format!("expected `{c:?}`"); |
0731742a | 489 | let (note, secondary_label) = if c == '}' { |
dfeec247 XL |
490 | ( |
491 | Some( | |
492 | "if you intended to print `{`, you can escape it using `{{`".to_owned(), | |
493 | ), | |
494 | self.last_opening_brace | |
495 | .map(|sp| ("because of this opening brace".to_owned(), sp)), | |
496 | ) | |
0731742a XL |
497 | } else { |
498 | (None, None) | |
499 | }; | |
500 | self.errors.push(ParseError { | |
501 | description, | |
502 | note, | |
503 | label, | |
dc9dc135 | 504 | span: pos.to(pos), |
0731742a | 505 | secondary_label, |
064997fb | 506 | should_be_replaced_with_positional_argument: false, |
0731742a | 507 | }); |
476ff2be | 508 | } else { |
9c376795 | 509 | self.err(description, format!("expected `{c:?}`"), pos.to(pos)); |
476ff2be | 510 | } |
8faf50e0 | 511 | None |
1a4d82fc JJ |
512 | } |
513 | } | |
514 | ||
0731742a | 515 | /// Consumes all whitespace characters until the first non-whitespace character |
1a4d82fc | 516 | fn ws(&mut self) { |
e9174d1e | 517 | while let Some(&(_, c)) = self.cur.peek() { |
b039eaaf SL |
518 | if c.is_whitespace() { |
519 | self.cur.next(); | |
520 | } else { | |
92a42be0 | 521 | break; |
b039eaaf | 522 | } |
1a4d82fc JJ |
523 | } |
524 | } | |
525 | ||
526 | /// Parses all of a string which is to be considered a "raw literal" in a | |
527 | /// format string. This is everything outside of the braces. | |
c34b1796 | 528 | fn string(&mut self, start: usize) -> &'a str { |
e9174d1e SL |
529 | // we may not consume the character, peek the iterator |
530 | while let Some(&(pos, c)) = self.cur.peek() { | |
531 | match c { | |
b039eaaf SL |
532 | '{' | '}' => { |
533 | return &self.input[start..pos]; | |
534 | } | |
f9f354fc | 535 | '\n' if self.is_literal => { |
f2b60f7d | 536 | self.line_spans.push(self.span(self.cur_line_start, pos)); |
f9f354fc XL |
537 | self.cur_line_start = pos + 1; |
538 | self.cur.next(); | |
539 | } | |
b039eaaf | 540 | _ => { |
f9f354fc XL |
541 | if self.is_literal && pos == self.cur_line_start && c.is_whitespace() { |
542 | self.cur_line_start = pos + c.len_utf8(); | |
543 | } | |
b039eaaf SL |
544 | self.cur.next(); |
545 | } | |
1a4d82fc JJ |
546 | } |
547 | } | |
e9174d1e | 548 | &self.input[start..self.input.len()] |
1a4d82fc JJ |
549 | } |
550 | ||
e74abb32 | 551 | /// Parses an `Argument` structure, or what's contained within braces inside the format string. |
064997fb | 552 | fn argument(&mut self, start: InnerOffset) -> Argument<'a> { |
5bcae85e | 553 | let pos = self.position(); |
064997fb FG |
554 | |
555 | let end = self | |
556 | .cur | |
557 | .clone() | |
558 | .find(|(_, ch)| !ch.is_whitespace()) | |
559 | .map_or(start, |(end, _)| self.to_span_index(end)); | |
560 | let position_span = start.to(end); | |
561 | ||
f9f354fc XL |
562 | let format = match self.mode { |
563 | ParseMode::Format => self.format(), | |
564 | ParseMode::InlineAsm => self.inline_asm(), | |
565 | }; | |
5bcae85e SL |
566 | |
567 | // Resolve position after parsing format spec. | |
568 | let pos = match pos { | |
569 | Some(position) => position, | |
570 | None => { | |
571 | let i = self.curarg; | |
572 | self.curarg += 1; | |
abe05a73 | 573 | ArgumentImplicitlyIs(i) |
5bcae85e SL |
574 | } |
575 | }; | |
576 | ||
064997fb | 577 | Argument { position: pos, position_span, format } |
1a4d82fc JJ |
578 | } |
579 | ||
580 | /// Parses a positional argument for a format. This could either be an | |
581 | /// integer index of an argument, a named argument, or a blank string. | |
5bcae85e SL |
582 | /// Returns `Some(parsed_position)` if the position is not implicitly |
583 | /// consuming a macro argument, `None` if it's the case. | |
04454e1e | 584 | fn position(&mut self) -> Option<Position<'a>> { |
e9174d1e | 585 | if let Some(i) = self.integer() { |
5bcae85e | 586 | Some(ArgumentIs(i)) |
e9174d1e SL |
587 | } else { |
588 | match self.cur.peek() { | |
064997fb | 589 | Some(&(_, c)) if rustc_lexer::is_id_start(c) => Some(ArgumentNamed(self.word())), |
5bcae85e SL |
590 | |
591 | // This is an `ArgumentNext`. | |
592 | // Record the fact and do the resolution after parsing the | |
593 | // format spec, to make things like `{:.*}` work. | |
594 | _ => None, | |
1a4d82fc JJ |
595 | } |
596 | } | |
597 | } | |
598 | ||
f2b60f7d FG |
599 | fn current_pos(&mut self) -> usize { |
600 | if let Some(&(pos, _)) = self.cur.peek() { pos } else { self.input.len() } | |
601 | } | |
602 | ||
1a4d82fc | 603 | /// Parses a format specifier at the current position, returning all of the |
e74abb32 | 604 | /// relevant information in the `FormatSpec` struct. |
1a4d82fc JJ |
605 | fn format(&mut self) -> FormatSpec<'a> { |
606 | let mut spec = FormatSpec { | |
607 | fill: None, | |
608 | align: AlignUnknown, | |
9ffffee4 FG |
609 | sign: None, |
610 | alternate: false, | |
611 | zero_pad: false, | |
612 | debug_hex: None, | |
1a4d82fc | 613 | precision: CountImplied, |
416331ca | 614 | precision_span: None, |
1a4d82fc | 615 | width: CountImplied, |
416331ca | 616 | width_span: None, |
85aaf69f | 617 | ty: &self.input[..0], |
60c5eb7d | 618 | ty_span: None, |
1a4d82fc | 619 | }; |
b039eaaf | 620 | if !self.consume(':') { |
92a42be0 | 621 | return spec; |
b039eaaf | 622 | } |
1a4d82fc JJ |
623 | |
624 | // fill character | |
e9174d1e | 625 | if let Some(&(_, c)) = self.cur.peek() { |
1b1a35ee XL |
626 | if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) { |
627 | spec.fill = Some(c); | |
628 | self.cur.next(); | |
1a4d82fc | 629 | } |
1a4d82fc JJ |
630 | } |
631 | // Alignment | |
632 | if self.consume('<') { | |
633 | spec.align = AlignLeft; | |
634 | } else if self.consume('>') { | |
635 | spec.align = AlignRight; | |
636 | } else if self.consume('^') { | |
637 | spec.align = AlignCenter; | |
638 | } | |
639 | // Sign flags | |
640 | if self.consume('+') { | |
9ffffee4 | 641 | spec.sign = Some(Sign::Plus); |
1a4d82fc | 642 | } else if self.consume('-') { |
9ffffee4 | 643 | spec.sign = Some(Sign::Minus); |
1a4d82fc JJ |
644 | } |
645 | // Alternate marker | |
646 | if self.consume('#') { | |
9ffffee4 | 647 | spec.alternate = true; |
1a4d82fc JJ |
648 | } |
649 | // Width and precision | |
650 | let mut havewidth = false; | |
416331ca | 651 | |
1a4d82fc JJ |
652 | if self.consume('0') { |
653 | // small ambiguity with '0$' as a format string. In theory this is a | |
654 | // '0' flag and then an ill-formatted format string with just a '$' | |
655 | // and no count, but this is better if we instead interpret this as | |
656 | // no '0' flag and '0$' as the width instead. | |
064997fb | 657 | if let Some(end) = self.consume_pos('$') { |
1a4d82fc | 658 | spec.width = CountIsParam(0); |
f2b60f7d | 659 | spec.width_span = Some(self.span(end - 1, end + 1)); |
1a4d82fc JJ |
660 | havewidth = true; |
661 | } else { | |
9ffffee4 | 662 | spec.zero_pad = true; |
1a4d82fc JJ |
663 | } |
664 | } | |
f2b60f7d | 665 | |
1a4d82fc | 666 | if !havewidth { |
f2b60f7d FG |
667 | let start = self.current_pos(); |
668 | spec.width = self.count(start); | |
669 | if spec.width != CountImplied { | |
670 | let end = self.current_pos(); | |
671 | spec.width_span = Some(self.span(start, end)); | |
672 | } | |
1a4d82fc | 673 | } |
064997fb | 674 | |
416331ca | 675 | if let Some(start) = self.consume_pos('.') { |
f2b60f7d | 676 | if self.consume('*') { |
5bcae85e SL |
677 | // Resolve `CountIsNextParam`. |
678 | // We can do this immediately as `position` is resolved later. | |
679 | let i = self.curarg; | |
680 | self.curarg += 1; | |
f2b60f7d | 681 | spec.precision = CountIsStar(i); |
1a4d82fc | 682 | } else { |
f2b60f7d | 683 | spec.precision = self.count(start + 1); |
1a4d82fc | 684 | } |
f2b60f7d FG |
685 | let end = self.current_pos(); |
686 | spec.precision_span = Some(self.span(start, end)); | |
1a4d82fc | 687 | } |
f2b60f7d FG |
688 | |
689 | let ty_span_start = self.current_pos(); | |
0531ce1d XL |
690 | // Optional radix followed by the actual format specifier |
691 | if self.consume('x') { | |
692 | if self.consume('?') { | |
9ffffee4 | 693 | spec.debug_hex = Some(DebugHex::Lower); |
0531ce1d XL |
694 | spec.ty = "?"; |
695 | } else { | |
696 | spec.ty = "x"; | |
697 | } | |
698 | } else if self.consume('X') { | |
699 | if self.consume('?') { | |
9ffffee4 | 700 | spec.debug_hex = Some(DebugHex::Upper); |
0531ce1d XL |
701 | spec.ty = "?"; |
702 | } else { | |
703 | spec.ty = "X"; | |
704 | } | |
705 | } else if self.consume('?') { | |
1a4d82fc JJ |
706 | spec.ty = "?"; |
707 | } else { | |
708 | spec.ty = self.word(); | |
60c5eb7d | 709 | if !spec.ty.is_empty() { |
f2b60f7d FG |
710 | let ty_span_end = self.current_pos(); |
711 | spec.ty_span = Some(self.span(ty_span_start, ty_span_end)); | |
60c5eb7d | 712 | } |
1a4d82fc | 713 | } |
e9174d1e | 714 | spec |
1a4d82fc JJ |
715 | } |
716 | ||
f9f354fc XL |
717 | /// Parses an inline assembly template modifier at the current position, returning the modifier |
718 | /// in the `ty` field of the `FormatSpec` struct. | |
719 | fn inline_asm(&mut self) -> FormatSpec<'a> { | |
720 | let mut spec = FormatSpec { | |
721 | fill: None, | |
722 | align: AlignUnknown, | |
9ffffee4 FG |
723 | sign: None, |
724 | alternate: false, | |
725 | zero_pad: false, | |
726 | debug_hex: None, | |
f9f354fc XL |
727 | precision: CountImplied, |
728 | precision_span: None, | |
729 | width: CountImplied, | |
730 | width_span: None, | |
731 | ty: &self.input[..0], | |
732 | ty_span: None, | |
733 | }; | |
734 | if !self.consume(':') { | |
735 | return spec; | |
736 | } | |
737 | ||
f2b60f7d | 738 | let ty_span_start = self.current_pos(); |
f9f354fc | 739 | spec.ty = self.word(); |
f9f354fc | 740 | if !spec.ty.is_empty() { |
f2b60f7d FG |
741 | let ty_span_end = self.current_pos(); |
742 | spec.ty_span = Some(self.span(ty_span_start, ty_span_end)); | |
f9f354fc XL |
743 | } |
744 | ||
745 | spec | |
746 | } | |
747 | ||
e74abb32 | 748 | /// Parses a `Count` parameter at the current position. This does not check |
1a4d82fc JJ |
749 | /// for 'CountIsNextParam' because that is only used in precision, not |
750 | /// width. | |
f2b60f7d | 751 | fn count(&mut self, start: usize) -> Count<'a> { |
e9174d1e | 752 | if let Some(i) = self.integer() { |
f2b60f7d | 753 | if self.consume('$') { CountIsParam(i) } else { CountIs(i) } |
e9174d1e SL |
754 | } else { |
755 | let tmp = self.cur.clone(); | |
756 | let word = self.word(); | |
757 | if word.is_empty() { | |
758 | self.cur = tmp; | |
f2b60f7d | 759 | CountImplied |
5099ac24 | 760 | } else if let Some(end) = self.consume_pos('$') { |
f2b60f7d FG |
761 | let name_span = self.span(start, end); |
762 | CountIsName(word, name_span) | |
e9174d1e | 763 | } else { |
b7449926 | 764 | self.cur = tmp; |
f2b60f7d | 765 | CountImplied |
1a4d82fc JJ |
766 | } |
767 | } | |
768 | } | |
769 | ||
e1599b0c XL |
770 | /// Parses a word starting at the current position. A word is the same as |
771 | /// Rust identifier, except that it can't start with `_` character. | |
1a4d82fc | 772 | fn word(&mut self) -> &'a str { |
e9174d1e | 773 | let start = match self.cur.peek() { |
60c5eb7d | 774 | Some(&(pos, c)) if rustc_lexer::is_id_start(c) => { |
b039eaaf SL |
775 | self.cur.next(); |
776 | pos | |
777 | } | |
778 | _ => { | |
60c5eb7d | 779 | return ""; |
b039eaaf | 780 | } |
1a4d82fc | 781 | }; |
60c5eb7d | 782 | let mut end = None; |
e9174d1e | 783 | while let Some(&(pos, c)) = self.cur.peek() { |
e1599b0c | 784 | if rustc_lexer::is_id_continue(c) { |
e9174d1e SL |
785 | self.cur.next(); |
786 | } else { | |
60c5eb7d XL |
787 | end = Some(pos); |
788 | break; | |
1a4d82fc JJ |
789 | } |
790 | } | |
60c5eb7d XL |
791 | let end = end.unwrap_or(self.input.len()); |
792 | let word = &self.input[start..end]; | |
793 | if word == "_" { | |
794 | self.err_with_note( | |
795 | "invalid argument name `_`", | |
796 | "invalid argument name", | |
797 | "argument name cannot be a single underscore", | |
f2b60f7d | 798 | self.span(start, end), |
60c5eb7d XL |
799 | ); |
800 | } | |
801 | word | |
1a4d82fc JJ |
802 | } |
803 | ||
c34b1796 | 804 | fn integer(&mut self) -> Option<usize> { |
2b03887a | 805 | let mut cur: usize = 0; |
1a4d82fc | 806 | let mut found = false; |
2b03887a FG |
807 | let mut overflow = false; |
808 | let start = self.current_pos(); | |
e9174d1e SL |
809 | while let Some(&(_, c)) = self.cur.peek() { |
810 | if let Some(i) = c.to_digit(10) { | |
2b03887a FG |
811 | let (tmp, mul_overflow) = cur.overflowing_mul(10); |
812 | let (tmp, add_overflow) = tmp.overflowing_add(i as usize); | |
813 | if mul_overflow || add_overflow { | |
814 | overflow = true; | |
815 | } | |
816 | cur = tmp; | |
e9174d1e SL |
817 | found = true; |
818 | self.cur.next(); | |
819 | } else { | |
92a42be0 | 820 | break; |
1a4d82fc JJ |
821 | } |
822 | } | |
2b03887a FG |
823 | |
824 | if overflow { | |
825 | let end = self.current_pos(); | |
826 | let overflowed_int = &self.input[start..end]; | |
827 | self.err( | |
828 | format!( | |
829 | "integer `{}` does not fit into the type `usize` whose range is `0..={}`", | |
830 | overflowed_int, | |
831 | usize::MAX | |
832 | ), | |
833 | "integer out of range for `usize`", | |
834 | self.span(start, end), | |
835 | ); | |
836 | } | |
837 | ||
9ffffee4 FG |
838 | found.then_some(cur) |
839 | } | |
840 | ||
841 | fn suggest_format(&mut self) { | |
842 | if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) { | |
843 | let word = self.word(); | |
844 | let _end = self.current_pos(); | |
845 | let pos = self.to_span_index(pos); | |
846 | self.errors.insert( | |
847 | 0, | |
848 | ParseError { | |
849 | description: "expected format parameter to occur after `:`".to_owned(), | |
850 | note: Some(format!("`?` comes after `:`, try `{}:{}` instead", word, "?")), | |
851 | label: "expected `?` to occur after `:`".to_owned(), | |
852 | span: pos.to(pos), | |
853 | secondary_label: None, | |
854 | should_be_replaced_with_positional_argument: false, | |
855 | }, | |
856 | ); | |
857 | } | |
1a4d82fc | 858 | } |
064997fb FG |
859 | |
860 | fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) { | |
861 | if let Some(end) = self.consume_pos('.') { | |
862 | let byte_pos = self.to_span_index(end); | |
863 | let start = InnerOffset(byte_pos.0 + 1); | |
864 | let field = self.argument(start); | |
865 | // We can only parse `foo.bar` field access, any deeper nesting, | |
866 | // or another type of expression, like method calls, are not supported | |
867 | if !self.consume('}') { | |
868 | return; | |
869 | } | |
870 | if let ArgumentNamed(_) = arg.position { | |
871 | if let ArgumentNamed(_) = field.position { | |
872 | self.errors.insert( | |
873 | 0, | |
874 | ParseError { | |
875 | description: "field access isn't supported".to_string(), | |
876 | note: None, | |
877 | label: "not supported".to_string(), | |
878 | span: InnerSpan::new(arg.position_span.start, field.position_span.end), | |
879 | secondary_label: None, | |
880 | should_be_replaced_with_positional_argument: true, | |
881 | }, | |
882 | ); | |
883 | } | |
884 | } | |
885 | } | |
886 | } | |
1a4d82fc JJ |
887 | } |
888 | ||
f9f354fc XL |
889 | /// Finds the indices of all characters that have been processed and differ between the actual |
890 | /// written code (code snippet) and the `InternedString` that gets processed in the `Parser` | |
5e7ed085 | 891 | /// in order to properly synthesise the intra-string `Span`s for error diagnostics. |
9c376795 | 892 | fn find_width_map_from_snippet( |
f9f354fc XL |
893 | snippet: Option<string::String>, |
894 | str_style: Option<usize>, | |
9c376795 | 895 | ) -> InputStringKind { |
f9f354fc | 896 | let snippet = match snippet { |
6a06907d | 897 | Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s, |
9c376795 | 898 | _ => return InputStringKind::NotALiteral, |
f9f354fc XL |
899 | }; |
900 | ||
487cf647 | 901 | if str_style.is_some() { |
9c376795 | 902 | return InputStringKind::Literal { width_mappings: Vec::new() }; |
487cf647 | 903 | } |
cdc7bbd5 | 904 | |
487cf647 FG |
905 | let snippet = &snippet[1..snippet.len() - 1]; |
906 | ||
907 | let mut s = snippet.char_indices(); | |
9c376795 | 908 | let mut width_mappings = vec![]; |
487cf647 FG |
909 | while let Some((pos, c)) = s.next() { |
910 | match (c, s.clone().next()) { | |
911 | // skip whitespace and empty lines ending in '\\' | |
9c376795 | 912 | ('\\', Some((_, '\n'))) => { |
487cf647 | 913 | let _ = s.next(); |
9c376795 | 914 | let mut width = 2; |
487cf647 | 915 | |
9c376795 | 916 | while let Some((_, c)) = s.clone().next() { |
487cf647 | 917 | if matches!(c, ' ' | '\n' | '\t') { |
9c376795 | 918 | width += 1; |
487cf647 FG |
919 | let _ = s.next(); |
920 | } else { | |
921 | break; | |
f9f354fc XL |
922 | } |
923 | } | |
9c376795 FG |
924 | |
925 | width_mappings.push(InnerWidthMapping::new(pos, width, 0)); | |
487cf647 | 926 | } |
9c376795 FG |
927 | ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { |
928 | width_mappings.push(InnerWidthMapping::new(pos, 2, 1)); | |
487cf647 FG |
929 | let _ = s.next(); |
930 | } | |
931 | ('\\', Some((_, 'x'))) => { | |
9c376795 FG |
932 | // consume `\xAB` literal |
933 | s.nth(2); | |
934 | width_mappings.push(InnerWidthMapping::new(pos, 4, 1)); | |
487cf647 FG |
935 | } |
936 | ('\\', Some((_, 'u'))) => { | |
9c376795 FG |
937 | let mut width = 2; |
938 | let _ = s.next(); | |
939 | ||
940 | if let Some((_, next_c)) = s.next() { | |
487cf647 FG |
941 | if next_c == '{' { |
942 | // consume up to 6 hexanumeric chars | |
943 | let digits_len = | |
944 | s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count(); | |
945 | ||
946 | let len_utf8 = s | |
947 | .as_str() | |
948 | .get(..digits_len) | |
949 | .and_then(|digits| u32::from_str_radix(digits, 16).ok()) | |
950 | .and_then(char::from_u32) | |
951 | .map_or(1, char::len_utf8); | |
952 | ||
953 | // Skip the digits, for chars that encode to more than 1 utf-8 byte | |
954 | // exclude as many digits as it is greater than 1 byte | |
955 | // | |
956 | // So for a 3 byte character, exclude 2 digits | |
957 | let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1)); | |
958 | ||
959 | // skip '{' and '}' also | |
9c376795 | 960 | width += required_skips + 2; |
487cf647 FG |
961 | |
962 | s.nth(digits_len); | |
963 | } else if next_c.is_digit(16) { | |
9c376795 FG |
964 | width += 1; |
965 | ||
487cf647 FG |
966 | // We suggest adding `{` and `}` when appropriate, accept it here as if |
967 | // it were correct | |
968 | let mut i = 0; // consume up to 6 hexanumeric chars | |
9c376795 | 969 | while let (Some((_, c)), _) = (s.next(), i < 6) { |
487cf647 | 970 | if c.is_digit(16) { |
9c376795 | 971 | width += 1; |
487cf647 FG |
972 | } else { |
973 | break; | |
f9f354fc | 974 | } |
487cf647 | 975 | i += 1; |
f9f354fc XL |
976 | } |
977 | } | |
978 | } | |
9c376795 FG |
979 | |
980 | width_mappings.push(InnerWidthMapping::new(pos, width, 1)); | |
f9f354fc | 981 | } |
487cf647 | 982 | _ => {} |
f9f354fc | 983 | } |
f9f354fc | 984 | } |
9c376795 FG |
985 | |
986 | InputStringKind::Literal { width_mappings } | |
f9f354fc XL |
987 | } |
988 | ||
9c376795 FG |
989 | // Assert a reasonable size for `Piece` |
990 | #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] | |
991 | rustc_data_structures::static_assert_size!(Piece<'_>, 16); | |
992 | ||
1a4d82fc | 993 | #[cfg(test)] |
dc9dc135 | 994 | mod tests; |