]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | //! Macro support for format strings |
2 | //! | |
3 | //! These structures are used when parsing format strings for the compiler. | |
4 | //! Parsing does not happen at runtime: structures of `std::fmt::rt` are | |
5 | //! generated instead. | |
6 | ||
dfeec247 | 7 | #![doc( |
1b1a35ee | 8 | html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/", |
dfeec247 XL |
9 | html_playground_url = "https://play.rust-lang.org/", |
10 | test(attr(deny(warnings))) | |
11 | )] | |
f2b60f7d FG |
12 | #![deny(rustc::untranslatable_diagnostic)] |
13 | #![deny(rustc::diagnostic_outside_of_impl)] | |
04454e1e FG |
14 | // We want to be able to build this crate with a stable compiler, so no |
15 | // `#![feature]` attributes should be added. | |
b7449926 | 16 | |
9fa01778 | 17 | pub use Alignment::*; |
9fa01778 | 18 | pub use Count::*; |
dfeec247 XL |
19 | pub use Flag::*; |
20 | pub use Piece::*; | |
21 | pub use Position::*; | |
1a4d82fc | 22 | |
dfeec247 | 23 | use std::iter; |
1a4d82fc JJ |
24 | use std::str; |
25 | use std::string; | |
26 | ||
04454e1e FG |
27 | // Note: copied from rustc_span |
28 | /// Range inside of a `Span` used for diagnostics when we only have access to relative positions. | |
29 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] | |
30 | pub struct InnerSpan { | |
31 | pub start: usize, | |
32 | pub end: usize, | |
33 | } | |
34 | ||
35 | impl InnerSpan { | |
36 | pub fn new(start: usize, end: usize) -> InnerSpan { | |
37 | InnerSpan { start, end } | |
38 | } | |
39 | } | |
dc9dc135 | 40 | |
f9f354fc XL |
41 | /// The type of format string that we are parsing. |
42 | #[derive(Copy, Clone, Debug, Eq, PartialEq)] | |
43 | pub enum ParseMode { | |
44 | /// A normal format string as per `format_args!`. | |
45 | Format, | |
46 | /// An inline assembly template string for `asm!`. | |
47 | InlineAsm, | |
48 | } | |
49 | ||
dc9dc135 XL |
50 | #[derive(Copy, Clone)] |
51 | struct InnerOffset(usize); | |
52 | ||
53 | impl InnerOffset { | |
54 | fn to(self, end: InnerOffset) -> InnerSpan { | |
55 | InnerSpan::new(self.0, end.0) | |
56 | } | |
57 | } | |
58 | ||
1a4d82fc JJ |
59 | /// A piece is a portion of the format string which represents the next part |
60 | /// to emit. These are emitted as a stream by the `Parser` class. | |
60c5eb7d | 61 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc JJ |
62 | pub enum Piece<'a> { |
63 | /// A literal string which should directly be emitted | |
64 | String(&'a str), | |
65 | /// This describes that formatting should process the next argument (as | |
66 | /// specified inside) for emission. | |
67 | NextArgument(Argument<'a>), | |
68 | } | |
69 | ||
70 | /// Representation of an argument specification. | |
60c5eb7d | 71 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc JJ |
72 | pub struct Argument<'a> { |
73 | /// Where to find this argument | |
04454e1e | 74 | pub position: Position<'a>, |
064997fb FG |
75 | /// The span of the position indicator. Includes any whitespace in implicit |
76 | /// positions (`{ }`). | |
77 | pub position_span: InnerSpan, | |
1a4d82fc JJ |
78 | /// How to format the argument |
79 | pub format: FormatSpec<'a>, | |
80 | } | |
81 | ||
82 | /// Specification for the formatting of an argument in the format string. | |
60c5eb7d | 83 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc | 84 | pub struct FormatSpec<'a> { |
416331ca | 85 | /// Optionally specified character to fill alignment with. |
1a4d82fc | 86 | pub fill: Option<char>, |
416331ca | 87 | /// Optionally specified alignment. |
1a4d82fc | 88 | pub align: Alignment, |
416331ca | 89 | /// Packed version of various flags provided. |
c34b1796 | 90 | pub flags: u32, |
416331ca | 91 | /// The integer precision to use. |
04454e1e | 92 | pub precision: Count<'a>, |
416331ca XL |
93 | /// The span of the precision formatting flag (for diagnostics). |
94 | pub precision_span: Option<InnerSpan>, | |
95 | /// The string width requested for the resulting format. | |
04454e1e | 96 | pub width: Count<'a>, |
416331ca XL |
97 | /// The span of the width formatting flag (for diagnostics). |
98 | pub width_span: Option<InnerSpan>, | |
1a4d82fc JJ |
99 | /// The descriptor string representing the name of the format desired for |
100 | /// this argument, this can be empty or any number of characters, although | |
101 | /// it is required to be one word. | |
b039eaaf | 102 | pub ty: &'a str, |
60c5eb7d XL |
103 | /// The span of the descriptor string (for diagnostics). |
104 | pub ty_span: Option<InnerSpan>, | |
1a4d82fc JJ |
105 | } |
106 | ||
107 | /// Enum describing where an argument for a format can be located. | |
60c5eb7d | 108 | #[derive(Copy, Clone, Debug, PartialEq)] |
04454e1e | 109 | pub enum Position<'a> { |
2c00a5a8 | 110 | /// The argument is implied to be located at an index |
abe05a73 | 111 | ArgumentImplicitlyIs(usize), |
064997fb | 112 | /// The argument is located at a specific index given in the format, |
c34b1796 | 113 | ArgumentIs(usize), |
1a4d82fc | 114 | /// The argument has a name. |
064997fb | 115 | ArgumentNamed(&'a str), |
1a4d82fc JJ |
116 | } |
117 | ||
04454e1e | 118 | impl Position<'_> { |
9fa01778 XL |
119 | pub fn index(&self) -> Option<usize> { |
120 | match self { | |
064997fb | 121 | ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i), |
9fa01778 XL |
122 | _ => None, |
123 | } | |
124 | } | |
125 | } | |
126 | ||
1a4d82fc | 127 | /// Enum of alignments which are supported. |
60c5eb7d | 128 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc JJ |
129 | pub enum Alignment { |
130 | /// The value will be aligned to the left. | |
131 | AlignLeft, | |
132 | /// The value will be aligned to the right. | |
133 | AlignRight, | |
134 | /// The value will be aligned in the center. | |
135 | AlignCenter, | |
136 | /// The value will take on a default alignment. | |
137 | AlignUnknown, | |
138 | } | |
139 | ||
140 | /// Various flags which can be applied to format strings. The meaning of these | |
141 | /// flags is defined by the formatters themselves. | |
60c5eb7d | 142 | #[derive(Copy, Clone, Debug, PartialEq)] |
1a4d82fc JJ |
143 | pub enum Flag { |
144 | /// A `+` will be used to denote positive numbers. | |
145 | FlagSignPlus, | |
146 | /// A `-` will be used to denote negative numbers. This is the default. | |
147 | FlagSignMinus, | |
148 | /// An alternate form will be used for the value. In the case of numbers, | |
149 | /// this means that the number will be prefixed with the supplied string. | |
150 | FlagAlternate, | |
151 | /// For numbers, this means that the number will be padded with zeroes, | |
152 | /// and the sign (`+` or `-`) will precede them. | |
153 | FlagSignAwareZeroPad, | |
0531ce1d XL |
154 | /// For Debug / `?`, format integers in lower-case hexadecimal. |
155 | FlagDebugLowerHex, | |
156 | /// For Debug / `?`, format integers in upper-case hexadecimal. | |
157 | FlagDebugUpperHex, | |
1a4d82fc JJ |
158 | } |
159 | ||
160 | /// A count is used for the precision and width parameters of an integer, and | |
161 | /// can reference either an argument or a literal integer. | |
60c5eb7d | 162 | #[derive(Copy, Clone, Debug, PartialEq)] |
04454e1e | 163 | pub enum Count<'a> { |
1a4d82fc | 164 | /// The count is specified explicitly. |
c34b1796 | 165 | CountIs(usize), |
1a4d82fc | 166 | /// The count is specified by the argument with the given name. |
04454e1e | 167 | CountIsName(&'a str, InnerSpan), |
1a4d82fc | 168 | /// The count is specified by the argument at the given index. |
c34b1796 | 169 | CountIsParam(usize), |
f2b60f7d FG |
170 | /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index. |
171 | CountIsStar(usize), | |
1a4d82fc JJ |
172 | /// The count is implied and cannot be explicitly specified. |
173 | CountImplied, | |
174 | } | |
175 | ||
94b46f34 XL |
176 | pub struct ParseError { |
177 | pub description: string::String, | |
178 | pub note: Option<string::String>, | |
179 | pub label: string::String, | |
dc9dc135 XL |
180 | pub span: InnerSpan, |
181 | pub secondary_label: Option<(string::String, InnerSpan)>, | |
064997fb | 182 | pub should_be_replaced_with_positional_argument: bool, |
94b46f34 XL |
183 | } |
184 | ||
1a4d82fc | 185 | /// The parser structure for interpreting the input format string. This is |
b039eaaf | 186 | /// modeled as an iterator over `Piece` structures to form a stream of tokens |
1a4d82fc JJ |
187 | /// being output. |
188 | /// | |
189 | /// This is a recursive-descent parser for the sake of simplicity, and if | |
190 | /// necessary there's probably lots of room for improvement performance-wise. | |
191 | pub struct Parser<'a> { | |
f9f354fc | 192 | mode: ParseMode, |
1a4d82fc | 193 | input: &'a str, |
e9174d1e | 194 | cur: iter::Peekable<str::CharIndices<'a>>, |
1a4d82fc | 195 | /// Error messages accumulated during parsing |
94b46f34 | 196 | pub errors: Vec<ParseError>, |
5bcae85e | 197 | /// Current position of implicit positional argument pointer |
f035d41b | 198 | pub curarg: usize, |
8faf50e0 XL |
199 | /// `Some(raw count)` when the string is "raw", used to position spans correctly |
200 | style: Option<usize>, | |
b7449926 | 201 | /// Start and end byte offset of every successfully parsed argument |
dc9dc135 | 202 | pub arg_places: Vec<InnerSpan>, |
0731742a XL |
203 | /// Characters that need to be shifted |
204 | skips: Vec<usize>, | |
dc9dc135 XL |
205 | /// Span of the last opening brace seen, used for error reporting |
206 | last_opening_brace: Option<InnerSpan>, | |
74b04a01 | 207 | /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` |
0731742a | 208 | append_newline: bool, |
f9f354fc | 209 | /// Whether this formatting string is a literal or it comes from a macro. |
f035d41b | 210 | pub is_literal: bool, |
f9f354fc XL |
211 | /// Start position of the current line. |
212 | cur_line_start: usize, | |
213 | /// Start and end byte offset of every line of the format string. Excludes | |
214 | /// newline characters and leading whitespace. | |
215 | pub line_spans: Vec<InnerSpan>, | |
0731742a XL |
216 | } |
217 | ||
1a4d82fc JJ |
218 | impl<'a> Iterator for Parser<'a> { |
219 | type Item = Piece<'a>; | |
220 | ||
221 | fn next(&mut self) -> Option<Piece<'a>> { | |
e9174d1e SL |
222 | if let Some(&(pos, c)) = self.cur.peek() { |
223 | match c { | |
224 | '{' => { | |
dc9dc135 XL |
225 | let curr_last_brace = self.last_opening_brace; |
226 | let byte_pos = self.to_span_index(pos); | |
2b03887a | 227 | let lbrace_end = self.to_span_index(pos + 1); |
064997fb | 228 | self.last_opening_brace = Some(byte_pos.to(lbrace_end)); |
e9174d1e SL |
229 | self.cur.next(); |
230 | if self.consume('{') { | |
dc9dc135 | 231 | self.last_opening_brace = curr_last_brace; |
0731742a | 232 | |
e9174d1e SL |
233 | Some(String(self.string(pos + 1))) |
234 | } else { | |
064997fb | 235 | let arg = self.argument(lbrace_end); |
cdc7bbd5 XL |
236 | if let Some(rbrace_byte_idx) = self.must_consume('}') { |
237 | let lbrace_inner_offset = self.to_span_index(pos); | |
238 | let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx); | |
f9f354fc | 239 | if self.is_literal { |
cdc7bbd5 XL |
240 | self.arg_places.push( |
241 | lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)), | |
242 | ); | |
f9f354fc | 243 | } |
064997fb FG |
244 | } else { |
245 | self.suggest_positional_arg_instead_of_captured_arg(arg); | |
8faf50e0 XL |
246 | } |
247 | Some(NextArgument(arg)) | |
e9174d1e | 248 | } |
1a4d82fc | 249 | } |
e9174d1e SL |
250 | '}' => { |
251 | self.cur.next(); | |
252 | if self.consume('}') { | |
8faf50e0 | 253 | Some(String(self.string(pos + 1))) |
e9174d1e | 254 | } else { |
0731742a | 255 | let err_pos = self.to_span_index(pos); |
94b46f34 XL |
256 | self.err_with_note( |
257 | "unmatched `}` found", | |
258 | "unmatched `}`", | |
259 | "if you intended to print `}`, you can escape it using `}}`", | |
dc9dc135 | 260 | err_pos.to(err_pos), |
94b46f34 | 261 | ); |
e9174d1e SL |
262 | None |
263 | } | |
1a4d82fc | 264 | } |
e9174d1e | 265 | _ => Some(String(self.string(pos))), |
1a4d82fc | 266 | } |
e9174d1e | 267 | } else { |
f035d41b | 268 | if self.is_literal { |
f2b60f7d | 269 | let span = self.span(self.cur_line_start, self.input.len()); |
f035d41b XL |
270 | if self.line_spans.last() != Some(&span) { |
271 | self.line_spans.push(span); | |
272 | } | |
f9f354fc | 273 | } |
e9174d1e | 274 | None |
1a4d82fc JJ |
275 | } |
276 | } | |
277 | } | |
278 | ||
279 | impl<'a> Parser<'a> { | |
280 | /// Creates a new parser for the given format string | |
0731742a XL |
281 | pub fn new( |
282 | s: &'a str, | |
283 | style: Option<usize>, | |
f9f354fc | 284 | snippet: Option<string::String>, |
0731742a | 285 | append_newline: bool, |
f9f354fc | 286 | mode: ParseMode, |
0731742a | 287 | ) -> Parser<'a> { |
f9f354fc | 288 | let (skips, is_literal) = find_skips_from_snippet(snippet, style); |
1a4d82fc | 289 | Parser { |
f9f354fc | 290 | mode, |
1a4d82fc | 291 | input: s, |
e9174d1e | 292 | cur: s.char_indices().peekable(), |
92a42be0 | 293 | errors: vec![], |
5bcae85e | 294 | curarg: 0, |
8faf50e0 | 295 | style, |
8faf50e0 | 296 | arg_places: vec![], |
0731742a | 297 | skips, |
dc9dc135 | 298 | last_opening_brace: None, |
0731742a | 299 | append_newline, |
f9f354fc XL |
300 | is_literal, |
301 | cur_line_start: 0, | |
302 | line_spans: vec![], | |
1a4d82fc JJ |
303 | } |
304 | } | |
305 | ||
306 | /// Notifies of an error. The message doesn't actually need to be of type | |
307 | /// String, but I think it does when this eventually uses conditions so it | |
308 | /// might as well start using it now. | |
94b46f34 XL |
309 | fn err<S1: Into<string::String>, S2: Into<string::String>>( |
310 | &mut self, | |
311 | description: S1, | |
312 | label: S2, | |
dc9dc135 | 313 | span: InnerSpan, |
94b46f34 XL |
314 | ) { |
315 | self.errors.push(ParseError { | |
316 | description: description.into(), | |
317 | note: None, | |
318 | label: label.into(), | |
dc9dc135 | 319 | span, |
0731742a | 320 | secondary_label: None, |
064997fb | 321 | should_be_replaced_with_positional_argument: false, |
94b46f34 | 322 | }); |
476ff2be SL |
323 | } |
324 | ||
325 | /// Notifies of an error. The message doesn't actually need to be of type | |
326 | /// String, but I think it does when this eventually uses conditions so it | |
327 | /// might as well start using it now. | |
dfeec247 XL |
328 | fn err_with_note< |
329 | S1: Into<string::String>, | |
330 | S2: Into<string::String>, | |
331 | S3: Into<string::String>, | |
332 | >( | |
94b46f34 XL |
333 | &mut self, |
334 | description: S1, | |
335 | label: S2, | |
336 | note: S3, | |
dc9dc135 | 337 | span: InnerSpan, |
94b46f34 XL |
338 | ) { |
339 | self.errors.push(ParseError { | |
340 | description: description.into(), | |
341 | note: Some(note.into()), | |
342 | label: label.into(), | |
dc9dc135 | 343 | span, |
0731742a | 344 | secondary_label: None, |
064997fb | 345 | should_be_replaced_with_positional_argument: false, |
94b46f34 | 346 | }); |
1a4d82fc JJ |
347 | } |
348 | ||
349 | /// Optionally consumes the specified character. If the character is not at | |
416331ca XL |
350 | /// the current position, then the current iterator isn't moved and `false` is |
351 | /// returned, otherwise the character is consumed and `true` is returned. | |
1a4d82fc | 352 | fn consume(&mut self, c: char) -> bool { |
416331ca XL |
353 | self.consume_pos(c).is_some() |
354 | } | |
355 | ||
356 | /// Optionally consumes the specified character. If the character is not at | |
357 | /// the current position, then the current iterator isn't moved and `None` is | |
358 | /// returned, otherwise the character is consumed and the current position is | |
359 | /// returned. | |
360 | fn consume_pos(&mut self, c: char) -> Option<usize> { | |
361 | if let Some(&(pos, maybe)) = self.cur.peek() { | |
b039eaaf SL |
362 | if c == maybe { |
363 | self.cur.next(); | |
416331ca | 364 | return Some(pos); |
b039eaaf | 365 | } |
1a4d82fc | 366 | } |
416331ca | 367 | None |
1a4d82fc JJ |
368 | } |
369 | ||
dc9dc135 | 370 | fn to_span_index(&self, pos: usize) -> InnerOffset { |
0731742a | 371 | let mut pos = pos; |
dc9dc135 XL |
372 | // This handles the raw string case, the raw argument is the number of # |
373 | // in r###"..."### (we need to add one because of the `r`). | |
5869c6ff | 374 | let raw = self.style.map_or(0, |raw| raw + 1); |
0731742a XL |
375 | for skip in &self.skips { |
376 | if pos > *skip { | |
377 | pos += 1; | |
dc9dc135 | 378 | } else if pos == *skip && raw == 0 { |
0731742a XL |
379 | pos += 1; |
380 | } else { | |
381 | break; | |
382 | } | |
383 | } | |
dc9dc135 | 384 | InnerOffset(raw + pos + 1) |
0731742a XL |
385 | } |
386 | ||
f2b60f7d FG |
387 | fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan { |
388 | let start = self.to_span_index(start_pos); | |
389 | let end = self.to_span_index(end_pos); | |
390 | start.to(end) | |
391 | } | |
392 | ||
1a4d82fc JJ |
393 | /// Forces consumption of the specified character. If the character is not |
394 | /// found, an error is emitted. | |
8faf50e0 | 395 | fn must_consume(&mut self, c: char) -> Option<usize> { |
1a4d82fc | 396 | self.ws(); |
8faf50e0 | 397 | |
94b46f34 | 398 | if let Some(&(pos, maybe)) = self.cur.peek() { |
e9174d1e | 399 | if c == maybe { |
1a4d82fc | 400 | self.cur.next(); |
8faf50e0 | 401 | Some(pos) |
e9174d1e | 402 | } else { |
0731742a XL |
403 | let pos = self.to_span_index(pos); |
404 | let description = format!("expected `'}}'`, found `{:?}`", maybe); | |
405 | let label = "expected `}`".to_owned(); | |
406 | let (note, secondary_label) = if c == '}' { | |
dfeec247 XL |
407 | ( |
408 | Some( | |
409 | "if you intended to print `{`, you can escape it using `{{`".to_owned(), | |
410 | ), | |
411 | self.last_opening_brace | |
412 | .map(|sp| ("because of this opening brace".to_owned(), sp)), | |
413 | ) | |
0731742a XL |
414 | } else { |
415 | (None, None) | |
416 | }; | |
417 | self.errors.push(ParseError { | |
418 | description, | |
419 | note, | |
420 | label, | |
dc9dc135 | 421 | span: pos.to(pos), |
0731742a | 422 | secondary_label, |
064997fb | 423 | should_be_replaced_with_positional_argument: false, |
0731742a | 424 | }); |
8faf50e0 | 425 | None |
1a4d82fc | 426 | } |
e9174d1e | 427 | } else { |
0731742a XL |
428 | let description = format!("expected `{:?}` but string was terminated", c); |
429 | // point at closing `"` | |
430 | let pos = self.input.len() - if self.append_newline { 1 } else { 0 }; | |
431 | let pos = self.to_span_index(pos); | |
476ff2be | 432 | if c == '}' { |
0731742a XL |
433 | let label = format!("expected `{:?}`", c); |
434 | let (note, secondary_label) = if c == '}' { | |
dfeec247 XL |
435 | ( |
436 | Some( | |
437 | "if you intended to print `{`, you can escape it using `{{`".to_owned(), | |
438 | ), | |
439 | self.last_opening_brace | |
440 | .map(|sp| ("because of this opening brace".to_owned(), sp)), | |
441 | ) | |
0731742a XL |
442 | } else { |
443 | (None, None) | |
444 | }; | |
445 | self.errors.push(ParseError { | |
446 | description, | |
447 | note, | |
448 | label, | |
dc9dc135 | 449 | span: pos.to(pos), |
0731742a | 450 | secondary_label, |
064997fb | 451 | should_be_replaced_with_positional_argument: false, |
0731742a | 452 | }); |
476ff2be | 453 | } else { |
dc9dc135 | 454 | self.err(description, format!("expected `{:?}`", c), pos.to(pos)); |
476ff2be | 455 | } |
8faf50e0 | 456 | None |
1a4d82fc JJ |
457 | } |
458 | } | |
459 | ||
0731742a | 460 | /// Consumes all whitespace characters until the first non-whitespace character |
1a4d82fc | 461 | fn ws(&mut self) { |
e9174d1e | 462 | while let Some(&(_, c)) = self.cur.peek() { |
b039eaaf SL |
463 | if c.is_whitespace() { |
464 | self.cur.next(); | |
465 | } else { | |
92a42be0 | 466 | break; |
b039eaaf | 467 | } |
1a4d82fc JJ |
468 | } |
469 | } | |
470 | ||
471 | /// Parses all of a string which is to be considered a "raw literal" in a | |
472 | /// format string. This is everything outside of the braces. | |
c34b1796 | 473 | fn string(&mut self, start: usize) -> &'a str { |
e9174d1e SL |
474 | // we may not consume the character, peek the iterator |
475 | while let Some(&(pos, c)) = self.cur.peek() { | |
476 | match c { | |
b039eaaf SL |
477 | '{' | '}' => { |
478 | return &self.input[start..pos]; | |
479 | } | |
f9f354fc | 480 | '\n' if self.is_literal => { |
f2b60f7d | 481 | self.line_spans.push(self.span(self.cur_line_start, pos)); |
f9f354fc XL |
482 | self.cur_line_start = pos + 1; |
483 | self.cur.next(); | |
484 | } | |
b039eaaf | 485 | _ => { |
f9f354fc XL |
486 | if self.is_literal && pos == self.cur_line_start && c.is_whitespace() { |
487 | self.cur_line_start = pos + c.len_utf8(); | |
488 | } | |
b039eaaf SL |
489 | self.cur.next(); |
490 | } | |
1a4d82fc JJ |
491 | } |
492 | } | |
e9174d1e | 493 | &self.input[start..self.input.len()] |
1a4d82fc JJ |
494 | } |
495 | ||
e74abb32 | 496 | /// Parses an `Argument` structure, or what's contained within braces inside the format string. |
064997fb | 497 | fn argument(&mut self, start: InnerOffset) -> Argument<'a> { |
5bcae85e | 498 | let pos = self.position(); |
064997fb FG |
499 | |
500 | let end = self | |
501 | .cur | |
502 | .clone() | |
503 | .find(|(_, ch)| !ch.is_whitespace()) | |
504 | .map_or(start, |(end, _)| self.to_span_index(end)); | |
505 | let position_span = start.to(end); | |
506 | ||
f9f354fc XL |
507 | let format = match self.mode { |
508 | ParseMode::Format => self.format(), | |
509 | ParseMode::InlineAsm => self.inline_asm(), | |
510 | }; | |
5bcae85e SL |
511 | |
512 | // Resolve position after parsing format spec. | |
513 | let pos = match pos { | |
514 | Some(position) => position, | |
515 | None => { | |
516 | let i = self.curarg; | |
517 | self.curarg += 1; | |
abe05a73 | 518 | ArgumentImplicitlyIs(i) |
5bcae85e SL |
519 | } |
520 | }; | |
521 | ||
064997fb | 522 | Argument { position: pos, position_span, format } |
1a4d82fc JJ |
523 | } |
524 | ||
525 | /// Parses a positional argument for a format. This could either be an | |
526 | /// integer index of an argument, a named argument, or a blank string. | |
5bcae85e SL |
527 | /// Returns `Some(parsed_position)` if the position is not implicitly |
528 | /// consuming a macro argument, `None` if it's the case. | |
04454e1e | 529 | fn position(&mut self) -> Option<Position<'a>> { |
e9174d1e | 530 | if let Some(i) = self.integer() { |
5bcae85e | 531 | Some(ArgumentIs(i)) |
e9174d1e SL |
532 | } else { |
533 | match self.cur.peek() { | |
064997fb | 534 | Some(&(_, c)) if rustc_lexer::is_id_start(c) => Some(ArgumentNamed(self.word())), |
5bcae85e SL |
535 | |
536 | // This is an `ArgumentNext`. | |
537 | // Record the fact and do the resolution after parsing the | |
538 | // format spec, to make things like `{:.*}` work. | |
539 | _ => None, | |
1a4d82fc JJ |
540 | } |
541 | } | |
542 | } | |
543 | ||
f2b60f7d FG |
544 | fn current_pos(&mut self) -> usize { |
545 | if let Some(&(pos, _)) = self.cur.peek() { pos } else { self.input.len() } | |
546 | } | |
547 | ||
1a4d82fc | 548 | /// Parses a format specifier at the current position, returning all of the |
e74abb32 | 549 | /// relevant information in the `FormatSpec` struct. |
1a4d82fc JJ |
550 | fn format(&mut self) -> FormatSpec<'a> { |
551 | let mut spec = FormatSpec { | |
552 | fill: None, | |
553 | align: AlignUnknown, | |
554 | flags: 0, | |
555 | precision: CountImplied, | |
416331ca | 556 | precision_span: None, |
1a4d82fc | 557 | width: CountImplied, |
416331ca | 558 | width_span: None, |
85aaf69f | 559 | ty: &self.input[..0], |
60c5eb7d | 560 | ty_span: None, |
1a4d82fc | 561 | }; |
b039eaaf | 562 | if !self.consume(':') { |
92a42be0 | 563 | return spec; |
b039eaaf | 564 | } |
1a4d82fc JJ |
565 | |
566 | // fill character | |
e9174d1e | 567 | if let Some(&(_, c)) = self.cur.peek() { |
1b1a35ee XL |
568 | if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) { |
569 | spec.fill = Some(c); | |
570 | self.cur.next(); | |
1a4d82fc | 571 | } |
1a4d82fc JJ |
572 | } |
573 | // Alignment | |
574 | if self.consume('<') { | |
575 | spec.align = AlignLeft; | |
576 | } else if self.consume('>') { | |
577 | spec.align = AlignRight; | |
578 | } else if self.consume('^') { | |
579 | spec.align = AlignCenter; | |
580 | } | |
581 | // Sign flags | |
582 | if self.consume('+') { | |
c34b1796 | 583 | spec.flags |= 1 << (FlagSignPlus as u32); |
1a4d82fc | 584 | } else if self.consume('-') { |
c34b1796 | 585 | spec.flags |= 1 << (FlagSignMinus as u32); |
1a4d82fc JJ |
586 | } |
587 | // Alternate marker | |
588 | if self.consume('#') { | |
c34b1796 | 589 | spec.flags |= 1 << (FlagAlternate as u32); |
1a4d82fc JJ |
590 | } |
591 | // Width and precision | |
592 | let mut havewidth = false; | |
416331ca | 593 | |
1a4d82fc JJ |
594 | if self.consume('0') { |
595 | // small ambiguity with '0$' as a format string. In theory this is a | |
596 | // '0' flag and then an ill-formatted format string with just a '$' | |
597 | // and no count, but this is better if we instead interpret this as | |
598 | // no '0' flag and '0$' as the width instead. | |
064997fb | 599 | if let Some(end) = self.consume_pos('$') { |
1a4d82fc | 600 | spec.width = CountIsParam(0); |
f2b60f7d | 601 | spec.width_span = Some(self.span(end - 1, end + 1)); |
1a4d82fc JJ |
602 | havewidth = true; |
603 | } else { | |
c34b1796 | 604 | spec.flags |= 1 << (FlagSignAwareZeroPad as u32); |
1a4d82fc JJ |
605 | } |
606 | } | |
f2b60f7d | 607 | |
1a4d82fc | 608 | if !havewidth { |
f2b60f7d FG |
609 | let start = self.current_pos(); |
610 | spec.width = self.count(start); | |
611 | if spec.width != CountImplied { | |
612 | let end = self.current_pos(); | |
613 | spec.width_span = Some(self.span(start, end)); | |
614 | } | |
1a4d82fc | 615 | } |
064997fb | 616 | |
416331ca | 617 | if let Some(start) = self.consume_pos('.') { |
f2b60f7d | 618 | if self.consume('*') { |
5bcae85e SL |
619 | // Resolve `CountIsNextParam`. |
620 | // We can do this immediately as `position` is resolved later. | |
621 | let i = self.curarg; | |
622 | self.curarg += 1; | |
f2b60f7d | 623 | spec.precision = CountIsStar(i); |
1a4d82fc | 624 | } else { |
f2b60f7d | 625 | spec.precision = self.count(start + 1); |
1a4d82fc | 626 | } |
f2b60f7d FG |
627 | let end = self.current_pos(); |
628 | spec.precision_span = Some(self.span(start, end)); | |
1a4d82fc | 629 | } |
f2b60f7d FG |
630 | |
631 | let ty_span_start = self.current_pos(); | |
0531ce1d XL |
632 | // Optional radix followed by the actual format specifier |
633 | if self.consume('x') { | |
634 | if self.consume('?') { | |
635 | spec.flags |= 1 << (FlagDebugLowerHex as u32); | |
636 | spec.ty = "?"; | |
637 | } else { | |
638 | spec.ty = "x"; | |
639 | } | |
640 | } else if self.consume('X') { | |
641 | if self.consume('?') { | |
642 | spec.flags |= 1 << (FlagDebugUpperHex as u32); | |
643 | spec.ty = "?"; | |
644 | } else { | |
645 | spec.ty = "X"; | |
646 | } | |
647 | } else if self.consume('?') { | |
1a4d82fc JJ |
648 | spec.ty = "?"; |
649 | } else { | |
650 | spec.ty = self.word(); | |
60c5eb7d | 651 | if !spec.ty.is_empty() { |
f2b60f7d FG |
652 | let ty_span_end = self.current_pos(); |
653 | spec.ty_span = Some(self.span(ty_span_start, ty_span_end)); | |
60c5eb7d | 654 | } |
1a4d82fc | 655 | } |
e9174d1e | 656 | spec |
1a4d82fc JJ |
657 | } |
658 | ||
f9f354fc XL |
659 | /// Parses an inline assembly template modifier at the current position, returning the modifier |
660 | /// in the `ty` field of the `FormatSpec` struct. | |
661 | fn inline_asm(&mut self) -> FormatSpec<'a> { | |
662 | let mut spec = FormatSpec { | |
663 | fill: None, | |
664 | align: AlignUnknown, | |
665 | flags: 0, | |
666 | precision: CountImplied, | |
667 | precision_span: None, | |
668 | width: CountImplied, | |
669 | width_span: None, | |
670 | ty: &self.input[..0], | |
671 | ty_span: None, | |
672 | }; | |
673 | if !self.consume(':') { | |
674 | return spec; | |
675 | } | |
676 | ||
f2b60f7d | 677 | let ty_span_start = self.current_pos(); |
f9f354fc | 678 | spec.ty = self.word(); |
f9f354fc | 679 | if !spec.ty.is_empty() { |
f2b60f7d FG |
680 | let ty_span_end = self.current_pos(); |
681 | spec.ty_span = Some(self.span(ty_span_start, ty_span_end)); | |
f9f354fc XL |
682 | } |
683 | ||
684 | spec | |
685 | } | |
686 | ||
e74abb32 | 687 | /// Parses a `Count` parameter at the current position. This does not check |
1a4d82fc JJ |
688 | /// for 'CountIsNextParam' because that is only used in precision, not |
689 | /// width. | |
f2b60f7d | 690 | fn count(&mut self, start: usize) -> Count<'a> { |
e9174d1e | 691 | if let Some(i) = self.integer() { |
f2b60f7d | 692 | if self.consume('$') { CountIsParam(i) } else { CountIs(i) } |
e9174d1e SL |
693 | } else { |
694 | let tmp = self.cur.clone(); | |
695 | let word = self.word(); | |
696 | if word.is_empty() { | |
697 | self.cur = tmp; | |
f2b60f7d | 698 | CountImplied |
5099ac24 | 699 | } else if let Some(end) = self.consume_pos('$') { |
f2b60f7d FG |
700 | let name_span = self.span(start, end); |
701 | CountIsName(word, name_span) | |
e9174d1e | 702 | } else { |
b7449926 | 703 | self.cur = tmp; |
f2b60f7d | 704 | CountImplied |
1a4d82fc JJ |
705 | } |
706 | } | |
707 | } | |
708 | ||
e1599b0c XL |
709 | /// Parses a word starting at the current position. A word is the same as |
710 | /// Rust identifier, except that it can't start with `_` character. | |
1a4d82fc | 711 | fn word(&mut self) -> &'a str { |
e9174d1e | 712 | let start = match self.cur.peek() { |
60c5eb7d | 713 | Some(&(pos, c)) if rustc_lexer::is_id_start(c) => { |
b039eaaf SL |
714 | self.cur.next(); |
715 | pos | |
716 | } | |
717 | _ => { | |
60c5eb7d | 718 | return ""; |
b039eaaf | 719 | } |
1a4d82fc | 720 | }; |
60c5eb7d | 721 | let mut end = None; |
e9174d1e | 722 | while let Some(&(pos, c)) = self.cur.peek() { |
e1599b0c | 723 | if rustc_lexer::is_id_continue(c) { |
e9174d1e SL |
724 | self.cur.next(); |
725 | } else { | |
60c5eb7d XL |
726 | end = Some(pos); |
727 | break; | |
1a4d82fc JJ |
728 | } |
729 | } | |
60c5eb7d XL |
730 | let end = end.unwrap_or(self.input.len()); |
731 | let word = &self.input[start..end]; | |
732 | if word == "_" { | |
733 | self.err_with_note( | |
734 | "invalid argument name `_`", | |
735 | "invalid argument name", | |
736 | "argument name cannot be a single underscore", | |
f2b60f7d | 737 | self.span(start, end), |
60c5eb7d XL |
738 | ); |
739 | } | |
740 | word | |
1a4d82fc JJ |
741 | } |
742 | ||
c34b1796 | 743 | fn integer(&mut self) -> Option<usize> { |
2b03887a | 744 | let mut cur: usize = 0; |
1a4d82fc | 745 | let mut found = false; |
2b03887a FG |
746 | let mut overflow = false; |
747 | let start = self.current_pos(); | |
e9174d1e SL |
748 | while let Some(&(_, c)) = self.cur.peek() { |
749 | if let Some(i) = c.to_digit(10) { | |
2b03887a FG |
750 | let (tmp, mul_overflow) = cur.overflowing_mul(10); |
751 | let (tmp, add_overflow) = tmp.overflowing_add(i as usize); | |
752 | if mul_overflow || add_overflow { | |
753 | overflow = true; | |
754 | } | |
755 | cur = tmp; | |
e9174d1e SL |
756 | found = true; |
757 | self.cur.next(); | |
758 | } else { | |
92a42be0 | 759 | break; |
1a4d82fc JJ |
760 | } |
761 | } | |
2b03887a FG |
762 | |
763 | if overflow { | |
764 | let end = self.current_pos(); | |
765 | let overflowed_int = &self.input[start..end]; | |
766 | self.err( | |
767 | format!( | |
768 | "integer `{}` does not fit into the type `usize` whose range is `0..={}`", | |
769 | overflowed_int, | |
770 | usize::MAX | |
771 | ), | |
772 | "integer out of range for `usize`", | |
773 | self.span(start, end), | |
774 | ); | |
775 | } | |
776 | ||
04454e1e | 777 | if found { Some(cur) } else { None } |
1a4d82fc | 778 | } |
064997fb FG |
779 | |
780 | fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) { | |
781 | if let Some(end) = self.consume_pos('.') { | |
782 | let byte_pos = self.to_span_index(end); | |
783 | let start = InnerOffset(byte_pos.0 + 1); | |
784 | let field = self.argument(start); | |
785 | // We can only parse `foo.bar` field access, any deeper nesting, | |
786 | // or another type of expression, like method calls, are not supported | |
787 | if !self.consume('}') { | |
788 | return; | |
789 | } | |
790 | if let ArgumentNamed(_) = arg.position { | |
791 | if let ArgumentNamed(_) = field.position { | |
792 | self.errors.insert( | |
793 | 0, | |
794 | ParseError { | |
795 | description: "field access isn't supported".to_string(), | |
796 | note: None, | |
797 | label: "not supported".to_string(), | |
798 | span: InnerSpan::new(arg.position_span.start, field.position_span.end), | |
799 | secondary_label: None, | |
800 | should_be_replaced_with_positional_argument: true, | |
801 | }, | |
802 | ); | |
803 | } | |
804 | } | |
805 | } | |
806 | } | |
1a4d82fc JJ |
807 | } |
808 | ||
f9f354fc XL |
809 | /// Finds the indices of all characters that have been processed and differ between the actual |
810 | /// written code (code snippet) and the `InternedString` that gets processed in the `Parser` | |
5e7ed085 | 811 | /// in order to properly synthesise the intra-string `Span`s for error diagnostics. |
f9f354fc XL |
812 | fn find_skips_from_snippet( |
813 | snippet: Option<string::String>, | |
814 | str_style: Option<usize>, | |
815 | ) -> (Vec<usize>, bool) { | |
816 | let snippet = match snippet { | |
6a06907d | 817 | Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s, |
f9f354fc XL |
818 | _ => return (vec![], false), |
819 | }; | |
820 | ||
487cf647 FG |
821 | if str_style.is_some() { |
822 | return (vec![], true); | |
823 | } | |
cdc7bbd5 | 824 | |
487cf647 FG |
825 | let snippet = &snippet[1..snippet.len() - 1]; |
826 | ||
827 | let mut s = snippet.char_indices(); | |
828 | let mut skips = vec![]; | |
829 | while let Some((pos, c)) = s.next() { | |
830 | match (c, s.clone().next()) { | |
831 | // skip whitespace and empty lines ending in '\\' | |
832 | ('\\', Some((next_pos, '\n'))) => { | |
833 | skips.push(pos); | |
834 | skips.push(next_pos); | |
835 | let _ = s.next(); | |
836 | ||
837 | while let Some((pos, c)) = s.clone().next() { | |
838 | if matches!(c, ' ' | '\n' | '\t') { | |
839 | skips.push(pos); | |
840 | let _ = s.next(); | |
841 | } else { | |
842 | break; | |
f9f354fc XL |
843 | } |
844 | } | |
487cf647 FG |
845 | } |
846 | ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { | |
847 | skips.push(next_pos); | |
848 | let _ = s.next(); | |
849 | } | |
850 | ('\\', Some((_, 'x'))) => { | |
851 | for _ in 0..3 { | |
852 | // consume `\xAB` literal | |
f9f354fc XL |
853 | if let Some((pos, _)) = s.next() { |
854 | skips.push(pos); | |
487cf647 FG |
855 | } else { |
856 | break; | |
f9f354fc | 857 | } |
487cf647 FG |
858 | } |
859 | } | |
860 | ('\\', Some((_, 'u'))) => { | |
861 | if let Some((pos, _)) = s.next() { | |
862 | skips.push(pos); | |
863 | } | |
864 | if let Some((next_pos, next_c)) = s.next() { | |
865 | if next_c == '{' { | |
866 | // consume up to 6 hexanumeric chars | |
867 | let digits_len = | |
868 | s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count(); | |
869 | ||
870 | let len_utf8 = s | |
871 | .as_str() | |
872 | .get(..digits_len) | |
873 | .and_then(|digits| u32::from_str_radix(digits, 16).ok()) | |
874 | .and_then(char::from_u32) | |
875 | .map_or(1, char::len_utf8); | |
876 | ||
877 | // Skip the digits, for chars that encode to more than 1 utf-8 byte | |
878 | // exclude as many digits as it is greater than 1 byte | |
879 | // | |
880 | // So for a 3 byte character, exclude 2 digits | |
881 | let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1)); | |
882 | ||
883 | // skip '{' and '}' also | |
884 | for pos in (next_pos..).take(required_skips + 2) { | |
885 | skips.push(pos) | |
886 | } | |
887 | ||
888 | s.nth(digits_len); | |
889 | } else if next_c.is_digit(16) { | |
890 | skips.push(next_pos); | |
891 | // We suggest adding `{` and `}` when appropriate, accept it here as if | |
892 | // it were correct | |
893 | let mut i = 0; // consume up to 6 hexanumeric chars | |
894 | while let (Some((next_pos, c)), _) = (s.next(), i < 6) { | |
895 | if c.is_digit(16) { | |
896 | skips.push(next_pos); | |
897 | } else { | |
898 | break; | |
f9f354fc | 899 | } |
487cf647 | 900 | i += 1; |
f9f354fc XL |
901 | } |
902 | } | |
903 | } | |
f9f354fc | 904 | } |
487cf647 | 905 | _ => {} |
f9f354fc | 906 | } |
f9f354fc | 907 | } |
487cf647 | 908 | (skips, true) |
f9f354fc XL |
909 | } |
910 | ||
1a4d82fc | 911 | #[cfg(test)] |
dc9dc135 | 912 | mod tests; |