]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_parse_format/src/lib.rs
New upstream version 1.69.0+dfsg1
[rustc.git] / compiler / rustc_parse_format / src / lib.rs
CommitLineData
1a4d82fc
JJ
1//! Macro support for format strings
2//!
3//! These structures are used when parsing format strings for the compiler.
4//! Parsing does not happen at runtime: structures of `std::fmt::rt` are
5//! generated instead.
6
dfeec247 7#![doc(
1b1a35ee 8 html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/",
dfeec247
XL
9 html_playground_url = "https://play.rust-lang.org/",
10 test(attr(deny(warnings)))
11)]
f2b60f7d
FG
12#![deny(rustc::untranslatable_diagnostic)]
13#![deny(rustc::diagnostic_outside_of_impl)]
04454e1e
FG
14// We want to be able to build this crate with a stable compiler, so no
15// `#![feature]` attributes should be added.
b7449926 16
9fa01778 17pub use Alignment::*;
9fa01778 18pub use Count::*;
dfeec247
XL
19pub use Piece::*;
20pub use Position::*;
1a4d82fc 21
dfeec247 22use std::iter;
1a4d82fc
JJ
23use std::str;
24use std::string;
25
04454e1e
FG
26// Note: copied from rustc_span
27/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
28#[derive(Copy, Clone, PartialEq, Eq, Debug)]
29pub struct InnerSpan {
30 pub start: usize,
31 pub end: usize,
32}
33
34impl InnerSpan {
35 pub fn new(start: usize, end: usize) -> InnerSpan {
36 InnerSpan { start, end }
37 }
38}
dc9dc135 39
9c376795
FG
40/// The location and before/after width of a character whose width has changed from its source code
41/// representation
42#[derive(Copy, Clone, PartialEq, Eq)]
43pub struct InnerWidthMapping {
44 /// Index of the character in the source
45 pub position: usize,
46 /// The inner width in characters
47 pub before: usize,
48 /// The transformed width in characters
49 pub after: usize,
50}
51
52impl InnerWidthMapping {
53 pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
54 InnerWidthMapping { position, before, after }
55 }
56}
57
58/// Whether the input string is a literal. If yes, it contains the inner width mappings.
59#[derive(Clone, PartialEq, Eq)]
60enum InputStringKind {
61 NotALiteral,
62 Literal { width_mappings: Vec<InnerWidthMapping> },
63}
64
f9f354fc
XL
65/// The type of format string that we are parsing.
66#[derive(Copy, Clone, Debug, Eq, PartialEq)]
67pub enum ParseMode {
68 /// A normal format string as per `format_args!`.
69 Format,
70 /// An inline assembly template string for `asm!`.
71 InlineAsm,
72}
73
dc9dc135
XL
74#[derive(Copy, Clone)]
75struct InnerOffset(usize);
76
77impl InnerOffset {
78 fn to(self, end: InnerOffset) -> InnerSpan {
79 InnerSpan::new(self.0, end.0)
80 }
81}
82
1a4d82fc
JJ
83/// A piece is a portion of the format string which represents the next part
84/// to emit. These are emitted as a stream by the `Parser` class.
9c376795 85#[derive(Clone, Debug, PartialEq)]
1a4d82fc
JJ
86pub enum Piece<'a> {
87 /// A literal string which should directly be emitted
88 String(&'a str),
89 /// This describes that formatting should process the next argument (as
90 /// specified inside) for emission.
9c376795 91 NextArgument(Box<Argument<'a>>),
1a4d82fc
JJ
92}
93
94/// Representation of an argument specification.
60c5eb7d 95#[derive(Copy, Clone, Debug, PartialEq)]
1a4d82fc
JJ
96pub struct Argument<'a> {
97 /// Where to find this argument
04454e1e 98 pub position: Position<'a>,
064997fb
FG
99 /// The span of the position indicator. Includes any whitespace in implicit
100 /// positions (`{ }`).
101 pub position_span: InnerSpan,
1a4d82fc
JJ
102 /// How to format the argument
103 pub format: FormatSpec<'a>,
104}
105
106/// Specification for the formatting of an argument in the format string.
60c5eb7d 107#[derive(Copy, Clone, Debug, PartialEq)]
1a4d82fc 108pub struct FormatSpec<'a> {
416331ca 109 /// Optionally specified character to fill alignment with.
1a4d82fc 110 pub fill: Option<char>,
416331ca 111 /// Optionally specified alignment.
1a4d82fc 112 pub align: Alignment,
9ffffee4
FG
113 /// The `+` or `-` flag.
114 pub sign: Option<Sign>,
115 /// The `#` flag.
116 pub alternate: bool,
117 /// The `0` flag.
118 pub zero_pad: bool,
119 /// The `x` or `X` flag. (Only for `Debug`.)
120 pub debug_hex: Option<DebugHex>,
416331ca 121 /// The integer precision to use.
04454e1e 122 pub precision: Count<'a>,
416331ca
XL
123 /// The span of the precision formatting flag (for diagnostics).
124 pub precision_span: Option<InnerSpan>,
125 /// The string width requested for the resulting format.
04454e1e 126 pub width: Count<'a>,
416331ca
XL
127 /// The span of the width formatting flag (for diagnostics).
128 pub width_span: Option<InnerSpan>,
1a4d82fc
JJ
129 /// The descriptor string representing the name of the format desired for
130 /// this argument, this can be empty or any number of characters, although
131 /// it is required to be one word.
b039eaaf 132 pub ty: &'a str,
60c5eb7d
XL
133 /// The span of the descriptor string (for diagnostics).
134 pub ty_span: Option<InnerSpan>,
1a4d82fc
JJ
135}
136
137/// Enum describing where an argument for a format can be located.
60c5eb7d 138#[derive(Copy, Clone, Debug, PartialEq)]
04454e1e 139pub enum Position<'a> {
2c00a5a8 140 /// The argument is implied to be located at an index
abe05a73 141 ArgumentImplicitlyIs(usize),
064997fb 142 /// The argument is located at a specific index given in the format,
c34b1796 143 ArgumentIs(usize),
1a4d82fc 144 /// The argument has a name.
064997fb 145 ArgumentNamed(&'a str),
1a4d82fc
JJ
146}
147
04454e1e 148impl Position<'_> {
9fa01778
XL
149 pub fn index(&self) -> Option<usize> {
150 match self {
064997fb 151 ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i),
9fa01778
XL
152 _ => None,
153 }
154 }
155}
156
1a4d82fc 157/// Enum of alignments which are supported.
60c5eb7d 158#[derive(Copy, Clone, Debug, PartialEq)]
1a4d82fc
JJ
159pub enum Alignment {
160 /// The value will be aligned to the left.
161 AlignLeft,
162 /// The value will be aligned to the right.
163 AlignRight,
164 /// The value will be aligned in the center.
165 AlignCenter,
166 /// The value will take on a default alignment.
167 AlignUnknown,
168}
169
9ffffee4 170/// Enum for the sign flags.
60c5eb7d 171#[derive(Copy, Clone, Debug, PartialEq)]
9ffffee4
FG
172pub enum Sign {
173 /// The `+` flag.
174 Plus,
175 /// The `-` flag.
176 Minus,
177}
178
179/// Enum for the debug hex flags.
180#[derive(Copy, Clone, Debug, PartialEq)]
181pub enum DebugHex {
182 /// The `x` flag in `{:x?}`.
183 Lower,
184 /// The `X` flag in `{:X?}`.
185 Upper,
1a4d82fc
JJ
186}
187
188/// A count is used for the precision and width parameters of an integer, and
189/// can reference either an argument or a literal integer.
60c5eb7d 190#[derive(Copy, Clone, Debug, PartialEq)]
04454e1e 191pub enum Count<'a> {
1a4d82fc 192 /// The count is specified explicitly.
c34b1796 193 CountIs(usize),
1a4d82fc 194 /// The count is specified by the argument with the given name.
04454e1e 195 CountIsName(&'a str, InnerSpan),
1a4d82fc 196 /// The count is specified by the argument at the given index.
c34b1796 197 CountIsParam(usize),
f2b60f7d
FG
198 /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
199 CountIsStar(usize),
1a4d82fc
JJ
200 /// The count is implied and cannot be explicitly specified.
201 CountImplied,
202}
203
94b46f34
XL
204pub struct ParseError {
205 pub description: string::String,
206 pub note: Option<string::String>,
207 pub label: string::String,
dc9dc135
XL
208 pub span: InnerSpan,
209 pub secondary_label: Option<(string::String, InnerSpan)>,
064997fb 210 pub should_be_replaced_with_positional_argument: bool,
94b46f34
XL
211}
212
1a4d82fc 213/// The parser structure for interpreting the input format string. This is
b039eaaf 214/// modeled as an iterator over `Piece` structures to form a stream of tokens
1a4d82fc
JJ
215/// being output.
216///
217/// This is a recursive-descent parser for the sake of simplicity, and if
218/// necessary there's probably lots of room for improvement performance-wise.
219pub struct Parser<'a> {
f9f354fc 220 mode: ParseMode,
1a4d82fc 221 input: &'a str,
e9174d1e 222 cur: iter::Peekable<str::CharIndices<'a>>,
1a4d82fc 223 /// Error messages accumulated during parsing
94b46f34 224 pub errors: Vec<ParseError>,
5bcae85e 225 /// Current position of implicit positional argument pointer
f035d41b 226 pub curarg: usize,
8faf50e0
XL
227 /// `Some(raw count)` when the string is "raw", used to position spans correctly
228 style: Option<usize>,
b7449926 229 /// Start and end byte offset of every successfully parsed argument
dc9dc135 230 pub arg_places: Vec<InnerSpan>,
9c376795
FG
231 /// Characters whose length has been changed from their in-code representation
232 width_map: Vec<InnerWidthMapping>,
dc9dc135
XL
233 /// Span of the last opening brace seen, used for error reporting
234 last_opening_brace: Option<InnerSpan>,
74b04a01 235 /// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
0731742a 236 append_newline: bool,
f9f354fc 237 /// Whether this formatting string is a literal or it comes from a macro.
f035d41b 238 pub is_literal: bool,
f9f354fc
XL
239 /// Start position of the current line.
240 cur_line_start: usize,
241 /// Start and end byte offset of every line of the format string. Excludes
242 /// newline characters and leading whitespace.
243 pub line_spans: Vec<InnerSpan>,
0731742a
XL
244}
245
1a4d82fc
JJ
246impl<'a> Iterator for Parser<'a> {
247 type Item = Piece<'a>;
248
249 fn next(&mut self) -> Option<Piece<'a>> {
e9174d1e
SL
250 if let Some(&(pos, c)) = self.cur.peek() {
251 match c {
252 '{' => {
dc9dc135
XL
253 let curr_last_brace = self.last_opening_brace;
254 let byte_pos = self.to_span_index(pos);
9c376795 255 let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
064997fb 256 self.last_opening_brace = Some(byte_pos.to(lbrace_end));
e9174d1e
SL
257 self.cur.next();
258 if self.consume('{') {
dc9dc135 259 self.last_opening_brace = curr_last_brace;
0731742a 260
e9174d1e
SL
261 Some(String(self.string(pos + 1)))
262 } else {
064997fb 263 let arg = self.argument(lbrace_end);
9c376795 264 if let Some(rbrace_pos) = self.must_consume('}') {
f9f354fc 265 if self.is_literal {
9c376795
FG
266 let lbrace_byte_pos = self.to_span_index(pos);
267 let rbrace_byte_pos = self.to_span_index(rbrace_pos);
268
269 let width = self.to_span_width(rbrace_pos);
270
cdc7bbd5 271 self.arg_places.push(
9c376795 272 lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
cdc7bbd5 273 );
f9f354fc 274 }
064997fb 275 } else {
9ffffee4
FG
276 if let Some(&(_, maybe)) = self.cur.peek() {
277 if maybe == '?' {
278 self.suggest_format();
279 } else {
280 self.suggest_positional_arg_instead_of_captured_arg(arg);
281 }
282 }
8faf50e0 283 }
9c376795 284 Some(NextArgument(Box::new(arg)))
e9174d1e 285 }
1a4d82fc 286 }
e9174d1e
SL
287 '}' => {
288 self.cur.next();
289 if self.consume('}') {
8faf50e0 290 Some(String(self.string(pos + 1)))
e9174d1e 291 } else {
0731742a 292 let err_pos = self.to_span_index(pos);
94b46f34
XL
293 self.err_with_note(
294 "unmatched `}` found",
295 "unmatched `}`",
296 "if you intended to print `}`, you can escape it using `}}`",
dc9dc135 297 err_pos.to(err_pos),
94b46f34 298 );
e9174d1e
SL
299 None
300 }
1a4d82fc 301 }
e9174d1e 302 _ => Some(String(self.string(pos))),
1a4d82fc 303 }
e9174d1e 304 } else {
f035d41b 305 if self.is_literal {
f2b60f7d 306 let span = self.span(self.cur_line_start, self.input.len());
f035d41b
XL
307 if self.line_spans.last() != Some(&span) {
308 self.line_spans.push(span);
309 }
f9f354fc 310 }
e9174d1e 311 None
1a4d82fc
JJ
312 }
313 }
314}
315
316impl<'a> Parser<'a> {
317 /// Creates a new parser for the given format string
0731742a
XL
318 pub fn new(
319 s: &'a str,
320 style: Option<usize>,
f9f354fc 321 snippet: Option<string::String>,
0731742a 322 append_newline: bool,
f9f354fc 323 mode: ParseMode,
0731742a 324 ) -> Parser<'a> {
9c376795
FG
325 let input_string_kind = find_width_map_from_snippet(snippet, style);
326 let (width_map, is_literal) = match input_string_kind {
327 InputStringKind::Literal { width_mappings } => (width_mappings, true),
328 InputStringKind::NotALiteral => (Vec::new(), false),
329 };
330
1a4d82fc 331 Parser {
f9f354fc 332 mode,
1a4d82fc 333 input: s,
e9174d1e 334 cur: s.char_indices().peekable(),
92a42be0 335 errors: vec![],
5bcae85e 336 curarg: 0,
8faf50e0 337 style,
8faf50e0 338 arg_places: vec![],
9c376795 339 width_map,
dc9dc135 340 last_opening_brace: None,
0731742a 341 append_newline,
f9f354fc
XL
342 is_literal,
343 cur_line_start: 0,
344 line_spans: vec![],
1a4d82fc
JJ
345 }
346 }
347
348 /// Notifies of an error. The message doesn't actually need to be of type
349 /// String, but I think it does when this eventually uses conditions so it
350 /// might as well start using it now.
94b46f34
XL
351 fn err<S1: Into<string::String>, S2: Into<string::String>>(
352 &mut self,
353 description: S1,
354 label: S2,
dc9dc135 355 span: InnerSpan,
94b46f34
XL
356 ) {
357 self.errors.push(ParseError {
358 description: description.into(),
359 note: None,
360 label: label.into(),
dc9dc135 361 span,
0731742a 362 secondary_label: None,
064997fb 363 should_be_replaced_with_positional_argument: false,
94b46f34 364 });
476ff2be
SL
365 }
366
367 /// Notifies of an error. The message doesn't actually need to be of type
368 /// String, but I think it does when this eventually uses conditions so it
369 /// might as well start using it now.
dfeec247
XL
370 fn err_with_note<
371 S1: Into<string::String>,
372 S2: Into<string::String>,
373 S3: Into<string::String>,
374 >(
94b46f34
XL
375 &mut self,
376 description: S1,
377 label: S2,
378 note: S3,
dc9dc135 379 span: InnerSpan,
94b46f34
XL
380 ) {
381 self.errors.push(ParseError {
382 description: description.into(),
383 note: Some(note.into()),
384 label: label.into(),
dc9dc135 385 span,
0731742a 386 secondary_label: None,
064997fb 387 should_be_replaced_with_positional_argument: false,
94b46f34 388 });
1a4d82fc
JJ
389 }
390
391 /// Optionally consumes the specified character. If the character is not at
416331ca
XL
392 /// the current position, then the current iterator isn't moved and `false` is
393 /// returned, otherwise the character is consumed and `true` is returned.
1a4d82fc 394 fn consume(&mut self, c: char) -> bool {
416331ca
XL
395 self.consume_pos(c).is_some()
396 }
397
398 /// Optionally consumes the specified character. If the character is not at
399 /// the current position, then the current iterator isn't moved and `None` is
400 /// returned, otherwise the character is consumed and the current position is
401 /// returned.
402 fn consume_pos(&mut self, c: char) -> Option<usize> {
403 if let Some(&(pos, maybe)) = self.cur.peek() {
b039eaaf
SL
404 if c == maybe {
405 self.cur.next();
416331ca 406 return Some(pos);
b039eaaf 407 }
1a4d82fc 408 }
416331ca 409 None
1a4d82fc
JJ
410 }
411
9c376795
FG
412 fn remap_pos(&self, mut pos: usize) -> InnerOffset {
413 for width in &self.width_map {
414 if pos > width.position {
415 pos += width.before - width.after;
416 } else if pos == width.position && width.after == 0 {
417 pos += width.before;
418 } else {
419 break;
420 }
421 }
422
423 InnerOffset(pos)
424 }
425
dc9dc135 426 fn to_span_index(&self, pos: usize) -> InnerOffset {
dc9dc135
XL
427 // This handles the raw string case, the raw argument is the number of #
428 // in r###"..."### (we need to add one because of the `r`).
5869c6ff 429 let raw = self.style.map_or(0, |raw| raw + 1);
9c376795
FG
430 let pos = self.remap_pos(pos);
431 InnerOffset(raw + pos.0 + 1)
432 }
433
434 fn to_span_width(&self, pos: usize) -> usize {
435 let pos = self.remap_pos(pos);
436 match self.width_map.iter().find(|w| w.position == pos.0) {
437 Some(w) => w.before,
438 None => 1,
0731742a 439 }
0731742a
XL
440 }
441
f2b60f7d
FG
442 fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
443 let start = self.to_span_index(start_pos);
444 let end = self.to_span_index(end_pos);
445 start.to(end)
446 }
447
1a4d82fc
JJ
448 /// Forces consumption of the specified character. If the character is not
449 /// found, an error is emitted.
8faf50e0 450 fn must_consume(&mut self, c: char) -> Option<usize> {
1a4d82fc 451 self.ws();
8faf50e0 452
94b46f34 453 if let Some(&(pos, maybe)) = self.cur.peek() {
e9174d1e 454 if c == maybe {
1a4d82fc 455 self.cur.next();
8faf50e0 456 Some(pos)
e9174d1e 457 } else {
0731742a 458 let pos = self.to_span_index(pos);
9c376795 459 let description = format!("expected `'}}'`, found `{maybe:?}`");
0731742a
XL
460 let label = "expected `}`".to_owned();
461 let (note, secondary_label) = if c == '}' {
dfeec247
XL
462 (
463 Some(
464 "if you intended to print `{`, you can escape it using `{{`".to_owned(),
465 ),
466 self.last_opening_brace
467 .map(|sp| ("because of this opening brace".to_owned(), sp)),
468 )
0731742a
XL
469 } else {
470 (None, None)
471 };
472 self.errors.push(ParseError {
473 description,
474 note,
475 label,
dc9dc135 476 span: pos.to(pos),
0731742a 477 secondary_label,
064997fb 478 should_be_replaced_with_positional_argument: false,
0731742a 479 });
8faf50e0 480 None
1a4d82fc 481 }
e9174d1e 482 } else {
9c376795 483 let description = format!("expected `{c:?}` but string was terminated");
0731742a
XL
484 // point at closing `"`
485 let pos = self.input.len() - if self.append_newline { 1 } else { 0 };
486 let pos = self.to_span_index(pos);
476ff2be 487 if c == '}' {
9c376795 488 let label = format!("expected `{c:?}`");
0731742a 489 let (note, secondary_label) = if c == '}' {
dfeec247
XL
490 (
491 Some(
492 "if you intended to print `{`, you can escape it using `{{`".to_owned(),
493 ),
494 self.last_opening_brace
495 .map(|sp| ("because of this opening brace".to_owned(), sp)),
496 )
0731742a
XL
497 } else {
498 (None, None)
499 };
500 self.errors.push(ParseError {
501 description,
502 note,
503 label,
dc9dc135 504 span: pos.to(pos),
0731742a 505 secondary_label,
064997fb 506 should_be_replaced_with_positional_argument: false,
0731742a 507 });
476ff2be 508 } else {
9c376795 509 self.err(description, format!("expected `{c:?}`"), pos.to(pos));
476ff2be 510 }
8faf50e0 511 None
1a4d82fc
JJ
512 }
513 }
514
0731742a 515 /// Consumes all whitespace characters until the first non-whitespace character
1a4d82fc 516 fn ws(&mut self) {
e9174d1e 517 while let Some(&(_, c)) = self.cur.peek() {
b039eaaf
SL
518 if c.is_whitespace() {
519 self.cur.next();
520 } else {
92a42be0 521 break;
b039eaaf 522 }
1a4d82fc
JJ
523 }
524 }
525
526 /// Parses all of a string which is to be considered a "raw literal" in a
527 /// format string. This is everything outside of the braces.
c34b1796 528 fn string(&mut self, start: usize) -> &'a str {
e9174d1e
SL
529 // we may not consume the character, peek the iterator
530 while let Some(&(pos, c)) = self.cur.peek() {
531 match c {
b039eaaf
SL
532 '{' | '}' => {
533 return &self.input[start..pos];
534 }
f9f354fc 535 '\n' if self.is_literal => {
f2b60f7d 536 self.line_spans.push(self.span(self.cur_line_start, pos));
f9f354fc
XL
537 self.cur_line_start = pos + 1;
538 self.cur.next();
539 }
b039eaaf 540 _ => {
f9f354fc
XL
541 if self.is_literal && pos == self.cur_line_start && c.is_whitespace() {
542 self.cur_line_start = pos + c.len_utf8();
543 }
b039eaaf
SL
544 self.cur.next();
545 }
1a4d82fc
JJ
546 }
547 }
e9174d1e 548 &self.input[start..self.input.len()]
1a4d82fc
JJ
549 }
550
e74abb32 551 /// Parses an `Argument` structure, or what's contained within braces inside the format string.
064997fb 552 fn argument(&mut self, start: InnerOffset) -> Argument<'a> {
5bcae85e 553 let pos = self.position();
064997fb
FG
554
555 let end = self
556 .cur
557 .clone()
558 .find(|(_, ch)| !ch.is_whitespace())
559 .map_or(start, |(end, _)| self.to_span_index(end));
560 let position_span = start.to(end);
561
f9f354fc
XL
562 let format = match self.mode {
563 ParseMode::Format => self.format(),
564 ParseMode::InlineAsm => self.inline_asm(),
565 };
5bcae85e
SL
566
567 // Resolve position after parsing format spec.
568 let pos = match pos {
569 Some(position) => position,
570 None => {
571 let i = self.curarg;
572 self.curarg += 1;
abe05a73 573 ArgumentImplicitlyIs(i)
5bcae85e
SL
574 }
575 };
576
064997fb 577 Argument { position: pos, position_span, format }
1a4d82fc
JJ
578 }
579
580 /// Parses a positional argument for a format. This could either be an
581 /// integer index of an argument, a named argument, or a blank string.
5bcae85e
SL
582 /// Returns `Some(parsed_position)` if the position is not implicitly
583 /// consuming a macro argument, `None` if it's the case.
04454e1e 584 fn position(&mut self) -> Option<Position<'a>> {
e9174d1e 585 if let Some(i) = self.integer() {
5bcae85e 586 Some(ArgumentIs(i))
e9174d1e
SL
587 } else {
588 match self.cur.peek() {
064997fb 589 Some(&(_, c)) if rustc_lexer::is_id_start(c) => Some(ArgumentNamed(self.word())),
5bcae85e
SL
590
591 // This is an `ArgumentNext`.
592 // Record the fact and do the resolution after parsing the
593 // format spec, to make things like `{:.*}` work.
594 _ => None,
1a4d82fc
JJ
595 }
596 }
597 }
598
f2b60f7d
FG
599 fn current_pos(&mut self) -> usize {
600 if let Some(&(pos, _)) = self.cur.peek() { pos } else { self.input.len() }
601 }
602
1a4d82fc 603 /// Parses a format specifier at the current position, returning all of the
e74abb32 604 /// relevant information in the `FormatSpec` struct.
1a4d82fc
JJ
605 fn format(&mut self) -> FormatSpec<'a> {
606 let mut spec = FormatSpec {
607 fill: None,
608 align: AlignUnknown,
9ffffee4
FG
609 sign: None,
610 alternate: false,
611 zero_pad: false,
612 debug_hex: None,
1a4d82fc 613 precision: CountImplied,
416331ca 614 precision_span: None,
1a4d82fc 615 width: CountImplied,
416331ca 616 width_span: None,
85aaf69f 617 ty: &self.input[..0],
60c5eb7d 618 ty_span: None,
1a4d82fc 619 };
b039eaaf 620 if !self.consume(':') {
92a42be0 621 return spec;
b039eaaf 622 }
1a4d82fc
JJ
623
624 // fill character
e9174d1e 625 if let Some(&(_, c)) = self.cur.peek() {
1b1a35ee
XL
626 if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) {
627 spec.fill = Some(c);
628 self.cur.next();
1a4d82fc 629 }
1a4d82fc
JJ
630 }
631 // Alignment
632 if self.consume('<') {
633 spec.align = AlignLeft;
634 } else if self.consume('>') {
635 spec.align = AlignRight;
636 } else if self.consume('^') {
637 spec.align = AlignCenter;
638 }
639 // Sign flags
640 if self.consume('+') {
9ffffee4 641 spec.sign = Some(Sign::Plus);
1a4d82fc 642 } else if self.consume('-') {
9ffffee4 643 spec.sign = Some(Sign::Minus);
1a4d82fc
JJ
644 }
645 // Alternate marker
646 if self.consume('#') {
9ffffee4 647 spec.alternate = true;
1a4d82fc
JJ
648 }
649 // Width and precision
650 let mut havewidth = false;
416331ca 651
1a4d82fc
JJ
652 if self.consume('0') {
653 // small ambiguity with '0$' as a format string. In theory this is a
654 // '0' flag and then an ill-formatted format string with just a '$'
655 // and no count, but this is better if we instead interpret this as
656 // no '0' flag and '0$' as the width instead.
064997fb 657 if let Some(end) = self.consume_pos('$') {
1a4d82fc 658 spec.width = CountIsParam(0);
f2b60f7d 659 spec.width_span = Some(self.span(end - 1, end + 1));
1a4d82fc
JJ
660 havewidth = true;
661 } else {
9ffffee4 662 spec.zero_pad = true;
1a4d82fc
JJ
663 }
664 }
f2b60f7d 665
1a4d82fc 666 if !havewidth {
f2b60f7d
FG
667 let start = self.current_pos();
668 spec.width = self.count(start);
669 if spec.width != CountImplied {
670 let end = self.current_pos();
671 spec.width_span = Some(self.span(start, end));
672 }
1a4d82fc 673 }
064997fb 674
416331ca 675 if let Some(start) = self.consume_pos('.') {
f2b60f7d 676 if self.consume('*') {
5bcae85e
SL
677 // Resolve `CountIsNextParam`.
678 // We can do this immediately as `position` is resolved later.
679 let i = self.curarg;
680 self.curarg += 1;
f2b60f7d 681 spec.precision = CountIsStar(i);
1a4d82fc 682 } else {
f2b60f7d 683 spec.precision = self.count(start + 1);
1a4d82fc 684 }
f2b60f7d
FG
685 let end = self.current_pos();
686 spec.precision_span = Some(self.span(start, end));
1a4d82fc 687 }
f2b60f7d
FG
688
689 let ty_span_start = self.current_pos();
0531ce1d
XL
690 // Optional radix followed by the actual format specifier
691 if self.consume('x') {
692 if self.consume('?') {
9ffffee4 693 spec.debug_hex = Some(DebugHex::Lower);
0531ce1d
XL
694 spec.ty = "?";
695 } else {
696 spec.ty = "x";
697 }
698 } else if self.consume('X') {
699 if self.consume('?') {
9ffffee4 700 spec.debug_hex = Some(DebugHex::Upper);
0531ce1d
XL
701 spec.ty = "?";
702 } else {
703 spec.ty = "X";
704 }
705 } else if self.consume('?') {
1a4d82fc
JJ
706 spec.ty = "?";
707 } else {
708 spec.ty = self.word();
60c5eb7d 709 if !spec.ty.is_empty() {
f2b60f7d
FG
710 let ty_span_end = self.current_pos();
711 spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
60c5eb7d 712 }
1a4d82fc 713 }
e9174d1e 714 spec
1a4d82fc
JJ
715 }
716
f9f354fc
XL
717 /// Parses an inline assembly template modifier at the current position, returning the modifier
718 /// in the `ty` field of the `FormatSpec` struct.
719 fn inline_asm(&mut self) -> FormatSpec<'a> {
720 let mut spec = FormatSpec {
721 fill: None,
722 align: AlignUnknown,
9ffffee4
FG
723 sign: None,
724 alternate: false,
725 zero_pad: false,
726 debug_hex: None,
f9f354fc
XL
727 precision: CountImplied,
728 precision_span: None,
729 width: CountImplied,
730 width_span: None,
731 ty: &self.input[..0],
732 ty_span: None,
733 };
734 if !self.consume(':') {
735 return spec;
736 }
737
f2b60f7d 738 let ty_span_start = self.current_pos();
f9f354fc 739 spec.ty = self.word();
f9f354fc 740 if !spec.ty.is_empty() {
f2b60f7d
FG
741 let ty_span_end = self.current_pos();
742 spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
f9f354fc
XL
743 }
744
745 spec
746 }
747
e74abb32 748 /// Parses a `Count` parameter at the current position. This does not check
1a4d82fc
JJ
749 /// for 'CountIsNextParam' because that is only used in precision, not
750 /// width.
f2b60f7d 751 fn count(&mut self, start: usize) -> Count<'a> {
e9174d1e 752 if let Some(i) = self.integer() {
f2b60f7d 753 if self.consume('$') { CountIsParam(i) } else { CountIs(i) }
e9174d1e
SL
754 } else {
755 let tmp = self.cur.clone();
756 let word = self.word();
757 if word.is_empty() {
758 self.cur = tmp;
f2b60f7d 759 CountImplied
5099ac24 760 } else if let Some(end) = self.consume_pos('$') {
f2b60f7d
FG
761 let name_span = self.span(start, end);
762 CountIsName(word, name_span)
e9174d1e 763 } else {
b7449926 764 self.cur = tmp;
f2b60f7d 765 CountImplied
1a4d82fc
JJ
766 }
767 }
768 }
769
e1599b0c
XL
770 /// Parses a word starting at the current position. A word is the same as
771 /// Rust identifier, except that it can't start with `_` character.
1a4d82fc 772 fn word(&mut self) -> &'a str {
e9174d1e 773 let start = match self.cur.peek() {
60c5eb7d 774 Some(&(pos, c)) if rustc_lexer::is_id_start(c) => {
b039eaaf
SL
775 self.cur.next();
776 pos
777 }
778 _ => {
60c5eb7d 779 return "";
b039eaaf 780 }
1a4d82fc 781 };
60c5eb7d 782 let mut end = None;
e9174d1e 783 while let Some(&(pos, c)) = self.cur.peek() {
e1599b0c 784 if rustc_lexer::is_id_continue(c) {
e9174d1e
SL
785 self.cur.next();
786 } else {
60c5eb7d
XL
787 end = Some(pos);
788 break;
1a4d82fc
JJ
789 }
790 }
60c5eb7d
XL
791 let end = end.unwrap_or(self.input.len());
792 let word = &self.input[start..end];
793 if word == "_" {
794 self.err_with_note(
795 "invalid argument name `_`",
796 "invalid argument name",
797 "argument name cannot be a single underscore",
f2b60f7d 798 self.span(start, end),
60c5eb7d
XL
799 );
800 }
801 word
1a4d82fc
JJ
802 }
803
c34b1796 804 fn integer(&mut self) -> Option<usize> {
2b03887a 805 let mut cur: usize = 0;
1a4d82fc 806 let mut found = false;
2b03887a
FG
807 let mut overflow = false;
808 let start = self.current_pos();
e9174d1e
SL
809 while let Some(&(_, c)) = self.cur.peek() {
810 if let Some(i) = c.to_digit(10) {
2b03887a
FG
811 let (tmp, mul_overflow) = cur.overflowing_mul(10);
812 let (tmp, add_overflow) = tmp.overflowing_add(i as usize);
813 if mul_overflow || add_overflow {
814 overflow = true;
815 }
816 cur = tmp;
e9174d1e
SL
817 found = true;
818 self.cur.next();
819 } else {
92a42be0 820 break;
1a4d82fc
JJ
821 }
822 }
2b03887a
FG
823
824 if overflow {
825 let end = self.current_pos();
826 let overflowed_int = &self.input[start..end];
827 self.err(
828 format!(
829 "integer `{}` does not fit into the type `usize` whose range is `0..={}`",
830 overflowed_int,
831 usize::MAX
832 ),
833 "integer out of range for `usize`",
834 self.span(start, end),
835 );
836 }
837
9ffffee4
FG
838 found.then_some(cur)
839 }
840
841 fn suggest_format(&mut self) {
842 if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) {
843 let word = self.word();
844 let _end = self.current_pos();
845 let pos = self.to_span_index(pos);
846 self.errors.insert(
847 0,
848 ParseError {
849 description: "expected format parameter to occur after `:`".to_owned(),
850 note: Some(format!("`?` comes after `:`, try `{}:{}` instead", word, "?")),
851 label: "expected `?` to occur after `:`".to_owned(),
852 span: pos.to(pos),
853 secondary_label: None,
854 should_be_replaced_with_positional_argument: false,
855 },
856 );
857 }
1a4d82fc 858 }
064997fb
FG
859
860 fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) {
861 if let Some(end) = self.consume_pos('.') {
862 let byte_pos = self.to_span_index(end);
863 let start = InnerOffset(byte_pos.0 + 1);
864 let field = self.argument(start);
865 // We can only parse `foo.bar` field access, any deeper nesting,
866 // or another type of expression, like method calls, are not supported
867 if !self.consume('}') {
868 return;
869 }
870 if let ArgumentNamed(_) = arg.position {
871 if let ArgumentNamed(_) = field.position {
872 self.errors.insert(
873 0,
874 ParseError {
875 description: "field access isn't supported".to_string(),
876 note: None,
877 label: "not supported".to_string(),
878 span: InnerSpan::new(arg.position_span.start, field.position_span.end),
879 secondary_label: None,
880 should_be_replaced_with_positional_argument: true,
881 },
882 );
883 }
884 }
885 }
886 }
1a4d82fc
JJ
887}
888
f9f354fc
XL
889/// Finds the indices of all characters that have been processed and differ between the actual
890/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
5e7ed085 891/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
9c376795 892fn find_width_map_from_snippet(
f9f354fc
XL
893 snippet: Option<string::String>,
894 str_style: Option<usize>,
9c376795 895) -> InputStringKind {
f9f354fc 896 let snippet = match snippet {
6a06907d 897 Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
9c376795 898 _ => return InputStringKind::NotALiteral,
f9f354fc
XL
899 };
900
487cf647 901 if str_style.is_some() {
9c376795 902 return InputStringKind::Literal { width_mappings: Vec::new() };
487cf647 903 }
cdc7bbd5 904
487cf647
FG
905 let snippet = &snippet[1..snippet.len() - 1];
906
907 let mut s = snippet.char_indices();
9c376795 908 let mut width_mappings = vec![];
487cf647
FG
909 while let Some((pos, c)) = s.next() {
910 match (c, s.clone().next()) {
911 // skip whitespace and empty lines ending in '\\'
9c376795 912 ('\\', Some((_, '\n'))) => {
487cf647 913 let _ = s.next();
9c376795 914 let mut width = 2;
487cf647 915
9c376795 916 while let Some((_, c)) = s.clone().next() {
487cf647 917 if matches!(c, ' ' | '\n' | '\t') {
9c376795 918 width += 1;
487cf647
FG
919 let _ = s.next();
920 } else {
921 break;
f9f354fc
XL
922 }
923 }
9c376795
FG
924
925 width_mappings.push(InnerWidthMapping::new(pos, width, 0));
487cf647 926 }
9c376795
FG
927 ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
928 width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
487cf647
FG
929 let _ = s.next();
930 }
931 ('\\', Some((_, 'x'))) => {
9c376795
FG
932 // consume `\xAB` literal
933 s.nth(2);
934 width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
487cf647
FG
935 }
936 ('\\', Some((_, 'u'))) => {
9c376795
FG
937 let mut width = 2;
938 let _ = s.next();
939
940 if let Some((_, next_c)) = s.next() {
487cf647
FG
941 if next_c == '{' {
942 // consume up to 6 hexanumeric chars
943 let digits_len =
944 s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count();
945
946 let len_utf8 = s
947 .as_str()
948 .get(..digits_len)
949 .and_then(|digits| u32::from_str_radix(digits, 16).ok())
950 .and_then(char::from_u32)
951 .map_or(1, char::len_utf8);
952
953 // Skip the digits, for chars that encode to more than 1 utf-8 byte
954 // exclude as many digits as it is greater than 1 byte
955 //
956 // So for a 3 byte character, exclude 2 digits
957 let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
958
959 // skip '{' and '}' also
9c376795 960 width += required_skips + 2;
487cf647
FG
961
962 s.nth(digits_len);
963 } else if next_c.is_digit(16) {
9c376795
FG
964 width += 1;
965
487cf647
FG
966 // We suggest adding `{` and `}` when appropriate, accept it here as if
967 // it were correct
968 let mut i = 0; // consume up to 6 hexanumeric chars
9c376795 969 while let (Some((_, c)), _) = (s.next(), i < 6) {
487cf647 970 if c.is_digit(16) {
9c376795 971 width += 1;
487cf647
FG
972 } else {
973 break;
f9f354fc 974 }
487cf647 975 i += 1;
f9f354fc
XL
976 }
977 }
978 }
9c376795
FG
979
980 width_mappings.push(InnerWidthMapping::new(pos, width, 1));
f9f354fc 981 }
487cf647 982 _ => {}
f9f354fc 983 }
f9f354fc 984 }
9c376795
FG
985
986 InputStringKind::Literal { width_mappings }
f9f354fc
XL
987}
988
9c376795
FG
989// Assert a reasonable size for `Piece`
990#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
991rustc_data_structures::static_assert_size!(Piece<'_>, 16);
992
1a4d82fc 993#[cfg(test)]
dc9dc135 994mod tests;