]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_parse_format/src/lib.rs
New upstream version 1.75.0+dfsg1
[rustc.git] / compiler / rustc_parse_format / src / lib.rs
CommitLineData
1a4d82fc
JJ
1//! Macro support for format strings
2//!
3//! These structures are used when parsing format strings for the compiler.
4//! Parsing does not happen at runtime: structures of `std::fmt::rt` are
5//! generated instead.
6
dfeec247 7#![doc(
1b1a35ee 8 html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/",
dfeec247
XL
9 html_playground_url = "https://play.rust-lang.org/",
10 test(attr(deny(warnings)))
11)]
ed00b5ec
FG
12#![cfg_attr(not(bootstrap), doc(rust_logo))]
13#![cfg_attr(not(bootstrap), allow(internal_features))]
14#![cfg_attr(not(bootstrap), feature(rustdoc_internals))]
f2b60f7d
FG
15#![deny(rustc::untranslatable_diagnostic)]
16#![deny(rustc::diagnostic_outside_of_impl)]
04454e1e
FG
17// We want to be able to build this crate with a stable compiler, so no
18// `#![feature]` attributes should be added.
b7449926 19
353b0b11 20use rustc_lexer::unescape;
9fa01778 21pub use Alignment::*;
9fa01778 22pub use Count::*;
dfeec247
XL
23pub use Piece::*;
24pub use Position::*;
1a4d82fc 25
dfeec247 26use std::iter;
1a4d82fc
JJ
27use std::str;
28use std::string;
29
04454e1e
FG
30// Note: copied from rustc_span
31/// Range inside of a `Span` used for diagnostics when we only have access to relative positions.
32#[derive(Copy, Clone, PartialEq, Eq, Debug)]
33pub struct InnerSpan {
34 pub start: usize,
35 pub end: usize,
36}
37
38impl InnerSpan {
39 pub fn new(start: usize, end: usize) -> InnerSpan {
40 InnerSpan { start, end }
41 }
42}
dc9dc135 43
9c376795
FG
44/// The location and before/after width of a character whose width has changed from its source code
45/// representation
46#[derive(Copy, Clone, PartialEq, Eq)]
47pub struct InnerWidthMapping {
48 /// Index of the character in the source
49 pub position: usize,
50 /// The inner width in characters
51 pub before: usize,
52 /// The transformed width in characters
53 pub after: usize,
54}
55
56impl InnerWidthMapping {
57 pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
58 InnerWidthMapping { position, before, after }
59 }
60}
61
62/// Whether the input string is a literal. If yes, it contains the inner width mappings.
63#[derive(Clone, PartialEq, Eq)]
64enum InputStringKind {
65 NotALiteral,
66 Literal { width_mappings: Vec<InnerWidthMapping> },
67}
68
f9f354fc
XL
69/// The type of format string that we are parsing.
70#[derive(Copy, Clone, Debug, Eq, PartialEq)]
71pub enum ParseMode {
72 /// A normal format string as per `format_args!`.
73 Format,
74 /// An inline assembly template string for `asm!`.
75 InlineAsm,
76}
77
dc9dc135
XL
78#[derive(Copy, Clone)]
79struct InnerOffset(usize);
80
81impl InnerOffset {
82 fn to(self, end: InnerOffset) -> InnerSpan {
83 InnerSpan::new(self.0, end.0)
84 }
85}
86
1a4d82fc
JJ
87/// A piece is a portion of the format string which represents the next part
88/// to emit. These are emitted as a stream by the `Parser` class.
9c376795 89#[derive(Clone, Debug, PartialEq)]
1a4d82fc
JJ
90pub enum Piece<'a> {
91 /// A literal string which should directly be emitted
92 String(&'a str),
93 /// This describes that formatting should process the next argument (as
94 /// specified inside) for emission.
9c376795 95 NextArgument(Box<Argument<'a>>),
1a4d82fc
JJ
96}
97
98/// Representation of an argument specification.
60c5eb7d 99#[derive(Copy, Clone, Debug, PartialEq)]
1a4d82fc
JJ
100pub struct Argument<'a> {
101 /// Where to find this argument
04454e1e 102 pub position: Position<'a>,
064997fb
FG
103 /// The span of the position indicator. Includes any whitespace in implicit
104 /// positions (`{ }`).
105 pub position_span: InnerSpan,
1a4d82fc
JJ
106 /// How to format the argument
107 pub format: FormatSpec<'a>,
108}
109
110/// Specification for the formatting of an argument in the format string.
60c5eb7d 111#[derive(Copy, Clone, Debug, PartialEq)]
1a4d82fc 112pub struct FormatSpec<'a> {
416331ca 113 /// Optionally specified character to fill alignment with.
1a4d82fc 114 pub fill: Option<char>,
add651ee
FG
115 /// Span of the optionally specified fill character.
116 pub fill_span: Option<InnerSpan>,
416331ca 117 /// Optionally specified alignment.
1a4d82fc 118 pub align: Alignment,
9ffffee4
FG
119 /// The `+` or `-` flag.
120 pub sign: Option<Sign>,
121 /// The `#` flag.
122 pub alternate: bool,
123 /// The `0` flag.
124 pub zero_pad: bool,
125 /// The `x` or `X` flag. (Only for `Debug`.)
126 pub debug_hex: Option<DebugHex>,
416331ca 127 /// The integer precision to use.
04454e1e 128 pub precision: Count<'a>,
416331ca
XL
129 /// The span of the precision formatting flag (for diagnostics).
130 pub precision_span: Option<InnerSpan>,
131 /// The string width requested for the resulting format.
04454e1e 132 pub width: Count<'a>,
416331ca
XL
133 /// The span of the width formatting flag (for diagnostics).
134 pub width_span: Option<InnerSpan>,
1a4d82fc
JJ
135 /// The descriptor string representing the name of the format desired for
136 /// this argument, this can be empty or any number of characters, although
137 /// it is required to be one word.
b039eaaf 138 pub ty: &'a str,
60c5eb7d
XL
139 /// The span of the descriptor string (for diagnostics).
140 pub ty_span: Option<InnerSpan>,
1a4d82fc
JJ
141}
142
143/// Enum describing where an argument for a format can be located.
60c5eb7d 144#[derive(Copy, Clone, Debug, PartialEq)]
04454e1e 145pub enum Position<'a> {
2c00a5a8 146 /// The argument is implied to be located at an index
abe05a73 147 ArgumentImplicitlyIs(usize),
064997fb 148 /// The argument is located at a specific index given in the format,
c34b1796 149 ArgumentIs(usize),
1a4d82fc 150 /// The argument has a name.
064997fb 151 ArgumentNamed(&'a str),
1a4d82fc
JJ
152}
153
04454e1e 154impl Position<'_> {
9fa01778
XL
155 pub fn index(&self) -> Option<usize> {
156 match self {
064997fb 157 ArgumentIs(i, ..) | ArgumentImplicitlyIs(i) => Some(*i),
9fa01778
XL
158 _ => None,
159 }
160 }
161}
162
1a4d82fc 163/// Enum of alignments which are supported.
60c5eb7d 164#[derive(Copy, Clone, Debug, PartialEq)]
1a4d82fc
JJ
165pub enum Alignment {
166 /// The value will be aligned to the left.
167 AlignLeft,
168 /// The value will be aligned to the right.
169 AlignRight,
170 /// The value will be aligned in the center.
171 AlignCenter,
172 /// The value will take on a default alignment.
173 AlignUnknown,
174}
175
9ffffee4 176/// Enum for the sign flags.
60c5eb7d 177#[derive(Copy, Clone, Debug, PartialEq)]
9ffffee4
FG
178pub enum Sign {
179 /// The `+` flag.
180 Plus,
181 /// The `-` flag.
182 Minus,
183}
184
185/// Enum for the debug hex flags.
186#[derive(Copy, Clone, Debug, PartialEq)]
187pub enum DebugHex {
188 /// The `x` flag in `{:x?}`.
189 Lower,
190 /// The `X` flag in `{:X?}`.
191 Upper,
1a4d82fc
JJ
192}
193
194/// A count is used for the precision and width parameters of an integer, and
195/// can reference either an argument or a literal integer.
60c5eb7d 196#[derive(Copy, Clone, Debug, PartialEq)]
04454e1e 197pub enum Count<'a> {
1a4d82fc 198 /// The count is specified explicitly.
c34b1796 199 CountIs(usize),
1a4d82fc 200 /// The count is specified by the argument with the given name.
04454e1e 201 CountIsName(&'a str, InnerSpan),
1a4d82fc 202 /// The count is specified by the argument at the given index.
c34b1796 203 CountIsParam(usize),
f2b60f7d
FG
204 /// The count is specified by a star (like in `{:.*}`) that refers to the argument at the given index.
205 CountIsStar(usize),
1a4d82fc
JJ
206 /// The count is implied and cannot be explicitly specified.
207 CountImplied,
208}
209
94b46f34
XL
210pub struct ParseError {
211 pub description: string::String,
212 pub note: Option<string::String>,
213 pub label: string::String,
dc9dc135
XL
214 pub span: InnerSpan,
215 pub secondary_label: Option<(string::String, InnerSpan)>,
781aab86
FG
216 pub suggestion: Suggestion,
217}
218
219pub enum Suggestion {
220 None,
221 /// Replace inline argument with positional argument:
222 /// `format!("{foo.bar}")` -> `format!("{}", foo.bar)`
223 UsePositional,
224 /// Remove `r#` from identifier:
225 /// `format!("{r#foo}")` -> `format!("{foo}")`
226 RemoveRawIdent(InnerSpan),
94b46f34
XL
227}
228
1a4d82fc 229/// The parser structure for interpreting the input format string. This is
b039eaaf 230/// modeled as an iterator over `Piece` structures to form a stream of tokens
1a4d82fc
JJ
231/// being output.
232///
233/// This is a recursive-descent parser for the sake of simplicity, and if
234/// necessary there's probably lots of room for improvement performance-wise.
235pub struct Parser<'a> {
f9f354fc 236 mode: ParseMode,
1a4d82fc 237 input: &'a str,
e9174d1e 238 cur: iter::Peekable<str::CharIndices<'a>>,
1a4d82fc 239 /// Error messages accumulated during parsing
94b46f34 240 pub errors: Vec<ParseError>,
5bcae85e 241 /// Current position of implicit positional argument pointer
f035d41b 242 pub curarg: usize,
8faf50e0
XL
243 /// `Some(raw count)` when the string is "raw", used to position spans correctly
244 style: Option<usize>,
b7449926 245 /// Start and end byte offset of every successfully parsed argument
dc9dc135 246 pub arg_places: Vec<InnerSpan>,
9c376795
FG
247 /// Characters whose length has been changed from their in-code representation
248 width_map: Vec<InnerWidthMapping>,
dc9dc135
XL
249 /// Span of the last opening brace seen, used for error reporting
250 last_opening_brace: Option<InnerSpan>,
74b04a01 251 /// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
0731742a 252 append_newline: bool,
353b0b11
FG
253 /// Whether this formatting string was written directly in the source. This controls whether we
254 /// can use spans to refer into it and give better error messages.
255 /// N.B: This does _not_ control whether implicit argument captures can be used.
256 pub is_source_literal: bool,
f9f354fc
XL
257 /// Start position of the current line.
258 cur_line_start: usize,
259 /// Start and end byte offset of every line of the format string. Excludes
260 /// newline characters and leading whitespace.
261 pub line_spans: Vec<InnerSpan>,
0731742a
XL
262}
263
1a4d82fc
JJ
264impl<'a> Iterator for Parser<'a> {
265 type Item = Piece<'a>;
266
267 fn next(&mut self) -> Option<Piece<'a>> {
e9174d1e
SL
268 if let Some(&(pos, c)) = self.cur.peek() {
269 match c {
270 '{' => {
dc9dc135
XL
271 let curr_last_brace = self.last_opening_brace;
272 let byte_pos = self.to_span_index(pos);
9c376795 273 let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
064997fb 274 self.last_opening_brace = Some(byte_pos.to(lbrace_end));
e9174d1e
SL
275 self.cur.next();
276 if self.consume('{') {
dc9dc135 277 self.last_opening_brace = curr_last_brace;
0731742a 278
e9174d1e
SL
279 Some(String(self.string(pos + 1)))
280 } else {
064997fb 281 let arg = self.argument(lbrace_end);
add651ee 282 if let Some(rbrace_pos) = self.consume_closing_brace(&arg) {
353b0b11 283 if self.is_source_literal {
9c376795
FG
284 let lbrace_byte_pos = self.to_span_index(pos);
285 let rbrace_byte_pos = self.to_span_index(rbrace_pos);
286
287 let width = self.to_span_width(rbrace_pos);
288
cdc7bbd5 289 self.arg_places.push(
9c376795 290 lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
cdc7bbd5 291 );
f9f354fc 292 }
064997fb 293 } else {
9ffffee4
FG
294 if let Some(&(_, maybe)) = self.cur.peek() {
295 if maybe == '?' {
296 self.suggest_format();
297 } else {
298 self.suggest_positional_arg_instead_of_captured_arg(arg);
299 }
300 }
8faf50e0 301 }
9c376795 302 Some(NextArgument(Box::new(arg)))
e9174d1e 303 }
1a4d82fc 304 }
e9174d1e
SL
305 '}' => {
306 self.cur.next();
307 if self.consume('}') {
8faf50e0 308 Some(String(self.string(pos + 1)))
e9174d1e 309 } else {
0731742a 310 let err_pos = self.to_span_index(pos);
94b46f34
XL
311 self.err_with_note(
312 "unmatched `}` found",
313 "unmatched `}`",
314 "if you intended to print `}`, you can escape it using `}}`",
dc9dc135 315 err_pos.to(err_pos),
94b46f34 316 );
e9174d1e
SL
317 None
318 }
1a4d82fc 319 }
e9174d1e 320 _ => Some(String(self.string(pos))),
1a4d82fc 321 }
e9174d1e 322 } else {
353b0b11 323 if self.is_source_literal {
f2b60f7d 324 let span = self.span(self.cur_line_start, self.input.len());
f035d41b
XL
325 if self.line_spans.last() != Some(&span) {
326 self.line_spans.push(span);
327 }
f9f354fc 328 }
e9174d1e 329 None
1a4d82fc
JJ
330 }
331 }
332}
333
334impl<'a> Parser<'a> {
335 /// Creates a new parser for the given format string
0731742a
XL
336 pub fn new(
337 s: &'a str,
338 style: Option<usize>,
f9f354fc 339 snippet: Option<string::String>,
0731742a 340 append_newline: bool,
f9f354fc 341 mode: ParseMode,
0731742a 342 ) -> Parser<'a> {
353b0b11
FG
343 let input_string_kind = find_width_map_from_snippet(s, snippet, style);
344 let (width_map, is_source_literal) = match input_string_kind {
9c376795
FG
345 InputStringKind::Literal { width_mappings } => (width_mappings, true),
346 InputStringKind::NotALiteral => (Vec::new(), false),
347 };
348
1a4d82fc 349 Parser {
f9f354fc 350 mode,
1a4d82fc 351 input: s,
e9174d1e 352 cur: s.char_indices().peekable(),
92a42be0 353 errors: vec![],
5bcae85e 354 curarg: 0,
8faf50e0 355 style,
8faf50e0 356 arg_places: vec![],
9c376795 357 width_map,
dc9dc135 358 last_opening_brace: None,
0731742a 359 append_newline,
353b0b11 360 is_source_literal,
f9f354fc
XL
361 cur_line_start: 0,
362 line_spans: vec![],
1a4d82fc
JJ
363 }
364 }
365
366 /// Notifies of an error. The message doesn't actually need to be of type
367 /// String, but I think it does when this eventually uses conditions so it
368 /// might as well start using it now.
94b46f34
XL
369 fn err<S1: Into<string::String>, S2: Into<string::String>>(
370 &mut self,
371 description: S1,
372 label: S2,
dc9dc135 373 span: InnerSpan,
94b46f34
XL
374 ) {
375 self.errors.push(ParseError {
376 description: description.into(),
377 note: None,
378 label: label.into(),
dc9dc135 379 span,
0731742a 380 secondary_label: None,
781aab86 381 suggestion: Suggestion::None,
94b46f34 382 });
476ff2be
SL
383 }
384
385 /// Notifies of an error. The message doesn't actually need to be of type
386 /// String, but I think it does when this eventually uses conditions so it
387 /// might as well start using it now.
dfeec247
XL
388 fn err_with_note<
389 S1: Into<string::String>,
390 S2: Into<string::String>,
391 S3: Into<string::String>,
392 >(
94b46f34
XL
393 &mut self,
394 description: S1,
395 label: S2,
396 note: S3,
dc9dc135 397 span: InnerSpan,
94b46f34
XL
398 ) {
399 self.errors.push(ParseError {
400 description: description.into(),
401 note: Some(note.into()),
402 label: label.into(),
dc9dc135 403 span,
0731742a 404 secondary_label: None,
781aab86 405 suggestion: Suggestion::None,
94b46f34 406 });
1a4d82fc
JJ
407 }
408
409 /// Optionally consumes the specified character. If the character is not at
416331ca
XL
410 /// the current position, then the current iterator isn't moved and `false` is
411 /// returned, otherwise the character is consumed and `true` is returned.
1a4d82fc 412 fn consume(&mut self, c: char) -> bool {
416331ca
XL
413 self.consume_pos(c).is_some()
414 }
415
416 /// Optionally consumes the specified character. If the character is not at
417 /// the current position, then the current iterator isn't moved and `None` is
418 /// returned, otherwise the character is consumed and the current position is
419 /// returned.
420 fn consume_pos(&mut self, c: char) -> Option<usize> {
421 if let Some(&(pos, maybe)) = self.cur.peek() {
b039eaaf
SL
422 if c == maybe {
423 self.cur.next();
416331ca 424 return Some(pos);
b039eaaf 425 }
1a4d82fc 426 }
416331ca 427 None
1a4d82fc
JJ
428 }
429
9c376795
FG
430 fn remap_pos(&self, mut pos: usize) -> InnerOffset {
431 for width in &self.width_map {
432 if pos > width.position {
433 pos += width.before - width.after;
434 } else if pos == width.position && width.after == 0 {
435 pos += width.before;
436 } else {
437 break;
438 }
439 }
440
441 InnerOffset(pos)
442 }
443
dc9dc135 444 fn to_span_index(&self, pos: usize) -> InnerOffset {
dc9dc135
XL
445 // This handles the raw string case, the raw argument is the number of #
446 // in r###"..."### (we need to add one because of the `r`).
5869c6ff 447 let raw = self.style.map_or(0, |raw| raw + 1);
9c376795
FG
448 let pos = self.remap_pos(pos);
449 InnerOffset(raw + pos.0 + 1)
450 }
451
452 fn to_span_width(&self, pos: usize) -> usize {
453 let pos = self.remap_pos(pos);
454 match self.width_map.iter().find(|w| w.position == pos.0) {
455 Some(w) => w.before,
456 None => 1,
0731742a 457 }
0731742a
XL
458 }
459
f2b60f7d
FG
460 fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
461 let start = self.to_span_index(start_pos);
462 let end = self.to_span_index(end_pos);
463 start.to(end)
464 }
465
1a4d82fc
JJ
466 /// Forces consumption of the specified character. If the character is not
467 /// found, an error is emitted.
add651ee 468 fn consume_closing_brace(&mut self, arg: &Argument<'_>) -> Option<usize> {
1a4d82fc 469 self.ws();
8faf50e0 470
add651ee
FG
471 let pos;
472 let description;
473
474 if let Some(&(peek_pos, maybe)) = self.cur.peek() {
475 if maybe == '}' {
1a4d82fc 476 self.cur.next();
add651ee 477 return Some(peek_pos);
1a4d82fc 478 }
add651ee
FG
479
480 pos = peek_pos;
481 description = format!("expected `'}}'`, found `{maybe:?}`");
e9174d1e 482 } else {
add651ee 483 description = "expected `'}'` but string was terminated".to_owned();
0731742a 484 // point at closing `"`
add651ee 485 pos = self.input.len() - if self.append_newline { 1 } else { 0 };
1a4d82fc 486 }
add651ee
FG
487
488 let pos = self.to_span_index(pos);
489
490 let label = "expected `'}'`".to_owned();
491 let (note, secondary_label) = if arg.format.fill == Some('}') {
492 (
493 Some("the character `'}'` is interpreted as a fill character because of the `:` that precedes it".to_owned()),
494 arg.format.fill_span.map(|sp| ("this is not interpreted as a formatting closing brace".to_owned(), sp)),
495 )
496 } else {
497 (
498 Some("if you intended to print `{`, you can escape it using `{{`".to_owned()),
499 self.last_opening_brace.map(|sp| ("because of this opening brace".to_owned(), sp)),
500 )
501 };
502
503 self.errors.push(ParseError {
504 description,
505 note,
506 label,
507 span: pos.to(pos),
508 secondary_label,
781aab86 509 suggestion: Suggestion::None,
add651ee
FG
510 });
511
512 None
1a4d82fc
JJ
513 }
514
0731742a 515 /// Consumes all whitespace characters until the first non-whitespace character
1a4d82fc 516 fn ws(&mut self) {
e9174d1e 517 while let Some(&(_, c)) = self.cur.peek() {
b039eaaf
SL
518 if c.is_whitespace() {
519 self.cur.next();
520 } else {
92a42be0 521 break;
b039eaaf 522 }
1a4d82fc
JJ
523 }
524 }
525
526 /// Parses all of a string which is to be considered a "raw literal" in a
527 /// format string. This is everything outside of the braces.
c34b1796 528 fn string(&mut self, start: usize) -> &'a str {
e9174d1e
SL
529 // we may not consume the character, peek the iterator
530 while let Some(&(pos, c)) = self.cur.peek() {
531 match c {
b039eaaf
SL
532 '{' | '}' => {
533 return &self.input[start..pos];
534 }
353b0b11 535 '\n' if self.is_source_literal => {
f2b60f7d 536 self.line_spans.push(self.span(self.cur_line_start, pos));
f9f354fc
XL
537 self.cur_line_start = pos + 1;
538 self.cur.next();
539 }
b039eaaf 540 _ => {
353b0b11 541 if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() {
f9f354fc
XL
542 self.cur_line_start = pos + c.len_utf8();
543 }
b039eaaf
SL
544 self.cur.next();
545 }
1a4d82fc
JJ
546 }
547 }
e9174d1e 548 &self.input[start..self.input.len()]
1a4d82fc
JJ
549 }
550
e74abb32 551 /// Parses an `Argument` structure, or what's contained within braces inside the format string.
064997fb 552 fn argument(&mut self, start: InnerOffset) -> Argument<'a> {
5bcae85e 553 let pos = self.position();
064997fb
FG
554
555 let end = self
556 .cur
557 .clone()
558 .find(|(_, ch)| !ch.is_whitespace())
559 .map_or(start, |(end, _)| self.to_span_index(end));
560 let position_span = start.to(end);
561
f9f354fc
XL
562 let format = match self.mode {
563 ParseMode::Format => self.format(),
564 ParseMode::InlineAsm => self.inline_asm(),
565 };
5bcae85e
SL
566
567 // Resolve position after parsing format spec.
568 let pos = match pos {
569 Some(position) => position,
570 None => {
571 let i = self.curarg;
572 self.curarg += 1;
abe05a73 573 ArgumentImplicitlyIs(i)
5bcae85e
SL
574 }
575 };
576
064997fb 577 Argument { position: pos, position_span, format }
1a4d82fc
JJ
578 }
579
580 /// Parses a positional argument for a format. This could either be an
581 /// integer index of an argument, a named argument, or a blank string.
5bcae85e
SL
582 /// Returns `Some(parsed_position)` if the position is not implicitly
583 /// consuming a macro argument, `None` if it's the case.
04454e1e 584 fn position(&mut self) -> Option<Position<'a>> {
e9174d1e 585 if let Some(i) = self.integer() {
5bcae85e 586 Some(ArgumentIs(i))
e9174d1e
SL
587 } else {
588 match self.cur.peek() {
781aab86
FG
589 Some(&(lo, c)) if rustc_lexer::is_id_start(c) => {
590 let word = self.word();
591
592 // Recover from `r#ident` in format strings.
593 // FIXME: use a let chain
594 if word == "r" {
595 if let Some((pos, '#')) = self.cur.peek() {
596 if self.input[pos + 1..]
597 .chars()
598 .next()
599 .is_some_and(rustc_lexer::is_id_start)
600 {
601 self.cur.next();
602 let word = self.word();
603 let prefix_span = self.span(lo, lo + 2);
604 let full_span = self.span(lo, lo + 2 + word.len());
605 self.errors.insert(0, ParseError {
606 description: "raw identifiers are not supported".to_owned(),
607 note: Some("identifiers in format strings can be keywords and don't need to be prefixed with `r#`".to_string()),
608 label: "raw identifier used here".to_owned(),
609 span: full_span,
610 secondary_label: None,
611 suggestion: Suggestion::RemoveRawIdent(prefix_span),
612 });
613 return Some(ArgumentNamed(word));
614 }
615 }
616 }
617
618 Some(ArgumentNamed(word))
619 }
5bcae85e
SL
620
621 // This is an `ArgumentNext`.
622 // Record the fact and do the resolution after parsing the
623 // format spec, to make things like `{:.*}` work.
624 _ => None,
1a4d82fc
JJ
625 }
626 }
627 }
628
f2b60f7d
FG
629 fn current_pos(&mut self) -> usize {
630 if let Some(&(pos, _)) = self.cur.peek() { pos } else { self.input.len() }
631 }
632
1a4d82fc 633 /// Parses a format specifier at the current position, returning all of the
e74abb32 634 /// relevant information in the `FormatSpec` struct.
1a4d82fc
JJ
635 fn format(&mut self) -> FormatSpec<'a> {
636 let mut spec = FormatSpec {
637 fill: None,
add651ee 638 fill_span: None,
1a4d82fc 639 align: AlignUnknown,
9ffffee4
FG
640 sign: None,
641 alternate: false,
642 zero_pad: false,
643 debug_hex: None,
1a4d82fc 644 precision: CountImplied,
416331ca 645 precision_span: None,
1a4d82fc 646 width: CountImplied,
416331ca 647 width_span: None,
85aaf69f 648 ty: &self.input[..0],
60c5eb7d 649 ty_span: None,
1a4d82fc 650 };
b039eaaf 651 if !self.consume(':') {
92a42be0 652 return spec;
b039eaaf 653 }
1a4d82fc
JJ
654
655 // fill character
add651ee 656 if let Some(&(idx, c)) = self.cur.peek() {
1b1a35ee
XL
657 if let Some((_, '>' | '<' | '^')) = self.cur.clone().nth(1) {
658 spec.fill = Some(c);
add651ee 659 spec.fill_span = Some(self.span(idx, idx + 1));
1b1a35ee 660 self.cur.next();
1a4d82fc 661 }
1a4d82fc
JJ
662 }
663 // Alignment
664 if self.consume('<') {
665 spec.align = AlignLeft;
666 } else if self.consume('>') {
667 spec.align = AlignRight;
668 } else if self.consume('^') {
669 spec.align = AlignCenter;
670 }
671 // Sign flags
672 if self.consume('+') {
9ffffee4 673 spec.sign = Some(Sign::Plus);
1a4d82fc 674 } else if self.consume('-') {
9ffffee4 675 spec.sign = Some(Sign::Minus);
1a4d82fc
JJ
676 }
677 // Alternate marker
678 if self.consume('#') {
9ffffee4 679 spec.alternate = true;
1a4d82fc
JJ
680 }
681 // Width and precision
682 let mut havewidth = false;
416331ca 683
1a4d82fc
JJ
684 if self.consume('0') {
685 // small ambiguity with '0$' as a format string. In theory this is a
686 // '0' flag and then an ill-formatted format string with just a '$'
687 // and no count, but this is better if we instead interpret this as
688 // no '0' flag and '0$' as the width instead.
064997fb 689 if let Some(end) = self.consume_pos('$') {
1a4d82fc 690 spec.width = CountIsParam(0);
f2b60f7d 691 spec.width_span = Some(self.span(end - 1, end + 1));
1a4d82fc
JJ
692 havewidth = true;
693 } else {
9ffffee4 694 spec.zero_pad = true;
1a4d82fc
JJ
695 }
696 }
f2b60f7d 697
1a4d82fc 698 if !havewidth {
f2b60f7d
FG
699 let start = self.current_pos();
700 spec.width = self.count(start);
701 if spec.width != CountImplied {
702 let end = self.current_pos();
703 spec.width_span = Some(self.span(start, end));
704 }
1a4d82fc 705 }
064997fb 706
416331ca 707 if let Some(start) = self.consume_pos('.') {
f2b60f7d 708 if self.consume('*') {
5bcae85e
SL
709 // Resolve `CountIsNextParam`.
710 // We can do this immediately as `position` is resolved later.
711 let i = self.curarg;
712 self.curarg += 1;
f2b60f7d 713 spec.precision = CountIsStar(i);
1a4d82fc 714 } else {
f2b60f7d 715 spec.precision = self.count(start + 1);
1a4d82fc 716 }
f2b60f7d
FG
717 let end = self.current_pos();
718 spec.precision_span = Some(self.span(start, end));
1a4d82fc 719 }
f2b60f7d
FG
720
721 let ty_span_start = self.current_pos();
0531ce1d
XL
722 // Optional radix followed by the actual format specifier
723 if self.consume('x') {
724 if self.consume('?') {
9ffffee4 725 spec.debug_hex = Some(DebugHex::Lower);
0531ce1d
XL
726 spec.ty = "?";
727 } else {
728 spec.ty = "x";
729 }
730 } else if self.consume('X') {
731 if self.consume('?') {
9ffffee4 732 spec.debug_hex = Some(DebugHex::Upper);
0531ce1d
XL
733 spec.ty = "?";
734 } else {
735 spec.ty = "X";
736 }
737 } else if self.consume('?') {
1a4d82fc
JJ
738 spec.ty = "?";
739 } else {
740 spec.ty = self.word();
60c5eb7d 741 if !spec.ty.is_empty() {
f2b60f7d
FG
742 let ty_span_end = self.current_pos();
743 spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
60c5eb7d 744 }
1a4d82fc 745 }
e9174d1e 746 spec
1a4d82fc
JJ
747 }
748
f9f354fc
XL
749 /// Parses an inline assembly template modifier at the current position, returning the modifier
750 /// in the `ty` field of the `FormatSpec` struct.
751 fn inline_asm(&mut self) -> FormatSpec<'a> {
752 let mut spec = FormatSpec {
753 fill: None,
add651ee 754 fill_span: None,
f9f354fc 755 align: AlignUnknown,
9ffffee4
FG
756 sign: None,
757 alternate: false,
758 zero_pad: false,
759 debug_hex: None,
f9f354fc
XL
760 precision: CountImplied,
761 precision_span: None,
762 width: CountImplied,
763 width_span: None,
764 ty: &self.input[..0],
765 ty_span: None,
766 };
767 if !self.consume(':') {
768 return spec;
769 }
770
f2b60f7d 771 let ty_span_start = self.current_pos();
f9f354fc 772 spec.ty = self.word();
f9f354fc 773 if !spec.ty.is_empty() {
f2b60f7d
FG
774 let ty_span_end = self.current_pos();
775 spec.ty_span = Some(self.span(ty_span_start, ty_span_end));
f9f354fc
XL
776 }
777
778 spec
779 }
780
e74abb32 781 /// Parses a `Count` parameter at the current position. This does not check
1a4d82fc
JJ
782 /// for 'CountIsNextParam' because that is only used in precision, not
783 /// width.
f2b60f7d 784 fn count(&mut self, start: usize) -> Count<'a> {
e9174d1e 785 if let Some(i) = self.integer() {
f2b60f7d 786 if self.consume('$') { CountIsParam(i) } else { CountIs(i) }
e9174d1e
SL
787 } else {
788 let tmp = self.cur.clone();
789 let word = self.word();
790 if word.is_empty() {
791 self.cur = tmp;
f2b60f7d 792 CountImplied
5099ac24 793 } else if let Some(end) = self.consume_pos('$') {
f2b60f7d
FG
794 let name_span = self.span(start, end);
795 CountIsName(word, name_span)
e9174d1e 796 } else {
b7449926 797 self.cur = tmp;
f2b60f7d 798 CountImplied
1a4d82fc
JJ
799 }
800 }
801 }
802
e1599b0c
XL
803 /// Parses a word starting at the current position. A word is the same as
804 /// Rust identifier, except that it can't start with `_` character.
1a4d82fc 805 fn word(&mut self) -> &'a str {
e9174d1e 806 let start = match self.cur.peek() {
60c5eb7d 807 Some(&(pos, c)) if rustc_lexer::is_id_start(c) => {
b039eaaf
SL
808 self.cur.next();
809 pos
810 }
811 _ => {
60c5eb7d 812 return "";
b039eaaf 813 }
1a4d82fc 814 };
60c5eb7d 815 let mut end = None;
e9174d1e 816 while let Some(&(pos, c)) = self.cur.peek() {
e1599b0c 817 if rustc_lexer::is_id_continue(c) {
e9174d1e
SL
818 self.cur.next();
819 } else {
60c5eb7d
XL
820 end = Some(pos);
821 break;
1a4d82fc
JJ
822 }
823 }
60c5eb7d
XL
824 let end = end.unwrap_or(self.input.len());
825 let word = &self.input[start..end];
826 if word == "_" {
827 self.err_with_note(
828 "invalid argument name `_`",
829 "invalid argument name",
830 "argument name cannot be a single underscore",
f2b60f7d 831 self.span(start, end),
60c5eb7d
XL
832 );
833 }
834 word
1a4d82fc
JJ
835 }
836
c34b1796 837 fn integer(&mut self) -> Option<usize> {
2b03887a 838 let mut cur: usize = 0;
1a4d82fc 839 let mut found = false;
2b03887a
FG
840 let mut overflow = false;
841 let start = self.current_pos();
e9174d1e
SL
842 while let Some(&(_, c)) = self.cur.peek() {
843 if let Some(i) = c.to_digit(10) {
2b03887a
FG
844 let (tmp, mul_overflow) = cur.overflowing_mul(10);
845 let (tmp, add_overflow) = tmp.overflowing_add(i as usize);
846 if mul_overflow || add_overflow {
847 overflow = true;
848 }
849 cur = tmp;
e9174d1e
SL
850 found = true;
851 self.cur.next();
852 } else {
92a42be0 853 break;
1a4d82fc
JJ
854 }
855 }
2b03887a
FG
856
857 if overflow {
858 let end = self.current_pos();
859 let overflowed_int = &self.input[start..end];
860 self.err(
861 format!(
862 "integer `{}` does not fit into the type `usize` whose range is `0..={}`",
863 overflowed_int,
864 usize::MAX
865 ),
866 "integer out of range for `usize`",
867 self.span(start, end),
868 );
869 }
870
9ffffee4
FG
871 found.then_some(cur)
872 }
873
874 fn suggest_format(&mut self) {
875 if let (Some(pos), Some(_)) = (self.consume_pos('?'), self.consume_pos(':')) {
876 let word = self.word();
877 let _end = self.current_pos();
878 let pos = self.to_span_index(pos);
879 self.errors.insert(
880 0,
881 ParseError {
882 description: "expected format parameter to occur after `:`".to_owned(),
883 note: Some(format!("`?` comes after `:`, try `{}:{}` instead", word, "?")),
884 label: "expected `?` to occur after `:`".to_owned(),
885 span: pos.to(pos),
886 secondary_label: None,
781aab86 887 suggestion: Suggestion::None,
9ffffee4
FG
888 },
889 );
890 }
1a4d82fc 891 }
064997fb
FG
892
893 fn suggest_positional_arg_instead_of_captured_arg(&mut self, arg: Argument<'a>) {
894 if let Some(end) = self.consume_pos('.') {
895 let byte_pos = self.to_span_index(end);
896 let start = InnerOffset(byte_pos.0 + 1);
897 let field = self.argument(start);
898 // We can only parse `foo.bar` field access, any deeper nesting,
899 // or another type of expression, like method calls, are not supported
900 if !self.consume('}') {
901 return;
902 }
903 if let ArgumentNamed(_) = arg.position {
904 if let ArgumentNamed(_) = field.position {
905 self.errors.insert(
906 0,
907 ParseError {
908 description: "field access isn't supported".to_string(),
909 note: None,
910 label: "not supported".to_string(),
911 span: InnerSpan::new(arg.position_span.start, field.position_span.end),
912 secondary_label: None,
781aab86 913 suggestion: Suggestion::UsePositional,
064997fb
FG
914 },
915 );
916 }
917 }
918 }
919 }
1a4d82fc
JJ
920}
921
f9f354fc
XL
922/// Finds the indices of all characters that have been processed and differ between the actual
923/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
5e7ed085 924/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
9c376795 925fn find_width_map_from_snippet(
353b0b11 926 input: &str,
f9f354fc
XL
927 snippet: Option<string::String>,
928 str_style: Option<usize>,
9c376795 929) -> InputStringKind {
f9f354fc 930 let snippet = match snippet {
6a06907d 931 Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
9c376795 932 _ => return InputStringKind::NotALiteral,
f9f354fc
XL
933 };
934
487cf647 935 if str_style.is_some() {
9c376795 936 return InputStringKind::Literal { width_mappings: Vec::new() };
487cf647 937 }
cdc7bbd5 938
353b0b11 939 // Strip quotes.
487cf647
FG
940 let snippet = &snippet[1..snippet.len() - 1];
941
353b0b11
FG
942 // Macros like `println` add a newline at the end. That technically doesn't make them "literals" anymore, but it's fine
943 // since we will never need to point our spans there, so we lie about it here by ignoring it.
944 // Since there might actually be newlines in the source code, we need to normalize away all trailing newlines.
945 // If we only trimmed it off the input, `format!("\n")` would cause a mismatch as here we they actually match up.
946 // Alternatively, we could just count the trailing newlines and only trim one from the input if they don't match up.
947 let input_no_nl = input.trim_end_matches('\n');
948 let Some(unescaped) = unescape_string(snippet) else {
949 return InputStringKind::NotALiteral;
950 };
951
952 let unescaped_no_nl = unescaped.trim_end_matches('\n');
953
954 if unescaped_no_nl != input_no_nl {
955 // The source string that we're pointing at isn't our input, so spans pointing at it will be incorrect.
956 // This can for example happen with proc macros that respan generated literals.
957 return InputStringKind::NotALiteral;
958 }
959
487cf647 960 let mut s = snippet.char_indices();
9c376795 961 let mut width_mappings = vec![];
487cf647
FG
962 while let Some((pos, c)) = s.next() {
963 match (c, s.clone().next()) {
964 // skip whitespace and empty lines ending in '\\'
9c376795 965 ('\\', Some((_, '\n'))) => {
487cf647 966 let _ = s.next();
9c376795 967 let mut width = 2;
487cf647 968
9c376795 969 while let Some((_, c)) = s.clone().next() {
487cf647 970 if matches!(c, ' ' | '\n' | '\t') {
9c376795 971 width += 1;
487cf647
FG
972 let _ = s.next();
973 } else {
974 break;
f9f354fc
XL
975 }
976 }
9c376795
FG
977
978 width_mappings.push(InnerWidthMapping::new(pos, width, 0));
487cf647 979 }
9c376795
FG
980 ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
981 width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
487cf647
FG
982 let _ = s.next();
983 }
984 ('\\', Some((_, 'x'))) => {
9c376795
FG
985 // consume `\xAB` literal
986 s.nth(2);
987 width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
487cf647
FG
988 }
989 ('\\', Some((_, 'u'))) => {
9c376795
FG
990 let mut width = 2;
991 let _ = s.next();
992
993 if let Some((_, next_c)) = s.next() {
487cf647
FG
994 if next_c == '{' {
995 // consume up to 6 hexanumeric chars
996 let digits_len =
997 s.clone().take(6).take_while(|(_, c)| c.is_digit(16)).count();
998
999 let len_utf8 = s
1000 .as_str()
1001 .get(..digits_len)
1002 .and_then(|digits| u32::from_str_radix(digits, 16).ok())
1003 .and_then(char::from_u32)
1004 .map_or(1, char::len_utf8);
1005
1006 // Skip the digits, for chars that encode to more than 1 utf-8 byte
1007 // exclude as many digits as it is greater than 1 byte
1008 //
1009 // So for a 3 byte character, exclude 2 digits
1010 let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
1011
1012 // skip '{' and '}' also
9c376795 1013 width += required_skips + 2;
487cf647
FG
1014
1015 s.nth(digits_len);
1016 } else if next_c.is_digit(16) {
9c376795
FG
1017 width += 1;
1018
487cf647
FG
1019 // We suggest adding `{` and `}` when appropriate, accept it here as if
1020 // it were correct
1021 let mut i = 0; // consume up to 6 hexanumeric chars
9c376795 1022 while let (Some((_, c)), _) = (s.next(), i < 6) {
487cf647 1023 if c.is_digit(16) {
9c376795 1024 width += 1;
487cf647
FG
1025 } else {
1026 break;
f9f354fc 1027 }
487cf647 1028 i += 1;
f9f354fc
XL
1029 }
1030 }
1031 }
9c376795
FG
1032
1033 width_mappings.push(InnerWidthMapping::new(pos, width, 1));
f9f354fc 1034 }
487cf647 1035 _ => {}
f9f354fc 1036 }
f9f354fc 1037 }
9c376795
FG
1038
1039 InputStringKind::Literal { width_mappings }
f9f354fc
XL
1040}
1041
353b0b11
FG
1042fn unescape_string(string: &str) -> Option<string::String> {
1043 let mut buf = string::String::new();
1044 let mut ok = true;
1045 unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| {
1046 match unescaped_char {
1047 Ok(c) => buf.push(c),
1048 Err(_) => ok = false,
1049 }
1050 });
1051
1052 ok.then_some(buf)
1053}
1054
9c376795
FG
1055// Assert a reasonable size for `Piece`
1056#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
781aab86 1057rustc_index::static_assert_size!(Piece<'_>, 16);
9c376795 1058
1a4d82fc 1059#[cfg(test)]
dc9dc135 1060mod tests;