]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_builtin_macros/src/format_foreign.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / compiler / rustc_builtin_macros / src / format_foreign.rs
1 pub mod printf {
2 use super::strcursor::StrCursor as Cur;
3 use rustc_span::InnerSpan;
4
5 /// Represents a single `printf`-style substitution.
6 #[derive(Clone, PartialEq, Debug)]
7 pub enum Substitution<'a> {
8 /// A formatted output substitution with its internal byte offset.
9 Format(Format<'a>),
10 /// A literal `%%` escape.
11 Escape,
12 }
13
14 impl<'a> Substitution<'a> {
15 pub fn as_str(&self) -> &str {
16 match *self {
17 Substitution::Format(ref fmt) => fmt.span,
18 Substitution::Escape => "%%",
19 }
20 }
21
22 pub fn position(&self) -> Option<InnerSpan> {
23 match *self {
24 Substitution::Format(ref fmt) => Some(fmt.position),
25 _ => None,
26 }
27 }
28
29 pub fn set_position(&mut self, start: usize, end: usize) {
30 if let Substitution::Format(ref mut fmt) = self {
31 fmt.position = InnerSpan::new(start, end);
32 }
33 }
34
35 /// Translate this substitution into an equivalent Rust formatting directive.
36 ///
37 /// This ignores cases where the substitution does not have an exact equivalent, or where
38 /// the substitution would be unnecessary.
39 pub fn translate(&self) -> Option<String> {
40 match *self {
41 Substitution::Format(ref fmt) => fmt.translate(),
42 Substitution::Escape => None,
43 }
44 }
45 }
46
47 #[derive(Clone, PartialEq, Debug)]
48 /// A single `printf`-style formatting directive.
49 pub struct Format<'a> {
50 /// The entire original formatting directive.
51 pub span: &'a str,
52 /// The (1-based) parameter to be converted.
53 pub parameter: Option<u16>,
54 /// Formatting flags.
55 pub flags: &'a str,
56 /// Minimum width of the output.
57 pub width: Option<Num>,
58 /// Precision of the conversion.
59 pub precision: Option<Num>,
60 /// Length modifier for the conversion.
61 pub length: Option<&'a str>,
62 /// Type of parameter being converted.
63 pub type_: &'a str,
64 /// Byte offset for the start and end of this formatting directive.
65 pub position: InnerSpan,
66 }
67
68 impl Format<'_> {
69 /// Translate this directive into an equivalent Rust formatting directive.
70 ///
71 /// Returns `None` in cases where the `printf` directive does not have an exact Rust
72 /// equivalent, rather than guessing.
73 pub fn translate(&self) -> Option<String> {
74 use std::fmt::Write;
75
76 let (c_alt, c_zero, c_left, c_plus) = {
77 let mut c_alt = false;
78 let mut c_zero = false;
79 let mut c_left = false;
80 let mut c_plus = false;
81 for c in self.flags.chars() {
82 match c {
83 '#' => c_alt = true,
84 '0' => c_zero = true,
85 '-' => c_left = true,
86 '+' => c_plus = true,
87 _ => return None,
88 }
89 }
90 (c_alt, c_zero, c_left, c_plus)
91 };
92
93 // Has a special form in Rust for numbers.
94 let fill = c_zero.then_some("0");
95
96 let align = c_left.then_some("<");
97
98 // Rust doesn't have an equivalent to the `' '` flag.
99 let sign = c_plus.then_some("+");
100
101 // Not *quite* the same, depending on the type...
102 let alt = c_alt;
103
104 let width = match self.width {
105 Some(Num::Next) => {
106 // NOTE: Rust doesn't support this.
107 return None;
108 }
109 w @ Some(Num::Arg(_)) => w,
110 w @ Some(Num::Num(_)) => w,
111 None => None,
112 };
113
114 let precision = self.precision;
115
116 // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so
117 // we just ignore it.
118
119 let (type_, use_zero_fill, is_int) = match self.type_ {
120 "d" | "i" | "u" => (None, true, true),
121 "f" | "F" => (None, false, false),
122 "s" | "c" => (None, false, false),
123 "e" | "E" => (Some(self.type_), true, false),
124 "x" | "X" | "o" => (Some(self.type_), true, true),
125 "p" => (Some(self.type_), false, true),
126 "g" => (Some("e"), true, false),
127 "G" => (Some("E"), true, false),
128 _ => return None,
129 };
130
131 let (fill, width, precision) = match (is_int, width, precision) {
132 (true, Some(_), Some(_)) => {
133 // Rust can't duplicate this insanity.
134 return None;
135 }
136 (true, None, Some(p)) => (Some("0"), Some(p), None),
137 (true, w, None) => (fill, w, None),
138 (false, w, p) => (fill, w, p),
139 };
140
141 let align = match (self.type_, width.is_some(), align.is_some()) {
142 ("s", true, false) => Some(">"),
143 _ => align,
144 };
145
146 let (fill, zero_fill) = match (fill, use_zero_fill) {
147 (Some("0"), true) => (None, true),
148 (fill, _) => (fill, false),
149 };
150
151 let alt = match type_ {
152 Some("x" | "X") => alt,
153 _ => false,
154 };
155
156 let has_options = fill.is_some()
157 || align.is_some()
158 || sign.is_some()
159 || alt
160 || zero_fill
161 || width.is_some()
162 || precision.is_some()
163 || type_.is_some();
164
165 // Initialise with a rough guess.
166 let cap = self.span.len() + if has_options { 2 } else { 0 };
167 let mut s = String::with_capacity(cap);
168
169 s.push('{');
170
171 if let Some(arg) = self.parameter {
172 write!(s, "{}", arg.checked_sub(1)?).ok()?;
173 }
174
175 if has_options {
176 s.push(':');
177
178 let align = if let Some(fill) = fill {
179 s.push_str(fill);
180 align.or(Some(">"))
181 } else {
182 align
183 };
184
185 if let Some(align) = align {
186 s.push_str(align);
187 }
188
189 if let Some(sign) = sign {
190 s.push_str(sign);
191 }
192
193 if alt {
194 s.push('#');
195 }
196
197 if zero_fill {
198 s.push('0');
199 }
200
201 if let Some(width) = width {
202 width.translate(&mut s).ok()?;
203 }
204
205 if let Some(precision) = precision {
206 s.push('.');
207 precision.translate(&mut s).ok()?;
208 }
209
210 if let Some(type_) = type_ {
211 s.push_str(type_);
212 }
213 }
214
215 s.push('}');
216 Some(s)
217 }
218 }
219
220 /// A general number used in a `printf` formatting directive.
221 #[derive(Copy, Clone, PartialEq, Debug)]
222 pub enum Num {
223 // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU
224 // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it
225 // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or
226 // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit
227 // on a screen.
228 /// A specific, fixed value.
229 Num(u16),
230 /// The value is derived from a positional argument.
231 Arg(u16),
232 /// The value is derived from the "next" unconverted argument.
233 Next,
234 }
235
236 impl Num {
237 fn from_str(s: &str, arg: Option<&str>) -> Self {
238 if let Some(arg) = arg {
239 Num::Arg(arg.parse().unwrap_or_else(|_| panic!("invalid format arg `{:?}`", arg)))
240 } else if s == "*" {
241 Num::Next
242 } else {
243 Num::Num(s.parse().unwrap_or_else(|_| panic!("invalid format num `{:?}`", s)))
244 }
245 }
246
247 fn translate(&self, s: &mut String) -> std::fmt::Result {
248 use std::fmt::Write;
249 match *self {
250 Num::Num(n) => write!(s, "{}", n),
251 Num::Arg(n) => {
252 let n = n.checked_sub(1).ok_or(std::fmt::Error)?;
253 write!(s, "{}$", n)
254 }
255 Num::Next => write!(s, "*"),
256 }
257 }
258 }
259
260 /// Returns an iterator over all substitutions in a given string.
261 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
262 Substitutions { s, pos: start_pos }
263 }
264
265 /// Iterator over substitutions in a string.
266 pub struct Substitutions<'a> {
267 s: &'a str,
268 pos: usize,
269 }
270
271 impl<'a> Iterator for Substitutions<'a> {
272 type Item = Substitution<'a>;
273 fn next(&mut self) -> Option<Self::Item> {
274 let (mut sub, tail) = parse_next_substitution(self.s)?;
275 self.s = tail;
276 match sub {
277 Substitution::Format(_) => {
278 if let Some(inner_span) = sub.position() {
279 sub.set_position(inner_span.start + self.pos, inner_span.end + self.pos);
280 self.pos += inner_span.end;
281 }
282 }
283 Substitution::Escape => self.pos += 2,
284 }
285 Some(sub)
286 }
287
288 fn size_hint(&self) -> (usize, Option<usize>) {
289 // Substitutions are at least 2 characters long.
290 (0, Some(self.s.len() / 2))
291 }
292 }
293
294 enum State {
295 Start,
296 Flags,
297 Width,
298 WidthArg,
299 Prec,
300 PrecInner,
301 Length,
302 Type,
303 }
304
305 /// Parse the next substitution from the input string.
306 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
307 use self::State::*;
308
309 let at = {
310 let start = s.find('%')?;
311 if let '%' = s[start + 1..].chars().next()? {
312 return Some((Substitution::Escape, &s[start + 2..]));
313 }
314
315 Cur::new_at(&s[..], start)
316 };
317
318 // This is meant to be a translation of the following regex:
319 //
320 // ```regex
321 // (?x)
322 // ^ %
323 // (?: (?P<parameter> \d+) \$ )?
324 // (?P<flags> [-+ 0\#']* )
325 // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )?
326 // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )?
327 // (?P<length>
328 // # Standard
329 // hh | h | ll | l | L | z | j | t
330 //
331 // # Other
332 // | I32 | I64 | I | q
333 // )?
334 // (?P<type> . )
335 // ```
336
337 // Used to establish the full span at the end.
338 let start = at;
339 // The current position within the string.
340 let mut at = at.at_next_cp()?;
341 // `c` is the next codepoint, `next` is a cursor after it.
342 let (mut c, mut next) = at.next_cp()?;
343
344 // Update `at`, `c`, and `next`, exiting if we're out of input.
345 macro_rules! move_to {
346 ($cur:expr) => {{
347 at = $cur;
348 let (c_, next_) = at.next_cp()?;
349 c = c_;
350 next = next_;
351 }};
352 }
353
354 // Constructs a result when parsing fails.
355 //
356 // Note: `move` used to capture copies of the cursors as they are *now*.
357 let fallback = move || {
358 Some((
359 Substitution::Format(Format {
360 span: start.slice_between(next).unwrap(),
361 parameter: None,
362 flags: "",
363 width: None,
364 precision: None,
365 length: None,
366 type_: at.slice_between(next).unwrap(),
367 position: InnerSpan::new(start.at, next.at),
368 }),
369 next.slice_after(),
370 ))
371 };
372
373 // Next parsing state.
374 let mut state = Start;
375
376 // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end.
377 let mut parameter: Option<u16> = None;
378 let mut flags: &str = "";
379 let mut width: Option<Num> = None;
380 let mut precision: Option<Num> = None;
381 let mut length: Option<&str> = None;
382 let mut type_: &str = "";
383 let end: Cur<'_>;
384
385 if let Start = state {
386 match c {
387 '1'..='9' => {
388 let end = at_next_cp_while(next, is_digit);
389 match end.next_cp() {
390 // Yes, this *is* the parameter.
391 Some(('$', end2)) => {
392 state = Flags;
393 parameter = Some(at.slice_between(end).unwrap().parse().unwrap());
394 move_to!(end2);
395 }
396 // Wait, no, actually, it's the width.
397 Some(_) => {
398 state = Prec;
399 parameter = None;
400 flags = "";
401 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
402 move_to!(end);
403 }
404 // It's invalid, is what it is.
405 None => return fallback(),
406 }
407 }
408 _ => {
409 state = Flags;
410 parameter = None;
411 move_to!(at);
412 }
413 }
414 }
415
416 if let Flags = state {
417 let end = at_next_cp_while(at, is_flag);
418 state = Width;
419 flags = at.slice_between(end).unwrap();
420 move_to!(end);
421 }
422
423 if let Width = state {
424 match c {
425 '*' => {
426 state = WidthArg;
427 move_to!(next);
428 }
429 '1'..='9' => {
430 let end = at_next_cp_while(next, is_digit);
431 state = Prec;
432 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
433 move_to!(end);
434 }
435 _ => {
436 state = Prec;
437 width = None;
438 move_to!(at);
439 }
440 }
441 }
442
443 if let WidthArg = state {
444 let end = at_next_cp_while(at, is_digit);
445 match end.next_cp() {
446 Some(('$', end2)) => {
447 state = Prec;
448 width = Some(Num::from_str("", Some(at.slice_between(end).unwrap())));
449 move_to!(end2);
450 }
451 _ => {
452 state = Prec;
453 width = Some(Num::Next);
454 move_to!(end);
455 }
456 }
457 }
458
459 if let Prec = state {
460 match c {
461 '.' => {
462 state = PrecInner;
463 move_to!(next);
464 }
465 _ => {
466 state = Length;
467 precision = None;
468 move_to!(at);
469 }
470 }
471 }
472
473 if let PrecInner = state {
474 match c {
475 '*' => {
476 let end = at_next_cp_while(next, is_digit);
477 match end.next_cp() {
478 Some(('$', end2)) => {
479 state = Length;
480 precision = Some(Num::from_str("*", next.slice_between(end)));
481 move_to!(end2);
482 }
483 _ => {
484 state = Length;
485 precision = Some(Num::Next);
486 move_to!(end);
487 }
488 }
489 }
490 '0'..='9' => {
491 let end = at_next_cp_while(next, is_digit);
492 state = Length;
493 precision = Some(Num::from_str(at.slice_between(end).unwrap(), None));
494 move_to!(end);
495 }
496 _ => return fallback(),
497 }
498 }
499
500 if let Length = state {
501 let c1_next1 = next.next_cp();
502 match (c, c1_next1) {
503 ('h', Some(('h', next1))) | ('l', Some(('l', next1))) => {
504 state = Type;
505 length = Some(at.slice_between(next1).unwrap());
506 move_to!(next1);
507 }
508
509 ('h' | 'l' | 'L' | 'z' | 'j' | 't' | 'q', _) => {
510 state = Type;
511 length = Some(at.slice_between(next).unwrap());
512 move_to!(next);
513 }
514
515 ('I', _) => {
516 let end = next
517 .at_next_cp()
518 .and_then(|end| end.at_next_cp())
519 .map(|end| (next.slice_between(end).unwrap(), end));
520 let end = match end {
521 Some(("32" | "64", end)) => end,
522 _ => next,
523 };
524 state = Type;
525 length = Some(at.slice_between(end).unwrap());
526 move_to!(end);
527 }
528
529 _ => {
530 state = Type;
531 length = None;
532 move_to!(at);
533 }
534 }
535 }
536
537 if let Type = state {
538 drop(c);
539 type_ = at.slice_between(next).unwrap();
540
541 // Don't use `move_to!` here, as we *can* be at the end of the input.
542 at = next;
543 }
544
545 drop(c);
546 drop(next);
547
548 end = at;
549 let position = InnerSpan::new(start.at, end.at);
550
551 let f = Format {
552 span: start.slice_between(end).unwrap(),
553 parameter,
554 flags,
555 width,
556 precision,
557 length,
558 type_,
559 position,
560 };
561 Some((Substitution::Format(f), end.slice_after()))
562 }
563
564 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
565 where
566 F: FnMut(char) -> bool,
567 {
568 loop {
569 match cur.next_cp() {
570 Some((c, next)) => {
571 if pred(c) {
572 cur = next;
573 } else {
574 return cur;
575 }
576 }
577 None => return cur,
578 }
579 }
580 }
581
582 fn is_digit(c: char) -> bool {
583 match c {
584 '0'..='9' => true,
585 _ => false,
586 }
587 }
588
589 fn is_flag(c: char) -> bool {
590 match c {
591 '0' | '-' | '+' | ' ' | '#' | '\'' => true,
592 _ => false,
593 }
594 }
595
596 #[cfg(test)]
597 mod tests;
598 }
599
600 pub mod shell {
601 use super::strcursor::StrCursor as Cur;
602 use rustc_span::InnerSpan;
603
604 #[derive(Clone, PartialEq, Debug)]
605 pub enum Substitution<'a> {
606 Ordinal(u8, (usize, usize)),
607 Name(&'a str, (usize, usize)),
608 Escape((usize, usize)),
609 }
610
611 impl Substitution<'_> {
612 pub fn as_str(&self) -> String {
613 match self {
614 Substitution::Ordinal(n, _) => format!("${}", n),
615 Substitution::Name(n, _) => format!("${}", n),
616 Substitution::Escape(_) => "$$".into(),
617 }
618 }
619
620 pub fn position(&self) -> Option<InnerSpan> {
621 match self {
622 Substitution::Ordinal(_, pos)
623 | Substitution::Name(_, pos)
624 | Substitution::Escape(pos) => Some(InnerSpan::new(pos.0, pos.1)),
625 }
626 }
627
628 pub fn set_position(&mut self, start: usize, end: usize) {
629 match self {
630 Substitution::Ordinal(_, ref mut pos)
631 | Substitution::Name(_, ref mut pos)
632 | Substitution::Escape(ref mut pos) => *pos = (start, end),
633 }
634 }
635
636 pub fn translate(&self) -> Option<String> {
637 match *self {
638 Substitution::Ordinal(n, _) => Some(format!("{{{}}}", n)),
639 Substitution::Name(n, _) => Some(format!("{{{}}}", n)),
640 Substitution::Escape(_) => None,
641 }
642 }
643 }
644
645 /// Returns an iterator over all substitutions in a given string.
646 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
647 Substitutions { s, pos: start_pos }
648 }
649
650 /// Iterator over substitutions in a string.
651 pub struct Substitutions<'a> {
652 s: &'a str,
653 pos: usize,
654 }
655
656 impl<'a> Iterator for Substitutions<'a> {
657 type Item = Substitution<'a>;
658 fn next(&mut self) -> Option<Self::Item> {
659 match parse_next_substitution(self.s) {
660 Some((mut sub, tail)) => {
661 self.s = tail;
662 if let Some(InnerSpan { start, end }) = sub.position() {
663 sub.set_position(start + self.pos, end + self.pos);
664 self.pos += end;
665 }
666 Some(sub)
667 }
668 None => None,
669 }
670 }
671
672 fn size_hint(&self) -> (usize, Option<usize>) {
673 (0, Some(self.s.len()))
674 }
675 }
676
677 /// Parse the next substitution from the input string.
678 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
679 let at = {
680 let start = s.find('$')?;
681 match s[start + 1..].chars().next()? {
682 '$' => return Some((Substitution::Escape((start, start + 2)), &s[start + 2..])),
683 c @ '0'..='9' => {
684 let n = (c as u8) - b'0';
685 return Some((Substitution::Ordinal(n, (start, start + 2)), &s[start + 2..]));
686 }
687 _ => { /* fall-through */ }
688 }
689
690 Cur::new_at(&s[..], start)
691 };
692
693 let at = at.at_next_cp()?;
694 let (c, inner) = at.next_cp()?;
695
696 if !is_ident_head(c) {
697 None
698 } else {
699 let end = at_next_cp_while(inner, is_ident_tail);
700 let slice = at.slice_between(end).unwrap();
701 let start = at.at - 1;
702 let end_pos = at.at + slice.len();
703 Some((Substitution::Name(slice, (start, end_pos)), end.slice_after()))
704 }
705 }
706
707 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
708 where
709 F: FnMut(char) -> bool,
710 {
711 loop {
712 match cur.next_cp() {
713 Some((c, next)) => {
714 if pred(c) {
715 cur = next;
716 } else {
717 return cur;
718 }
719 }
720 None => return cur,
721 }
722 }
723 }
724
725 fn is_ident_head(c: char) -> bool {
726 match c {
727 'a'..='z' | 'A'..='Z' | '_' => true,
728 _ => false,
729 }
730 }
731
732 fn is_ident_tail(c: char) -> bool {
733 match c {
734 '0'..='9' => true,
735 c => is_ident_head(c),
736 }
737 }
738
739 #[cfg(test)]
740 mod tests;
741 }
742
743 mod strcursor {
744 pub struct StrCursor<'a> {
745 s: &'a str,
746 pub at: usize,
747 }
748
749 impl<'a> StrCursor<'a> {
750 pub fn new_at(s: &'a str, at: usize) -> StrCursor<'a> {
751 StrCursor { s, at }
752 }
753
754 pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
755 match self.try_seek_right_cp() {
756 true => Some(self),
757 false => None,
758 }
759 }
760
761 pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
762 let cp = self.cp_after()?;
763 self.seek_right(cp.len_utf8());
764 Some((cp, self))
765 }
766
767 fn slice_before(&self) -> &'a str {
768 &self.s[0..self.at]
769 }
770
771 pub fn slice_after(&self) -> &'a str {
772 &self.s[self.at..]
773 }
774
775 pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
776 if !str_eq_literal(self.s, until.s) {
777 None
778 } else {
779 use std::cmp::{max, min};
780 let beg = min(self.at, until.at);
781 let end = max(self.at, until.at);
782 Some(&self.s[beg..end])
783 }
784 }
785
786 fn cp_after(&self) -> Option<char> {
787 self.slice_after().chars().next()
788 }
789
790 fn try_seek_right_cp(&mut self) -> bool {
791 match self.slice_after().chars().next() {
792 Some(c) => {
793 self.at += c.len_utf8();
794 true
795 }
796 None => false,
797 }
798 }
799
800 fn seek_right(&mut self, bytes: usize) {
801 self.at += bytes;
802 }
803 }
804
805 impl Copy for StrCursor<'_> {}
806
807 impl<'a> Clone for StrCursor<'a> {
808 fn clone(&self) -> StrCursor<'a> {
809 *self
810 }
811 }
812
813 impl std::fmt::Debug for StrCursor<'_> {
814 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
815 write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
816 }
817 }
818
819 fn str_eq_literal(a: &str, b: &str) -> bool {
820 a.as_bytes().as_ptr() == b.as_bytes().as_ptr() && a.len() == b.len()
821 }
822 }