]> git.proxmox.com Git - rustc.git/blob - src/libsyntax_ext/format_foreign.rs
New upstream version 1.41.1+dfsg1
[rustc.git] / src / libsyntax_ext / format_foreign.rs
1 pub mod printf {
2 use super::strcursor::StrCursor as Cur;
3 use syntax_pos::InnerSpan;
4
5 /// Represents a single `printf`-style substitution.
6 #[derive(Clone, PartialEq, Debug)]
7 pub enum Substitution<'a> {
8 /// A formatted output substitution with its internal byte offset.
9 Format(Format<'a>),
10 /// A literal `%%` escape.
11 Escape,
12 }
13
14 impl<'a> Substitution<'a> {
15 pub fn as_str(&self) -> &str {
16 match *self {
17 Substitution::Format(ref fmt) => fmt.span,
18 Substitution::Escape => "%%",
19 }
20 }
21
22 pub fn position(&self) -> Option<InnerSpan> {
23 match *self {
24 Substitution::Format(ref fmt) => Some(fmt.position),
25 _ => None,
26 }
27 }
28
29 pub fn set_position(&mut self, start: usize, end: usize) {
30 match self {
31 Substitution::Format(ref mut fmt) => {
32 fmt.position = InnerSpan::new(start, end);
33 }
34 _ => {}
35 }
36 }
37
38
39 /// Translate this substitution into an equivalent Rust formatting directive.
40 ///
41 /// This ignores cases where the substitution does not have an exact equivalent, or where
42 /// the substitution would be unnecessary.
43 pub fn translate(&self) -> Option<String> {
44 match *self {
45 Substitution::Format(ref fmt) => fmt.translate(),
46 Substitution::Escape => None,
47 }
48 }
49 }
50
51 #[derive(Clone, PartialEq, Debug)]
52 /// A single `printf`-style formatting directive.
53 pub struct Format<'a> {
54 /// The entire original formatting directive.
55 pub span: &'a str,
56 /// The (1-based) parameter to be converted.
57 pub parameter: Option<u16>,
58 /// Formatting flags.
59 pub flags: &'a str,
60 /// Minimum width of the output.
61 pub width: Option<Num>,
62 /// Precision of the conversion.
63 pub precision: Option<Num>,
64 /// Length modifier for the conversion.
65 pub length: Option<&'a str>,
66 /// Type of parameter being converted.
67 pub type_: &'a str,
68 /// Byte offset for the start and end of this formatting directive.
69 pub position: InnerSpan,
70 }
71
72 impl Format<'_> {
73 /// Translate this directive into an equivalent Rust formatting directive.
74 ///
75 /// Returns `None` in cases where the `printf` directive does not have an exact Rust
76 /// equivalent, rather than guessing.
77 pub fn translate(&self) -> Option<String> {
78 use std::fmt::Write;
79
80 let (c_alt, c_zero, c_left, c_plus) = {
81 let mut c_alt = false;
82 let mut c_zero = false;
83 let mut c_left = false;
84 let mut c_plus = false;
85 for c in self.flags.chars() {
86 match c {
87 '#' => c_alt = true,
88 '0' => c_zero = true,
89 '-' => c_left = true,
90 '+' => c_plus = true,
91 _ => return None
92 }
93 }
94 (c_alt, c_zero, c_left, c_plus)
95 };
96
97 // Has a special form in Rust for numbers.
98 let fill = c_zero.then_some("0");
99
100 let align = c_left.then_some("<");
101
102 // Rust doesn't have an equivalent to the `' '` flag.
103 let sign = c_plus.then_some("+");
104
105 // Not *quite* the same, depending on the type...
106 let alt = c_alt;
107
108 let width = match self.width {
109 Some(Num::Next) => {
110 // NOTE: Rust doesn't support this.
111 return None;
112 }
113 w @ Some(Num::Arg(_)) => w,
114 w @ Some(Num::Num(_)) => w,
115 None => None,
116 };
117
118 let precision = self.precision;
119
120 // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so
121 // we just ignore it.
122
123 let (type_, use_zero_fill, is_int) = match self.type_ {
124 "d" | "i" | "u" => (None, true, true),
125 "f" | "F" => (None, false, false),
126 "s" | "c" => (None, false, false),
127 "e" | "E" => (Some(self.type_), true, false),
128 "x" | "X" | "o" => (Some(self.type_), true, true),
129 "p" => (Some(self.type_), false, true),
130 "g" => (Some("e"), true, false),
131 "G" => (Some("E"), true, false),
132 _ => return None,
133 };
134
135 let (fill, width, precision) = match (is_int, width, precision) {
136 (true, Some(_), Some(_)) => {
137 // Rust can't duplicate this insanity.
138 return None;
139 },
140 (true, None, Some(p)) => (Some("0"), Some(p), None),
141 (true, w, None) => (fill, w, None),
142 (false, w, p) => (fill, w, p),
143 };
144
145 let align = match (self.type_, width.is_some(), align.is_some()) {
146 ("s", true, false) => Some(">"),
147 _ => align,
148 };
149
150 let (fill, zero_fill) = match (fill, use_zero_fill) {
151 (Some("0"), true) => (None, true),
152 (fill, _) => (fill, false),
153 };
154
155 let alt = match type_ {
156 Some("x") | Some("X") => alt,
157 _ => false,
158 };
159
160 let has_options = fill.is_some()
161 || align.is_some()
162 || sign.is_some()
163 || alt
164 || zero_fill
165 || width.is_some()
166 || precision.is_some()
167 || type_.is_some()
168 ;
169
170 // Initialise with a rough guess.
171 let cap = self.span.len() + if has_options { 2 } else { 0 };
172 let mut s = String::with_capacity(cap);
173
174 s.push_str("{");
175
176 if let Some(arg) = self.parameter {
177 write!(s, "{}", arg.checked_sub(1)?).ok()?;
178 }
179
180 if has_options {
181 s.push_str(":");
182
183 let align = if let Some(fill) = fill {
184 s.push_str(fill);
185 align.or(Some(">"))
186 } else {
187 align
188 };
189
190 if let Some(align) = align {
191 s.push_str(align);
192 }
193
194 if let Some(sign) = sign {
195 s.push_str(sign);
196 }
197
198 if alt {
199 s.push_str("#");
200 }
201
202 if zero_fill {
203 s.push_str("0");
204 }
205
206 if let Some(width) = width {
207 width.translate(&mut s).ok()?;
208 }
209
210 if let Some(precision) = precision {
211 s.push_str(".");
212 precision.translate(&mut s).ok()?;
213 }
214
215 if let Some(type_) = type_ {
216 s.push_str(type_);
217 }
218 }
219
220 s.push_str("}");
221 Some(s)
222 }
223 }
224
225 /// A general number used in a `printf` formatting directive.
226 #[derive(Copy, Clone, PartialEq, Debug)]
227 pub enum Num {
228 // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU
229 // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it
230 // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or
231 // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit
232 // on a screen.
233
234 /// A specific, fixed value.
235 Num(u16),
236 /// The value is derived from a positional argument.
237 Arg(u16),
238 /// The value is derived from the "next" unconverted argument.
239 Next,
240 }
241
242 impl Num {
243 fn from_str(s: &str, arg: Option<&str>) -> Self {
244 if let Some(arg) = arg {
245 Num::Arg(arg.parse().unwrap_or_else(|_| panic!("invalid format arg `{:?}`", arg)))
246 } else if s == "*" {
247 Num::Next
248 } else {
249 Num::Num(s.parse().unwrap_or_else(|_| panic!("invalid format num `{:?}`", s)))
250 }
251 }
252
253 fn translate(&self, s: &mut String) -> std::fmt::Result {
254 use std::fmt::Write;
255 match *self {
256 Num::Num(n) => write!(s, "{}", n),
257 Num::Arg(n) => {
258 let n = n.checked_sub(1).ok_or(std::fmt::Error)?;
259 write!(s, "{}$", n)
260 },
261 Num::Next => write!(s, "*"),
262 }
263 }
264 }
265
266 /// Returns an iterator over all substitutions in a given string.
267 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
268 Substitutions {
269 s,
270 pos: start_pos,
271 }
272 }
273
274 /// Iterator over substitutions in a string.
275 pub struct Substitutions<'a> {
276 s: &'a str,
277 pos: usize,
278 }
279
280 impl<'a> Iterator for Substitutions<'a> {
281 type Item = Substitution<'a>;
282 fn next(&mut self) -> Option<Self::Item> {
283 let (mut sub, tail) = parse_next_substitution(self.s)?;
284 self.s = tail;
285 match sub {
286 Substitution::Format(_) => if let Some(inner_span) = sub.position() {
287 sub.set_position(inner_span.start + self.pos, inner_span.end + self.pos);
288 self.pos += inner_span.end;
289 }
290 Substitution::Escape => self.pos += 2,
291 }
292 Some(sub)
293 }
294
295 fn size_hint(&self) -> (usize, Option<usize>) {
296 // Substitutions are at least 2 characters long.
297 (0, Some(self.s.len() / 2))
298 }
299 }
300
301 enum State {
302 Start,
303 Flags,
304 Width,
305 WidthArg,
306 Prec,
307 PrecInner,
308 Length,
309 Type,
310 }
311
312 /// Parse the next substitution from the input string.
313 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
314 use self::State::*;
315
316 let at = {
317 let start = s.find('%')?;
318 match s[start+1..].chars().next()? {
319 '%' => return Some((Substitution::Escape, &s[start+2..])),
320 _ => {/* fall-through */},
321 }
322
323 Cur::new_at(&s[..], start)
324 };
325
326 // This is meant to be a translation of the following regex:
327 //
328 // ```regex
329 // (?x)
330 // ^ %
331 // (?: (?P<parameter> \d+) \$ )?
332 // (?P<flags> [-+ 0\#']* )
333 // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )?
334 // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )?
335 // (?P<length>
336 // # Standard
337 // hh | h | ll | l | L | z | j | t
338 //
339 // # Other
340 // | I32 | I64 | I | q
341 // )?
342 // (?P<type> . )
343 // ```
344
345 // Used to establish the full span at the end.
346 let start = at;
347 // The current position within the string.
348 let mut at = at.at_next_cp()?;
349 // `c` is the next codepoint, `next` is a cursor after it.
350 let (mut c, mut next) = at.next_cp()?;
351
352 // Update `at`, `c`, and `next`, exiting if we're out of input.
353 macro_rules! move_to {
354 ($cur:expr) => {
355 {
356 at = $cur;
357 let (c_, next_) = at.next_cp()?;
358 c = c_;
359 next = next_;
360 }
361 };
362 }
363
364 // Constructs a result when parsing fails.
365 //
366 // Note: `move` used to capture copies of the cursors as they are *now*.
367 let fallback = move || {
368 return Some((
369 Substitution::Format(Format {
370 span: start.slice_between(next).unwrap(),
371 parameter: None,
372 flags: "",
373 width: None,
374 precision: None,
375 length: None,
376 type_: at.slice_between(next).unwrap(),
377 position: InnerSpan::new(start.at, next.at),
378 }),
379 next.slice_after()
380 ));
381 };
382
383 // Next parsing state.
384 let mut state = Start;
385
386 // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end.
387 let mut parameter: Option<u16> = None;
388 let mut flags: &str = "";
389 let mut width: Option<Num> = None;
390 let mut precision: Option<Num> = None;
391 let mut length: Option<&str> = None;
392 let mut type_: &str = "";
393 let end: Cur<'_>;
394
395 if let Start = state {
396 match c {
397 '1'..='9' => {
398 let end = at_next_cp_while(next, is_digit);
399 match end.next_cp() {
400 // Yes, this *is* the parameter.
401 Some(('$', end2)) => {
402 state = Flags;
403 parameter = Some(at.slice_between(end).unwrap().parse().unwrap());
404 move_to!(end2);
405 },
406 // Wait, no, actually, it's the width.
407 Some(_) => {
408 state = Prec;
409 parameter = None;
410 flags = "";
411 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
412 move_to!(end);
413 },
414 // It's invalid, is what it is.
415 None => return fallback(),
416 }
417 },
418 _ => {
419 state = Flags;
420 parameter = None;
421 move_to!(at);
422 }
423 }
424 }
425
426 if let Flags = state {
427 let end = at_next_cp_while(at, is_flag);
428 state = Width;
429 flags = at.slice_between(end).unwrap();
430 move_to!(end);
431 }
432
433 if let Width = state {
434 match c {
435 '*' => {
436 state = WidthArg;
437 move_to!(next);
438 },
439 '1' ..= '9' => {
440 let end = at_next_cp_while(next, is_digit);
441 state = Prec;
442 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
443 move_to!(end);
444 },
445 _ => {
446 state = Prec;
447 width = None;
448 move_to!(at);
449 }
450 }
451 }
452
453 if let WidthArg = state {
454 let end = at_next_cp_while(at, is_digit);
455 match end.next_cp() {
456 Some(('$', end2)) => {
457 state = Prec;
458 width = Some(Num::from_str("", Some(at.slice_between(end).unwrap())));
459 move_to!(end2);
460 },
461 _ => {
462 state = Prec;
463 width = Some(Num::Next);
464 move_to!(end);
465 }
466 }
467 }
468
469 if let Prec = state {
470 match c {
471 '.' => {
472 state = PrecInner;
473 move_to!(next);
474 },
475 _ => {
476 state = Length;
477 precision = None;
478 move_to!(at);
479 }
480 }
481 }
482
483 if let PrecInner = state {
484 match c {
485 '*' => {
486 let end = at_next_cp_while(next, is_digit);
487 match end.next_cp() {
488 Some(('$', end2)) => {
489 state = Length;
490 precision = Some(Num::from_str("*", next.slice_between(end)));
491 move_to!(end2);
492 },
493 _ => {
494 state = Length;
495 precision = Some(Num::Next);
496 move_to!(end);
497 }
498 }
499 },
500 '0' ..= '9' => {
501 let end = at_next_cp_while(next, is_digit);
502 state = Length;
503 precision = Some(Num::from_str(at.slice_between(end).unwrap(), None));
504 move_to!(end);
505 },
506 _ => return fallback(),
507 }
508 }
509
510 if let Length = state {
511 let c1_next1 = next.next_cp();
512 match (c, c1_next1) {
513 ('h', Some(('h', next1)))
514 | ('l', Some(('l', next1)))
515 => {
516 state = Type;
517 length = Some(at.slice_between(next1).unwrap());
518 move_to!(next1);
519 },
520
521 ('h', _) | ('l', _) | ('L', _)
522 | ('z', _) | ('j', _) | ('t', _)
523 | ('q', _)
524 => {
525 state = Type;
526 length = Some(at.slice_between(next).unwrap());
527 move_to!(next);
528 },
529
530 ('I', _) => {
531 let end = next.at_next_cp()
532 .and_then(|end| end.at_next_cp())
533 .map(|end| (next.slice_between(end).unwrap(), end));
534 let end = match end {
535 Some(("32", end)) => end,
536 Some(("64", end)) => end,
537 _ => next
538 };
539 state = Type;
540 length = Some(at.slice_between(end).unwrap());
541 move_to!(end);
542 },
543
544 _ => {
545 state = Type;
546 length = None;
547 move_to!(at);
548 }
549 }
550 }
551
552 if let Type = state {
553 drop(c);
554 type_ = at.slice_between(next).unwrap();
555
556 // Don't use `move_to!` here, as we *can* be at the end of the input.
557 at = next;
558 }
559
560 drop(c);
561 drop(next);
562
563 end = at;
564 let position = InnerSpan::new(start.at, end.at);
565
566 let f = Format {
567 span: start.slice_between(end).unwrap(),
568 parameter,
569 flags,
570 width,
571 precision,
572 length,
573 type_,
574 position,
575 };
576 Some((Substitution::Format(f), end.slice_after()))
577 }
578
579 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
580 where F: FnMut(char) -> bool {
581 loop {
582 match cur.next_cp() {
583 Some((c, next)) => if pred(c) {
584 cur = next;
585 } else {
586 return cur;
587 },
588 None => return cur,
589 }
590 }
591 }
592
593 fn is_digit(c: char) -> bool {
594 match c {
595 '0' ..= '9' => true,
596 _ => false
597 }
598 }
599
600 fn is_flag(c: char) -> bool {
601 match c {
602 '0' | '-' | '+' | ' ' | '#' | '\'' => true,
603 _ => false
604 }
605 }
606
607 #[cfg(test)]
608 mod tests;
609 }
610
611 pub mod shell {
612 use super::strcursor::StrCursor as Cur;
613 use syntax_pos::InnerSpan;
614
615 #[derive(Clone, PartialEq, Debug)]
616 pub enum Substitution<'a> {
617 Ordinal(u8, (usize, usize)),
618 Name(&'a str, (usize, usize)),
619 Escape((usize, usize)),
620 }
621
622 impl Substitution<'_> {
623 pub fn as_str(&self) -> String {
624 match self {
625 Substitution::Ordinal(n, _) => format!("${}", n),
626 Substitution::Name(n, _) => format!("${}", n),
627 Substitution::Escape(_) => "$$".into(),
628 }
629 }
630
631 pub fn position(&self) -> Option<InnerSpan> {
632 match self {
633 Substitution::Ordinal(_, pos) |
634 Substitution::Name(_, pos) |
635 Substitution::Escape(pos) => Some(InnerSpan::new(pos.0, pos.1)),
636 }
637 }
638
639 pub fn set_position(&mut self, start: usize, end: usize) {
640 match self {
641 Substitution::Ordinal(_, ref mut pos) |
642 Substitution::Name(_, ref mut pos) |
643 Substitution::Escape(ref mut pos) => *pos = (start, end),
644 }
645 }
646
647 pub fn translate(&self) -> Option<String> {
648 match *self {
649 Substitution::Ordinal(n, _) => Some(format!("{{{}}}", n)),
650 Substitution::Name(n, _) => Some(format!("{{{}}}", n)),
651 Substitution::Escape(_) => None,
652 }
653 }
654 }
655
656 /// Returns an iterator over all substitutions in a given string.
657 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
658 Substitutions {
659 s,
660 pos: start_pos,
661 }
662 }
663
664 /// Iterator over substitutions in a string.
665 pub struct Substitutions<'a> {
666 s: &'a str,
667 pos: usize,
668 }
669
670 impl<'a> Iterator for Substitutions<'a> {
671 type Item = Substitution<'a>;
672 fn next(&mut self) -> Option<Self::Item> {
673 match parse_next_substitution(self.s) {
674 Some((mut sub, tail)) => {
675 self.s = tail;
676 if let Some(InnerSpan { start, end }) = sub.position() {
677 sub.set_position(start + self.pos, end + self.pos);
678 self.pos += end;
679 }
680 Some(sub)
681 },
682 None => None,
683 }
684 }
685
686 fn size_hint(&self) -> (usize, Option<usize>) {
687 (0, Some(self.s.len()))
688 }
689 }
690
691 /// Parse the next substitution from the input string.
692 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
693 let at = {
694 let start = s.find('$')?;
695 match s[start+1..].chars().next()? {
696 '$' => return Some((Substitution::Escape((start, start+2)), &s[start+2..])),
697 c @ '0' ..= '9' => {
698 let n = (c as u8) - b'0';
699 return Some((Substitution::Ordinal(n, (start, start+2)), &s[start+2..]));
700 },
701 _ => {/* fall-through */},
702 }
703
704 Cur::new_at(&s[..], start)
705 };
706
707 let at = at.at_next_cp()?;
708 let (c, inner) = at.next_cp()?;
709
710 if !is_ident_head(c) {
711 None
712 } else {
713 let end = at_next_cp_while(inner, is_ident_tail);
714 let slice = at.slice_between(end).unwrap();
715 let start = at.at - 1;
716 let end_pos = at.at + slice.len();
717 Some((Substitution::Name(slice, (start, end_pos)), end.slice_after()))
718 }
719 }
720
721 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
722 where F: FnMut(char) -> bool {
723 loop {
724 match cur.next_cp() {
725 Some((c, next)) => if pred(c) {
726 cur = next;
727 } else {
728 return cur;
729 },
730 None => return cur,
731 }
732 }
733 }
734
735 fn is_ident_head(c: char) -> bool {
736 match c {
737 'a' ..= 'z' | 'A' ..= 'Z' | '_' => true,
738 _ => false
739 }
740 }
741
742 fn is_ident_tail(c: char) -> bool {
743 match c {
744 '0' ..= '9' => true,
745 c => is_ident_head(c)
746 }
747 }
748
749 #[cfg(test)]
750 mod tests;
751 }
752
753 mod strcursor {
754 pub struct StrCursor<'a> {
755 s: &'a str,
756 pub at: usize,
757 }
758
759 impl<'a> StrCursor<'a> {
760 pub fn new_at(s: &'a str, at: usize) -> StrCursor<'a> {
761 StrCursor {
762 s,
763 at,
764 }
765 }
766
767 pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
768 match self.try_seek_right_cp() {
769 true => Some(self),
770 false => None
771 }
772 }
773
774 pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
775 let cp = self.cp_after()?;
776 self.seek_right(cp.len_utf8());
777 Some((cp, self))
778 }
779
780 fn slice_before(&self) -> &'a str {
781 &self.s[0..self.at]
782 }
783
784 pub fn slice_after(&self) -> &'a str {
785 &self.s[self.at..]
786 }
787
788 pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
789 if !str_eq_literal(self.s, until.s) {
790 None
791 } else {
792 use std::cmp::{max, min};
793 let beg = min(self.at, until.at);
794 let end = max(self.at, until.at);
795 Some(&self.s[beg..end])
796 }
797 }
798
799 fn cp_after(&self) -> Option<char> {
800 self.slice_after().chars().next()
801 }
802
803 fn try_seek_right_cp(&mut self) -> bool {
804 match self.slice_after().chars().next() {
805 Some(c) => {
806 self.at += c.len_utf8();
807 true
808 },
809 None => false,
810 }
811 }
812
813 fn seek_right(&mut self, bytes: usize) {
814 self.at += bytes;
815 }
816 }
817
818 impl Copy for StrCursor<'_> {}
819
820 impl<'a> Clone for StrCursor<'a> {
821 fn clone(&self) -> StrCursor<'a> {
822 *self
823 }
824 }
825
826 impl std::fmt::Debug for StrCursor<'_> {
827 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
828 write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
829 }
830 }
831
832 fn str_eq_literal(a: &str, b: &str) -> bool {
833 a.as_bytes().as_ptr() == b.as_bytes().as_ptr()
834 && a.len() == b.len()
835 }
836 }