]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_builtin_macros/src/format_foreign.rs
New upstream version 1.59.0+dfsg1
[rustc.git] / compiler / rustc_builtin_macros / src / format_foreign.rs
CommitLineData
c295e0f8 1pub(crate) mod printf {
476ff2be 2 use super::strcursor::StrCursor as Cur;
dfeec247 3 use rustc_span::InnerSpan;
476ff2be
SL
4
5 /// Represents a single `printf`-style substitution.
8faf50e0 6 #[derive(Clone, PartialEq, Debug)]
476ff2be 7 pub enum Substitution<'a> {
8faf50e0 8 /// A formatted output substitution with its internal byte offset.
476ff2be 9 Format(Format<'a>),
a2a8927a
XL
10 /// A literal `%%` escape, with its start and end indices.
11 Escape((usize, usize)),
476ff2be
SL
12 }
13
14 impl<'a> Substitution<'a> {
15 pub fn as_str(&self) -> &str {
16 match *self {
17 Substitution::Format(ref fmt) => fmt.span,
a2a8927a 18 Substitution::Escape(_) => "%%",
476ff2be
SL
19 }
20 }
21
dc9dc135 22 pub fn position(&self) -> Option<InnerSpan> {
8faf50e0
XL
23 match *self {
24 Substitution::Format(ref fmt) => Some(fmt.position),
a2a8927a 25 Substitution::Escape((start, end)) => Some(InnerSpan::new(start, end)),
8faf50e0
XL
26 }
27 }
28
29 pub fn set_position(&mut self, start: usize, end: usize) {
a2a8927a
XL
30 match self {
31 Substitution::Format(ref mut fmt) => fmt.position = InnerSpan::new(start, end),
32 Substitution::Escape(ref mut pos) => *pos = (start, end),
8faf50e0
XL
33 }
34 }
35
476ff2be
SL
36 /// Translate this substitution into an equivalent Rust formatting directive.
37 ///
38 /// This ignores cases where the substitution does not have an exact equivalent, or where
39 /// the substitution would be unnecessary.
c295e0f8 40 pub fn translate(&self) -> Result<String, Option<String>> {
476ff2be
SL
41 match *self {
42 Substitution::Format(ref fmt) => fmt.translate(),
a2a8927a 43 Substitution::Escape(_) => Err(None),
476ff2be
SL
44 }
45 }
46 }
47
8faf50e0 48 #[derive(Clone, PartialEq, Debug)]
476ff2be
SL
49 /// A single `printf`-style formatting directive.
50 pub struct Format<'a> {
51 /// The entire original formatting directive.
52 pub span: &'a str,
53 /// The (1-based) parameter to be converted.
54 pub parameter: Option<u16>,
55 /// Formatting flags.
56 pub flags: &'a str,
57 /// Minimum width of the output.
58 pub width: Option<Num>,
59 /// Precision of the conversion.
60 pub precision: Option<Num>,
61 /// Length modifier for the conversion.
62 pub length: Option<&'a str>,
63 /// Type of parameter being converted.
64 pub type_: &'a str,
8faf50e0 65 /// Byte offset for the start and end of this formatting directive.
dc9dc135 66 pub position: InnerSpan,
476ff2be
SL
67 }
68
9fa01778 69 impl Format<'_> {
476ff2be
SL
70 /// Translate this directive into an equivalent Rust formatting directive.
71 ///
c295e0f8 72 /// Returns `Err` in cases where the `printf` directive does not have an exact Rust
476ff2be 73 /// equivalent, rather than guessing.
c295e0f8 74 pub fn translate(&self) -> Result<String, Option<String>> {
476ff2be
SL
75 use std::fmt::Write;
76
77 let (c_alt, c_zero, c_left, c_plus) = {
78 let mut c_alt = false;
79 let mut c_zero = false;
80 let mut c_left = false;
81 let mut c_plus = false;
82 for c in self.flags.chars() {
83 match c {
84 '#' => c_alt = true,
85 '0' => c_zero = true,
86 '-' => c_left = true,
87 '+' => c_plus = true,
c295e0f8
XL
88 _ => {
89 return Err(Some(format!(
90 "the flag `{}` is unknown or unsupported",
91 c
92 )));
93 }
476ff2be
SL
94 }
95 }
96 (c_alt, c_zero, c_left, c_plus)
97 };
98
99 // Has a special form in Rust for numbers.
60c5eb7d 100 let fill = c_zero.then_some("0");
476ff2be 101
60c5eb7d 102 let align = c_left.then_some("<");
476ff2be
SL
103
104 // Rust doesn't have an equivalent to the `' '` flag.
60c5eb7d 105 let sign = c_plus.then_some("+");
476ff2be
SL
106
107 // Not *quite* the same, depending on the type...
108 let alt = c_alt;
109
110 let width = match self.width {
111 Some(Num::Next) => {
112 // NOTE: Rust doesn't support this.
c295e0f8
XL
113 return Err(Some(
114 "you have to use a positional or named parameter for the width".to_string(),
115 ));
476ff2be
SL
116 }
117 w @ Some(Num::Arg(_)) => w,
118 w @ Some(Num::Num(_)) => w,
119 None => None,
120 };
121
122 let precision = self.precision;
123
124 // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so
125 // we just ignore it.
126
127 let (type_, use_zero_fill, is_int) = match self.type_ {
128 "d" | "i" | "u" => (None, true, true),
129 "f" | "F" => (None, false, false),
130 "s" | "c" => (None, false, false),
131 "e" | "E" => (Some(self.type_), true, false),
132 "x" | "X" | "o" => (Some(self.type_), true, true),
133 "p" => (Some(self.type_), false, true),
134 "g" => (Some("e"), true, false),
135 "G" => (Some("E"), true, false),
c295e0f8
XL
136 _ => {
137 return Err(Some(format!(
138 "the conversion specifier `{}` is unknown or unsupported",
139 self.type_
140 )));
141 }
476ff2be
SL
142 };
143
144 let (fill, width, precision) = match (is_int, width, precision) {
145 (true, Some(_), Some(_)) => {
146 // Rust can't duplicate this insanity.
c295e0f8
XL
147 return Err(Some(
148 "width and precision cannot both be specified for integer conversions"
149 .to_string(),
150 ));
dfeec247 151 }
476ff2be
SL
152 (true, None, Some(p)) => (Some("0"), Some(p), None),
153 (true, w, None) => (fill, w, None),
154 (false, w, p) => (fill, w, p),
155 };
156
157 let align = match (self.type_, width.is_some(), align.is_some()) {
158 ("s", true, false) => Some(">"),
159 _ => align,
160 };
161
162 let (fill, zero_fill) = match (fill, use_zero_fill) {
163 (Some("0"), true) => (None, true),
164 (fill, _) => (fill, false),
165 };
166
167 let alt = match type_ {
ba9703b0 168 Some("x" | "X") => alt,
476ff2be
SL
169 _ => false,
170 };
171
172 let has_options = fill.is_some()
173 || align.is_some()
174 || sign.is_some()
175 || alt
176 || zero_fill
177 || width.is_some()
178 || precision.is_some()
dfeec247 179 || type_.is_some();
476ff2be
SL
180
181 // Initialise with a rough guess.
182 let cap = self.span.len() + if has_options { 2 } else { 0 };
183 let mut s = String::with_capacity(cap);
184
1b1a35ee 185 s.push('{');
476ff2be
SL
186
187 if let Some(arg) = self.parameter {
c295e0f8
XL
188 match write!(
189 s,
190 "{}",
191 match arg.checked_sub(1) {
192 Some(a) => a,
193 None => return Err(None),
194 }
195 ) {
196 Err(_) => return Err(None),
197 _ => {}
198 }
476ff2be
SL
199 }
200
201 if has_options {
1b1a35ee 202 s.push(':');
476ff2be
SL
203
204 let align = if let Some(fill) = fill {
205 s.push_str(fill);
206 align.or(Some(">"))
207 } else {
208 align
209 };
210
211 if let Some(align) = align {
212 s.push_str(align);
213 }
214
215 if let Some(sign) = sign {
216 s.push_str(sign);
217 }
218
219 if alt {
1b1a35ee 220 s.push('#');
476ff2be
SL
221 }
222
223 if zero_fill {
1b1a35ee 224 s.push('0');
476ff2be
SL
225 }
226
227 if let Some(width) = width {
c295e0f8
XL
228 match width.translate(&mut s) {
229 Err(_) => return Err(None),
230 _ => {}
231 }
476ff2be
SL
232 }
233
234 if let Some(precision) = precision {
1b1a35ee 235 s.push('.');
c295e0f8
XL
236 match precision.translate(&mut s) {
237 Err(_) => return Err(None),
238 _ => {}
239 }
476ff2be
SL
240 }
241
242 if let Some(type_) = type_ {
243 s.push_str(type_);
244 }
245 }
246
1b1a35ee 247 s.push('}');
c295e0f8 248 Ok(s)
476ff2be
SL
249 }
250 }
251
252 /// A general number used in a `printf` formatting directive.
8faf50e0 253 #[derive(Copy, Clone, PartialEq, Debug)]
476ff2be
SL
254 pub enum Num {
255 // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU
256 // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it
257 // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or
258 // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit
259 // on a screen.
476ff2be
SL
260 /// A specific, fixed value.
261 Num(u16),
262 /// The value is derived from a positional argument.
263 Arg(u16),
264 /// The value is derived from the "next" unconverted argument.
265 Next,
266 }
267
268 impl Num {
269 fn from_str(s: &str, arg: Option<&str>) -> Self {
270 if let Some(arg) = arg {
8faf50e0 271 Num::Arg(arg.parse().unwrap_or_else(|_| panic!("invalid format arg `{:?}`", arg)))
476ff2be
SL
272 } else if s == "*" {
273 Num::Next
274 } else {
8faf50e0 275 Num::Num(s.parse().unwrap_or_else(|_| panic!("invalid format num `{:?}`", s)))
476ff2be
SL
276 }
277 }
278
9fa01778 279 fn translate(&self, s: &mut String) -> std::fmt::Result {
476ff2be
SL
280 use std::fmt::Write;
281 match *self {
282 Num::Num(n) => write!(s, "{}", n),
283 Num::Arg(n) => {
9fa01778 284 let n = n.checked_sub(1).ok_or(std::fmt::Error)?;
476ff2be 285 write!(s, "{}$", n)
dfeec247 286 }
476ff2be
SL
287 Num::Next => write!(s, "*"),
288 }
289 }
290 }
291
292 /// Returns an iterator over all substitutions in a given string.
dc9dc135 293 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
dfeec247 294 Substitutions { s, pos: start_pos }
476ff2be
SL
295 }
296
297 /// Iterator over substitutions in a string.
298 pub struct Substitutions<'a> {
299 s: &'a str,
8faf50e0 300 pos: usize,
476ff2be
SL
301 }
302
303 impl<'a> Iterator for Substitutions<'a> {
304 type Item = Substitution<'a>;
305 fn next(&mut self) -> Option<Self::Item> {
8faf50e0 306 let (mut sub, tail) = parse_next_substitution(self.s)?;
ff7c6d11 307 self.s = tail;
a2a8927a
XL
308 if let Some(InnerSpan { start, end }) = sub.position() {
309 sub.set_position(start + self.pos, end + self.pos);
310 self.pos += end;
8faf50e0 311 }
ff7c6d11 312 Some(sub)
476ff2be 313 }
0531ce1d
XL
314
315 fn size_hint(&self) -> (usize, Option<usize>) {
316 // Substitutions are at least 2 characters long.
317 (0, Some(self.s.len() / 2))
318 }
476ff2be
SL
319 }
320
321 enum State {
322 Start,
323 Flags,
324 Width,
325 WidthArg,
326 Prec,
327 PrecInner,
328 Length,
329 Type,
330 }
331
332 /// Parse the next substitution from the input string.
9fa01778 333 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
476ff2be
SL
334 use self::State::*;
335
336 let at = {
ff7c6d11 337 let start = s.find('%')?;
ba9703b0 338 if let '%' = s[start + 1..].chars().next()? {
a2a8927a 339 return Some((Substitution::Escape((start, start + 2)), &s[start + 2..]));
476ff2be
SL
340 }
341
6a06907d 342 Cur::new_at(s, start)
476ff2be
SL
343 };
344
345 // This is meant to be a translation of the following regex:
346 //
347 // ```regex
348 // (?x)
349 // ^ %
350 // (?: (?P<parameter> \d+) \$ )?
351 // (?P<flags> [-+ 0\#']* )
352 // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )?
353 // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )?
354 // (?P<length>
355 // # Standard
356 // hh | h | ll | l | L | z | j | t
357 //
358 // # Other
359 // | I32 | I64 | I | q
360 // )?
361 // (?P<type> . )
362 // ```
363
364 // Used to establish the full span at the end.
365 let start = at;
366 // The current position within the string.
ff7c6d11 367 let mut at = at.at_next_cp()?;
476ff2be 368 // `c` is the next codepoint, `next` is a cursor after it.
ff7c6d11 369 let (mut c, mut next) = at.next_cp()?;
476ff2be
SL
370
371 // Update `at`, `c`, and `next`, exiting if we're out of input.
372 macro_rules! move_to {
dfeec247
XL
373 ($cur:expr) => {{
374 at = $cur;
375 let (c_, next_) = at.next_cp()?;
376 c = c_;
377 next = next_;
378 }};
476ff2be
SL
379 }
380
381 // Constructs a result when parsing fails.
382 //
383 // Note: `move` used to capture copies of the cursors as they are *now*.
384 let fallback = move || {
ba9703b0 385 Some((
476ff2be
SL
386 Substitution::Format(Format {
387 span: start.slice_between(next).unwrap(),
388 parameter: None,
389 flags: "",
390 width: None,
391 precision: None,
392 length: None,
393 type_: at.slice_between(next).unwrap(),
dc9dc135 394 position: InnerSpan::new(start.at, next.at),
476ff2be 395 }),
dfeec247 396 next.slice_after(),
ba9703b0 397 ))
476ff2be
SL
398 };
399
400 // Next parsing state.
401 let mut state = Start;
402
403 // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end.
404 let mut parameter: Option<u16> = None;
405 let mut flags: &str = "";
406 let mut width: Option<Num> = None;
407 let mut precision: Option<Num> = None;
408 let mut length: Option<&str> = None;
409 let mut type_: &str = "";
9fa01778 410 let end: Cur<'_>;
476ff2be
SL
411
412 if let Start = state {
413 match c {
8faf50e0 414 '1'..='9' => {
29967ef6 415 let end = at_next_cp_while(next, char::is_ascii_digit);
476ff2be
SL
416 match end.next_cp() {
417 // Yes, this *is* the parameter.
418 Some(('$', end2)) => {
419 state = Flags;
420 parameter = Some(at.slice_between(end).unwrap().parse().unwrap());
421 move_to!(end2);
dfeec247 422 }
476ff2be
SL
423 // Wait, no, actually, it's the width.
424 Some(_) => {
425 state = Prec;
426 parameter = None;
427 flags = "";
428 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
429 move_to!(end);
dfeec247 430 }
476ff2be
SL
431 // It's invalid, is what it is.
432 None => return fallback(),
433 }
dfeec247 434 }
476ff2be
SL
435 _ => {
436 state = Flags;
437 parameter = None;
438 move_to!(at);
439 }
440 }
441 }
442
443 if let Flags = state {
444 let end = at_next_cp_while(at, is_flag);
445 state = Width;
446 flags = at.slice_between(end).unwrap();
447 move_to!(end);
448 }
449
450 if let Width = state {
451 match c {
452 '*' => {
453 state = WidthArg;
454 move_to!(next);
dfeec247
XL
455 }
456 '1'..='9' => {
29967ef6 457 let end = at_next_cp_while(next, char::is_ascii_digit);
476ff2be
SL
458 state = Prec;
459 width = Some(Num::from_str(at.slice_between(end).unwrap(), None));
460 move_to!(end);
dfeec247 461 }
476ff2be
SL
462 _ => {
463 state = Prec;
464 width = None;
465 move_to!(at);
466 }
467 }
468 }
469
470 if let WidthArg = state {
29967ef6 471 let end = at_next_cp_while(at, char::is_ascii_digit);
476ff2be
SL
472 match end.next_cp() {
473 Some(('$', end2)) => {
474 state = Prec;
475 width = Some(Num::from_str("", Some(at.slice_between(end).unwrap())));
476 move_to!(end2);
dfeec247 477 }
476ff2be
SL
478 _ => {
479 state = Prec;
480 width = Some(Num::Next);
481 move_to!(end);
482 }
483 }
484 }
485
486 if let Prec = state {
487 match c {
488 '.' => {
489 state = PrecInner;
490 move_to!(next);
dfeec247 491 }
476ff2be
SL
492 _ => {
493 state = Length;
494 precision = None;
495 move_to!(at);
496 }
497 }
498 }
499
500 if let PrecInner = state {
501 match c {
502 '*' => {
29967ef6 503 let end = at_next_cp_while(next, char::is_ascii_digit);
476ff2be
SL
504 match end.next_cp() {
505 Some(('$', end2)) => {
506 state = Length;
507 precision = Some(Num::from_str("*", next.slice_between(end)));
508 move_to!(end2);
dfeec247 509 }
476ff2be
SL
510 _ => {
511 state = Length;
512 precision = Some(Num::Next);
513 move_to!(end);
514 }
515 }
dfeec247
XL
516 }
517 '0'..='9' => {
29967ef6 518 let end = at_next_cp_while(next, char::is_ascii_digit);
476ff2be
SL
519 state = Length;
520 precision = Some(Num::from_str(at.slice_between(end).unwrap(), None));
521 move_to!(end);
dfeec247 522 }
476ff2be
SL
523 _ => return fallback(),
524 }
525 }
526
527 if let Length = state {
528 let c1_next1 = next.next_cp();
529 match (c, c1_next1) {
dfeec247 530 ('h', Some(('h', next1))) | ('l', Some(('l', next1))) => {
476ff2be
SL
531 state = Type;
532 length = Some(at.slice_between(next1).unwrap());
533 move_to!(next1);
dfeec247 534 }
476ff2be 535
ba9703b0 536 ('h' | 'l' | 'L' | 'z' | 'j' | 't' | 'q', _) => {
476ff2be
SL
537 state = Type;
538 length = Some(at.slice_between(next).unwrap());
539 move_to!(next);
dfeec247 540 }
476ff2be
SL
541
542 ('I', _) => {
dfeec247
XL
543 let end = next
544 .at_next_cp()
476ff2be
SL
545 .and_then(|end| end.at_next_cp())
546 .map(|end| (next.slice_between(end).unwrap(), end));
547 let end = match end {
1b1a35ee 548 Some(("32" | "64", end)) => end,
dfeec247 549 _ => next,
476ff2be
SL
550 };
551 state = Type;
552 length = Some(at.slice_between(end).unwrap());
553 move_to!(end);
dfeec247 554 }
476ff2be
SL
555
556 _ => {
557 state = Type;
558 length = None;
559 move_to!(at);
560 }
561 }
562 }
563
564 if let Type = state {
565 drop(c);
566 type_ = at.slice_between(next).unwrap();
567
568 // Don't use `move_to!` here, as we *can* be at the end of the input.
569 at = next;
570 }
571
572 drop(c);
573 drop(next);
574
575 end = at;
dc9dc135 576 let position = InnerSpan::new(start.at, end.at);
476ff2be
SL
577
578 let f = Format {
579 span: start.slice_between(end).unwrap(),
3b2f2976
XL
580 parameter,
581 flags,
582 width,
583 precision,
584 length,
585 type_,
8faf50e0 586 position,
476ff2be
SL
587 };
588 Some((Substitution::Format(f), end.slice_after()))
589 }
590
9fa01778 591 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
dfeec247 592 where
29967ef6 593 F: FnMut(&char) -> bool,
dfeec247 594 {
476ff2be
SL
595 loop {
596 match cur.next_cp() {
dfeec247 597 Some((c, next)) => {
29967ef6 598 if pred(&c) {
dfeec247
XL
599 cur = next;
600 } else {
601 return cur;
602 }
603 }
476ff2be
SL
604 None => return cur,
605 }
606 }
607 }
608
29967ef6 609 fn is_flag(c: &char) -> bool {
5869c6ff 610 matches!(c, '0' | '-' | '+' | ' ' | '#' | '\'')
476ff2be
SL
611 }
612
613 #[cfg(test)]
dc9dc135 614 mod tests;
476ff2be
SL
615}
616
617pub mod shell {
618 use super::strcursor::StrCursor as Cur;
dfeec247 619 use rustc_span::InnerSpan;
476ff2be 620
8faf50e0 621 #[derive(Clone, PartialEq, Debug)]
476ff2be 622 pub enum Substitution<'a> {
b7449926
XL
623 Ordinal(u8, (usize, usize)),
624 Name(&'a str, (usize, usize)),
625 Escape((usize, usize)),
476ff2be
SL
626 }
627
9fa01778 628 impl Substitution<'_> {
476ff2be 629 pub fn as_str(&self) -> String {
b7449926
XL
630 match self {
631 Substitution::Ordinal(n, _) => format!("${}", n),
632 Substitution::Name(n, _) => format!("${}", n),
633 Substitution::Escape(_) => "$$".into(),
476ff2be
SL
634 }
635 }
636
dc9dc135 637 pub fn position(&self) -> Option<InnerSpan> {
b7449926 638 match self {
dfeec247
XL
639 Substitution::Ordinal(_, pos)
640 | Substitution::Name(_, pos)
641 | Substitution::Escape(pos) => Some(InnerSpan::new(pos.0, pos.1)),
b7449926
XL
642 }
643 }
644
645 pub fn set_position(&mut self, start: usize, end: usize) {
646 match self {
dfeec247
XL
647 Substitution::Ordinal(_, ref mut pos)
648 | Substitution::Name(_, ref mut pos)
649 | Substitution::Escape(ref mut pos) => *pos = (start, end),
8faf50e0
XL
650 }
651 }
652
c295e0f8 653 pub fn translate(&self) -> Result<String, Option<String>> {
476ff2be 654 match *self {
c295e0f8
XL
655 Substitution::Ordinal(n, _) => Ok(format!("{{{}}}", n)),
656 Substitution::Name(n, _) => Ok(format!("{{{}}}", n)),
657 Substitution::Escape(_) => Err(None),
476ff2be
SL
658 }
659 }
660 }
661
662 /// Returns an iterator over all substitutions in a given string.
dc9dc135 663 pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> {
dfeec247 664 Substitutions { s, pos: start_pos }
476ff2be
SL
665 }
666
667 /// Iterator over substitutions in a string.
668 pub struct Substitutions<'a> {
669 s: &'a str,
b7449926 670 pos: usize,
476ff2be
SL
671 }
672
673 impl<'a> Iterator for Substitutions<'a> {
674 type Item = Substitution<'a>;
675 fn next(&mut self) -> Option<Self::Item> {
fc512014
XL
676 let (mut sub, tail) = parse_next_substitution(self.s)?;
677 self.s = tail;
678 if let Some(InnerSpan { start, end }) = sub.position() {
679 sub.set_position(start + self.pos, end + self.pos);
680 self.pos += end;
476ff2be 681 }
fc512014 682 Some(sub)
476ff2be 683 }
0531ce1d
XL
684
685 fn size_hint(&self) -> (usize, Option<usize>) {
686 (0, Some(self.s.len()))
687 }
476ff2be
SL
688 }
689
690 /// Parse the next substitution from the input string.
9fa01778 691 pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> {
476ff2be 692 let at = {
ff7c6d11 693 let start = s.find('$')?;
dfeec247
XL
694 match s[start + 1..].chars().next()? {
695 '$' => return Some((Substitution::Escape((start, start + 2)), &s[start + 2..])),
696 c @ '0'..='9' => {
476ff2be 697 let n = (c as u8) - b'0';
dfeec247
XL
698 return Some((Substitution::Ordinal(n, (start, start + 2)), &s[start + 2..]));
699 }
700 _ => { /* fall-through */ }
476ff2be
SL
701 }
702
6a06907d 703 Cur::new_at(s, start)
476ff2be
SL
704 };
705
ff7c6d11
XL
706 let at = at.at_next_cp()?;
707 let (c, inner) = at.next_cp()?;
708
709 if !is_ident_head(c) {
710 None
711 } else {
712 let end = at_next_cp_while(inner, is_ident_tail);
b7449926
XL
713 let slice = at.slice_between(end).unwrap();
714 let start = at.at - 1;
715 let end_pos = at.at + slice.len();
716 Some((Substitution::Name(slice, (start, end_pos)), end.slice_after()))
476ff2be
SL
717 }
718 }
719
9fa01778 720 fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_>
dfeec247
XL
721 where
722 F: FnMut(char) -> bool,
723 {
476ff2be
SL
724 loop {
725 match cur.next_cp() {
dfeec247
XL
726 Some((c, next)) => {
727 if pred(c) {
728 cur = next;
729 } else {
730 return cur;
731 }
732 }
476ff2be
SL
733 None => return cur,
734 }
735 }
736 }
737
738 fn is_ident_head(c: char) -> bool {
29967ef6 739 c.is_ascii_alphabetic() || c == '_'
476ff2be
SL
740 }
741
742 fn is_ident_tail(c: char) -> bool {
29967ef6 743 c.is_ascii_alphanumeric() || c == '_'
476ff2be
SL
744 }
745
746 #[cfg(test)]
dc9dc135 747 mod tests;
476ff2be
SL
748}
749
750mod strcursor {
476ff2be
SL
751 pub struct StrCursor<'a> {
752 s: &'a str,
8faf50e0 753 pub at: usize,
476ff2be
SL
754 }
755
756 impl<'a> StrCursor<'a> {
8faf50e0 757 pub fn new_at(s: &'a str, at: usize) -> StrCursor<'a> {
dfeec247 758 StrCursor { s, at }
8faf50e0
XL
759 }
760
476ff2be
SL
761 pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> {
762 match self.try_seek_right_cp() {
763 true => Some(self),
dfeec247 764 false => None,
476ff2be
SL
765 }
766 }
767
768 pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> {
ff7c6d11 769 let cp = self.cp_after()?;
476ff2be
SL
770 self.seek_right(cp.len_utf8());
771 Some((cp, self))
772 }
773
774 fn slice_before(&self) -> &'a str {
775 &self.s[0..self.at]
776 }
777
778 pub fn slice_after(&self) -> &'a str {
779 &self.s[self.at..]
780 }
781
782 pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> {
783 if !str_eq_literal(self.s, until.s) {
784 None
785 } else {
786 use std::cmp::{max, min};
787 let beg = min(self.at, until.at);
788 let end = max(self.at, until.at);
789 Some(&self.s[beg..end])
790 }
791 }
792
793 fn cp_after(&self) -> Option<char> {
794 self.slice_after().chars().next()
795 }
796
797 fn try_seek_right_cp(&mut self) -> bool {
798 match self.slice_after().chars().next() {
799 Some(c) => {
800 self.at += c.len_utf8();
801 true
dfeec247 802 }
476ff2be
SL
803 None => false,
804 }
805 }
806
807 fn seek_right(&mut self, bytes: usize) {
808 self.at += bytes;
809 }
810 }
811
9fa01778 812 impl Copy for StrCursor<'_> {}
476ff2be
SL
813
814 impl<'a> Clone for StrCursor<'a> {
815 fn clone(&self) -> StrCursor<'a> {
816 *self
817 }
818 }
819
9fa01778
XL
820 impl std::fmt::Debug for StrCursor<'_> {
821 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
476ff2be
SL
822 write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after())
823 }
824 }
825
826 fn str_eq_literal(a: &str, b: &str) -> bool {
dfeec247 827 a.as_bytes().as_ptr() == b.as_bytes().as_ptr() && a.len() == b.len()
476ff2be
SL
828 }
829}