]>
Commit | Line | Data |
---|---|---|
c295e0f8 | 1 | pub(crate) mod printf { |
476ff2be | 2 | use super::strcursor::StrCursor as Cur; |
dfeec247 | 3 | use rustc_span::InnerSpan; |
476ff2be SL |
4 | |
5 | /// Represents a single `printf`-style substitution. | |
8faf50e0 | 6 | #[derive(Clone, PartialEq, Debug)] |
476ff2be | 7 | pub enum Substitution<'a> { |
8faf50e0 | 8 | /// A formatted output substitution with its internal byte offset. |
476ff2be | 9 | Format(Format<'a>), |
a2a8927a XL |
10 | /// A literal `%%` escape, with its start and end indices. |
11 | Escape((usize, usize)), | |
476ff2be SL |
12 | } |
13 | ||
14 | impl<'a> Substitution<'a> { | |
15 | pub fn as_str(&self) -> &str { | |
16 | match *self { | |
17 | Substitution::Format(ref fmt) => fmt.span, | |
a2a8927a | 18 | Substitution::Escape(_) => "%%", |
476ff2be SL |
19 | } |
20 | } | |
21 | ||
dc9dc135 | 22 | pub fn position(&self) -> Option<InnerSpan> { |
8faf50e0 XL |
23 | match *self { |
24 | Substitution::Format(ref fmt) => Some(fmt.position), | |
a2a8927a | 25 | Substitution::Escape((start, end)) => Some(InnerSpan::new(start, end)), |
8faf50e0 XL |
26 | } |
27 | } | |
28 | ||
29 | pub fn set_position(&mut self, start: usize, end: usize) { | |
a2a8927a XL |
30 | match self { |
31 | Substitution::Format(ref mut fmt) => fmt.position = InnerSpan::new(start, end), | |
32 | Substitution::Escape(ref mut pos) => *pos = (start, end), | |
8faf50e0 XL |
33 | } |
34 | } | |
35 | ||
476ff2be SL |
36 | /// Translate this substitution into an equivalent Rust formatting directive. |
37 | /// | |
38 | /// This ignores cases where the substitution does not have an exact equivalent, or where | |
39 | /// the substitution would be unnecessary. | |
c295e0f8 | 40 | pub fn translate(&self) -> Result<String, Option<String>> { |
476ff2be SL |
41 | match *self { |
42 | Substitution::Format(ref fmt) => fmt.translate(), | |
a2a8927a | 43 | Substitution::Escape(_) => Err(None), |
476ff2be SL |
44 | } |
45 | } | |
46 | } | |
47 | ||
8faf50e0 | 48 | #[derive(Clone, PartialEq, Debug)] |
476ff2be SL |
49 | /// A single `printf`-style formatting directive. |
50 | pub struct Format<'a> { | |
51 | /// The entire original formatting directive. | |
52 | pub span: &'a str, | |
53 | /// The (1-based) parameter to be converted. | |
54 | pub parameter: Option<u16>, | |
55 | /// Formatting flags. | |
56 | pub flags: &'a str, | |
57 | /// Minimum width of the output. | |
58 | pub width: Option<Num>, | |
59 | /// Precision of the conversion. | |
60 | pub precision: Option<Num>, | |
61 | /// Length modifier for the conversion. | |
62 | pub length: Option<&'a str>, | |
63 | /// Type of parameter being converted. | |
64 | pub type_: &'a str, | |
8faf50e0 | 65 | /// Byte offset for the start and end of this formatting directive. |
dc9dc135 | 66 | pub position: InnerSpan, |
476ff2be SL |
67 | } |
68 | ||
9fa01778 | 69 | impl Format<'_> { |
476ff2be SL |
70 | /// Translate this directive into an equivalent Rust formatting directive. |
71 | /// | |
c295e0f8 | 72 | /// Returns `Err` in cases where the `printf` directive does not have an exact Rust |
476ff2be | 73 | /// equivalent, rather than guessing. |
c295e0f8 | 74 | pub fn translate(&self) -> Result<String, Option<String>> { |
476ff2be SL |
75 | use std::fmt::Write; |
76 | ||
77 | let (c_alt, c_zero, c_left, c_plus) = { | |
78 | let mut c_alt = false; | |
79 | let mut c_zero = false; | |
80 | let mut c_left = false; | |
81 | let mut c_plus = false; | |
82 | for c in self.flags.chars() { | |
83 | match c { | |
84 | '#' => c_alt = true, | |
85 | '0' => c_zero = true, | |
86 | '-' => c_left = true, | |
87 | '+' => c_plus = true, | |
c295e0f8 XL |
88 | _ => { |
89 | return Err(Some(format!( | |
90 | "the flag `{}` is unknown or unsupported", | |
91 | c | |
92 | ))); | |
93 | } | |
476ff2be SL |
94 | } |
95 | } | |
96 | (c_alt, c_zero, c_left, c_plus) | |
97 | }; | |
98 | ||
99 | // Has a special form in Rust for numbers. | |
60c5eb7d | 100 | let fill = c_zero.then_some("0"); |
476ff2be | 101 | |
60c5eb7d | 102 | let align = c_left.then_some("<"); |
476ff2be SL |
103 | |
104 | // Rust doesn't have an equivalent to the `' '` flag. | |
60c5eb7d | 105 | let sign = c_plus.then_some("+"); |
476ff2be SL |
106 | |
107 | // Not *quite* the same, depending on the type... | |
108 | let alt = c_alt; | |
109 | ||
110 | let width = match self.width { | |
111 | Some(Num::Next) => { | |
112 | // NOTE: Rust doesn't support this. | |
c295e0f8 XL |
113 | return Err(Some( |
114 | "you have to use a positional or named parameter for the width".to_string(), | |
115 | )); | |
476ff2be SL |
116 | } |
117 | w @ Some(Num::Arg(_)) => w, | |
118 | w @ Some(Num::Num(_)) => w, | |
119 | None => None, | |
120 | }; | |
121 | ||
122 | let precision = self.precision; | |
123 | ||
124 | // NOTE: although length *can* have an effect, we can't duplicate the effect in Rust, so | |
125 | // we just ignore it. | |
126 | ||
127 | let (type_, use_zero_fill, is_int) = match self.type_ { | |
128 | "d" | "i" | "u" => (None, true, true), | |
129 | "f" | "F" => (None, false, false), | |
130 | "s" | "c" => (None, false, false), | |
131 | "e" | "E" => (Some(self.type_), true, false), | |
132 | "x" | "X" | "o" => (Some(self.type_), true, true), | |
133 | "p" => (Some(self.type_), false, true), | |
134 | "g" => (Some("e"), true, false), | |
135 | "G" => (Some("E"), true, false), | |
c295e0f8 XL |
136 | _ => { |
137 | return Err(Some(format!( | |
138 | "the conversion specifier `{}` is unknown or unsupported", | |
139 | self.type_ | |
140 | ))); | |
141 | } | |
476ff2be SL |
142 | }; |
143 | ||
144 | let (fill, width, precision) = match (is_int, width, precision) { | |
145 | (true, Some(_), Some(_)) => { | |
146 | // Rust can't duplicate this insanity. | |
c295e0f8 XL |
147 | return Err(Some( |
148 | "width and precision cannot both be specified for integer conversions" | |
149 | .to_string(), | |
150 | )); | |
dfeec247 | 151 | } |
476ff2be SL |
152 | (true, None, Some(p)) => (Some("0"), Some(p), None), |
153 | (true, w, None) => (fill, w, None), | |
154 | (false, w, p) => (fill, w, p), | |
155 | }; | |
156 | ||
157 | let align = match (self.type_, width.is_some(), align.is_some()) { | |
158 | ("s", true, false) => Some(">"), | |
159 | _ => align, | |
160 | }; | |
161 | ||
162 | let (fill, zero_fill) = match (fill, use_zero_fill) { | |
163 | (Some("0"), true) => (None, true), | |
164 | (fill, _) => (fill, false), | |
165 | }; | |
166 | ||
167 | let alt = match type_ { | |
ba9703b0 | 168 | Some("x" | "X") => alt, |
476ff2be SL |
169 | _ => false, |
170 | }; | |
171 | ||
172 | let has_options = fill.is_some() | |
173 | || align.is_some() | |
174 | || sign.is_some() | |
175 | || alt | |
176 | || zero_fill | |
177 | || width.is_some() | |
178 | || precision.is_some() | |
dfeec247 | 179 | || type_.is_some(); |
476ff2be SL |
180 | |
181 | // Initialise with a rough guess. | |
182 | let cap = self.span.len() + if has_options { 2 } else { 0 }; | |
183 | let mut s = String::with_capacity(cap); | |
184 | ||
1b1a35ee | 185 | s.push('{'); |
476ff2be SL |
186 | |
187 | if let Some(arg) = self.parameter { | |
c295e0f8 XL |
188 | match write!( |
189 | s, | |
190 | "{}", | |
191 | match arg.checked_sub(1) { | |
192 | Some(a) => a, | |
193 | None => return Err(None), | |
194 | } | |
195 | ) { | |
196 | Err(_) => return Err(None), | |
197 | _ => {} | |
198 | } | |
476ff2be SL |
199 | } |
200 | ||
201 | if has_options { | |
1b1a35ee | 202 | s.push(':'); |
476ff2be SL |
203 | |
204 | let align = if let Some(fill) = fill { | |
205 | s.push_str(fill); | |
206 | align.or(Some(">")) | |
207 | } else { | |
208 | align | |
209 | }; | |
210 | ||
211 | if let Some(align) = align { | |
212 | s.push_str(align); | |
213 | } | |
214 | ||
215 | if let Some(sign) = sign { | |
216 | s.push_str(sign); | |
217 | } | |
218 | ||
219 | if alt { | |
1b1a35ee | 220 | s.push('#'); |
476ff2be SL |
221 | } |
222 | ||
223 | if zero_fill { | |
1b1a35ee | 224 | s.push('0'); |
476ff2be SL |
225 | } |
226 | ||
227 | if let Some(width) = width { | |
c295e0f8 XL |
228 | match width.translate(&mut s) { |
229 | Err(_) => return Err(None), | |
230 | _ => {} | |
231 | } | |
476ff2be SL |
232 | } |
233 | ||
234 | if let Some(precision) = precision { | |
1b1a35ee | 235 | s.push('.'); |
c295e0f8 XL |
236 | match precision.translate(&mut s) { |
237 | Err(_) => return Err(None), | |
238 | _ => {} | |
239 | } | |
476ff2be SL |
240 | } |
241 | ||
242 | if let Some(type_) = type_ { | |
243 | s.push_str(type_); | |
244 | } | |
245 | } | |
246 | ||
1b1a35ee | 247 | s.push('}'); |
c295e0f8 | 248 | Ok(s) |
476ff2be SL |
249 | } |
250 | } | |
251 | ||
252 | /// A general number used in a `printf` formatting directive. | |
8faf50e0 | 253 | #[derive(Copy, Clone, PartialEq, Debug)] |
476ff2be SL |
254 | pub enum Num { |
255 | // The range of these values is technically bounded by `NL_ARGMAX`... but, at least for GNU | |
256 | // libc, it apparently has no real fixed limit. A `u16` is used here on the basis that it | |
257 | // is *vanishingly* unlikely that *anyone* is going to try formatting something wider, or | |
258 | // with more precision, than 32 thousand positions which is so wide it couldn't possibly fit | |
259 | // on a screen. | |
476ff2be SL |
260 | /// A specific, fixed value. |
261 | Num(u16), | |
262 | /// The value is derived from a positional argument. | |
263 | Arg(u16), | |
264 | /// The value is derived from the "next" unconverted argument. | |
265 | Next, | |
266 | } | |
267 | ||
268 | impl Num { | |
269 | fn from_str(s: &str, arg: Option<&str>) -> Self { | |
270 | if let Some(arg) = arg { | |
8faf50e0 | 271 | Num::Arg(arg.parse().unwrap_or_else(|_| panic!("invalid format arg `{:?}`", arg))) |
476ff2be SL |
272 | } else if s == "*" { |
273 | Num::Next | |
274 | } else { | |
8faf50e0 | 275 | Num::Num(s.parse().unwrap_or_else(|_| panic!("invalid format num `{:?}`", s))) |
476ff2be SL |
276 | } |
277 | } | |
278 | ||
9fa01778 | 279 | fn translate(&self, s: &mut String) -> std::fmt::Result { |
476ff2be SL |
280 | use std::fmt::Write; |
281 | match *self { | |
282 | Num::Num(n) => write!(s, "{}", n), | |
283 | Num::Arg(n) => { | |
9fa01778 | 284 | let n = n.checked_sub(1).ok_or(std::fmt::Error)?; |
476ff2be | 285 | write!(s, "{}$", n) |
dfeec247 | 286 | } |
476ff2be SL |
287 | Num::Next => write!(s, "*"), |
288 | } | |
289 | } | |
290 | } | |
291 | ||
292 | /// Returns an iterator over all substitutions in a given string. | |
dc9dc135 | 293 | pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> { |
dfeec247 | 294 | Substitutions { s, pos: start_pos } |
476ff2be SL |
295 | } |
296 | ||
297 | /// Iterator over substitutions in a string. | |
298 | pub struct Substitutions<'a> { | |
299 | s: &'a str, | |
8faf50e0 | 300 | pos: usize, |
476ff2be SL |
301 | } |
302 | ||
303 | impl<'a> Iterator for Substitutions<'a> { | |
304 | type Item = Substitution<'a>; | |
305 | fn next(&mut self) -> Option<Self::Item> { | |
8faf50e0 | 306 | let (mut sub, tail) = parse_next_substitution(self.s)?; |
ff7c6d11 | 307 | self.s = tail; |
a2a8927a XL |
308 | if let Some(InnerSpan { start, end }) = sub.position() { |
309 | sub.set_position(start + self.pos, end + self.pos); | |
310 | self.pos += end; | |
8faf50e0 | 311 | } |
ff7c6d11 | 312 | Some(sub) |
476ff2be | 313 | } |
0531ce1d XL |
314 | |
315 | fn size_hint(&self) -> (usize, Option<usize>) { | |
316 | // Substitutions are at least 2 characters long. | |
317 | (0, Some(self.s.len() / 2)) | |
318 | } | |
476ff2be SL |
319 | } |
320 | ||
321 | enum State { | |
322 | Start, | |
323 | Flags, | |
324 | Width, | |
325 | WidthArg, | |
326 | Prec, | |
327 | PrecInner, | |
328 | Length, | |
329 | Type, | |
330 | } | |
331 | ||
332 | /// Parse the next substitution from the input string. | |
9fa01778 | 333 | pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> { |
476ff2be SL |
334 | use self::State::*; |
335 | ||
336 | let at = { | |
ff7c6d11 | 337 | let start = s.find('%')?; |
ba9703b0 | 338 | if let '%' = s[start + 1..].chars().next()? { |
a2a8927a | 339 | return Some((Substitution::Escape((start, start + 2)), &s[start + 2..])); |
476ff2be SL |
340 | } |
341 | ||
6a06907d | 342 | Cur::new_at(s, start) |
476ff2be SL |
343 | }; |
344 | ||
345 | // This is meant to be a translation of the following regex: | |
346 | // | |
347 | // ```regex | |
348 | // (?x) | |
349 | // ^ % | |
350 | // (?: (?P<parameter> \d+) \$ )? | |
351 | // (?P<flags> [-+ 0\#']* ) | |
352 | // (?P<width> \d+ | \* (?: (?P<widtha> \d+) \$ )? )? | |
353 | // (?: \. (?P<precision> \d+ | \* (?: (?P<precisiona> \d+) \$ )? ) )? | |
354 | // (?P<length> | |
355 | // # Standard | |
356 | // hh | h | ll | l | L | z | j | t | |
357 | // | |
358 | // # Other | |
359 | // | I32 | I64 | I | q | |
360 | // )? | |
361 | // (?P<type> . ) | |
362 | // ``` | |
363 | ||
364 | // Used to establish the full span at the end. | |
365 | let start = at; | |
366 | // The current position within the string. | |
ff7c6d11 | 367 | let mut at = at.at_next_cp()?; |
476ff2be | 368 | // `c` is the next codepoint, `next` is a cursor after it. |
ff7c6d11 | 369 | let (mut c, mut next) = at.next_cp()?; |
476ff2be SL |
370 | |
371 | // Update `at`, `c`, and `next`, exiting if we're out of input. | |
372 | macro_rules! move_to { | |
dfeec247 XL |
373 | ($cur:expr) => {{ |
374 | at = $cur; | |
375 | let (c_, next_) = at.next_cp()?; | |
376 | c = c_; | |
377 | next = next_; | |
378 | }}; | |
476ff2be SL |
379 | } |
380 | ||
381 | // Constructs a result when parsing fails. | |
382 | // | |
383 | // Note: `move` used to capture copies of the cursors as they are *now*. | |
384 | let fallback = move || { | |
ba9703b0 | 385 | Some(( |
476ff2be SL |
386 | Substitution::Format(Format { |
387 | span: start.slice_between(next).unwrap(), | |
388 | parameter: None, | |
389 | flags: "", | |
390 | width: None, | |
391 | precision: None, | |
392 | length: None, | |
393 | type_: at.slice_between(next).unwrap(), | |
dc9dc135 | 394 | position: InnerSpan::new(start.at, next.at), |
476ff2be | 395 | }), |
dfeec247 | 396 | next.slice_after(), |
ba9703b0 | 397 | )) |
476ff2be SL |
398 | }; |
399 | ||
400 | // Next parsing state. | |
401 | let mut state = Start; | |
402 | ||
403 | // Sadly, Rust isn't *quite* smart enough to know these *must* be initialised by the end. | |
404 | let mut parameter: Option<u16> = None; | |
405 | let mut flags: &str = ""; | |
406 | let mut width: Option<Num> = None; | |
407 | let mut precision: Option<Num> = None; | |
408 | let mut length: Option<&str> = None; | |
409 | let mut type_: &str = ""; | |
9fa01778 | 410 | let end: Cur<'_>; |
476ff2be SL |
411 | |
412 | if let Start = state { | |
413 | match c { | |
8faf50e0 | 414 | '1'..='9' => { |
29967ef6 | 415 | let end = at_next_cp_while(next, char::is_ascii_digit); |
476ff2be SL |
416 | match end.next_cp() { |
417 | // Yes, this *is* the parameter. | |
418 | Some(('$', end2)) => { | |
419 | state = Flags; | |
420 | parameter = Some(at.slice_between(end).unwrap().parse().unwrap()); | |
421 | move_to!(end2); | |
dfeec247 | 422 | } |
476ff2be SL |
423 | // Wait, no, actually, it's the width. |
424 | Some(_) => { | |
425 | state = Prec; | |
426 | parameter = None; | |
427 | flags = ""; | |
428 | width = Some(Num::from_str(at.slice_between(end).unwrap(), None)); | |
429 | move_to!(end); | |
dfeec247 | 430 | } |
476ff2be SL |
431 | // It's invalid, is what it is. |
432 | None => return fallback(), | |
433 | } | |
dfeec247 | 434 | } |
476ff2be SL |
435 | _ => { |
436 | state = Flags; | |
437 | parameter = None; | |
438 | move_to!(at); | |
439 | } | |
440 | } | |
441 | } | |
442 | ||
443 | if let Flags = state { | |
444 | let end = at_next_cp_while(at, is_flag); | |
445 | state = Width; | |
446 | flags = at.slice_between(end).unwrap(); | |
447 | move_to!(end); | |
448 | } | |
449 | ||
450 | if let Width = state { | |
451 | match c { | |
452 | '*' => { | |
453 | state = WidthArg; | |
454 | move_to!(next); | |
dfeec247 XL |
455 | } |
456 | '1'..='9' => { | |
29967ef6 | 457 | let end = at_next_cp_while(next, char::is_ascii_digit); |
476ff2be SL |
458 | state = Prec; |
459 | width = Some(Num::from_str(at.slice_between(end).unwrap(), None)); | |
460 | move_to!(end); | |
dfeec247 | 461 | } |
476ff2be SL |
462 | _ => { |
463 | state = Prec; | |
464 | width = None; | |
465 | move_to!(at); | |
466 | } | |
467 | } | |
468 | } | |
469 | ||
470 | if let WidthArg = state { | |
29967ef6 | 471 | let end = at_next_cp_while(at, char::is_ascii_digit); |
476ff2be SL |
472 | match end.next_cp() { |
473 | Some(('$', end2)) => { | |
474 | state = Prec; | |
475 | width = Some(Num::from_str("", Some(at.slice_between(end).unwrap()))); | |
476 | move_to!(end2); | |
dfeec247 | 477 | } |
476ff2be SL |
478 | _ => { |
479 | state = Prec; | |
480 | width = Some(Num::Next); | |
481 | move_to!(end); | |
482 | } | |
483 | } | |
484 | } | |
485 | ||
486 | if let Prec = state { | |
487 | match c { | |
488 | '.' => { | |
489 | state = PrecInner; | |
490 | move_to!(next); | |
dfeec247 | 491 | } |
476ff2be SL |
492 | _ => { |
493 | state = Length; | |
494 | precision = None; | |
495 | move_to!(at); | |
496 | } | |
497 | } | |
498 | } | |
499 | ||
500 | if let PrecInner = state { | |
501 | match c { | |
502 | '*' => { | |
29967ef6 | 503 | let end = at_next_cp_while(next, char::is_ascii_digit); |
476ff2be SL |
504 | match end.next_cp() { |
505 | Some(('$', end2)) => { | |
506 | state = Length; | |
507 | precision = Some(Num::from_str("*", next.slice_between(end))); | |
508 | move_to!(end2); | |
dfeec247 | 509 | } |
476ff2be SL |
510 | _ => { |
511 | state = Length; | |
512 | precision = Some(Num::Next); | |
513 | move_to!(end); | |
514 | } | |
515 | } | |
dfeec247 XL |
516 | } |
517 | '0'..='9' => { | |
29967ef6 | 518 | let end = at_next_cp_while(next, char::is_ascii_digit); |
476ff2be SL |
519 | state = Length; |
520 | precision = Some(Num::from_str(at.slice_between(end).unwrap(), None)); | |
521 | move_to!(end); | |
dfeec247 | 522 | } |
476ff2be SL |
523 | _ => return fallback(), |
524 | } | |
525 | } | |
526 | ||
527 | if let Length = state { | |
528 | let c1_next1 = next.next_cp(); | |
529 | match (c, c1_next1) { | |
dfeec247 | 530 | ('h', Some(('h', next1))) | ('l', Some(('l', next1))) => { |
476ff2be SL |
531 | state = Type; |
532 | length = Some(at.slice_between(next1).unwrap()); | |
533 | move_to!(next1); | |
dfeec247 | 534 | } |
476ff2be | 535 | |
ba9703b0 | 536 | ('h' | 'l' | 'L' | 'z' | 'j' | 't' | 'q', _) => { |
476ff2be SL |
537 | state = Type; |
538 | length = Some(at.slice_between(next).unwrap()); | |
539 | move_to!(next); | |
dfeec247 | 540 | } |
476ff2be SL |
541 | |
542 | ('I', _) => { | |
dfeec247 XL |
543 | let end = next |
544 | .at_next_cp() | |
476ff2be SL |
545 | .and_then(|end| end.at_next_cp()) |
546 | .map(|end| (next.slice_between(end).unwrap(), end)); | |
547 | let end = match end { | |
1b1a35ee | 548 | Some(("32" | "64", end)) => end, |
dfeec247 | 549 | _ => next, |
476ff2be SL |
550 | }; |
551 | state = Type; | |
552 | length = Some(at.slice_between(end).unwrap()); | |
553 | move_to!(end); | |
dfeec247 | 554 | } |
476ff2be SL |
555 | |
556 | _ => { | |
557 | state = Type; | |
558 | length = None; | |
559 | move_to!(at); | |
560 | } | |
561 | } | |
562 | } | |
563 | ||
564 | if let Type = state { | |
565 | drop(c); | |
566 | type_ = at.slice_between(next).unwrap(); | |
567 | ||
568 | // Don't use `move_to!` here, as we *can* be at the end of the input. | |
569 | at = next; | |
570 | } | |
571 | ||
572 | drop(c); | |
573 | drop(next); | |
574 | ||
575 | end = at; | |
dc9dc135 | 576 | let position = InnerSpan::new(start.at, end.at); |
476ff2be SL |
577 | |
578 | let f = Format { | |
579 | span: start.slice_between(end).unwrap(), | |
3b2f2976 XL |
580 | parameter, |
581 | flags, | |
582 | width, | |
583 | precision, | |
584 | length, | |
585 | type_, | |
8faf50e0 | 586 | position, |
476ff2be SL |
587 | }; |
588 | Some((Substitution::Format(f), end.slice_after())) | |
589 | } | |
590 | ||
9fa01778 | 591 | fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_> |
dfeec247 | 592 | where |
29967ef6 | 593 | F: FnMut(&char) -> bool, |
dfeec247 | 594 | { |
476ff2be SL |
595 | loop { |
596 | match cur.next_cp() { | |
dfeec247 | 597 | Some((c, next)) => { |
29967ef6 | 598 | if pred(&c) { |
dfeec247 XL |
599 | cur = next; |
600 | } else { | |
601 | return cur; | |
602 | } | |
603 | } | |
476ff2be SL |
604 | None => return cur, |
605 | } | |
606 | } | |
607 | } | |
608 | ||
29967ef6 | 609 | fn is_flag(c: &char) -> bool { |
5869c6ff | 610 | matches!(c, '0' | '-' | '+' | ' ' | '#' | '\'') |
476ff2be SL |
611 | } |
612 | ||
613 | #[cfg(test)] | |
dc9dc135 | 614 | mod tests; |
476ff2be SL |
615 | } |
616 | ||
617 | pub mod shell { | |
618 | use super::strcursor::StrCursor as Cur; | |
dfeec247 | 619 | use rustc_span::InnerSpan; |
476ff2be | 620 | |
8faf50e0 | 621 | #[derive(Clone, PartialEq, Debug)] |
476ff2be | 622 | pub enum Substitution<'a> { |
b7449926 XL |
623 | Ordinal(u8, (usize, usize)), |
624 | Name(&'a str, (usize, usize)), | |
625 | Escape((usize, usize)), | |
476ff2be SL |
626 | } |
627 | ||
9fa01778 | 628 | impl Substitution<'_> { |
476ff2be | 629 | pub fn as_str(&self) -> String { |
b7449926 XL |
630 | match self { |
631 | Substitution::Ordinal(n, _) => format!("${}", n), | |
632 | Substitution::Name(n, _) => format!("${}", n), | |
633 | Substitution::Escape(_) => "$$".into(), | |
476ff2be SL |
634 | } |
635 | } | |
636 | ||
dc9dc135 | 637 | pub fn position(&self) -> Option<InnerSpan> { |
b7449926 | 638 | match self { |
dfeec247 XL |
639 | Substitution::Ordinal(_, pos) |
640 | | Substitution::Name(_, pos) | |
641 | | Substitution::Escape(pos) => Some(InnerSpan::new(pos.0, pos.1)), | |
b7449926 XL |
642 | } |
643 | } | |
644 | ||
645 | pub fn set_position(&mut self, start: usize, end: usize) { | |
646 | match self { | |
dfeec247 XL |
647 | Substitution::Ordinal(_, ref mut pos) |
648 | | Substitution::Name(_, ref mut pos) | |
649 | | Substitution::Escape(ref mut pos) => *pos = (start, end), | |
8faf50e0 XL |
650 | } |
651 | } | |
652 | ||
c295e0f8 | 653 | pub fn translate(&self) -> Result<String, Option<String>> { |
476ff2be | 654 | match *self { |
c295e0f8 XL |
655 | Substitution::Ordinal(n, _) => Ok(format!("{{{}}}", n)), |
656 | Substitution::Name(n, _) => Ok(format!("{{{}}}", n)), | |
657 | Substitution::Escape(_) => Err(None), | |
476ff2be SL |
658 | } |
659 | } | |
660 | } | |
661 | ||
662 | /// Returns an iterator over all substitutions in a given string. | |
dc9dc135 | 663 | pub fn iter_subs(s: &str, start_pos: usize) -> Substitutions<'_> { |
dfeec247 | 664 | Substitutions { s, pos: start_pos } |
476ff2be SL |
665 | } |
666 | ||
667 | /// Iterator over substitutions in a string. | |
668 | pub struct Substitutions<'a> { | |
669 | s: &'a str, | |
b7449926 | 670 | pos: usize, |
476ff2be SL |
671 | } |
672 | ||
673 | impl<'a> Iterator for Substitutions<'a> { | |
674 | type Item = Substitution<'a>; | |
675 | fn next(&mut self) -> Option<Self::Item> { | |
fc512014 XL |
676 | let (mut sub, tail) = parse_next_substitution(self.s)?; |
677 | self.s = tail; | |
678 | if let Some(InnerSpan { start, end }) = sub.position() { | |
679 | sub.set_position(start + self.pos, end + self.pos); | |
680 | self.pos += end; | |
476ff2be | 681 | } |
fc512014 | 682 | Some(sub) |
476ff2be | 683 | } |
0531ce1d XL |
684 | |
685 | fn size_hint(&self) -> (usize, Option<usize>) { | |
686 | (0, Some(self.s.len())) | |
687 | } | |
476ff2be SL |
688 | } |
689 | ||
690 | /// Parse the next substitution from the input string. | |
9fa01778 | 691 | pub fn parse_next_substitution(s: &str) -> Option<(Substitution<'_>, &str)> { |
476ff2be | 692 | let at = { |
ff7c6d11 | 693 | let start = s.find('$')?; |
dfeec247 XL |
694 | match s[start + 1..].chars().next()? { |
695 | '$' => return Some((Substitution::Escape((start, start + 2)), &s[start + 2..])), | |
696 | c @ '0'..='9' => { | |
476ff2be | 697 | let n = (c as u8) - b'0'; |
dfeec247 XL |
698 | return Some((Substitution::Ordinal(n, (start, start + 2)), &s[start + 2..])); |
699 | } | |
700 | _ => { /* fall-through */ } | |
476ff2be SL |
701 | } |
702 | ||
6a06907d | 703 | Cur::new_at(s, start) |
476ff2be SL |
704 | }; |
705 | ||
ff7c6d11 XL |
706 | let at = at.at_next_cp()?; |
707 | let (c, inner) = at.next_cp()?; | |
708 | ||
709 | if !is_ident_head(c) { | |
710 | None | |
711 | } else { | |
712 | let end = at_next_cp_while(inner, is_ident_tail); | |
b7449926 XL |
713 | let slice = at.slice_between(end).unwrap(); |
714 | let start = at.at - 1; | |
715 | let end_pos = at.at + slice.len(); | |
716 | Some((Substitution::Name(slice, (start, end_pos)), end.slice_after())) | |
476ff2be SL |
717 | } |
718 | } | |
719 | ||
9fa01778 | 720 | fn at_next_cp_while<F>(mut cur: Cur<'_>, mut pred: F) -> Cur<'_> |
dfeec247 XL |
721 | where |
722 | F: FnMut(char) -> bool, | |
723 | { | |
476ff2be SL |
724 | loop { |
725 | match cur.next_cp() { | |
dfeec247 XL |
726 | Some((c, next)) => { |
727 | if pred(c) { | |
728 | cur = next; | |
729 | } else { | |
730 | return cur; | |
731 | } | |
732 | } | |
476ff2be SL |
733 | None => return cur, |
734 | } | |
735 | } | |
736 | } | |
737 | ||
738 | fn is_ident_head(c: char) -> bool { | |
29967ef6 | 739 | c.is_ascii_alphabetic() || c == '_' |
476ff2be SL |
740 | } |
741 | ||
742 | fn is_ident_tail(c: char) -> bool { | |
29967ef6 | 743 | c.is_ascii_alphanumeric() || c == '_' |
476ff2be SL |
744 | } |
745 | ||
746 | #[cfg(test)] | |
dc9dc135 | 747 | mod tests; |
476ff2be SL |
748 | } |
749 | ||
750 | mod strcursor { | |
476ff2be SL |
751 | pub struct StrCursor<'a> { |
752 | s: &'a str, | |
8faf50e0 | 753 | pub at: usize, |
476ff2be SL |
754 | } |
755 | ||
756 | impl<'a> StrCursor<'a> { | |
8faf50e0 | 757 | pub fn new_at(s: &'a str, at: usize) -> StrCursor<'a> { |
dfeec247 | 758 | StrCursor { s, at } |
8faf50e0 XL |
759 | } |
760 | ||
476ff2be SL |
761 | pub fn at_next_cp(mut self) -> Option<StrCursor<'a>> { |
762 | match self.try_seek_right_cp() { | |
763 | true => Some(self), | |
dfeec247 | 764 | false => None, |
476ff2be SL |
765 | } |
766 | } | |
767 | ||
768 | pub fn next_cp(mut self) -> Option<(char, StrCursor<'a>)> { | |
ff7c6d11 | 769 | let cp = self.cp_after()?; |
476ff2be SL |
770 | self.seek_right(cp.len_utf8()); |
771 | Some((cp, self)) | |
772 | } | |
773 | ||
774 | fn slice_before(&self) -> &'a str { | |
775 | &self.s[0..self.at] | |
776 | } | |
777 | ||
778 | pub fn slice_after(&self) -> &'a str { | |
779 | &self.s[self.at..] | |
780 | } | |
781 | ||
782 | pub fn slice_between(&self, until: StrCursor<'a>) -> Option<&'a str> { | |
783 | if !str_eq_literal(self.s, until.s) { | |
784 | None | |
785 | } else { | |
786 | use std::cmp::{max, min}; | |
787 | let beg = min(self.at, until.at); | |
788 | let end = max(self.at, until.at); | |
789 | Some(&self.s[beg..end]) | |
790 | } | |
791 | } | |
792 | ||
793 | fn cp_after(&self) -> Option<char> { | |
794 | self.slice_after().chars().next() | |
795 | } | |
796 | ||
797 | fn try_seek_right_cp(&mut self) -> bool { | |
798 | match self.slice_after().chars().next() { | |
799 | Some(c) => { | |
800 | self.at += c.len_utf8(); | |
801 | true | |
dfeec247 | 802 | } |
476ff2be SL |
803 | None => false, |
804 | } | |
805 | } | |
806 | ||
807 | fn seek_right(&mut self, bytes: usize) { | |
808 | self.at += bytes; | |
809 | } | |
810 | } | |
811 | ||
9fa01778 | 812 | impl Copy for StrCursor<'_> {} |
476ff2be SL |
813 | |
814 | impl<'a> Clone for StrCursor<'a> { | |
815 | fn clone(&self) -> StrCursor<'a> { | |
816 | *self | |
817 | } | |
818 | } | |
819 | ||
9fa01778 XL |
820 | impl std::fmt::Debug for StrCursor<'_> { |
821 | fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
476ff2be SL |
822 | write!(fmt, "StrCursor({:?} | {:?})", self.slice_before(), self.slice_after()) |
823 | } | |
824 | } | |
825 | ||
826 | fn str_eq_literal(a: &str, b: &str) -> bool { | |
dfeec247 | 827 | a.as_bytes().as_ptr() == b.as_bytes().as_ptr() && a.len() == b.len() |
476ff2be SL |
828 | } |
829 | } |