]>
Commit | Line | Data |
---|---|---|
9c376795 FG |
1 | //! Minimal, flexible command-line parser |
2 | //! | |
3 | //! As opposed to a declarative parser, this processes arguments as a stream of tokens. As lexing | |
4 | //! a command-line is not context-free, we rely on the caller to decide how to interpret the | |
5 | //! arguments. | |
6 | //! | |
7 | //! # Examples | |
8 | //! | |
9 | //! ```rust | |
353b0b11 FG |
10 | //! use std::path::PathBuf; |
11 | //! use std::ffi::OsStr; | |
12 | //! | |
13 | //! type BoxedError = Box<dyn std::error::Error + Send + Sync>; | |
14 | //! | |
9c376795 FG |
15 | //! #[derive(Debug)] |
16 | //! struct Args { | |
17 | //! paths: Vec<PathBuf>, | |
18 | //! color: Color, | |
19 | //! verbosity: usize, | |
20 | //! } | |
21 | //! | |
22 | //! #[derive(Debug)] | |
23 | //! enum Color { | |
24 | //! Always, | |
25 | //! Auto, | |
26 | //! Never, | |
27 | //! } | |
28 | //! | |
29 | //! impl Color { | |
353b0b11 | 30 | //! fn parse(s: Option<&OsStr>) -> Result<Self, BoxedError> { |
9c376795 FG |
31 | //! let s = s.map(|s| s.to_str().ok_or(s)); |
32 | //! match s { | |
33 | //! Some(Ok("always")) | Some(Ok("")) | None => { | |
34 | //! Ok(Color::Always) | |
35 | //! } | |
36 | //! Some(Ok("auto")) => { | |
37 | //! Ok(Color::Auto) | |
38 | //! } | |
39 | //! Some(Ok("never")) => { | |
40 | //! Ok(Color::Never) | |
41 | //! } | |
42 | //! Some(invalid) => { | |
fe692bf9 | 43 | //! Err(format!("Invalid value for `--color`, {invalid:?}").into()) |
9c376795 FG |
44 | //! } |
45 | //! } | |
46 | //! } | |
47 | //! } | |
48 | //! | |
49 | //! fn parse_args( | |
50 | //! raw: impl IntoIterator<Item=impl Into<std::ffi::OsString>> | |
51 | //! ) -> Result<Args, BoxedError> { | |
52 | //! let mut args = Args { | |
53 | //! paths: Vec::new(), | |
54 | //! color: Color::Auto, | |
55 | //! verbosity: 0, | |
56 | //! }; | |
57 | //! | |
58 | //! let raw = clap_lex::RawArgs::new(raw); | |
59 | //! let mut cursor = raw.cursor(); | |
60 | //! raw.next(&mut cursor); // Skip the bin | |
61 | //! while let Some(arg) = raw.next(&mut cursor) { | |
62 | //! if arg.is_escape() { | |
63 | //! args.paths.extend(raw.remaining(&mut cursor).map(PathBuf::from)); | |
64 | //! } else if arg.is_stdio() { | |
65 | //! args.paths.push(PathBuf::from("-")); | |
66 | //! } else if let Some((long, value)) = arg.to_long() { | |
67 | //! match long { | |
68 | //! Ok("verbose") => { | |
69 | //! if let Some(value) = value { | |
fe692bf9 | 70 | //! return Err(format!("`--verbose` does not take a value, got `{value:?}`").into()); |
9c376795 FG |
71 | //! } |
72 | //! args.verbosity += 1; | |
73 | //! } | |
74 | //! Ok("color") => { | |
75 | //! args.color = Color::parse(value)?; | |
76 | //! } | |
77 | //! _ => { | |
78 | //! return Err( | |
79 | //! format!("Unexpected flag: --{}", arg.display()).into() | |
80 | //! ); | |
81 | //! } | |
82 | //! } | |
83 | //! } else if let Some(mut shorts) = arg.to_short() { | |
84 | //! while let Some(short) = shorts.next_flag() { | |
85 | //! match short { | |
86 | //! Ok('v') => { | |
87 | //! args.verbosity += 1; | |
88 | //! } | |
89 | //! Ok('c') => { | |
90 | //! let value = shorts.next_value_os(); | |
91 | //! args.color = Color::parse(value)?; | |
92 | //! } | |
93 | //! Ok(c) => { | |
fe692bf9 | 94 | //! return Err(format!("Unexpected flag: -{c}").into()); |
9c376795 FG |
95 | //! } |
96 | //! Err(e) => { | |
353b0b11 | 97 | //! return Err(format!("Unexpected flag: -{}", e.to_string_lossy()).into()); |
9c376795 FG |
98 | //! } |
99 | //! } | |
100 | //! } | |
101 | //! } else { | |
353b0b11 | 102 | //! args.paths.push(PathBuf::from(arg.to_value_os().to_owned())); |
9c376795 FG |
103 | //! } |
104 | //! } | |
105 | //! | |
106 | //! Ok(args) | |
107 | //! } | |
108 | //! | |
109 | //! let args = parse_args(["bin", "--hello", "world"]); | |
fe692bf9 | 110 | //! println!("{args:?}"); |
9c376795 FG |
111 | //! ``` |
112 | ||
353b0b11 FG |
113 | mod ext; |
114 | ||
9c376795 FG |
115 | use std::ffi::OsStr; |
116 | use std::ffi::OsString; | |
117 | ||
118 | pub use std::io::SeekFrom; | |
119 | ||
353b0b11 | 120 | pub use ext::OsStrExt; |
9c376795 FG |
121 | |
122 | /// Command-line arguments | |
123 | #[derive(Default, Clone, Debug, PartialEq, Eq)] | |
124 | pub struct RawArgs { | |
125 | items: Vec<OsString>, | |
126 | } | |
127 | ||
128 | impl RawArgs { | |
129 | //// Create an argument list to parse | |
130 | /// | |
131 | /// **NOTE:** The argument returned will be the current binary. | |
132 | /// | |
133 | /// # Example | |
134 | /// | |
135 | /// ```rust,no_run | |
136 | /// # use std::path::PathBuf; | |
137 | /// let raw = clap_lex::RawArgs::from_args(); | |
138 | /// let mut cursor = raw.cursor(); | |
139 | /// let _bin = raw.next_os(&mut cursor); | |
140 | /// | |
141 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); | |
fe692bf9 | 142 | /// println!("{paths:?}"); |
9c376795 FG |
143 | /// ``` |
144 | pub fn from_args() -> Self { | |
145 | Self::new(std::env::args_os()) | |
146 | } | |
147 | ||
148 | //// Create an argument list to parse | |
149 | /// | |
150 | /// # Example | |
151 | /// | |
152 | /// ```rust,no_run | |
153 | /// # use std::path::PathBuf; | |
154 | /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]); | |
155 | /// let mut cursor = raw.cursor(); | |
156 | /// let _bin = raw.next_os(&mut cursor); | |
157 | /// | |
158 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); | |
fe692bf9 | 159 | /// println!("{paths:?}"); |
9c376795 FG |
160 | /// ``` |
161 | pub fn new(iter: impl IntoIterator<Item = impl Into<std::ffi::OsString>>) -> Self { | |
162 | let iter = iter.into_iter(); | |
163 | Self::from(iter) | |
164 | } | |
165 | ||
166 | /// Create a cursor for walking the arguments | |
167 | /// | |
168 | /// # Example | |
169 | /// | |
170 | /// ```rust,no_run | |
171 | /// # use std::path::PathBuf; | |
172 | /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]); | |
173 | /// let mut cursor = raw.cursor(); | |
174 | /// let _bin = raw.next_os(&mut cursor); | |
175 | /// | |
176 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); | |
fe692bf9 | 177 | /// println!("{paths:?}"); |
9c376795 FG |
178 | /// ``` |
179 | pub fn cursor(&self) -> ArgCursor { | |
180 | ArgCursor::new() | |
181 | } | |
182 | ||
183 | /// Advance the cursor, returning the next [`ParsedArg`] | |
184 | pub fn next(&self, cursor: &mut ArgCursor) -> Option<ParsedArg<'_>> { | |
185 | self.next_os(cursor).map(ParsedArg::new) | |
186 | } | |
187 | ||
188 | /// Advance the cursor, returning a raw argument value. | |
189 | pub fn next_os(&self, cursor: &mut ArgCursor) -> Option<&OsStr> { | |
190 | let next = self.items.get(cursor.cursor).map(|s| s.as_os_str()); | |
191 | cursor.cursor = cursor.cursor.saturating_add(1); | |
192 | next | |
193 | } | |
194 | ||
195 | /// Return the next [`ParsedArg`] | |
196 | pub fn peek(&self, cursor: &ArgCursor) -> Option<ParsedArg<'_>> { | |
197 | self.peek_os(cursor).map(ParsedArg::new) | |
198 | } | |
199 | ||
200 | /// Return a raw argument value. | |
201 | pub fn peek_os(&self, cursor: &ArgCursor) -> Option<&OsStr> { | |
202 | self.items.get(cursor.cursor).map(|s| s.as_os_str()) | |
203 | } | |
204 | ||
205 | /// Return all remaining raw arguments, advancing the cursor to the end | |
206 | /// | |
207 | /// # Example | |
208 | /// | |
209 | /// ```rust,no_run | |
210 | /// # use std::path::PathBuf; | |
211 | /// let raw = clap_lex::RawArgs::new(["bin", "foo.txt"]); | |
212 | /// let mut cursor = raw.cursor(); | |
213 | /// let _bin = raw.next_os(&mut cursor); | |
214 | /// | |
215 | /// let mut paths = raw.remaining(&mut cursor).map(PathBuf::from).collect::<Vec<_>>(); | |
fe692bf9 | 216 | /// println!("{paths:?}"); |
9c376795 FG |
217 | /// ``` |
218 | pub fn remaining(&self, cursor: &mut ArgCursor) -> impl Iterator<Item = &OsStr> { | |
219 | let remaining = self.items[cursor.cursor..].iter().map(|s| s.as_os_str()); | |
220 | cursor.cursor = self.items.len(); | |
221 | remaining | |
222 | } | |
223 | ||
224 | /// Adjust the cursor's position | |
225 | pub fn seek(&self, cursor: &mut ArgCursor, pos: SeekFrom) { | |
226 | let pos = match pos { | |
227 | SeekFrom::Start(pos) => pos, | |
228 | SeekFrom::End(pos) => (self.items.len() as i64).saturating_add(pos).max(0) as u64, | |
229 | SeekFrom::Current(pos) => (cursor.cursor as i64).saturating_add(pos).max(0) as u64, | |
230 | }; | |
231 | let pos = (pos as usize).min(self.items.len()); | |
232 | cursor.cursor = pos; | |
233 | } | |
234 | ||
235 | /// Inject arguments before the [`RawArgs::next`] | |
236 | pub fn insert( | |
237 | &mut self, | |
238 | cursor: &ArgCursor, | |
239 | insert_items: impl IntoIterator<Item = impl Into<OsString>>, | |
240 | ) { | |
241 | self.items.splice( | |
242 | cursor.cursor..cursor.cursor, | |
243 | insert_items.into_iter().map(Into::into), | |
244 | ); | |
245 | } | |
246 | ||
247 | /// Any remaining args? | |
248 | pub fn is_end(&self, cursor: &ArgCursor) -> bool { | |
249 | self.peek_os(cursor).is_none() | |
250 | } | |
251 | } | |
252 | ||
253 | impl<I, T> From<I> for RawArgs | |
254 | where | |
255 | I: Iterator<Item = T>, | |
256 | T: Into<OsString>, | |
257 | { | |
258 | fn from(val: I) -> Self { | |
259 | Self { | |
260 | items: val.map(|x| x.into()).collect(), | |
261 | } | |
262 | } | |
263 | } | |
264 | ||
265 | /// Position within [`RawArgs`] | |
266 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] | |
267 | pub struct ArgCursor { | |
268 | cursor: usize, | |
269 | } | |
270 | ||
271 | impl ArgCursor { | |
272 | fn new() -> Self { | |
273 | Self { cursor: 0 } | |
274 | } | |
275 | } | |
276 | ||
277 | /// Command-line Argument | |
278 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] | |
279 | pub struct ParsedArg<'s> { | |
353b0b11 | 280 | inner: &'s OsStr, |
9c376795 FG |
281 | } |
282 | ||
283 | impl<'s> ParsedArg<'s> { | |
284 | fn new(inner: &'s OsStr) -> Self { | |
353b0b11 | 285 | Self { inner } |
9c376795 FG |
286 | } |
287 | ||
288 | /// Argument is length of 0 | |
289 | pub fn is_empty(&self) -> bool { | |
353b0b11 | 290 | self.inner.is_empty() |
9c376795 FG |
291 | } |
292 | ||
293 | /// Does the argument look like a stdio argument (`-`) | |
294 | pub fn is_stdio(&self) -> bool { | |
353b0b11 | 295 | self.inner == "-" |
9c376795 FG |
296 | } |
297 | ||
298 | /// Does the argument look like an argument escape (`--`) | |
299 | pub fn is_escape(&self) -> bool { | |
353b0b11 | 300 | self.inner == "--" |
9c376795 FG |
301 | } |
302 | ||
ed00b5ec FG |
303 | /// Does the argument look like a negative number? |
304 | /// | |
305 | /// This won't parse the number in full but attempts to see if this looks | |
306 | /// like something along the lines of `-3`, `-0.3`, or `-33.03` | |
307 | pub fn is_negative_number(&self) -> bool { | |
9c376795 | 308 | self.to_value() |
ed00b5ec FG |
309 | .ok() |
310 | .and_then(|s| Some(is_number(s.strip_prefix('-')?))) | |
9c376795 FG |
311 | .unwrap_or_default() |
312 | } | |
313 | ||
314 | /// Treat as a long-flag | |
353b0b11 FG |
315 | pub fn to_long(&self) -> Option<(Result<&str, &OsStr>, Option<&OsStr>)> { |
316 | let raw = self.inner; | |
317 | let remainder = raw.strip_prefix("--")?; | |
318 | if remainder.is_empty() { | |
319 | debug_assert!(self.is_escape()); | |
320 | return None; | |
321 | } | |
9c376795 | 322 | |
353b0b11 FG |
323 | let (flag, value) = if let Some((p0, p1)) = remainder.split_once("=") { |
324 | (p0, Some(p1)) | |
9c376795 | 325 | } else { |
353b0b11 FG |
326 | (remainder, None) |
327 | }; | |
328 | let flag = flag.to_str().ok_or(flag); | |
329 | Some((flag, value)) | |
9c376795 FG |
330 | } |
331 | ||
332 | /// Can treat as a long-flag | |
333 | pub fn is_long(&self) -> bool { | |
353b0b11 | 334 | self.inner.starts_with("--") && !self.is_escape() |
9c376795 FG |
335 | } |
336 | ||
337 | /// Treat as a short-flag | |
338 | pub fn to_short(&self) -> Option<ShortFlags<'_>> { | |
353b0b11 FG |
339 | if let Some(remainder_os) = self.inner.strip_prefix("-") { |
340 | if remainder_os.starts_with("-") { | |
9c376795 FG |
341 | None |
342 | } else if remainder_os.is_empty() { | |
343 | debug_assert!(self.is_stdio()); | |
344 | None | |
345 | } else { | |
353b0b11 | 346 | Some(ShortFlags::new(remainder_os)) |
9c376795 FG |
347 | } |
348 | } else { | |
349 | None | |
350 | } | |
351 | } | |
352 | ||
353 | /// Can treat as a short-flag | |
354 | pub fn is_short(&self) -> bool { | |
353b0b11 | 355 | self.inner.starts_with("-") && !self.is_stdio() && !self.inner.starts_with("--") |
9c376795 FG |
356 | } |
357 | ||
358 | /// Treat as a value | |
359 | /// | |
360 | /// **NOTE:** May return a flag or an escape. | |
353b0b11 FG |
361 | pub fn to_value_os(&self) -> &OsStr { |
362 | self.inner | |
9c376795 FG |
363 | } |
364 | ||
365 | /// Treat as a value | |
366 | /// | |
367 | /// **NOTE:** May return a flag or an escape. | |
353b0b11 FG |
368 | pub fn to_value(&self) -> Result<&str, &OsStr> { |
369 | self.inner.to_str().ok_or(self.inner) | |
9c376795 FG |
370 | } |
371 | ||
372 | /// Safely print an argument that may contain non-UTF8 content | |
373 | /// | |
374 | /// This may perform lossy conversion, depending on the platform. If you would like an implementation which escapes the path please use Debug instead. | |
375 | pub fn display(&self) -> impl std::fmt::Display + '_ { | |
353b0b11 | 376 | self.inner.to_string_lossy() |
9c376795 FG |
377 | } |
378 | } | |
379 | ||
380 | /// Walk through short flags within a [`ParsedArg`] | |
381 | #[derive(Clone, Debug)] | |
382 | pub struct ShortFlags<'s> { | |
353b0b11 | 383 | inner: &'s OsStr, |
9c376795 | 384 | utf8_prefix: std::str::CharIndices<'s>, |
353b0b11 | 385 | invalid_suffix: Option<&'s OsStr>, |
9c376795 FG |
386 | } |
387 | ||
388 | impl<'s> ShortFlags<'s> { | |
353b0b11 FG |
389 | fn new(inner: &'s OsStr) -> Self { |
390 | let (utf8_prefix, invalid_suffix) = split_nonutf8_once(inner); | |
9c376795 FG |
391 | let utf8_prefix = utf8_prefix.char_indices(); |
392 | Self { | |
393 | inner, | |
394 | utf8_prefix, | |
395 | invalid_suffix, | |
396 | } | |
397 | } | |
398 | ||
399 | /// Move the iterator forward by `n` short flags | |
400 | pub fn advance_by(&mut self, n: usize) -> Result<(), usize> { | |
401 | for i in 0..n { | |
402 | self.next().ok_or(i)?.map_err(|_| i)?; | |
403 | } | |
404 | Ok(()) | |
405 | } | |
406 | ||
407 | /// No short flags left | |
408 | pub fn is_empty(&self) -> bool { | |
409 | self.invalid_suffix.is_none() && self.utf8_prefix.as_str().is_empty() | |
410 | } | |
411 | ||
412 | /// Does the short flag look like a number | |
413 | /// | |
414 | /// Ideally call this before doing any iterator | |
ed00b5ec FG |
415 | pub fn is_negative_number(&self) -> bool { |
416 | self.invalid_suffix.is_none() && is_number(self.utf8_prefix.as_str()) | |
9c376795 FG |
417 | } |
418 | ||
419 | /// Advance the iterator, returning the next short flag on success | |
420 | /// | |
421 | /// On error, returns the invalid-UTF8 value | |
353b0b11 | 422 | pub fn next_flag(&mut self) -> Option<Result<char, &'s OsStr>> { |
9c376795 FG |
423 | if let Some((_, flag)) = self.utf8_prefix.next() { |
424 | return Some(Ok(flag)); | |
425 | } | |
426 | ||
427 | if let Some(suffix) = self.invalid_suffix { | |
428 | self.invalid_suffix = None; | |
429 | return Some(Err(suffix)); | |
430 | } | |
431 | ||
432 | None | |
433 | } | |
434 | ||
435 | /// Advance the iterator, returning everything left as a value | |
353b0b11 | 436 | pub fn next_value_os(&mut self) -> Option<&'s OsStr> { |
9c376795 FG |
437 | if let Some((index, _)) = self.utf8_prefix.next() { |
438 | self.utf8_prefix = "".char_indices(); | |
439 | self.invalid_suffix = None; | |
353b0b11 FG |
440 | // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary |
441 | let remainder = unsafe { ext::split_at(self.inner, index).1 }; | |
442 | return Some(remainder); | |
9c376795 FG |
443 | } |
444 | ||
445 | if let Some(suffix) = self.invalid_suffix { | |
446 | self.invalid_suffix = None; | |
447 | return Some(suffix); | |
448 | } | |
449 | ||
450 | None | |
451 | } | |
452 | } | |
453 | ||
454 | impl<'s> Iterator for ShortFlags<'s> { | |
353b0b11 | 455 | type Item = Result<char, &'s OsStr>; |
9c376795 FG |
456 | |
457 | fn next(&mut self) -> Option<Self::Item> { | |
458 | self.next_flag() | |
459 | } | |
460 | } | |
461 | ||
353b0b11 FG |
462 | fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) { |
463 | match b.try_str() { | |
9c376795 FG |
464 | Ok(s) => (s, None), |
465 | Err(err) => { | |
353b0b11 FG |
466 | // SAFETY: `char_indices` ensures `index` is at a valid UTF-8 boundary |
467 | let (valid, after_valid) = unsafe { ext::split_at(b, err.valid_up_to()) }; | |
468 | let valid = valid.try_str().unwrap(); | |
9c376795 FG |
469 | (valid, Some(after_valid)) |
470 | } | |
471 | } | |
472 | } | |
ed00b5ec FG |
473 | |
474 | fn is_number(arg: &str) -> bool { | |
475 | // Return true if this looks like an integer or a float where it's all | |
476 | // digits plus an optional single dot after some digits. | |
477 | // | |
478 | // For floats allow forms such as `1.`, `1.2`, `1.2e10`, etc. | |
479 | let mut seen_dot = false; | |
480 | let mut position_of_e = None; | |
481 | for (i, c) in arg.as_bytes().iter().enumerate() { | |
482 | match c { | |
483 | // Digits are always valid | |
484 | b'0'..=b'9' => {} | |
485 | ||
486 | // Allow a `.`, but only one, only if it comes before an | |
487 | // optional exponent, and only if it's not the first character. | |
488 | b'.' if !seen_dot && position_of_e.is_none() && i > 0 => seen_dot = true, | |
489 | ||
490 | // Allow an exponent `e` but only at most one after the first | |
491 | // character. | |
492 | b'e' if position_of_e.is_none() && i > 0 => position_of_e = Some(i), | |
493 | ||
494 | _ => return false, | |
495 | } | |
496 | } | |
497 | ||
498 | // Disallow `-1e` which isn't a valid float since it doesn't actually have | |
499 | // an exponent. | |
500 | match position_of_e { | |
501 | Some(i) => i != arg.len() - 1, | |
502 | None => true, | |
503 | } | |
504 | } |