]> git.proxmox.com Git - rustc.git/blame - vendor/shlex/src/lib.rs
Merge tag 'debian/1.52.1+dfsg1-1_exp2' into proxmox/buster
[rustc.git] / vendor / shlex / src / lib.rs
CommitLineData
2c00a5a8
XL
1// Copyright 2015 Nicholas Allegra (comex).
2// Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
3// the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
4// copied, modified, or distributed except according to those terms.
5
6//! Same idea as (but implementation not directly based on) the Python shlex module. However, this
7//! implementation does not support any of the Python module's customization because it makes
8//! parsing slower and is fairly useless. You only get the default settings of shlex.split, which
9//! mimic the POSIX shell:
10//! http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
11//!
12//! This implementation also deviates from the Python version in not treating \r specially, which I
13//! believe is more compliant.
14//!
15//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
16//! directly as a micro-optimization.
17
18use std::borrow::Cow;
19
20/// An iterator that takes an input string and splits it into the words using the same syntax as
21/// the POSIX shell.
22pub struct Shlex<'a> {
23 in_iter: std::str::Bytes<'a>,
24 /// The number of newlines read so far, plus one.
25 pub line_no: usize,
26 /// An input string is erroneous if it ends while inside a quotation or right after an
27 /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that
28 /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
29 /// true; best to check it after you're done iterating.
30 pub had_error: bool,
31}
32
33impl<'a> Shlex<'a> {
34 pub fn new(in_str: &'a str) -> Self {
35 Shlex {
36 in_iter: in_str.bytes(),
37 line_no: 1,
38 had_error: false,
39 }
40 }
41
42 fn parse_word(&mut self, mut ch: u8) -> Option<String> {
43 let mut result: Vec<u8> = Vec::new();
44 loop {
45 match ch as char {
46 '"' => if let Err(()) = self.parse_double(&mut result) {
47 self.had_error = true;
48 return None;
49 },
50 '\'' => if let Err(()) = self.parse_single(&mut result) {
51 self.had_error = true;
52 return None;
53 },
54 '\\' => if let Some(ch2) = self.next_char() {
55 if ch2 != '\n' as u8 { result.push(ch2); }
56 } else {
57 self.had_error = true;
58 return None;
59 },
60 ' ' | '\t' | '\n' => { break; },
61 _ => { result.push(ch as u8); },
62 }
63 if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
64 }
65 unsafe { Some(String::from_utf8_unchecked(result)) }
66 }
67
68 fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
69 loop {
70 if let Some(ch2) = self.next_char() {
71 match ch2 as char {
72 '\\' => {
73 if let Some(ch3) = self.next_char() {
74 match ch3 as char {
75 // \$ => $
76 '$' | '`' | '"' | '\\' => { result.push(ch3); },
77 // \<newline> => nothing
78 '\n' => {},
79 // \x => =x
80 _ => { result.push('\\' as u8); result.push(ch3); }
81 }
82 } else {
83 return Err(());
84 }
85 },
86 '"' => { return Ok(()); },
87 _ => { result.push(ch2); },
88 }
89 } else {
90 return Err(());
91 }
92 }
93 }
94
95 fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
96 loop {
97 if let Some(ch2) = self.next_char() {
98 match ch2 as char {
2c00a5a8
XL
99 '\'' => { return Ok(()); },
100 _ => { result.push(ch2); },
101 }
102 } else {
103 return Err(());
104 }
105 }
106 }
107
108 fn next_char(&mut self) -> Option<u8> {
109 let res = self.in_iter.next();
110 if res == Some('\n' as u8) { self.line_no += 1; }
111 res
112 }
113}
114
115impl<'a> Iterator for Shlex<'a> {
116 type Item = String;
117 fn next(&mut self) -> Option<String> {
118 if let Some(mut ch) = self.next_char() {
119 // skip initial whitespace
120 loop {
121 match ch as char {
122 ' ' | '\t' | '\n' => {},
123 '#' => {
124 while let Some(ch2) = self.next_char() {
125 if ch2 as char == '\n' { break; }
126 }
127 },
128 _ => { break; }
129 }
130 if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
131 }
132 self.parse_word(ch)
133 } else { // no initial character
134 None
135 }
136 }
137
138}
139
140/// Convenience function that consumes the whole string at once. Returns None if the input was
141/// erroneous.
142pub fn split(in_str: &str) -> Option<Vec<String>> {
143 let mut shl = Shlex::new(in_str);
144 let res = shl.by_ref().collect();
145 if shl.had_error { None } else { Some(res) }
146}
147
148/// Given a single word, return a string suitable to encode it as a shell argument.
149pub fn quote(in_str: &str) -> Cow<str> {
150 if in_str.len() == 0 {
151 "\"\"".into()
152 } else if in_str.bytes().any(|c| match c as char {
153 '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' |
154 '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true,
155 _ => false
156 }) {
157 let mut out: Vec<u8> = Vec::new();
158 out.push('"' as u8);
159 for c in in_str.bytes() {
160 match c as char {
161 '$' | '`' | '"' | '\\' => out.push('\\' as u8),
162 _ => ()
163 }
164 out.push(c);
165 }
166 out.push('"' as u8);
167 unsafe { String::from_utf8_unchecked(out) }.into()
168 } else {
169 in_str.into()
170 }
171}
172
6a06907d
XL
173/// Convenience function that consumes an iterable of words and turns it into a single string,
174/// quoting words when necessary. Consecutive words will be separated by a single space.
175pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
176 words.into_iter()
177 .map(quote)
178 .collect::<Vec<_>>()
179 .join(" ")
180}
181
2c00a5a8
XL
182#[cfg(test)]
183static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
184 ("foo$baz", Some(&["foo$baz"])),
185 ("foo baz", Some(&["foo", "baz"])),
186 ("foo\"bar\"baz", Some(&["foobarbaz"])),
187 ("foo \"bar\"baz", Some(&["foo", "barbaz"])),
188 (" foo \nbar", Some(&["foo", "bar"])),
189 ("foo\\\nbar", Some(&["foobar"])),
190 ("\"foo\\\nbar\"", Some(&["foobar"])),
191 ("'baz\\$b'", Some(&["baz\\$b"])),
6a06907d 192 ("'baz\\\''", None),
2c00a5a8
XL
193 ("\\", None),
194 ("\"\\", None),
195 ("'\\", None),
196 ("\"", None),
197 ("'", None),
198 ("foo #bar\nbaz", Some(&["foo", "baz"])),
199 ("foo #bar", Some(&["foo"])),
200 ("foo#bar", Some(&["foo#bar"])),
201 ("foo\"#bar", None),
6a06907d
XL
202 ("'\\n'", Some(&["\\n"])),
203 ("'\\\\n'", Some(&["\\\\n"])),
2c00a5a8
XL
204];
205
206#[test]
207fn test_split() {
208 for &(input, output) in SPLIT_TEST_ITEMS {
209 assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
210 }
211}
212
213#[test]
214fn test_lineno() {
215 let mut sh = Shlex::new("\nfoo\nbar");
216 while let Some(word) = sh.next() {
217 if word == "bar" {
218 assert_eq!(sh.line_no, 3);
219 }
220 }
221}
222
223#[test]
224fn test_quote() {
225 assert_eq!(quote("foobar"), "foobar");
226 assert_eq!(quote("foo bar"), "\"foo bar\"");
227 assert_eq!(quote("\""), "\"\\\"\"");
228 assert_eq!(quote(""), "\"\"");
229}
6a06907d
XL
230
231#[test]
232fn test_join() {
233 assert_eq!(join(vec![]), "");
234 assert_eq!(join(vec![""]), "\"\"");
235 assert_eq!(join(vec!["a", "b"]), "a b");
236 assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz");
237}