]>
Commit | Line | Data |
---|---|---|
2c00a5a8 XL |
1 | // Copyright 2015 Nicholas Allegra (comex). |
2 | // Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or | |
3 | // the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be | |
4 | // copied, modified, or distributed except according to those terms. | |
5 | ||
6 | //! Same idea as (but implementation not directly based on) the Python shlex module. However, this | |
7 | //! implementation does not support any of the Python module's customization because it makes | |
8 | //! parsing slower and is fairly useless. You only get the default settings of shlex.split, which | |
9 | //! mimic the POSIX shell: | |
10 | //! http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html | |
11 | //! | |
12 | //! This implementation also deviates from the Python version in not treating \r specially, which I | |
13 | //! believe is more compliant. | |
14 | //! | |
15 | //! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes | |
16 | //! directly as a micro-optimization. | |
17 | ||
18 | use std::borrow::Cow; | |
19 | ||
20 | /// An iterator that takes an input string and splits it into the words using the same syntax as | |
21 | /// the POSIX shell. | |
22 | pub struct Shlex<'a> { | |
23 | in_iter: std::str::Bytes<'a>, | |
24 | /// The number of newlines read so far, plus one. | |
25 | pub line_no: usize, | |
26 | /// An input string is erroneous if it ends while inside a quotation or right after an | |
27 | /// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that | |
28 | /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to | |
29 | /// true; best to check it after you're done iterating. | |
30 | pub had_error: bool, | |
31 | } | |
32 | ||
33 | impl<'a> Shlex<'a> { | |
34 | pub fn new(in_str: &'a str) -> Self { | |
35 | Shlex { | |
36 | in_iter: in_str.bytes(), | |
37 | line_no: 1, | |
38 | had_error: false, | |
39 | } | |
40 | } | |
41 | ||
42 | fn parse_word(&mut self, mut ch: u8) -> Option<String> { | |
43 | let mut result: Vec<u8> = Vec::new(); | |
44 | loop { | |
45 | match ch as char { | |
46 | '"' => if let Err(()) = self.parse_double(&mut result) { | |
47 | self.had_error = true; | |
48 | return None; | |
49 | }, | |
50 | '\'' => if let Err(()) = self.parse_single(&mut result) { | |
51 | self.had_error = true; | |
52 | return None; | |
53 | }, | |
54 | '\\' => if let Some(ch2) = self.next_char() { | |
55 | if ch2 != '\n' as u8 { result.push(ch2); } | |
56 | } else { | |
57 | self.had_error = true; | |
58 | return None; | |
59 | }, | |
60 | ' ' | '\t' | '\n' => { break; }, | |
61 | _ => { result.push(ch as u8); }, | |
62 | } | |
63 | if let Some(ch2) = self.next_char() { ch = ch2; } else { break; } | |
64 | } | |
65 | unsafe { Some(String::from_utf8_unchecked(result)) } | |
66 | } | |
67 | ||
68 | fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { | |
69 | loop { | |
70 | if let Some(ch2) = self.next_char() { | |
71 | match ch2 as char { | |
72 | '\\' => { | |
73 | if let Some(ch3) = self.next_char() { | |
74 | match ch3 as char { | |
75 | // \$ => $ | |
76 | '$' | '`' | '"' | '\\' => { result.push(ch3); }, | |
77 | // \<newline> => nothing | |
78 | '\n' => {}, | |
79 | // \x => =x | |
80 | _ => { result.push('\\' as u8); result.push(ch3); } | |
81 | } | |
82 | } else { | |
83 | return Err(()); | |
84 | } | |
85 | }, | |
86 | '"' => { return Ok(()); }, | |
87 | _ => { result.push(ch2); }, | |
88 | } | |
89 | } else { | |
90 | return Err(()); | |
91 | } | |
92 | } | |
93 | } | |
94 | ||
95 | fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> { | |
96 | loop { | |
97 | if let Some(ch2) = self.next_char() { | |
98 | match ch2 as char { | |
2c00a5a8 XL |
99 | '\'' => { return Ok(()); }, |
100 | _ => { result.push(ch2); }, | |
101 | } | |
102 | } else { | |
103 | return Err(()); | |
104 | } | |
105 | } | |
106 | } | |
107 | ||
108 | fn next_char(&mut self) -> Option<u8> { | |
109 | let res = self.in_iter.next(); | |
110 | if res == Some('\n' as u8) { self.line_no += 1; } | |
111 | res | |
112 | } | |
113 | } | |
114 | ||
115 | impl<'a> Iterator for Shlex<'a> { | |
116 | type Item = String; | |
117 | fn next(&mut self) -> Option<String> { | |
118 | if let Some(mut ch) = self.next_char() { | |
119 | // skip initial whitespace | |
120 | loop { | |
121 | match ch as char { | |
122 | ' ' | '\t' | '\n' => {}, | |
123 | '#' => { | |
124 | while let Some(ch2) = self.next_char() { | |
125 | if ch2 as char == '\n' { break; } | |
126 | } | |
127 | }, | |
128 | _ => { break; } | |
129 | } | |
130 | if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; } | |
131 | } | |
132 | self.parse_word(ch) | |
133 | } else { // no initial character | |
134 | None | |
135 | } | |
136 | } | |
137 | ||
138 | } | |
139 | ||
140 | /// Convenience function that consumes the whole string at once. Returns None if the input was | |
141 | /// erroneous. | |
142 | pub fn split(in_str: &str) -> Option<Vec<String>> { | |
143 | let mut shl = Shlex::new(in_str); | |
144 | let res = shl.by_ref().collect(); | |
145 | if shl.had_error { None } else { Some(res) } | |
146 | } | |
147 | ||
148 | /// Given a single word, return a string suitable to encode it as a shell argument. | |
149 | pub fn quote(in_str: &str) -> Cow<str> { | |
150 | if in_str.len() == 0 { | |
151 | "\"\"".into() | |
152 | } else if in_str.bytes().any(|c| match c as char { | |
153 | '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' | | |
154 | '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true, | |
155 | _ => false | |
156 | }) { | |
157 | let mut out: Vec<u8> = Vec::new(); | |
158 | out.push('"' as u8); | |
159 | for c in in_str.bytes() { | |
160 | match c as char { | |
161 | '$' | '`' | '"' | '\\' => out.push('\\' as u8), | |
162 | _ => () | |
163 | } | |
164 | out.push(c); | |
165 | } | |
166 | out.push('"' as u8); | |
167 | unsafe { String::from_utf8_unchecked(out) }.into() | |
168 | } else { | |
169 | in_str.into() | |
170 | } | |
171 | } | |
172 | ||
6a06907d XL |
173 | /// Convenience function that consumes an iterable of words and turns it into a single string, |
174 | /// quoting words when necessary. Consecutive words will be separated by a single space. | |
175 | pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String { | |
176 | words.into_iter() | |
177 | .map(quote) | |
178 | .collect::<Vec<_>>() | |
179 | .join(" ") | |
180 | } | |
181 | ||
2c00a5a8 XL |
182 | #[cfg(test)] |
183 | static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[ | |
184 | ("foo$baz", Some(&["foo$baz"])), | |
185 | ("foo baz", Some(&["foo", "baz"])), | |
186 | ("foo\"bar\"baz", Some(&["foobarbaz"])), | |
187 | ("foo \"bar\"baz", Some(&["foo", "barbaz"])), | |
188 | (" foo \nbar", Some(&["foo", "bar"])), | |
189 | ("foo\\\nbar", Some(&["foobar"])), | |
190 | ("\"foo\\\nbar\"", Some(&["foobar"])), | |
191 | ("'baz\\$b'", Some(&["baz\\$b"])), | |
6a06907d | 192 | ("'baz\\\''", None), |
2c00a5a8 XL |
193 | ("\\", None), |
194 | ("\"\\", None), | |
195 | ("'\\", None), | |
196 | ("\"", None), | |
197 | ("'", None), | |
198 | ("foo #bar\nbaz", Some(&["foo", "baz"])), | |
199 | ("foo #bar", Some(&["foo"])), | |
200 | ("foo#bar", Some(&["foo#bar"])), | |
201 | ("foo\"#bar", None), | |
6a06907d XL |
202 | ("'\\n'", Some(&["\\n"])), |
203 | ("'\\\\n'", Some(&["\\\\n"])), | |
2c00a5a8 XL |
204 | ]; |
205 | ||
206 | #[test] | |
207 | fn test_split() { | |
208 | for &(input, output) in SPLIT_TEST_ITEMS { | |
209 | assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect())); | |
210 | } | |
211 | } | |
212 | ||
213 | #[test] | |
214 | fn test_lineno() { | |
215 | let mut sh = Shlex::new("\nfoo\nbar"); | |
216 | while let Some(word) = sh.next() { | |
217 | if word == "bar" { | |
218 | assert_eq!(sh.line_no, 3); | |
219 | } | |
220 | } | |
221 | } | |
222 | ||
223 | #[test] | |
224 | fn test_quote() { | |
225 | assert_eq!(quote("foobar"), "foobar"); | |
226 | assert_eq!(quote("foo bar"), "\"foo bar\""); | |
227 | assert_eq!(quote("\""), "\"\\\"\""); | |
228 | assert_eq!(quote(""), "\"\""); | |
229 | } | |
6a06907d XL |
230 | |
231 | #[test] | |
232 | fn test_join() { | |
233 | assert_eq!(join(vec![]), ""); | |
234 | assert_eq!(join(vec![""]), "\"\""); | |
235 | assert_eq!(join(vec!["a", "b"]), "a b"); | |
236 | assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz"); | |
237 | } |