]> git.proxmox.com Git - rustc.git/blob - library/std/src/sys/windows/args.rs
New upstream version 1.68.2+dfsg1
[rustc.git] / library / std / src / sys / windows / args.rs
1 //! The Windows command line is just a string
2 //! <https://docs.microsoft.com/en-us/archive/blogs/larryosterman/the-windows-command-line-is-just-a-string>
3 //!
4 //! This module implements the parsing necessary to turn that string into a list of arguments.
5
6 #[cfg(test)]
7 mod tests;
8
9 use crate::ffi::OsString;
10 use crate::fmt;
11 use crate::io;
12 use crate::num::NonZeroU16;
13 use crate::os::windows::prelude::*;
14 use crate::path::PathBuf;
15 use crate::sys::c;
16 use crate::sys::process::ensure_no_nuls;
17 use crate::sys::windows::os::current_exe;
18 use crate::sys_common::wstr::WStrUnits;
19 use crate::vec;
20
21 use crate::iter;
22
23 /// This is the const equivalent to `NonZeroU16::new(n).unwrap()`
24 ///
25 /// FIXME: This can be removed once `Option::unwrap` is stably const.
26 /// See the `const_option` feature (#67441).
27 const fn non_zero_u16(n: u16) -> NonZeroU16 {
28 match NonZeroU16::new(n) {
29 Some(n) => n,
30 None => panic!("called `unwrap` on a `None` value"),
31 }
32 }
33
34 pub fn args() -> Args {
35 // SAFETY: `GetCommandLineW` returns a pointer to a null terminated UTF-16
36 // string so it's safe for `WStrUnits` to use.
37 unsafe {
38 let lp_cmd_line = c::GetCommandLineW();
39 let parsed_args_list = parse_lp_cmd_line(WStrUnits::new(lp_cmd_line), || {
40 current_exe().map(PathBuf::into_os_string).unwrap_or_else(|_| OsString::new())
41 });
42
43 Args { parsed_args_list: parsed_args_list.into_iter() }
44 }
45 }
46
47 /// Implements the Windows command-line argument parsing algorithm.
48 ///
49 /// Microsoft's documentation for the Windows CLI argument format can be found at
50 /// <https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments>
51 ///
52 /// A more in-depth explanation is here:
53 /// <https://daviddeley.com/autohotkey/parameters/parameters.htm#WIN>
54 ///
55 /// Windows includes a function to do command line parsing in shell32.dll.
56 /// However, this is not used for two reasons:
57 ///
58 /// 1. Linking with that DLL causes the process to be registered as a GUI application.
59 /// GUI applications add a bunch of overhead, even if no windows are drawn. See
60 /// <https://randomascii.wordpress.com/2018/12/03/a-not-called-function-can-cause-a-5x-slowdown/>.
61 ///
62 /// 2. It does not follow the modern C/C++ argv rules outlined in the first two links above.
63 ///
64 /// This function was tested for equivalence to the C/C++ parsing rules using an
65 /// extensive test suite available at
66 /// <https://github.com/ChrisDenton/winarg/tree/std>.
67 fn parse_lp_cmd_line<'a, F: Fn() -> OsString>(
68 lp_cmd_line: Option<WStrUnits<'a>>,
69 exe_name: F,
70 ) -> Vec<OsString> {
71 const BACKSLASH: NonZeroU16 = non_zero_u16(b'\\' as u16);
72 const QUOTE: NonZeroU16 = non_zero_u16(b'"' as u16);
73 const TAB: NonZeroU16 = non_zero_u16(b'\t' as u16);
74 const SPACE: NonZeroU16 = non_zero_u16(b' ' as u16);
75
76 let mut ret_val = Vec::new();
77 // If the cmd line pointer is null or it points to an empty string then
78 // return the name of the executable as argv[0].
79 if lp_cmd_line.as_ref().and_then(|cmd| cmd.peek()).is_none() {
80 ret_val.push(exe_name());
81 return ret_val;
82 }
83 let mut code_units = lp_cmd_line.unwrap();
84
85 // The executable name at the beginning is special.
86 let mut in_quotes = false;
87 let mut cur = Vec::new();
88 for w in &mut code_units {
89 match w {
90 // A quote mark always toggles `in_quotes` no matter what because
91 // there are no escape characters when parsing the executable name.
92 QUOTE => in_quotes = !in_quotes,
93 // If not `in_quotes` then whitespace ends argv[0].
94 SPACE | TAB if !in_quotes => break,
95 // In all other cases the code unit is taken literally.
96 _ => cur.push(w.get()),
97 }
98 }
99 // Skip whitespace.
100 code_units.advance_while(|w| w == SPACE || w == TAB);
101 ret_val.push(OsString::from_wide(&cur));
102
103 // Parse the arguments according to these rules:
104 // * All code units are taken literally except space, tab, quote and backslash.
105 // * When not `in_quotes`, space and tab separate arguments. Consecutive spaces and tabs are
106 // treated as a single separator.
107 // * A space or tab `in_quotes` is taken literally.
108 // * A quote toggles `in_quotes` mode unless it's escaped. An escaped quote is taken literally.
109 // * A quote can be escaped if preceded by an odd number of backslashes.
110 // * If any number of backslashes is immediately followed by a quote then the number of
111 // backslashes is halved (rounding down).
112 // * Backslashes not followed by a quote are all taken literally.
113 // * If `in_quotes` then a quote can also be escaped using another quote
114 // (i.e. two consecutive quotes become one literal quote).
115 let mut cur = Vec::new();
116 let mut in_quotes = false;
117 while let Some(w) = code_units.next() {
118 match w {
119 // If not `in_quotes`, a space or tab ends the argument.
120 SPACE | TAB if !in_quotes => {
121 ret_val.push(OsString::from_wide(&cur[..]));
122 cur.truncate(0);
123
124 // Skip whitespace.
125 code_units.advance_while(|w| w == SPACE || w == TAB);
126 }
127 // Backslashes can escape quotes or backslashes but only if consecutive backslashes are followed by a quote.
128 BACKSLASH => {
129 let backslash_count = code_units.advance_while(|w| w == BACKSLASH) + 1;
130 if code_units.peek() == Some(QUOTE) {
131 cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count / 2));
132 // The quote is escaped if there are an odd number of backslashes.
133 if backslash_count % 2 == 1 {
134 code_units.next();
135 cur.push(QUOTE.get());
136 }
137 } else {
138 // If there is no quote on the end then there is no escaping.
139 cur.extend(iter::repeat(BACKSLASH.get()).take(backslash_count));
140 }
141 }
142 // If `in_quotes` and not backslash escaped (see above) then a quote either
143 // unsets `in_quote` or is escaped by another quote.
144 QUOTE if in_quotes => match code_units.peek() {
145 // Two consecutive quotes when `in_quotes` produces one literal quote.
146 Some(QUOTE) => {
147 cur.push(QUOTE.get());
148 code_units.next();
149 }
150 // Otherwise set `in_quotes`.
151 Some(_) => in_quotes = false,
152 // The end of the command line.
153 // Push `cur` even if empty, which we do by breaking while `in_quotes` is still set.
154 None => break,
155 },
156 // If not `in_quotes` and not BACKSLASH escaped (see above) then a quote sets `in_quote`.
157 QUOTE => in_quotes = true,
158 // Everything else is always taken literally.
159 _ => cur.push(w.get()),
160 }
161 }
162 // Push the final argument, if any.
163 if !cur.is_empty() || in_quotes {
164 ret_val.push(OsString::from_wide(&cur[..]));
165 }
166 ret_val
167 }
168
169 pub struct Args {
170 parsed_args_list: vec::IntoIter<OsString>,
171 }
172
173 impl fmt::Debug for Args {
174 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
175 self.parsed_args_list.as_slice().fmt(f)
176 }
177 }
178
179 impl Iterator for Args {
180 type Item = OsString;
181 fn next(&mut self) -> Option<OsString> {
182 self.parsed_args_list.next()
183 }
184 fn size_hint(&self) -> (usize, Option<usize>) {
185 self.parsed_args_list.size_hint()
186 }
187 }
188
189 impl DoubleEndedIterator for Args {
190 fn next_back(&mut self) -> Option<OsString> {
191 self.parsed_args_list.next_back()
192 }
193 }
194
195 impl ExactSizeIterator for Args {
196 fn len(&self) -> usize {
197 self.parsed_args_list.len()
198 }
199 }
200
201 #[derive(Debug)]
202 pub(crate) enum Arg {
203 /// Add quotes (if needed)
204 Regular(OsString),
205 /// Append raw string without quoting
206 Raw(OsString),
207 }
208
209 enum Quote {
210 // Every arg is quoted
211 Always,
212 // Whitespace and empty args are quoted
213 Auto,
214 // Arg appended without any changes (#29494)
215 Never,
216 }
217
218 pub(crate) fn append_arg(cmd: &mut Vec<u16>, arg: &Arg, force_quotes: bool) -> io::Result<()> {
219 let (arg, quote) = match arg {
220 Arg::Regular(arg) => (arg, if force_quotes { Quote::Always } else { Quote::Auto }),
221 Arg::Raw(arg) => (arg, Quote::Never),
222 };
223
224 // If an argument has 0 characters then we need to quote it to ensure
225 // that it actually gets passed through on the command line or otherwise
226 // it will be dropped entirely when parsed on the other end.
227 ensure_no_nuls(arg)?;
228 let arg_bytes = arg.bytes();
229 let (quote, escape) = match quote {
230 Quote::Always => (true, true),
231 Quote::Auto => {
232 (arg_bytes.iter().any(|c| *c == b' ' || *c == b'\t') || arg_bytes.is_empty(), true)
233 }
234 Quote::Never => (false, false),
235 };
236 if quote {
237 cmd.push('"' as u16);
238 }
239
240 let mut backslashes: usize = 0;
241 for x in arg.encode_wide() {
242 if escape {
243 if x == '\\' as u16 {
244 backslashes += 1;
245 } else {
246 if x == '"' as u16 {
247 // Add n+1 backslashes to total 2n+1 before internal '"'.
248 cmd.extend((0..=backslashes).map(|_| '\\' as u16));
249 }
250 backslashes = 0;
251 }
252 }
253 cmd.push(x);
254 }
255
256 if quote {
257 // Add n backslashes to total 2n before ending '"'.
258 cmd.extend((0..backslashes).map(|_| '\\' as u16));
259 cmd.push('"' as u16);
260 }
261 Ok(())
262 }
263
264 pub(crate) fn make_bat_command_line(
265 script: &[u16],
266 args: &[Arg],
267 force_quotes: bool,
268 ) -> io::Result<Vec<u16>> {
269 // Set the start of the command line to `cmd.exe /c "`
270 // It is necessary to surround the command in an extra pair of quotes,
271 // hence the trailing quote here. It will be closed after all arguments
272 // have been added.
273 let mut cmd: Vec<u16> = "cmd.exe /c \"".encode_utf16().collect();
274
275 // Push the script name surrounded by its quote pair.
276 cmd.push(b'"' as u16);
277 // Windows file names cannot contain a `"` character or end with `\\`.
278 // If the script name does then return an error.
279 if script.contains(&(b'"' as u16)) || script.last() == Some(&(b'\\' as u16)) {
280 return Err(io::const_io_error!(
281 io::ErrorKind::InvalidInput,
282 "Windows file names may not contain `\"` or end with `\\`"
283 ));
284 }
285 cmd.extend_from_slice(script.strip_suffix(&[0]).unwrap_or(script));
286 cmd.push(b'"' as u16);
287
288 // Append the arguments.
289 // FIXME: This needs tests to ensure that the arguments are properly
290 // reconstructed by the batch script by default.
291 for arg in args {
292 cmd.push(' ' as u16);
293 append_arg(&mut cmd, arg, force_quotes)?;
294 }
295
296 // Close the quote we left opened earlier.
297 cmd.push(b'"' as u16);
298
299 Ok(cmd)
300 }
301
302 /// Takes a path and tries to return a non-verbatim path.
303 ///
304 /// This is necessary because cmd.exe does not support verbatim paths.
305 pub(crate) fn to_user_path(mut path: Vec<u16>) -> io::Result<Vec<u16>> {
306 use crate::ptr;
307 use crate::sys::windows::fill_utf16_buf;
308
309 // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
310 // All of these are in the ASCII range so they can be cast directly to `u16`.
311 const SEP: u16 = b'\\' as _;
312 const QUERY: u16 = b'?' as _;
313 const COLON: u16 = b':' as _;
314 const U: u16 = b'U' as _;
315 const N: u16 = b'N' as _;
316 const C: u16 = b'C' as _;
317
318 // Early return if the path is too long to remove the verbatim prefix.
319 const LEGACY_MAX_PATH: usize = 260;
320 if path.len() > LEGACY_MAX_PATH {
321 return Ok(path);
322 }
323
324 match &path[..] {
325 // `\\?\C:\...` => `C:\...`
326 [SEP, SEP, QUERY, SEP, _, COLON, SEP, ..] => unsafe {
327 let lpfilename = path[4..].as_ptr();
328 fill_utf16_buf(
329 |buffer, size| c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()),
330 |full_path: &[u16]| {
331 if full_path == &path[4..path.len() - 1] { full_path.into() } else { path }
332 },
333 )
334 },
335 // `\\?\UNC\...` => `\\...`
336 [SEP, SEP, QUERY, SEP, U, N, C, SEP, ..] => unsafe {
337 // Change the `C` in `UNC\` to `\` so we can get a slice that starts with `\\`.
338 path[6] = b'\\' as u16;
339 let lpfilename = path[6..].as_ptr();
340 fill_utf16_buf(
341 |buffer, size| c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()),
342 |full_path: &[u16]| {
343 if full_path == &path[6..path.len() - 1] {
344 full_path.into()
345 } else {
346 // Restore the 'C' in "UNC".
347 path[6] = b'C' as u16;
348 path
349 }
350 },
351 )
352 },
353 // For everything else, leave the path unchanged.
354 _ => Ok(path),
355 }
356 }