]> git.proxmox.com Git - rustc.git/blame - library/std/src/sys/windows/path.rs
New upstream version 1.58.1+dfsg1
[rustc.git] / library / std / src / sys / windows / path.rs
CommitLineData
3c0e092e
XL
1use super::{c, fill_utf16_buf, to_u16s};
2use crate::ffi::{OsStr, OsString};
3use crate::io;
3dfed10e 4use crate::mem;
3c0e092e
XL
5use crate::path::{Path, PathBuf, Prefix};
6use crate::ptr;
3dfed10e
XL
7
8#[cfg(test)]
9mod tests;
10
11pub const MAIN_SEP_STR: &str = "\\";
12pub const MAIN_SEP: char = '\\';
13
6a06907d
XL
14/// # Safety
15///
16/// `bytes` must be a valid wtf8 encoded slice
fc512014
XL
17#[inline]
18unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr {
19 // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8,
20 // which is compatible with &[u8].
21 mem::transmute(bytes)
3dfed10e
XL
22}
23
24#[inline]
25pub fn is_sep_byte(b: u8) -> bool {
26 b == b'/' || b == b'\\'
27}
28
29#[inline]
30pub fn is_verbatim_sep(b: u8) -> bool {
31 b == b'\\'
32}
33
3c0e092e
XL
34/// Returns true if `path` looks like a lone filename.
35pub(crate) fn is_file_name(path: &OsStr) -> bool {
36 !path.bytes().iter().copied().any(is_sep_byte)
37}
38pub(crate) fn has_trailing_slash(path: &OsStr) -> bool {
39 let is_verbatim = path.bytes().starts_with(br"\\?\");
40 let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte };
41 if let Some(&c) = path.bytes().last() { is_separator(c) } else { false }
42}
43
44/// Appends a suffix to a path.
45///
46/// Can be used to append an extension without removing an existing extension.
47pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
48 let mut path = OsString::from(path);
49 path.push(suffix);
50 path.into()
51}
52
3dfed10e
XL
53pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
54 use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};
55
fc512014
XL
56 if let Some(path) = strip_prefix(path, r"\\") {
57 // \\
58 if let Some(path) = strip_prefix(path, r"?\") {
59 // \\?\
60 if let Some(path) = strip_prefix(path, r"UNC\") {
61 // \\?\UNC\server\share
62
63 let (server, path) = parse_next_component(path, true);
64 let (share, _) = parse_next_component(path, true);
65
66 Some(VerbatimUNC(server, share))
3dfed10e 67 } else {
fc512014
XL
68 let (prefix, _) = parse_next_component(path, true);
69
70 // in verbatim paths only recognize an exact drive prefix
71 if let Some(drive) = parse_drive_exact(prefix) {
72 // \\?\C:
73 Some(VerbatimDisk(drive))
74 } else {
75 // \\?\prefix
76 Some(Verbatim(prefix))
3dfed10e
XL
77 }
78 }
fc512014 79 } else if let Some(path) = strip_prefix(path, r".\") {
3dfed10e 80 // \\.\COM42
fc512014
XL
81 let (prefix, _) = parse_next_component(path, false);
82 Some(DeviceNS(prefix))
83 } else {
84 let (server, path) = parse_next_component(path, false);
85 let (share, _) = parse_next_component(path, false);
86
87 if !server.is_empty() && !share.is_empty() {
3dfed10e 88 // \\server\share
fc512014
XL
89 Some(UNC(server, share))
90 } else {
91 // no valid prefix beginning with "\\" recognized
92 None
3dfed10e 93 }
3dfed10e 94 }
fc512014 95 } else if let Some(drive) = parse_drive(path) {
3dfed10e 96 // C:
fc512014
XL
97 Some(Disk(drive))
98 } else {
99 // no prefix
100 None
3dfed10e 101 }
3dfed10e
XL
102}
103
fc512014
XL
104// Parses a drive prefix, e.g. "C:" and "C:\whatever"
105fn parse_drive(prefix: &OsStr) -> Option<u8> {
106 // In most DOS systems, it is not possible to have more than 26 drive letters.
107 // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
108 fn is_valid_drive_letter(drive: &u8) -> bool {
109 drive.is_ascii_alphabetic()
110 }
111
112 match prefix.bytes() {
113 [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
114 _ => None,
115 }
116}
117
118// Parses a drive prefix exactly, e.g. "C:"
119fn parse_drive_exact(prefix: &OsStr) -> Option<u8> {
120 // only parse two bytes: the drive letter and the drive separator
121 if prefix.len() == 2 { parse_drive(prefix) } else { None }
122}
123
124fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> {
125 // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]`
126 // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice.
127 match path.bytes().strip_prefix(prefix.as_bytes()) {
128 Some(path) => unsafe { Some(bytes_as_os_str(path)) },
129 None => None,
130 }
131}
132
133// Parse the next path component.
134//
135// Returns the next component and the rest of the path excluding the component and separator.
136// Does not recognize `/` as a separator character if `verbatim` is true.
137fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
138 let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
139
140 match path.bytes().iter().position(|&x| separator(x)) {
141 Some(separator_start) => {
142 let mut separator_end = separator_start + 1;
143
144 // a series of multiple separator characters is treated as a single separator,
145 // except in verbatim paths
146 while !verbatim && separator_end < path.len() && separator(path.bytes()[separator_end])
147 {
148 separator_end += 1;
149 }
150
151 let component = &path.bytes()[..separator_start];
152
153 // Panic safe
154 // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
155 let path = &path.bytes()[separator_end..];
156
6a06907d 157 // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
fc512014
XL
158 // is encoded in a single byte, therefore `bytes[separator_start]` and
159 // `bytes[separator_end]` must be code point boundaries and thus
160 // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
161 unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) }
162 }
163 None => (path, OsStr::new("")),
164 }
3dfed10e 165}
3c0e092e
XL
166
167/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
168///
169/// This path may or may not have a verbatim prefix.
170pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
171 // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL).
172 // However, for APIs such as CreateDirectory[1], the limit is 248.
173 //
174 // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters
175 const LEGACY_MAX_PATH: usize = 248;
176 // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
177 // All of these are in the ASCII range so they can be cast directly to `u16`.
178 const SEP: u16 = b'\\' as _;
179 const ALT_SEP: u16 = b'/' as _;
180 const QUERY: u16 = b'?' as _;
181 const COLON: u16 = b':' as _;
182 const DOT: u16 = b'.' as _;
183 const U: u16 = b'U' as _;
184 const N: u16 = b'N' as _;
185 const C: u16 = b'C' as _;
186
187 // \\?\
188 const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP];
189 // \??\
190 const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP];
191 // \\?\UNC\
192 const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP];
193
194 let mut path = to_u16s(path)?;
195 if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == &[0] {
196 // Early return for paths that are already verbatim or empty.
197 return Ok(path);
198 } else if path.len() < LEGACY_MAX_PATH {
199 // Early return if an absolute path is less < 260 UTF-16 code units.
200 // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily.
201 match path.as_slice() {
202 // Starts with `D:`, `D:\`, `D:/`, etc.
203 // Does not match if the path starts with a `\` or `/`.
204 [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..]
205 if *drive != SEP && *drive != ALT_SEP =>
206 {
207 return Ok(path);
208 }
209 // Starts with `\\`, `//`, etc
210 [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path),
211 _ => {}
212 }
213 }
214
215 // Firstly, get the absolute path using `GetFullPathNameW`.
216 // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
217 let lpfilename = path.as_ptr();
218 fill_utf16_buf(
219 // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
220 // `lpfilename` is a pointer to a null terminated string that is not
221 // invalidated until after `GetFullPathNameW` returns successfully.
222 |buffer, size| unsafe {
223 // While the docs for `GetFullPathNameW` have the standard note
224 // about needing a `\\?\` path for a long lpfilename, this does not
225 // appear to be true in practice.
226 // See:
227 // https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths
228 // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
229 c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut())
230 },
231 |mut absolute| {
232 path.clear();
233
234 // Secondly, add the verbatim prefix. This is easier here because we know the
235 // path is now absolute and fully normalized (e.g. `/` has been changed to `\`).
236 let prefix = match absolute {
237 // C:\ => \\?\C:\
238 [_, COLON, SEP, ..] => VERBATIM_PREFIX,
239 // \\.\ => \\?\
240 [SEP, SEP, DOT, SEP, ..] => {
241 absolute = &absolute[4..];
242 VERBATIM_PREFIX
243 }
244 // Leave \\?\ and \??\ as-is.
245 [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[],
246 // \\ => \\?\UNC\
247 [SEP, SEP, ..] => {
248 absolute = &absolute[2..];
249 UNC_PREFIX
250 }
251 // Anything else we leave alone.
252 _ => &[],
253 };
254
255 path.reserve_exact(prefix.len() + absolute.len() + 1);
256 path.extend_from_slice(prefix);
257 path.extend_from_slice(absolute);
258 path.push(0);
259 },
260 )?;
261 Ok(path)
262}