]> git.proxmox.com Git - rustc.git/blame - library/std/src/sys/windows/path.rs
New upstream version 1.74.1+dfsg1
[rustc.git] / library / std / src / sys / windows / path.rs
CommitLineData
3c0e092e
XL
1use super::{c, fill_utf16_buf, to_u16s};
2use crate::ffi::{OsStr, OsString};
3use crate::io;
3c0e092e
XL
4use crate::path::{Path, PathBuf, Prefix};
5use crate::ptr;
3dfed10e
XL
6
7#[cfg(test)]
8mod tests;
9
10pub const MAIN_SEP_STR: &str = "\\";
11pub const MAIN_SEP: char = '\\';
12
3dfed10e
XL
13#[inline]
14pub fn is_sep_byte(b: u8) -> bool {
15 b == b'/' || b == b'\\'
16}
17
18#[inline]
19pub fn is_verbatim_sep(b: u8) -> bool {
20 b == b'\\'
21}
22
3c0e092e
XL
23/// Returns true if `path` looks like a lone filename.
24pub(crate) fn is_file_name(path: &OsStr) -> bool {
781aab86 25 !path.as_encoded_bytes().iter().copied().any(is_sep_byte)
3c0e092e
XL
26}
27pub(crate) fn has_trailing_slash(path: &OsStr) -> bool {
781aab86 28 let is_verbatim = path.as_encoded_bytes().starts_with(br"\\?\");
3c0e092e 29 let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte };
781aab86 30 if let Some(&c) = path.as_encoded_bytes().last() { is_separator(c) } else { false }
3c0e092e
XL
31}
32
33/// Appends a suffix to a path.
34///
35/// Can be used to append an extension without removing an existing extension.
36pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
37 let mut path = OsString::from(path);
38 path.push(suffix);
39 path.into()
40}
41
04454e1e
FG
42struct PrefixParser<'a, const LEN: usize> {
43 path: &'a OsStr,
44 prefix: [u8; LEN],
45}
46
47impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
48 #[inline]
49 fn get_prefix(path: &OsStr) -> [u8; LEN] {
50 let mut prefix = [0; LEN];
51 // SAFETY: Only ASCII characters are modified.
781aab86 52 for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
04454e1e
FG
53 prefix[i] = if ch == b'/' { b'\\' } else { ch };
54 }
55 prefix
56 }
57
58 fn new(path: &'a OsStr) -> Self {
59 Self { path, prefix: Self::get_prefix(path) }
60 }
61
62 fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
63 PrefixParserSlice {
64 path: self.path,
65 prefix: &self.prefix[..LEN.min(self.path.len())],
66 index: 0,
67 }
68 }
69}
70
71struct PrefixParserSlice<'a, 'b> {
72 path: &'a OsStr,
73 prefix: &'b [u8],
74 index: usize,
75}
76
77impl<'a> PrefixParserSlice<'a, '_> {
78 fn strip_prefix(&self, prefix: &str) -> Option<Self> {
79 self.prefix[self.index..]
80 .starts_with(prefix.as_bytes())
81 .then(|| Self { index: self.index + prefix.len(), ..*self })
82 }
83
84 fn prefix_bytes(&self) -> &'a [u8] {
781aab86 85 &self.path.as_encoded_bytes()[..self.index]
04454e1e
FG
86 }
87
88 fn finish(self) -> &'a OsStr {
89 // SAFETY: The unsafety here stems from converting between &OsStr and
90 // &[u8] and back. This is safe to do because (1) we only look at ASCII
91 // contents of the encoding and (2) new &OsStr values are produced only
92 // from ASCII-bounded slices of existing &OsStr values.
781aab86 93 unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
04454e1e
FG
94 }
95}
96
3dfed10e
XL
97pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
98 use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};
99
04454e1e
FG
100 let parser = PrefixParser::<8>::new(path);
101 let parser = parser.as_slice();
102 if let Some(parser) = parser.strip_prefix(r"\\") {
fc512014 103 // \\
04454e1e
FG
104
105 // The meaning of verbatim paths can change when they use a different
106 // separator.
107 if let Some(parser) = parser.strip_prefix(r"?\") && !parser.prefix_bytes().iter().any(|&x| x == b'/') {
fc512014 108 // \\?\
04454e1e 109 if let Some(parser) = parser.strip_prefix(r"UNC\") {
fc512014
XL
110 // \\?\UNC\server\share
111
04454e1e 112 let path = parser.finish();
fc512014
XL
113 let (server, path) = parse_next_component(path, true);
114 let (share, _) = parse_next_component(path, true);
115
116 Some(VerbatimUNC(server, share))
3dfed10e 117 } else {
04454e1e 118 let path = parser.finish();
fc512014
XL
119
120 // in verbatim paths only recognize an exact drive prefix
04454e1e 121 if let Some(drive) = parse_drive_exact(path) {
fc512014
XL
122 // \\?\C:
123 Some(VerbatimDisk(drive))
124 } else {
125 // \\?\prefix
04454e1e 126 let (prefix, _) = parse_next_component(path, true);
fc512014 127 Some(Verbatim(prefix))
3dfed10e
XL
128 }
129 }
04454e1e 130 } else if let Some(parser) = parser.strip_prefix(r".\") {
3dfed10e 131 // \\.\COM42
04454e1e 132 let path = parser.finish();
fc512014
XL
133 let (prefix, _) = parse_next_component(path, false);
134 Some(DeviceNS(prefix))
135 } else {
04454e1e 136 let path = parser.finish();
fc512014
XL
137 let (server, path) = parse_next_component(path, false);
138 let (share, _) = parse_next_component(path, false);
139
140 if !server.is_empty() && !share.is_empty() {
3dfed10e 141 // \\server\share
fc512014
XL
142 Some(UNC(server, share))
143 } else {
144 // no valid prefix beginning with "\\" recognized
145 None
3dfed10e 146 }
3dfed10e 147 }
fc512014 148 } else if let Some(drive) = parse_drive(path) {
3dfed10e 149 // C:
fc512014
XL
150 Some(Disk(drive))
151 } else {
152 // no prefix
153 None
3dfed10e 154 }
3dfed10e
XL
155}
156
fc512014 157// Parses a drive prefix, e.g. "C:" and "C:\whatever"
04454e1e 158fn parse_drive(path: &OsStr) -> Option<u8> {
fc512014
XL
159 // In most DOS systems, it is not possible to have more than 26 drive letters.
160 // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
161 fn is_valid_drive_letter(drive: &u8) -> bool {
162 drive.is_ascii_alphabetic()
163 }
164
781aab86 165 match path.as_encoded_bytes() {
fc512014
XL
166 [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
167 _ => None,
168 }
169}
170
171// Parses a drive prefix exactly, e.g. "C:"
04454e1e 172fn parse_drive_exact(path: &OsStr) -> Option<u8> {
fc512014 173 // only parse two bytes: the drive letter and the drive separator
781aab86 174 if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
04454e1e
FG
175 parse_drive(path)
176 } else {
177 None
fc512014
XL
178 }
179}
180
181// Parse the next path component.
182//
183// Returns the next component and the rest of the path excluding the component and separator.
184// Does not recognize `/` as a separator character if `verbatim` is true.
185fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
186 let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
187
781aab86 188 match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
fc512014 189 Some(separator_start) => {
064997fb 190 let separator_end = separator_start + 1;
fc512014 191
781aab86 192 let component = &path.as_encoded_bytes()[..separator_start];
fc512014
XL
193
194 // Panic safe
195 // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
781aab86 196 let path = &path.as_encoded_bytes()[separator_end..];
fc512014 197
6a06907d 198 // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
fc512014
XL
199 // is encoded in a single byte, therefore `bytes[separator_start]` and
200 // `bytes[separator_end]` must be code point boundaries and thus
201 // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
fe692bf9
FG
202 unsafe {
203 (
781aab86
FG
204 OsStr::from_encoded_bytes_unchecked(component),
205 OsStr::from_encoded_bytes_unchecked(path),
fe692bf9
FG
206 )
207 }
fc512014
XL
208 }
209 None => (path, OsStr::new("")),
210 }
3dfed10e 211}
3c0e092e
XL
212
213/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
214///
215/// This path may or may not have a verbatim prefix.
216pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
353b0b11
FG
217 let path = to_u16s(path)?;
218 get_long_path(path, true)
219}
220
221/// Get a normalized absolute path that can bypass path length limits.
222///
223/// Setting prefer_verbatim to true suggests a stronger preference for verbatim
224/// paths even when not strictly necessary. This allows the Windows API to avoid
225/// repeating our work. However, if the path may be given back to users or
226/// passed to other application then it's preferable to use non-verbatim paths
227/// when possible. Non-verbatim paths are better understood by users and handled
228/// by more software.
229pub(crate) fn get_long_path(mut path: Vec<u16>, prefer_verbatim: bool) -> io::Result<Vec<u16>> {
3c0e092e
XL
230 // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL).
231 // However, for APIs such as CreateDirectory[1], the limit is 248.
232 //
233 // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters
234 const LEGACY_MAX_PATH: usize = 248;
235 // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
236 // All of these are in the ASCII range so they can be cast directly to `u16`.
237 const SEP: u16 = b'\\' as _;
238 const ALT_SEP: u16 = b'/' as _;
239 const QUERY: u16 = b'?' as _;
240 const COLON: u16 = b':' as _;
241 const DOT: u16 = b'.' as _;
242 const U: u16 = b'U' as _;
243 const N: u16 = b'N' as _;
244 const C: u16 = b'C' as _;
245
246 // \\?\
247 const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP];
248 // \??\
249 const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP];
250 // \\?\UNC\
251 const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP];
252
3c0e092e
XL
253 if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == &[0] {
254 // Early return for paths that are already verbatim or empty.
255 return Ok(path);
256 } else if path.len() < LEGACY_MAX_PATH {
257 // Early return if an absolute path is less < 260 UTF-16 code units.
258 // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily.
259 match path.as_slice() {
260 // Starts with `D:`, `D:\`, `D:/`, etc.
261 // Does not match if the path starts with a `\` or `/`.
262 [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..]
263 if *drive != SEP && *drive != ALT_SEP =>
264 {
265 return Ok(path);
266 }
267 // Starts with `\\`, `//`, etc
268 [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path),
269 _ => {}
270 }
271 }
272
273 // Firstly, get the absolute path using `GetFullPathNameW`.
274 // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
275 let lpfilename = path.as_ptr();
276 fill_utf16_buf(
277 // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
278 // `lpfilename` is a pointer to a null terminated string that is not
279 // invalidated until after `GetFullPathNameW` returns successfully.
04454e1e 280 |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
3c0e092e
XL
281 |mut absolute| {
282 path.clear();
283
353b0b11
FG
284 // Only prepend the prefix if needed.
285 if prefer_verbatim || absolute.len() + 1 >= LEGACY_MAX_PATH {
286 // Secondly, add the verbatim prefix. This is easier here because we know the
287 // path is now absolute and fully normalized (e.g. `/` has been changed to `\`).
288 let prefix = match absolute {
289 // C:\ => \\?\C:\
290 [_, COLON, SEP, ..] => VERBATIM_PREFIX,
291 // \\.\ => \\?\
292 [SEP, SEP, DOT, SEP, ..] => {
293 absolute = &absolute[4..];
294 VERBATIM_PREFIX
295 }
296 // Leave \\?\ and \??\ as-is.
297 [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[],
298 // \\ => \\?\UNC\
299 [SEP, SEP, ..] => {
300 absolute = &absolute[2..];
301 UNC_PREFIX
302 }
303 // Anything else we leave alone.
304 _ => &[],
305 };
306
307 path.reserve_exact(prefix.len() + absolute.len() + 1);
308 path.extend_from_slice(prefix);
309 } else {
310 path.reserve_exact(absolute.len() + 1);
311 }
3c0e092e
XL
312 path.extend_from_slice(absolute);
313 path.push(0);
314 },
315 )?;
316 Ok(path)
317}
5099ac24
FG
318
319/// Make a Windows path absolute.
320pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
04454e1e
FG
321 let path = path.as_os_str();
322 let prefix = parse_prefix(path);
323 // Verbatim paths should not be modified.
324 if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
325 // NULs in verbatim paths are rejected for consistency.
781aab86 326 if path.as_encoded_bytes().contains(&0) {
04454e1e
FG
327 return Err(io::const_io_error!(
328 io::ErrorKind::InvalidInput,
329 "strings passed to WinAPI cannot contain NULs",
330 ));
331 }
332 return Ok(path.to_owned().into());
5099ac24 333 }
04454e1e 334
5099ac24
FG
335 let path = to_u16s(path)?;
336 let lpfilename = path.as_ptr();
337 fill_utf16_buf(
338 // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
339 // `lpfilename` is a pointer to a null terminated string that is not
340 // invalidated until after `GetFullPathNameW` returns successfully.
341 |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
342 super::os2path,
343 )
344}