]> git.proxmox.com Git - rustc.git/blame - library/std/src/sys/windows/path.rs
Update upstream source from tag 'upstream/1.70.0+dfsg1'
[rustc.git] / library / std / src / sys / windows / path.rs
CommitLineData
3c0e092e
XL
1use super::{c, fill_utf16_buf, to_u16s};
2use crate::ffi::{OsStr, OsString};
3use crate::io;
3dfed10e 4use crate::mem;
3c0e092e
XL
5use crate::path::{Path, PathBuf, Prefix};
6use crate::ptr;
3dfed10e
XL
7
8#[cfg(test)]
9mod tests;
10
11pub const MAIN_SEP_STR: &str = "\\";
12pub const MAIN_SEP: char = '\\';
13
6a06907d
XL
14/// # Safety
15///
16/// `bytes` must be a valid wtf8 encoded slice
fc512014
XL
17#[inline]
18unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr {
19 // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8,
20 // which is compatible with &[u8].
21 mem::transmute(bytes)
3dfed10e
XL
22}
23
24#[inline]
25pub fn is_sep_byte(b: u8) -> bool {
26 b == b'/' || b == b'\\'
27}
28
29#[inline]
30pub fn is_verbatim_sep(b: u8) -> bool {
31 b == b'\\'
32}
33
3c0e092e
XL
34/// Returns true if `path` looks like a lone filename.
35pub(crate) fn is_file_name(path: &OsStr) -> bool {
36 !path.bytes().iter().copied().any(is_sep_byte)
37}
38pub(crate) fn has_trailing_slash(path: &OsStr) -> bool {
39 let is_verbatim = path.bytes().starts_with(br"\\?\");
40 let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte };
41 if let Some(&c) = path.bytes().last() { is_separator(c) } else { false }
42}
43
44/// Appends a suffix to a path.
45///
46/// Can be used to append an extension without removing an existing extension.
47pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
48 let mut path = OsString::from(path);
49 path.push(suffix);
50 path.into()
51}
52
04454e1e
FG
53struct PrefixParser<'a, const LEN: usize> {
54 path: &'a OsStr,
55 prefix: [u8; LEN],
56}
57
58impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
59 #[inline]
60 fn get_prefix(path: &OsStr) -> [u8; LEN] {
61 let mut prefix = [0; LEN];
62 // SAFETY: Only ASCII characters are modified.
63 for (i, &ch) in path.bytes().iter().take(LEN).enumerate() {
64 prefix[i] = if ch == b'/' { b'\\' } else { ch };
65 }
66 prefix
67 }
68
69 fn new(path: &'a OsStr) -> Self {
70 Self { path, prefix: Self::get_prefix(path) }
71 }
72
73 fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
74 PrefixParserSlice {
75 path: self.path,
76 prefix: &self.prefix[..LEN.min(self.path.len())],
77 index: 0,
78 }
79 }
80}
81
82struct PrefixParserSlice<'a, 'b> {
83 path: &'a OsStr,
84 prefix: &'b [u8],
85 index: usize,
86}
87
88impl<'a> PrefixParserSlice<'a, '_> {
89 fn strip_prefix(&self, prefix: &str) -> Option<Self> {
90 self.prefix[self.index..]
91 .starts_with(prefix.as_bytes())
92 .then(|| Self { index: self.index + prefix.len(), ..*self })
93 }
94
95 fn prefix_bytes(&self) -> &'a [u8] {
96 &self.path.bytes()[..self.index]
97 }
98
99 fn finish(self) -> &'a OsStr {
100 // SAFETY: The unsafety here stems from converting between &OsStr and
101 // &[u8] and back. This is safe to do because (1) we only look at ASCII
102 // contents of the encoding and (2) new &OsStr values are produced only
103 // from ASCII-bounded slices of existing &OsStr values.
104 unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) }
105 }
106}
107
3dfed10e
XL
108pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
109 use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC};
110
04454e1e
FG
111 let parser = PrefixParser::<8>::new(path);
112 let parser = parser.as_slice();
113 if let Some(parser) = parser.strip_prefix(r"\\") {
fc512014 114 // \\
04454e1e
FG
115
116 // The meaning of verbatim paths can change when they use a different
117 // separator.
118 if let Some(parser) = parser.strip_prefix(r"?\") && !parser.prefix_bytes().iter().any(|&x| x == b'/') {
fc512014 119 // \\?\
04454e1e 120 if let Some(parser) = parser.strip_prefix(r"UNC\") {
fc512014
XL
121 // \\?\UNC\server\share
122
04454e1e 123 let path = parser.finish();
fc512014
XL
124 let (server, path) = parse_next_component(path, true);
125 let (share, _) = parse_next_component(path, true);
126
127 Some(VerbatimUNC(server, share))
3dfed10e 128 } else {
04454e1e 129 let path = parser.finish();
fc512014
XL
130
131 // in verbatim paths only recognize an exact drive prefix
04454e1e 132 if let Some(drive) = parse_drive_exact(path) {
fc512014
XL
133 // \\?\C:
134 Some(VerbatimDisk(drive))
135 } else {
136 // \\?\prefix
04454e1e 137 let (prefix, _) = parse_next_component(path, true);
fc512014 138 Some(Verbatim(prefix))
3dfed10e
XL
139 }
140 }
04454e1e 141 } else if let Some(parser) = parser.strip_prefix(r".\") {
3dfed10e 142 // \\.\COM42
04454e1e 143 let path = parser.finish();
fc512014
XL
144 let (prefix, _) = parse_next_component(path, false);
145 Some(DeviceNS(prefix))
146 } else {
04454e1e 147 let path = parser.finish();
fc512014
XL
148 let (server, path) = parse_next_component(path, false);
149 let (share, _) = parse_next_component(path, false);
150
151 if !server.is_empty() && !share.is_empty() {
3dfed10e 152 // \\server\share
fc512014
XL
153 Some(UNC(server, share))
154 } else {
155 // no valid prefix beginning with "\\" recognized
156 None
3dfed10e 157 }
3dfed10e 158 }
fc512014 159 } else if let Some(drive) = parse_drive(path) {
3dfed10e 160 // C:
fc512014
XL
161 Some(Disk(drive))
162 } else {
163 // no prefix
164 None
3dfed10e 165 }
3dfed10e
XL
166}
167
fc512014 168// Parses a drive prefix, e.g. "C:" and "C:\whatever"
04454e1e 169fn parse_drive(path: &OsStr) -> Option<u8> {
fc512014
XL
170 // In most DOS systems, it is not possible to have more than 26 drive letters.
171 // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
172 fn is_valid_drive_letter(drive: &u8) -> bool {
173 drive.is_ascii_alphabetic()
174 }
175
04454e1e 176 match path.bytes() {
fc512014
XL
177 [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
178 _ => None,
179 }
180}
181
182// Parses a drive prefix exactly, e.g. "C:"
04454e1e 183fn parse_drive_exact(path: &OsStr) -> Option<u8> {
fc512014 184 // only parse two bytes: the drive letter and the drive separator
04454e1e
FG
185 if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
186 parse_drive(path)
187 } else {
188 None
fc512014
XL
189 }
190}
191
192// Parse the next path component.
193//
194// Returns the next component and the rest of the path excluding the component and separator.
195// Does not recognize `/` as a separator character if `verbatim` is true.
196fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
197 let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
198
199 match path.bytes().iter().position(|&x| separator(x)) {
200 Some(separator_start) => {
064997fb 201 let separator_end = separator_start + 1;
fc512014
XL
202
203 let component = &path.bytes()[..separator_start];
204
205 // Panic safe
206 // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
207 let path = &path.bytes()[separator_end..];
208
6a06907d 209 // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
fc512014
XL
210 // is encoded in a single byte, therefore `bytes[separator_start]` and
211 // `bytes[separator_end]` must be code point boundaries and thus
212 // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
213 unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) }
214 }
215 None => (path, OsStr::new("")),
216 }
3dfed10e 217}
3c0e092e
XL
218
219/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
220///
221/// This path may or may not have a verbatim prefix.
222pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
353b0b11
FG
223 let path = to_u16s(path)?;
224 get_long_path(path, true)
225}
226
227/// Get a normalized absolute path that can bypass path length limits.
228///
229/// Setting prefer_verbatim to true suggests a stronger preference for verbatim
230/// paths even when not strictly necessary. This allows the Windows API to avoid
231/// repeating our work. However, if the path may be given back to users or
232/// passed to other application then it's preferable to use non-verbatim paths
233/// when possible. Non-verbatim paths are better understood by users and handled
234/// by more software.
235pub(crate) fn get_long_path(mut path: Vec<u16>, prefer_verbatim: bool) -> io::Result<Vec<u16>> {
3c0e092e
XL
236 // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL).
237 // However, for APIs such as CreateDirectory[1], the limit is 248.
238 //
239 // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters
240 const LEGACY_MAX_PATH: usize = 248;
241 // UTF-16 encoded code points, used in parsing and building UTF-16 paths.
242 // All of these are in the ASCII range so they can be cast directly to `u16`.
243 const SEP: u16 = b'\\' as _;
244 const ALT_SEP: u16 = b'/' as _;
245 const QUERY: u16 = b'?' as _;
246 const COLON: u16 = b':' as _;
247 const DOT: u16 = b'.' as _;
248 const U: u16 = b'U' as _;
249 const N: u16 = b'N' as _;
250 const C: u16 = b'C' as _;
251
252 // \\?\
253 const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP];
254 // \??\
255 const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP];
256 // \\?\UNC\
257 const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP];
258
3c0e092e
XL
259 if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == &[0] {
260 // Early return for paths that are already verbatim or empty.
261 return Ok(path);
262 } else if path.len() < LEGACY_MAX_PATH {
263 // Early return if an absolute path is less < 260 UTF-16 code units.
264 // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily.
265 match path.as_slice() {
266 // Starts with `D:`, `D:\`, `D:/`, etc.
267 // Does not match if the path starts with a `\` or `/`.
268 [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..]
269 if *drive != SEP && *drive != ALT_SEP =>
270 {
271 return Ok(path);
272 }
273 // Starts with `\\`, `//`, etc
274 [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path),
275 _ => {}
276 }
277 }
278
279 // Firstly, get the absolute path using `GetFullPathNameW`.
280 // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
281 let lpfilename = path.as_ptr();
282 fill_utf16_buf(
283 // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
284 // `lpfilename` is a pointer to a null terminated string that is not
285 // invalidated until after `GetFullPathNameW` returns successfully.
04454e1e 286 |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
3c0e092e
XL
287 |mut absolute| {
288 path.clear();
289
353b0b11
FG
290 // Only prepend the prefix if needed.
291 if prefer_verbatim || absolute.len() + 1 >= LEGACY_MAX_PATH {
292 // Secondly, add the verbatim prefix. This is easier here because we know the
293 // path is now absolute and fully normalized (e.g. `/` has been changed to `\`).
294 let prefix = match absolute {
295 // C:\ => \\?\C:\
296 [_, COLON, SEP, ..] => VERBATIM_PREFIX,
297 // \\.\ => \\?\
298 [SEP, SEP, DOT, SEP, ..] => {
299 absolute = &absolute[4..];
300 VERBATIM_PREFIX
301 }
302 // Leave \\?\ and \??\ as-is.
303 [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[],
304 // \\ => \\?\UNC\
305 [SEP, SEP, ..] => {
306 absolute = &absolute[2..];
307 UNC_PREFIX
308 }
309 // Anything else we leave alone.
310 _ => &[],
311 };
312
313 path.reserve_exact(prefix.len() + absolute.len() + 1);
314 path.extend_from_slice(prefix);
315 } else {
316 path.reserve_exact(absolute.len() + 1);
317 }
3c0e092e
XL
318 path.extend_from_slice(absolute);
319 path.push(0);
320 },
321 )?;
322 Ok(path)
323}
5099ac24
FG
324
325/// Make a Windows path absolute.
326pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
04454e1e
FG
327 let path = path.as_os_str();
328 let prefix = parse_prefix(path);
329 // Verbatim paths should not be modified.
330 if prefix.map(|x| x.is_verbatim()).unwrap_or(false) {
331 // NULs in verbatim paths are rejected for consistency.
332 if path.bytes().contains(&0) {
333 return Err(io::const_io_error!(
334 io::ErrorKind::InvalidInput,
335 "strings passed to WinAPI cannot contain NULs",
336 ));
337 }
338 return Ok(path.to_owned().into());
5099ac24 339 }
04454e1e 340
5099ac24
FG
341 let path = to_u16s(path)?;
342 let lpfilename = path.as_ptr();
343 fill_utf16_buf(
344 // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid.
345 // `lpfilename` is a pointer to a null terminated string that is not
346 // invalidated until after `GetFullPathNameW` returns successfully.
347 |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) },
348 super::os2path,
349 )
350}