]>
Commit | Line | Data |
---|---|---|
3c0e092e XL |
1 | use super::{c, fill_utf16_buf, to_u16s}; |
2 | use crate::ffi::{OsStr, OsString}; | |
3 | use crate::io; | |
3dfed10e | 4 | use crate::mem; |
3c0e092e XL |
5 | use crate::path::{Path, PathBuf, Prefix}; |
6 | use crate::ptr; | |
3dfed10e XL |
7 | |
8 | #[cfg(test)] | |
9 | mod tests; | |
10 | ||
11 | pub const MAIN_SEP_STR: &str = "\\"; | |
12 | pub const MAIN_SEP: char = '\\'; | |
13 | ||
6a06907d XL |
14 | /// # Safety |
15 | /// | |
16 | /// `bytes` must be a valid wtf8 encoded slice | |
fc512014 XL |
17 | #[inline] |
18 | unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr { | |
19 | // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8, | |
20 | // which is compatible with &[u8]. | |
21 | mem::transmute(bytes) | |
3dfed10e XL |
22 | } |
23 | ||
24 | #[inline] | |
25 | pub fn is_sep_byte(b: u8) -> bool { | |
26 | b == b'/' || b == b'\\' | |
27 | } | |
28 | ||
29 | #[inline] | |
30 | pub fn is_verbatim_sep(b: u8) -> bool { | |
31 | b == b'\\' | |
32 | } | |
33 | ||
3c0e092e XL |
34 | /// Returns true if `path` looks like a lone filename. |
35 | pub(crate) fn is_file_name(path: &OsStr) -> bool { | |
36 | !path.bytes().iter().copied().any(is_sep_byte) | |
37 | } | |
38 | pub(crate) fn has_trailing_slash(path: &OsStr) -> bool { | |
39 | let is_verbatim = path.bytes().starts_with(br"\\?\"); | |
40 | let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte }; | |
41 | if let Some(&c) = path.bytes().last() { is_separator(c) } else { false } | |
42 | } | |
43 | ||
44 | /// Appends a suffix to a path. | |
45 | /// | |
46 | /// Can be used to append an extension without removing an existing extension. | |
47 | pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { | |
48 | let mut path = OsString::from(path); | |
49 | path.push(suffix); | |
50 | path.into() | |
51 | } | |
52 | ||
04454e1e FG |
53 | struct PrefixParser<'a, const LEN: usize> { |
54 | path: &'a OsStr, | |
55 | prefix: [u8; LEN], | |
56 | } | |
57 | ||
58 | impl<'a, const LEN: usize> PrefixParser<'a, LEN> { | |
59 | #[inline] | |
60 | fn get_prefix(path: &OsStr) -> [u8; LEN] { | |
61 | let mut prefix = [0; LEN]; | |
62 | // SAFETY: Only ASCII characters are modified. | |
63 | for (i, &ch) in path.bytes().iter().take(LEN).enumerate() { | |
64 | prefix[i] = if ch == b'/' { b'\\' } else { ch }; | |
65 | } | |
66 | prefix | |
67 | } | |
68 | ||
69 | fn new(path: &'a OsStr) -> Self { | |
70 | Self { path, prefix: Self::get_prefix(path) } | |
71 | } | |
72 | ||
73 | fn as_slice(&self) -> PrefixParserSlice<'a, '_> { | |
74 | PrefixParserSlice { | |
75 | path: self.path, | |
76 | prefix: &self.prefix[..LEN.min(self.path.len())], | |
77 | index: 0, | |
78 | } | |
79 | } | |
80 | } | |
81 | ||
82 | struct PrefixParserSlice<'a, 'b> { | |
83 | path: &'a OsStr, | |
84 | prefix: &'b [u8], | |
85 | index: usize, | |
86 | } | |
87 | ||
88 | impl<'a> PrefixParserSlice<'a, '_> { | |
89 | fn strip_prefix(&self, prefix: &str) -> Option<Self> { | |
90 | self.prefix[self.index..] | |
91 | .starts_with(prefix.as_bytes()) | |
92 | .then(|| Self { index: self.index + prefix.len(), ..*self }) | |
93 | } | |
94 | ||
95 | fn prefix_bytes(&self) -> &'a [u8] { | |
96 | &self.path.bytes()[..self.index] | |
97 | } | |
98 | ||
99 | fn finish(self) -> &'a OsStr { | |
100 | // SAFETY: The unsafety here stems from converting between &OsStr and | |
101 | // &[u8] and back. This is safe to do because (1) we only look at ASCII | |
102 | // contents of the encoding and (2) new &OsStr values are produced only | |
103 | // from ASCII-bounded slices of existing &OsStr values. | |
104 | unsafe { bytes_as_os_str(&self.path.bytes()[self.index..]) } | |
105 | } | |
106 | } | |
107 | ||
3dfed10e XL |
108 | pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { |
109 | use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; | |
110 | ||
04454e1e FG |
111 | let parser = PrefixParser::<8>::new(path); |
112 | let parser = parser.as_slice(); | |
113 | if let Some(parser) = parser.strip_prefix(r"\\") { | |
fc512014 | 114 | // \\ |
04454e1e FG |
115 | |
116 | // The meaning of verbatim paths can change when they use a different | |
117 | // separator. | |
118 | if let Some(parser) = parser.strip_prefix(r"?\") && !parser.prefix_bytes().iter().any(|&x| x == b'/') { | |
fc512014 | 119 | // \\?\ |
04454e1e | 120 | if let Some(parser) = parser.strip_prefix(r"UNC\") { |
fc512014 XL |
121 | // \\?\UNC\server\share |
122 | ||
04454e1e | 123 | let path = parser.finish(); |
fc512014 XL |
124 | let (server, path) = parse_next_component(path, true); |
125 | let (share, _) = parse_next_component(path, true); | |
126 | ||
127 | Some(VerbatimUNC(server, share)) | |
3dfed10e | 128 | } else { |
04454e1e | 129 | let path = parser.finish(); |
fc512014 XL |
130 | |
131 | // in verbatim paths only recognize an exact drive prefix | |
04454e1e | 132 | if let Some(drive) = parse_drive_exact(path) { |
fc512014 XL |
133 | // \\?\C: |
134 | Some(VerbatimDisk(drive)) | |
135 | } else { | |
136 | // \\?\prefix | |
04454e1e | 137 | let (prefix, _) = parse_next_component(path, true); |
fc512014 | 138 | Some(Verbatim(prefix)) |
3dfed10e XL |
139 | } |
140 | } | |
04454e1e | 141 | } else if let Some(parser) = parser.strip_prefix(r".\") { |
3dfed10e | 142 | // \\.\COM42 |
04454e1e | 143 | let path = parser.finish(); |
fc512014 XL |
144 | let (prefix, _) = parse_next_component(path, false); |
145 | Some(DeviceNS(prefix)) | |
146 | } else { | |
04454e1e | 147 | let path = parser.finish(); |
fc512014 XL |
148 | let (server, path) = parse_next_component(path, false); |
149 | let (share, _) = parse_next_component(path, false); | |
150 | ||
151 | if !server.is_empty() && !share.is_empty() { | |
3dfed10e | 152 | // \\server\share |
fc512014 XL |
153 | Some(UNC(server, share)) |
154 | } else { | |
155 | // no valid prefix beginning with "\\" recognized | |
156 | None | |
3dfed10e | 157 | } |
3dfed10e | 158 | } |
fc512014 | 159 | } else if let Some(drive) = parse_drive(path) { |
3dfed10e | 160 | // C: |
fc512014 XL |
161 | Some(Disk(drive)) |
162 | } else { | |
163 | // no prefix | |
164 | None | |
3dfed10e | 165 | } |
3dfed10e XL |
166 | } |
167 | ||
fc512014 | 168 | // Parses a drive prefix, e.g. "C:" and "C:\whatever" |
04454e1e | 169 | fn parse_drive(path: &OsStr) -> Option<u8> { |
fc512014 XL |
170 | // In most DOS systems, it is not possible to have more than 26 drive letters. |
171 | // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. | |
172 | fn is_valid_drive_letter(drive: &u8) -> bool { | |
173 | drive.is_ascii_alphabetic() | |
174 | } | |
175 | ||
04454e1e | 176 | match path.bytes() { |
fc512014 XL |
177 | [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), |
178 | _ => None, | |
179 | } | |
180 | } | |
181 | ||
182 | // Parses a drive prefix exactly, e.g. "C:" | |
04454e1e | 183 | fn parse_drive_exact(path: &OsStr) -> Option<u8> { |
fc512014 | 184 | // only parse two bytes: the drive letter and the drive separator |
04454e1e FG |
185 | if path.bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { |
186 | parse_drive(path) | |
187 | } else { | |
188 | None | |
fc512014 XL |
189 | } |
190 | } | |
191 | ||
192 | // Parse the next path component. | |
193 | // | |
194 | // Returns the next component and the rest of the path excluding the component and separator. | |
195 | // Does not recognize `/` as a separator character if `verbatim` is true. | |
196 | fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { | |
197 | let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; | |
198 | ||
199 | match path.bytes().iter().position(|&x| separator(x)) { | |
200 | Some(separator_start) => { | |
064997fb | 201 | let separator_end = separator_start + 1; |
fc512014 XL |
202 | |
203 | let component = &path.bytes()[..separator_start]; | |
204 | ||
205 | // Panic safe | |
206 | // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. | |
207 | let path = &path.bytes()[separator_end..]; | |
208 | ||
6a06907d | 209 | // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') |
fc512014 XL |
210 | // is encoded in a single byte, therefore `bytes[separator_start]` and |
211 | // `bytes[separator_end]` must be code point boundaries and thus | |
212 | // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. | |
213 | unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) } | |
214 | } | |
215 | None => (path, OsStr::new("")), | |
216 | } | |
3dfed10e | 217 | } |
3c0e092e XL |
218 | |
219 | /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. | |
220 | /// | |
221 | /// This path may or may not have a verbatim prefix. | |
222 | pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> { | |
353b0b11 FG |
223 | let path = to_u16s(path)?; |
224 | get_long_path(path, true) | |
225 | } | |
226 | ||
227 | /// Get a normalized absolute path that can bypass path length limits. | |
228 | /// | |
229 | /// Setting prefer_verbatim to true suggests a stronger preference for verbatim | |
230 | /// paths even when not strictly necessary. This allows the Windows API to avoid | |
231 | /// repeating our work. However, if the path may be given back to users or | |
232 | /// passed to other application then it's preferable to use non-verbatim paths | |
233 | /// when possible. Non-verbatim paths are better understood by users and handled | |
234 | /// by more software. | |
235 | pub(crate) fn get_long_path(mut path: Vec<u16>, prefer_verbatim: bool) -> io::Result<Vec<u16>> { | |
3c0e092e XL |
236 | // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL). |
237 | // However, for APIs such as CreateDirectory[1], the limit is 248. | |
238 | // | |
239 | // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters | |
240 | const LEGACY_MAX_PATH: usize = 248; | |
241 | // UTF-16 encoded code points, used in parsing and building UTF-16 paths. | |
242 | // All of these are in the ASCII range so they can be cast directly to `u16`. | |
243 | const SEP: u16 = b'\\' as _; | |
244 | const ALT_SEP: u16 = b'/' as _; | |
245 | const QUERY: u16 = b'?' as _; | |
246 | const COLON: u16 = b':' as _; | |
247 | const DOT: u16 = b'.' as _; | |
248 | const U: u16 = b'U' as _; | |
249 | const N: u16 = b'N' as _; | |
250 | const C: u16 = b'C' as _; | |
251 | ||
252 | // \\?\ | |
253 | const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP]; | |
254 | // \??\ | |
255 | const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP]; | |
256 | // \\?\UNC\ | |
257 | const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP]; | |
258 | ||
3c0e092e XL |
259 | if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == &[0] { |
260 | // Early return for paths that are already verbatim or empty. | |
261 | return Ok(path); | |
262 | } else if path.len() < LEGACY_MAX_PATH { | |
263 | // Early return if an absolute path is less < 260 UTF-16 code units. | |
264 | // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily. | |
265 | match path.as_slice() { | |
266 | // Starts with `D:`, `D:\`, `D:/`, etc. | |
267 | // Does not match if the path starts with a `\` or `/`. | |
268 | [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..] | |
269 | if *drive != SEP && *drive != ALT_SEP => | |
270 | { | |
271 | return Ok(path); | |
272 | } | |
273 | // Starts with `\\`, `//`, etc | |
274 | [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path), | |
275 | _ => {} | |
276 | } | |
277 | } | |
278 | ||
279 | // Firstly, get the absolute path using `GetFullPathNameW`. | |
280 | // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew | |
281 | let lpfilename = path.as_ptr(); | |
282 | fill_utf16_buf( | |
283 | // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. | |
284 | // `lpfilename` is a pointer to a null terminated string that is not | |
285 | // invalidated until after `GetFullPathNameW` returns successfully. | |
04454e1e | 286 | |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) }, |
3c0e092e XL |
287 | |mut absolute| { |
288 | path.clear(); | |
289 | ||
353b0b11 FG |
290 | // Only prepend the prefix if needed. |
291 | if prefer_verbatim || absolute.len() + 1 >= LEGACY_MAX_PATH { | |
292 | // Secondly, add the verbatim prefix. This is easier here because we know the | |
293 | // path is now absolute and fully normalized (e.g. `/` has been changed to `\`). | |
294 | let prefix = match absolute { | |
295 | // C:\ => \\?\C:\ | |
296 | [_, COLON, SEP, ..] => VERBATIM_PREFIX, | |
297 | // \\.\ => \\?\ | |
298 | [SEP, SEP, DOT, SEP, ..] => { | |
299 | absolute = &absolute[4..]; | |
300 | VERBATIM_PREFIX | |
301 | } | |
302 | // Leave \\?\ and \??\ as-is. | |
303 | [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[], | |
304 | // \\ => \\?\UNC\ | |
305 | [SEP, SEP, ..] => { | |
306 | absolute = &absolute[2..]; | |
307 | UNC_PREFIX | |
308 | } | |
309 | // Anything else we leave alone. | |
310 | _ => &[], | |
311 | }; | |
312 | ||
313 | path.reserve_exact(prefix.len() + absolute.len() + 1); | |
314 | path.extend_from_slice(prefix); | |
315 | } else { | |
316 | path.reserve_exact(absolute.len() + 1); | |
317 | } | |
3c0e092e XL |
318 | path.extend_from_slice(absolute); |
319 | path.push(0); | |
320 | }, | |
321 | )?; | |
322 | Ok(path) | |
323 | } | |
5099ac24 FG |
324 | |
325 | /// Make a Windows path absolute. | |
326 | pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> { | |
04454e1e FG |
327 | let path = path.as_os_str(); |
328 | let prefix = parse_prefix(path); | |
329 | // Verbatim paths should not be modified. | |
330 | if prefix.map(|x| x.is_verbatim()).unwrap_or(false) { | |
331 | // NULs in verbatim paths are rejected for consistency. | |
332 | if path.bytes().contains(&0) { | |
333 | return Err(io::const_io_error!( | |
334 | io::ErrorKind::InvalidInput, | |
335 | "strings passed to WinAPI cannot contain NULs", | |
336 | )); | |
337 | } | |
338 | return Ok(path.to_owned().into()); | |
5099ac24 | 339 | } |
04454e1e | 340 | |
5099ac24 FG |
341 | let path = to_u16s(path)?; |
342 | let lpfilename = path.as_ptr(); | |
343 | fill_utf16_buf( | |
344 | // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. | |
345 | // `lpfilename` is a pointer to a null terminated string that is not | |
346 | // invalidated until after `GetFullPathNameW` returns successfully. | |
347 | |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) }, | |
348 | super::os2path, | |
349 | ) | |
350 | } |