]>
Commit | Line | Data |
---|---|---|
3c0e092e XL |
1 | use super::{c, fill_utf16_buf, to_u16s}; |
2 | use crate::ffi::{OsStr, OsString}; | |
3 | use crate::io; | |
3c0e092e XL |
4 | use crate::path::{Path, PathBuf, Prefix}; |
5 | use crate::ptr; | |
3dfed10e XL |
6 | |
7 | #[cfg(test)] | |
8 | mod tests; | |
9 | ||
10 | pub const MAIN_SEP_STR: &str = "\\"; | |
11 | pub const MAIN_SEP: char = '\\'; | |
12 | ||
3dfed10e XL |
13 | #[inline] |
14 | pub fn is_sep_byte(b: u8) -> bool { | |
15 | b == b'/' || b == b'\\' | |
16 | } | |
17 | ||
18 | #[inline] | |
19 | pub fn is_verbatim_sep(b: u8) -> bool { | |
20 | b == b'\\' | |
21 | } | |
22 | ||
3c0e092e XL |
23 | /// Returns true if `path` looks like a lone filename. |
24 | pub(crate) fn is_file_name(path: &OsStr) -> bool { | |
781aab86 | 25 | !path.as_encoded_bytes().iter().copied().any(is_sep_byte) |
3c0e092e XL |
26 | } |
27 | pub(crate) fn has_trailing_slash(path: &OsStr) -> bool { | |
781aab86 | 28 | let is_verbatim = path.as_encoded_bytes().starts_with(br"\\?\"); |
3c0e092e | 29 | let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte }; |
781aab86 | 30 | if let Some(&c) = path.as_encoded_bytes().last() { is_separator(c) } else { false } |
3c0e092e XL |
31 | } |
32 | ||
33 | /// Appends a suffix to a path. | |
34 | /// | |
35 | /// Can be used to append an extension without removing an existing extension. | |
36 | pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { | |
37 | let mut path = OsString::from(path); | |
38 | path.push(suffix); | |
39 | path.into() | |
40 | } | |
41 | ||
04454e1e FG |
42 | struct PrefixParser<'a, const LEN: usize> { |
43 | path: &'a OsStr, | |
44 | prefix: [u8; LEN], | |
45 | } | |
46 | ||
47 | impl<'a, const LEN: usize> PrefixParser<'a, LEN> { | |
48 | #[inline] | |
49 | fn get_prefix(path: &OsStr) -> [u8; LEN] { | |
50 | let mut prefix = [0; LEN]; | |
51 | // SAFETY: Only ASCII characters are modified. | |
781aab86 | 52 | for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { |
04454e1e FG |
53 | prefix[i] = if ch == b'/' { b'\\' } else { ch }; |
54 | } | |
55 | prefix | |
56 | } | |
57 | ||
58 | fn new(path: &'a OsStr) -> Self { | |
59 | Self { path, prefix: Self::get_prefix(path) } | |
60 | } | |
61 | ||
62 | fn as_slice(&self) -> PrefixParserSlice<'a, '_> { | |
63 | PrefixParserSlice { | |
64 | path: self.path, | |
65 | prefix: &self.prefix[..LEN.min(self.path.len())], | |
66 | index: 0, | |
67 | } | |
68 | } | |
69 | } | |
70 | ||
71 | struct PrefixParserSlice<'a, 'b> { | |
72 | path: &'a OsStr, | |
73 | prefix: &'b [u8], | |
74 | index: usize, | |
75 | } | |
76 | ||
77 | impl<'a> PrefixParserSlice<'a, '_> { | |
78 | fn strip_prefix(&self, prefix: &str) -> Option<Self> { | |
79 | self.prefix[self.index..] | |
80 | .starts_with(prefix.as_bytes()) | |
4b012472 | 81 | .then_some(Self { index: self.index + prefix.len(), ..*self }) |
04454e1e FG |
82 | } |
83 | ||
84 | fn prefix_bytes(&self) -> &'a [u8] { | |
781aab86 | 85 | &self.path.as_encoded_bytes()[..self.index] |
04454e1e FG |
86 | } |
87 | ||
88 | fn finish(self) -> &'a OsStr { | |
89 | // SAFETY: The unsafety here stems from converting between &OsStr and | |
90 | // &[u8] and back. This is safe to do because (1) we only look at ASCII | |
91 | // contents of the encoding and (2) new &OsStr values are produced only | |
92 | // from ASCII-bounded slices of existing &OsStr values. | |
781aab86 | 93 | unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } |
04454e1e FG |
94 | } |
95 | } | |
96 | ||
3dfed10e XL |
97 | pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { |
98 | use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; | |
99 | ||
04454e1e FG |
100 | let parser = PrefixParser::<8>::new(path); |
101 | let parser = parser.as_slice(); | |
102 | if let Some(parser) = parser.strip_prefix(r"\\") { | |
fc512014 | 103 | // \\ |
04454e1e FG |
104 | |
105 | // The meaning of verbatim paths can change when they use a different | |
106 | // separator. | |
4b012472 FG |
107 | if let Some(parser) = parser.strip_prefix(r"?\") |
108 | && !parser.prefix_bytes().iter().any(|&x| x == b'/') | |
109 | { | |
fc512014 | 110 | // \\?\ |
04454e1e | 111 | if let Some(parser) = parser.strip_prefix(r"UNC\") { |
fc512014 XL |
112 | // \\?\UNC\server\share |
113 | ||
04454e1e | 114 | let path = parser.finish(); |
fc512014 XL |
115 | let (server, path) = parse_next_component(path, true); |
116 | let (share, _) = parse_next_component(path, true); | |
117 | ||
118 | Some(VerbatimUNC(server, share)) | |
3dfed10e | 119 | } else { |
04454e1e | 120 | let path = parser.finish(); |
fc512014 XL |
121 | |
122 | // in verbatim paths only recognize an exact drive prefix | |
04454e1e | 123 | if let Some(drive) = parse_drive_exact(path) { |
fc512014 XL |
124 | // \\?\C: |
125 | Some(VerbatimDisk(drive)) | |
126 | } else { | |
127 | // \\?\prefix | |
04454e1e | 128 | let (prefix, _) = parse_next_component(path, true); |
fc512014 | 129 | Some(Verbatim(prefix)) |
3dfed10e XL |
130 | } |
131 | } | |
04454e1e | 132 | } else if let Some(parser) = parser.strip_prefix(r".\") { |
3dfed10e | 133 | // \\.\COM42 |
04454e1e | 134 | let path = parser.finish(); |
fc512014 XL |
135 | let (prefix, _) = parse_next_component(path, false); |
136 | Some(DeviceNS(prefix)) | |
137 | } else { | |
04454e1e | 138 | let path = parser.finish(); |
fc512014 XL |
139 | let (server, path) = parse_next_component(path, false); |
140 | let (share, _) = parse_next_component(path, false); | |
141 | ||
142 | if !server.is_empty() && !share.is_empty() { | |
3dfed10e | 143 | // \\server\share |
fc512014 XL |
144 | Some(UNC(server, share)) |
145 | } else { | |
146 | // no valid prefix beginning with "\\" recognized | |
147 | None | |
3dfed10e | 148 | } |
3dfed10e | 149 | } |
fc512014 | 150 | } else { |
4b012472 FG |
151 | // If it has a drive like `C:` then it's a disk. |
152 | // Otherwise there is no prefix. | |
153 | parse_drive(path).map(Disk) | |
3dfed10e | 154 | } |
3dfed10e XL |
155 | } |
156 | ||
fc512014 | 157 | // Parses a drive prefix, e.g. "C:" and "C:\whatever" |
04454e1e | 158 | fn parse_drive(path: &OsStr) -> Option<u8> { |
fc512014 XL |
159 | // In most DOS systems, it is not possible to have more than 26 drive letters. |
160 | // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. | |
161 | fn is_valid_drive_letter(drive: &u8) -> bool { | |
162 | drive.is_ascii_alphabetic() | |
163 | } | |
164 | ||
781aab86 | 165 | match path.as_encoded_bytes() { |
fc512014 XL |
166 | [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), |
167 | _ => None, | |
168 | } | |
169 | } | |
170 | ||
171 | // Parses a drive prefix exactly, e.g. "C:" | |
04454e1e | 172 | fn parse_drive_exact(path: &OsStr) -> Option<u8> { |
fc512014 | 173 | // only parse two bytes: the drive letter and the drive separator |
781aab86 | 174 | if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { |
04454e1e FG |
175 | parse_drive(path) |
176 | } else { | |
177 | None | |
fc512014 XL |
178 | } |
179 | } | |
180 | ||
181 | // Parse the next path component. | |
182 | // | |
183 | // Returns the next component and the rest of the path excluding the component and separator. | |
184 | // Does not recognize `/` as a separator character if `verbatim` is true. | |
185 | fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { | |
186 | let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; | |
187 | ||
781aab86 | 188 | match path.as_encoded_bytes().iter().position(|&x| separator(x)) { |
fc512014 | 189 | Some(separator_start) => { |
064997fb | 190 | let separator_end = separator_start + 1; |
fc512014 | 191 | |
781aab86 | 192 | let component = &path.as_encoded_bytes()[..separator_start]; |
fc512014 XL |
193 | |
194 | // Panic safe | |
195 | // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. | |
781aab86 | 196 | let path = &path.as_encoded_bytes()[separator_end..]; |
fc512014 | 197 | |
6a06907d | 198 | // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') |
fc512014 XL |
199 | // is encoded in a single byte, therefore `bytes[separator_start]` and |
200 | // `bytes[separator_end]` must be code point boundaries and thus | |
201 | // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. | |
fe692bf9 FG |
202 | unsafe { |
203 | ( | |
781aab86 FG |
204 | OsStr::from_encoded_bytes_unchecked(component), |
205 | OsStr::from_encoded_bytes_unchecked(path), | |
fe692bf9 FG |
206 | ) |
207 | } | |
fc512014 XL |
208 | } |
209 | None => (path, OsStr::new("")), | |
210 | } | |
3dfed10e | 211 | } |
3c0e092e XL |
212 | |
213 | /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. | |
214 | /// | |
215 | /// This path may or may not have a verbatim prefix. | |
216 | pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> { | |
353b0b11 FG |
217 | let path = to_u16s(path)?; |
218 | get_long_path(path, true) | |
219 | } | |
220 | ||
221 | /// Get a normalized absolute path that can bypass path length limits. | |
222 | /// | |
223 | /// Setting prefer_verbatim to true suggests a stronger preference for verbatim | |
224 | /// paths even when not strictly necessary. This allows the Windows API to avoid | |
225 | /// repeating our work. However, if the path may be given back to users or | |
226 | /// passed to other application then it's preferable to use non-verbatim paths | |
227 | /// when possible. Non-verbatim paths are better understood by users and handled | |
228 | /// by more software. | |
229 | pub(crate) fn get_long_path(mut path: Vec<u16>, prefer_verbatim: bool) -> io::Result<Vec<u16>> { | |
3c0e092e XL |
230 | // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL). |
231 | // However, for APIs such as CreateDirectory[1], the limit is 248. | |
232 | // | |
233 | // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters | |
234 | const LEGACY_MAX_PATH: usize = 248; | |
235 | // UTF-16 encoded code points, used in parsing and building UTF-16 paths. | |
236 | // All of these are in the ASCII range so they can be cast directly to `u16`. | |
237 | const SEP: u16 = b'\\' as _; | |
238 | const ALT_SEP: u16 = b'/' as _; | |
239 | const QUERY: u16 = b'?' as _; | |
240 | const COLON: u16 = b':' as _; | |
241 | const DOT: u16 = b'.' as _; | |
242 | const U: u16 = b'U' as _; | |
243 | const N: u16 = b'N' as _; | |
244 | const C: u16 = b'C' as _; | |
245 | ||
246 | // \\?\ | |
247 | const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP]; | |
248 | // \??\ | |
249 | const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP]; | |
250 | // \\?\UNC\ | |
251 | const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP]; | |
252 | ||
4b012472 | 253 | if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == [0] { |
3c0e092e XL |
254 | // Early return for paths that are already verbatim or empty. |
255 | return Ok(path); | |
256 | } else if path.len() < LEGACY_MAX_PATH { | |
257 | // Early return if an absolute path is less < 260 UTF-16 code units. | |
258 | // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily. | |
259 | match path.as_slice() { | |
260 | // Starts with `D:`, `D:\`, `D:/`, etc. | |
261 | // Does not match if the path starts with a `\` or `/`. | |
262 | [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..] | |
263 | if *drive != SEP && *drive != ALT_SEP => | |
264 | { | |
265 | return Ok(path); | |
266 | } | |
267 | // Starts with `\\`, `//`, etc | |
268 | [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path), | |
269 | _ => {} | |
270 | } | |
271 | } | |
272 | ||
273 | // Firstly, get the absolute path using `GetFullPathNameW`. | |
274 | // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew | |
275 | let lpfilename = path.as_ptr(); | |
276 | fill_utf16_buf( | |
277 | // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. | |
278 | // `lpfilename` is a pointer to a null terminated string that is not | |
279 | // invalidated until after `GetFullPathNameW` returns successfully. | |
04454e1e | 280 | |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) }, |
3c0e092e XL |
281 | |mut absolute| { |
282 | path.clear(); | |
283 | ||
353b0b11 FG |
284 | // Only prepend the prefix if needed. |
285 | if prefer_verbatim || absolute.len() + 1 >= LEGACY_MAX_PATH { | |
286 | // Secondly, add the verbatim prefix. This is easier here because we know the | |
287 | // path is now absolute and fully normalized (e.g. `/` has been changed to `\`). | |
288 | let prefix = match absolute { | |
289 | // C:\ => \\?\C:\ | |
290 | [_, COLON, SEP, ..] => VERBATIM_PREFIX, | |
291 | // \\.\ => \\?\ | |
292 | [SEP, SEP, DOT, SEP, ..] => { | |
293 | absolute = &absolute[4..]; | |
294 | VERBATIM_PREFIX | |
295 | } | |
296 | // Leave \\?\ and \??\ as-is. | |
297 | [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[], | |
298 | // \\ => \\?\UNC\ | |
299 | [SEP, SEP, ..] => { | |
300 | absolute = &absolute[2..]; | |
301 | UNC_PREFIX | |
302 | } | |
303 | // Anything else we leave alone. | |
304 | _ => &[], | |
305 | }; | |
306 | ||
307 | path.reserve_exact(prefix.len() + absolute.len() + 1); | |
308 | path.extend_from_slice(prefix); | |
309 | } else { | |
310 | path.reserve_exact(absolute.len() + 1); | |
311 | } | |
3c0e092e XL |
312 | path.extend_from_slice(absolute); |
313 | path.push(0); | |
314 | }, | |
315 | )?; | |
316 | Ok(path) | |
317 | } | |
5099ac24 FG |
318 | |
319 | /// Make a Windows path absolute. | |
320 | pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> { | |
04454e1e FG |
321 | let path = path.as_os_str(); |
322 | let prefix = parse_prefix(path); | |
323 | // Verbatim paths should not be modified. | |
324 | if prefix.map(|x| x.is_verbatim()).unwrap_or(false) { | |
325 | // NULs in verbatim paths are rejected for consistency. | |
781aab86 | 326 | if path.as_encoded_bytes().contains(&0) { |
04454e1e FG |
327 | return Err(io::const_io_error!( |
328 | io::ErrorKind::InvalidInput, | |
329 | "strings passed to WinAPI cannot contain NULs", | |
330 | )); | |
331 | } | |
332 | return Ok(path.to_owned().into()); | |
5099ac24 | 333 | } |
04454e1e | 334 | |
5099ac24 FG |
335 | let path = to_u16s(path)?; |
336 | let lpfilename = path.as_ptr(); | |
337 | fill_utf16_buf( | |
338 | // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. | |
339 | // `lpfilename` is a pointer to a null terminated string that is not | |
340 | // invalidated until after `GetFullPathNameW` returns successfully. | |
341 | |buffer, size| unsafe { c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) }, | |
342 | super::os2path, | |
343 | ) | |
344 | } |