]>
Commit | Line | Data |
---|---|---|
3c0e092e XL |
1 | use super::{c, fill_utf16_buf, to_u16s}; |
2 | use crate::ffi::{OsStr, OsString}; | |
3 | use crate::io; | |
3dfed10e | 4 | use crate::mem; |
3c0e092e XL |
5 | use crate::path::{Path, PathBuf, Prefix}; |
6 | use crate::ptr; | |
3dfed10e XL |
7 | |
8 | #[cfg(test)] | |
9 | mod tests; | |
10 | ||
11 | pub const MAIN_SEP_STR: &str = "\\"; | |
12 | pub const MAIN_SEP: char = '\\'; | |
13 | ||
6a06907d XL |
14 | /// # Safety |
15 | /// | |
16 | /// `bytes` must be a valid wtf8 encoded slice | |
fc512014 XL |
17 | #[inline] |
18 | unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr { | |
19 | // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8, | |
20 | // which is compatible with &[u8]. | |
21 | mem::transmute(bytes) | |
3dfed10e XL |
22 | } |
23 | ||
24 | #[inline] | |
25 | pub fn is_sep_byte(b: u8) -> bool { | |
26 | b == b'/' || b == b'\\' | |
27 | } | |
28 | ||
29 | #[inline] | |
30 | pub fn is_verbatim_sep(b: u8) -> bool { | |
31 | b == b'\\' | |
32 | } | |
33 | ||
3c0e092e XL |
34 | /// Returns true if `path` looks like a lone filename. |
35 | pub(crate) fn is_file_name(path: &OsStr) -> bool { | |
36 | !path.bytes().iter().copied().any(is_sep_byte) | |
37 | } | |
38 | pub(crate) fn has_trailing_slash(path: &OsStr) -> bool { | |
39 | let is_verbatim = path.bytes().starts_with(br"\\?\"); | |
40 | let is_separator = if is_verbatim { is_verbatim_sep } else { is_sep_byte }; | |
41 | if let Some(&c) = path.bytes().last() { is_separator(c) } else { false } | |
42 | } | |
43 | ||
44 | /// Appends a suffix to a path. | |
45 | /// | |
46 | /// Can be used to append an extension without removing an existing extension. | |
47 | pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf { | |
48 | let mut path = OsString::from(path); | |
49 | path.push(suffix); | |
50 | path.into() | |
51 | } | |
52 | ||
3dfed10e XL |
53 | pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { |
54 | use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; | |
55 | ||
fc512014 XL |
56 | if let Some(path) = strip_prefix(path, r"\\") { |
57 | // \\ | |
58 | if let Some(path) = strip_prefix(path, r"?\") { | |
59 | // \\?\ | |
60 | if let Some(path) = strip_prefix(path, r"UNC\") { | |
61 | // \\?\UNC\server\share | |
62 | ||
63 | let (server, path) = parse_next_component(path, true); | |
64 | let (share, _) = parse_next_component(path, true); | |
65 | ||
66 | Some(VerbatimUNC(server, share)) | |
3dfed10e | 67 | } else { |
fc512014 XL |
68 | let (prefix, _) = parse_next_component(path, true); |
69 | ||
70 | // in verbatim paths only recognize an exact drive prefix | |
71 | if let Some(drive) = parse_drive_exact(prefix) { | |
72 | // \\?\C: | |
73 | Some(VerbatimDisk(drive)) | |
74 | } else { | |
75 | // \\?\prefix | |
76 | Some(Verbatim(prefix)) | |
3dfed10e XL |
77 | } |
78 | } | |
fc512014 | 79 | } else if let Some(path) = strip_prefix(path, r".\") { |
3dfed10e | 80 | // \\.\COM42 |
fc512014 XL |
81 | let (prefix, _) = parse_next_component(path, false); |
82 | Some(DeviceNS(prefix)) | |
83 | } else { | |
84 | let (server, path) = parse_next_component(path, false); | |
85 | let (share, _) = parse_next_component(path, false); | |
86 | ||
87 | if !server.is_empty() && !share.is_empty() { | |
3dfed10e | 88 | // \\server\share |
fc512014 XL |
89 | Some(UNC(server, share)) |
90 | } else { | |
91 | // no valid prefix beginning with "\\" recognized | |
92 | None | |
3dfed10e | 93 | } |
3dfed10e | 94 | } |
fc512014 | 95 | } else if let Some(drive) = parse_drive(path) { |
3dfed10e | 96 | // C: |
fc512014 XL |
97 | Some(Disk(drive)) |
98 | } else { | |
99 | // no prefix | |
100 | None | |
3dfed10e | 101 | } |
3dfed10e XL |
102 | } |
103 | ||
fc512014 XL |
104 | // Parses a drive prefix, e.g. "C:" and "C:\whatever" |
105 | fn parse_drive(prefix: &OsStr) -> Option<u8> { | |
106 | // In most DOS systems, it is not possible to have more than 26 drive letters. | |
107 | // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. | |
108 | fn is_valid_drive_letter(drive: &u8) -> bool { | |
109 | drive.is_ascii_alphabetic() | |
110 | } | |
111 | ||
112 | match prefix.bytes() { | |
113 | [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), | |
114 | _ => None, | |
115 | } | |
116 | } | |
117 | ||
118 | // Parses a drive prefix exactly, e.g. "C:" | |
119 | fn parse_drive_exact(prefix: &OsStr) -> Option<u8> { | |
120 | // only parse two bytes: the drive letter and the drive separator | |
121 | if prefix.len() == 2 { parse_drive(prefix) } else { None } | |
122 | } | |
123 | ||
124 | fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> { | |
125 | // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]` | |
126 | // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice. | |
127 | match path.bytes().strip_prefix(prefix.as_bytes()) { | |
128 | Some(path) => unsafe { Some(bytes_as_os_str(path)) }, | |
129 | None => None, | |
130 | } | |
131 | } | |
132 | ||
133 | // Parse the next path component. | |
134 | // | |
135 | // Returns the next component and the rest of the path excluding the component and separator. | |
136 | // Does not recognize `/` as a separator character if `verbatim` is true. | |
137 | fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { | |
138 | let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; | |
139 | ||
140 | match path.bytes().iter().position(|&x| separator(x)) { | |
141 | Some(separator_start) => { | |
142 | let mut separator_end = separator_start + 1; | |
143 | ||
144 | // a series of multiple separator characters is treated as a single separator, | |
145 | // except in verbatim paths | |
146 | while !verbatim && separator_end < path.len() && separator(path.bytes()[separator_end]) | |
147 | { | |
148 | separator_end += 1; | |
149 | } | |
150 | ||
151 | let component = &path.bytes()[..separator_start]; | |
152 | ||
153 | // Panic safe | |
154 | // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. | |
155 | let path = &path.bytes()[separator_end..]; | |
156 | ||
6a06907d | 157 | // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') |
fc512014 XL |
158 | // is encoded in a single byte, therefore `bytes[separator_start]` and |
159 | // `bytes[separator_end]` must be code point boundaries and thus | |
160 | // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. | |
161 | unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) } | |
162 | } | |
163 | None => (path, OsStr::new("")), | |
164 | } | |
3dfed10e | 165 | } |
3c0e092e XL |
166 | |
167 | /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. | |
168 | /// | |
169 | /// This path may or may not have a verbatim prefix. | |
170 | pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> { | |
171 | // Normally the MAX_PATH is 260 UTF-16 code units (including the NULL). | |
172 | // However, for APIs such as CreateDirectory[1], the limit is 248. | |
173 | // | |
174 | // [1]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createdirectorya#parameters | |
175 | const LEGACY_MAX_PATH: usize = 248; | |
176 | // UTF-16 encoded code points, used in parsing and building UTF-16 paths. | |
177 | // All of these are in the ASCII range so they can be cast directly to `u16`. | |
178 | const SEP: u16 = b'\\' as _; | |
179 | const ALT_SEP: u16 = b'/' as _; | |
180 | const QUERY: u16 = b'?' as _; | |
181 | const COLON: u16 = b':' as _; | |
182 | const DOT: u16 = b'.' as _; | |
183 | const U: u16 = b'U' as _; | |
184 | const N: u16 = b'N' as _; | |
185 | const C: u16 = b'C' as _; | |
186 | ||
187 | // \\?\ | |
188 | const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP]; | |
189 | // \??\ | |
190 | const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP]; | |
191 | // \\?\UNC\ | |
192 | const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP]; | |
193 | ||
194 | let mut path = to_u16s(path)?; | |
195 | if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) || path == &[0] { | |
196 | // Early return for paths that are already verbatim or empty. | |
197 | return Ok(path); | |
198 | } else if path.len() < LEGACY_MAX_PATH { | |
199 | // Early return if an absolute path is less < 260 UTF-16 code units. | |
200 | // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily. | |
201 | match path.as_slice() { | |
202 | // Starts with `D:`, `D:\`, `D:/`, etc. | |
203 | // Does not match if the path starts with a `\` or `/`. | |
204 | [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..] | |
205 | if *drive != SEP && *drive != ALT_SEP => | |
206 | { | |
207 | return Ok(path); | |
208 | } | |
209 | // Starts with `\\`, `//`, etc | |
210 | [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path), | |
211 | _ => {} | |
212 | } | |
213 | } | |
214 | ||
215 | // Firstly, get the absolute path using `GetFullPathNameW`. | |
216 | // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew | |
217 | let lpfilename = path.as_ptr(); | |
218 | fill_utf16_buf( | |
219 | // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. | |
220 | // `lpfilename` is a pointer to a null terminated string that is not | |
221 | // invalidated until after `GetFullPathNameW` returns successfully. | |
222 | |buffer, size| unsafe { | |
223 | // While the docs for `GetFullPathNameW` have the standard note | |
224 | // about needing a `\\?\` path for a long lpfilename, this does not | |
225 | // appear to be true in practice. | |
226 | // See: | |
227 | // https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths | |
228 | // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html | |
229 | c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) | |
230 | }, | |
231 | |mut absolute| { | |
232 | path.clear(); | |
233 | ||
234 | // Secondly, add the verbatim prefix. This is easier here because we know the | |
235 | // path is now absolute and fully normalized (e.g. `/` has been changed to `\`). | |
236 | let prefix = match absolute { | |
237 | // C:\ => \\?\C:\ | |
238 | [_, COLON, SEP, ..] => VERBATIM_PREFIX, | |
239 | // \\.\ => \\?\ | |
240 | [SEP, SEP, DOT, SEP, ..] => { | |
241 | absolute = &absolute[4..]; | |
242 | VERBATIM_PREFIX | |
243 | } | |
244 | // Leave \\?\ and \??\ as-is. | |
245 | [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[], | |
246 | // \\ => \\?\UNC\ | |
247 | [SEP, SEP, ..] => { | |
248 | absolute = &absolute[2..]; | |
249 | UNC_PREFIX | |
250 | } | |
251 | // Anything else we leave alone. | |
252 | _ => &[], | |
253 | }; | |
254 | ||
255 | path.reserve_exact(prefix.len() + absolute.len() + 1); | |
256 | path.extend_from_slice(prefix); | |
257 | path.extend_from_slice(absolute); | |
258 | path.push(0); | |
259 | }, | |
260 | )?; | |
261 | Ok(path) | |
262 | } |