]>
Commit | Line | Data |
---|---|---|
04454e1e FG |
1 | //! This crate allows interacting with the data stored by [`OsStr`] and |
2 | //! [`OsString`], without resorting to panics or corruption for invalid UTF-8. | |
3 | //! Thus, methods can be used that are already defined on [`[u8]`][slice] and | |
4 | //! [`Vec<u8>`]. | |
5 | //! | |
6 | //! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`] | |
7 | //! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which | |
8 | //! requires the bytes to be valid in UTF-8. However, since this crate makes | |
9 | //! conversions directly between the platform encoding and raw bytes, even some | |
10 | //! strings invalid in UTF-8 can be converted. | |
11 | //! | |
12 | //! In most cases, [`RawOsStr`] and [`RawOsString`] should be used. | |
13 | //! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are | |
14 | //! easier to misuse. | |
15 | //! | |
16 | //! # Encoding | |
17 | //! | |
18 | //! The encoding of bytes returned or accepted by methods of this crate is | |
19 | //! intentionally left unspecified. It may vary for different platforms, so | |
20 | //! defining it would run contrary to the goal of generic string handling. | |
21 | //! However, the following invariants will always be upheld: | |
22 | //! | |
23 | //! - The encoding will be compatible with UTF-8. In particular, splitting an | |
24 | //! encoded byte sequence by a UTF-8–encoded character always produces other | |
25 | //! valid byte sequences. They can be re-encoded without error using | |
26 | //! [`OsStrBytes::from_raw_bytes`] and similar methods. | |
27 | //! | |
28 | //! - All characters valid in platform strings are representable. [`OsStr`] and | |
29 | //! [`OsString`] can always be losslessly reconstructed from extracted bytes. | |
30 | //! | |
31 | //! Note that the chosen encoding may not match how Rust stores these strings | |
32 | //! internally, which is undocumented. For instance, the result of calling | |
33 | //! [`OsStr::len`] will not necessarily match the number of bytes this crate | |
34 | //! uses to represent the same string. | |
35 | //! | |
36 | //! Additionally, concatenation may yield unexpected results without a UTF-8 | |
37 | //! separator. If two platform strings need to be concatenated, the only safe | |
38 | //! way to do so is using [`OsString::push`]. This limitation also makes it | |
39 | //! undesirable to use the bytes in interchange. | |
40 | //! | |
41 | //! Since this encoding can change between versions and platforms, it should | |
42 | //! not be used for storage. The standard library provides implementations of | |
43 | //! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be | |
44 | //! preferred for that use case. | |
45 | //! | |
46 | //! # User Input | |
47 | //! | |
48 | //! Traits in this crate should ideally not be used to convert byte sequences | |
49 | //! that did not originate from [`OsStr`] or a related struct. The encoding | |
50 | //! used by this crate is an implementation detail, so it does not make sense | |
51 | //! to expose it to users. | |
52 | //! | |
53 | //! Crate [bstr] offers some useful alternative methods, such as | |
54 | //! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant | |
55 | //! for user input. But, they reject some byte sequences used to represent | |
56 | //! valid platform strings, which would be undesirable for reliable path | |
57 | //! handling. They are best used only when accepting unknown input. | |
58 | //! | |
59 | //! This crate is meant to help when you already have an instance of [`OsStr`] | |
60 | //! and need to modify the data in a lossless way. | |
61 | //! | |
62 | //! # Features | |
63 | //! | |
64 | //! These features are optional and can be enabled or disabled in a | |
65 | //! "Cargo.toml" file. | |
66 | //! | |
67 | //! ### Default Features | |
68 | //! | |
69 | //! - **memchr** - | |
70 | //! Changes the implementation to use crate [memchr] for better performance. | |
71 | //! This feature is useless when "raw\_os\_str" is disabled. | |
72 | //! | |
73 | //! For more information, see [`RawOsStr`][memchr complexity]. | |
74 | //! | |
75 | //! - **raw\_os\_str** - | |
76 | //! Enables use of [`RawOsStr`] and [`RawOsString`]. | |
77 | //! | |
78 | //! ### Optional Features | |
79 | //! | |
80 | //! - **print\_bytes** - | |
81 | //! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and | |
82 | //! [`RawOsString`]. | |
83 | //! | |
84 | //! - **uniquote** - | |
85 | //! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and | |
86 | //! [`RawOsString`]. | |
87 | //! | |
88 | //! # Implementation | |
89 | //! | |
90 | //! Some methods return [`Cow`] to account for platform differences. However, | |
91 | //! no guarantee is made that the same variant of that enum will always be | |
92 | //! returned for the same platform. Whichever can be constructed most | |
93 | //! efficiently will be returned. | |
94 | //! | |
95 | //! All traits are [sealed], meaning that they can only be implemented by this | |
96 | //! crate. Otherwise, backward compatibility would be more difficult to | |
97 | //! maintain for new features. | |
98 | //! | |
99 | //! # Complexity | |
100 | //! | |
101 | //! The time complexities of trait methods will vary based on what | |
102 | //! functionality is available for the platform. At worst, they will all be | |
103 | //! linear, but some can take constant time. For example, | |
104 | //! [`OsStringBytes::from_raw_vec`] might be able to reuse the allocation for | |
105 | //! its argument. | |
106 | //! | |
107 | //! # Examples | |
108 | //! | |
109 | //! ``` | |
110 | //! # #[cfg(any())] | |
111 | //! use std::env; | |
112 | //! use std::fs; | |
113 | //! # use std::io; | |
114 | //! | |
115 | //! use os_str_bytes::OsStrBytes; | |
116 | //! | |
117 | //! # mod env { | |
118 | //! # use std::env; | |
119 | //! # use std::ffi::OsString; | |
120 | //! # | |
121 | //! # pub fn args_os() -> impl Iterator<Item = OsString> { | |
122 | //! # let mut file = env::temp_dir(); | |
123 | //! # file.push("os_str_bytes\u{E9}.txt"); | |
124 | //! # return vec![OsString::new(), file.into_os_string()].into_iter(); | |
125 | //! # } | |
126 | //! # } | |
127 | //! # | |
128 | //! for file in env::args_os().skip(1) { | |
129 | //! if file.to_raw_bytes().first() != Some(&b'-') { | |
130 | //! let string = "Hello, world!"; | |
131 | //! fs::write(&file, string)?; | |
132 | //! assert_eq!(string, fs::read_to_string(file)?); | |
133 | //! } | |
134 | //! } | |
135 | //! # | |
136 | //! # Ok::<_, io::Error>(()) | |
137 | //! ``` | |
138 | //! | |
139 | //! [bstr]: https://crates.io/crates/bstr | |
140 | //! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str | |
141 | //! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string | |
142 | //! [memchr complexity]: RawOsStr#complexity | |
143 | //! [memchr]: https://crates.io/crates/memchr | |
144 | //! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt | |
145 | //! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt | |
146 | //! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed | |
147 | //! [print\_bytes]: https://crates.io/crates/print_bytes | |
148 | ||
149 | // Only require a nightly compiler when building documentation for docs.rs. | |
150 | // This is a private option that should not be used. | |
151 | // https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407 | |
152 | // https://github.com/dylni/os_str_bytes/issues/2 | |
153 | #![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))] | |
154 | // Nightly is also currently required for the SGX platform. | |
155 | #![cfg_attr( | |
156 | all(target_vendor = "fortanix", target_env = "sgx"), | |
157 | feature(sgx_platform) | |
158 | )] | |
159 | #![forbid(unsafe_op_in_unsafe_fn)] | |
160 | #![warn(unused_results)] | |
161 | ||
162 | use std::borrow::Cow; | |
163 | use std::error::Error; | |
164 | use std::ffi::OsStr; | |
165 | use std::ffi::OsString; | |
166 | use std::fmt; | |
167 | use std::fmt::Display; | |
168 | use std::fmt::Formatter; | |
169 | use std::path::Path; | |
170 | use std::path::PathBuf; | |
171 | use std::result; | |
172 | ||
173 | macro_rules! if_raw_str { | |
174 | ( $($item:item)+ ) => { | |
175 | $( | |
176 | #[cfg(feature = "raw_os_str")] | |
177 | $item | |
178 | )+ | |
179 | }; | |
180 | } | |
181 | ||
182 | #[cfg_attr( | |
183 | all(target_arch = "wasm32", target_os = "unknown"), | |
184 | path = "wasm32/mod.rs" | |
185 | )] | |
186 | #[cfg_attr(windows, path = "windows/mod.rs")] | |
187 | #[cfg_attr( | |
188 | not(any(all(target_arch = "wasm32", target_os = "unknown"), windows)), | |
189 | path = "common/mod.rs" | |
190 | )] | |
191 | mod imp; | |
192 | ||
193 | mod util; | |
194 | ||
195 | if_raw_str! { | |
196 | pub mod iter; | |
197 | ||
198 | mod pattern; | |
199 | pub use pattern::Pattern; | |
200 | ||
201 | mod raw_str; | |
202 | pub use raw_str::RawOsStr; | |
203 | pub use raw_str::RawOsString; | |
204 | } | |
205 | ||
206 | /// The error that occurs when a byte sequence is not representable in the | |
207 | /// platform encoding. | |
208 | /// | |
209 | /// [`Result::unwrap`] should almost always be called on results containing | |
210 | /// this error. It should be known whether or not byte sequences are properly | |
211 | /// encoded for the platform, since [the module-level documentation][encoding] | |
212 | /// discourages using encoded bytes in interchange. Results are returned | |
213 | /// primarily to make panicking behavior explicit. | |
214 | /// | |
215 | /// On Unix, this error is never returned, but [`OsStrExt`] or [`OsStringExt`] | |
216 | /// should be used instead if that needs to be guaranteed. | |
217 | /// | |
218 | /// [encoding]: self#encoding | |
219 | /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt | |
220 | /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt | |
221 | /// [`Result::unwrap`]: ::std::result::Result::unwrap | |
222 | #[derive(Debug, Eq, PartialEq)] | |
223 | pub struct EncodingError(imp::EncodingError); | |
224 | ||
225 | impl Display for EncodingError { | |
226 | #[inline] | |
227 | fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result { | |
228 | self.0.fmt(formatter) | |
229 | } | |
230 | } | |
231 | ||
232 | impl Error for EncodingError {} | |
233 | ||
234 | type Result<T> = result::Result<T, EncodingError>; | |
235 | ||
236 | /// A platform agnostic variant of [`OsStrExt`]. | |
237 | /// | |
238 | /// For more information, see [the module-level documentation][module]. | |
239 | /// | |
240 | /// [module]: self | |
241 | /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt | |
242 | pub trait OsStrBytes: private::Sealed + ToOwned { | |
243 | /// Converts a byte slice into an equivalent platform-native string. | |
244 | /// | |
245 | /// Provided byte strings should always be valid for the [unspecified | |
246 | /// encoding] used by this crate. | |
247 | /// | |
248 | /// # Errors | |
249 | /// | |
250 | /// See documentation for [`EncodingError`]. | |
251 | /// | |
252 | /// # Examples | |
253 | /// | |
254 | /// ``` | |
255 | /// use std::env; | |
256 | /// use std::ffi::OsStr; | |
257 | /// # use std::io; | |
258 | /// | |
259 | /// use os_str_bytes::OsStrBytes; | |
260 | /// | |
261 | /// let os_string = env::current_exe()?; | |
262 | /// let os_bytes = os_string.to_raw_bytes(); | |
263 | /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap()); | |
264 | /// # | |
265 | /// # Ok::<_, io::Error>(()) | |
266 | /// ``` | |
267 | /// | |
268 | /// [unspecified encoding]: self#encoding | |
269 | fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> | |
270 | where | |
271 | S: Into<Cow<'a, [u8]>>; | |
272 | ||
273 | /// Converts a platform-native string into an equivalent byte slice. | |
274 | /// | |
275 | /// The returned bytes string will use an [unspecified encoding]. | |
276 | /// | |
277 | /// # Examples | |
278 | /// | |
279 | /// ``` | |
280 | /// use std::env; | |
281 | /// # use std::io; | |
282 | /// | |
283 | /// use os_str_bytes::OsStrBytes; | |
284 | /// | |
285 | /// let os_string = env::current_exe()?; | |
286 | /// println!("{:?}", os_string.to_raw_bytes()); | |
287 | /// # | |
288 | /// # Ok::<_, io::Error>(()) | |
289 | /// ``` | |
290 | /// | |
291 | /// [unspecified encoding]: self#encoding | |
292 | #[must_use] | |
293 | fn to_raw_bytes(&self) -> Cow<'_, [u8]>; | |
294 | } | |
295 | ||
296 | impl OsStrBytes for OsStr { | |
297 | #[inline] | |
298 | fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> | |
299 | where | |
300 | S: Into<Cow<'a, [u8]>>, | |
301 | { | |
302 | match string.into() { | |
303 | Cow::Borrowed(string) => { | |
304 | imp::os_str_from_bytes(string).map_err(EncodingError) | |
305 | } | |
306 | Cow::Owned(string) => { | |
307 | OsStringBytes::from_raw_vec(string).map(Cow::Owned) | |
308 | } | |
309 | } | |
310 | } | |
311 | ||
312 | #[inline] | |
313 | fn to_raw_bytes(&self) -> Cow<'_, [u8]> { | |
314 | imp::os_str_to_bytes(self) | |
315 | } | |
316 | } | |
317 | ||
318 | impl OsStrBytes for Path { | |
319 | #[inline] | |
320 | fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> | |
321 | where | |
322 | S: Into<Cow<'a, [u8]>>, | |
323 | { | |
324 | OsStr::from_raw_bytes(string).map(|os_string| match os_string { | |
325 | Cow::Borrowed(os_string) => Cow::Borrowed(Self::new(os_string)), | |
326 | Cow::Owned(os_string) => Cow::Owned(os_string.into()), | |
327 | }) | |
328 | } | |
329 | ||
330 | #[inline] | |
331 | fn to_raw_bytes(&self) -> Cow<'_, [u8]> { | |
332 | self.as_os_str().to_raw_bytes() | |
333 | } | |
334 | } | |
335 | ||
336 | /// A platform agnostic variant of [`OsStringExt`]. | |
337 | /// | |
338 | /// For more information, see [the module-level documentation][module]. | |
339 | /// | |
340 | /// [module]: self | |
341 | /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt | |
342 | pub trait OsStringBytes: private::Sealed + Sized { | |
343 | /// Converts a byte vector into an equivalent platform-native string. | |
344 | /// | |
345 | /// Provided byte strings should always be valid for the [unspecified | |
346 | /// encoding] used by this crate. | |
347 | /// | |
348 | /// # Errors | |
349 | /// | |
350 | /// See documentation for [`EncodingError`]. | |
351 | /// | |
352 | /// # Examples | |
353 | /// | |
354 | /// ``` | |
355 | /// use std::env; | |
356 | /// use std::ffi::OsString; | |
357 | /// # use std::io; | |
358 | /// | |
359 | /// use os_str_bytes::OsStringBytes; | |
360 | /// | |
361 | /// let os_string = env::current_exe()?; | |
362 | /// let os_bytes = os_string.clone().into_raw_vec(); | |
363 | /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap()); | |
364 | /// # | |
365 | /// # Ok::<_, io::Error>(()) | |
366 | /// ``` | |
367 | /// | |
368 | /// [unspecified encoding]: self#encoding | |
369 | fn from_raw_vec(string: Vec<u8>) -> Result<Self>; | |
370 | ||
371 | /// Converts a platform-native string into an equivalent byte vector. | |
372 | /// | |
373 | /// The returned byte string will use an [unspecified encoding]. | |
374 | /// | |
375 | /// # Examples | |
376 | /// | |
377 | /// ``` | |
378 | /// use std::env; | |
379 | /// # use std::io; | |
380 | /// | |
381 | /// use os_str_bytes::OsStringBytes; | |
382 | /// | |
383 | /// let os_string = env::current_exe()?; | |
384 | /// println!("{:?}", os_string.into_raw_vec()); | |
385 | /// # | |
386 | /// # Ok::<_, io::Error>(()) | |
387 | /// ``` | |
388 | /// | |
389 | /// [unspecified encoding]: self#encoding | |
390 | #[must_use] | |
391 | fn into_raw_vec(self) -> Vec<u8>; | |
392 | } | |
393 | ||
394 | impl OsStringBytes for OsString { | |
395 | #[inline] | |
396 | fn from_raw_vec(string: Vec<u8>) -> Result<Self> { | |
397 | imp::os_string_from_vec(string).map_err(EncodingError) | |
398 | } | |
399 | ||
400 | #[inline] | |
401 | fn into_raw_vec(self) -> Vec<u8> { | |
402 | imp::os_string_into_vec(self) | |
403 | } | |
404 | } | |
405 | ||
406 | impl OsStringBytes for PathBuf { | |
407 | #[inline] | |
408 | fn from_raw_vec(string: Vec<u8>) -> Result<Self> { | |
409 | OsString::from_raw_vec(string).map(Into::into) | |
410 | } | |
411 | ||
412 | #[inline] | |
413 | fn into_raw_vec(self) -> Vec<u8> { | |
414 | self.into_os_string().into_raw_vec() | |
415 | } | |
416 | } | |
417 | ||
418 | mod private { | |
419 | use std::ffi::OsStr; | |
420 | use std::ffi::OsString; | |
421 | use std::path::Path; | |
422 | use std::path::PathBuf; | |
423 | ||
424 | pub trait Sealed {} | |
425 | impl Sealed for char {} | |
426 | impl Sealed for OsStr {} | |
427 | impl Sealed for OsString {} | |
428 | impl Sealed for Path {} | |
429 | impl Sealed for PathBuf {} | |
430 | impl Sealed for &str {} | |
431 | impl Sealed for &String {} | |
432 | } |