]> git.proxmox.com Git - rustc.git/blame - vendor/os_str_bytes/src/lib.rs
New upstream version 1.71.1+dfsg1
[rustc.git] / vendor / os_str_bytes / src / lib.rs
CommitLineData
04454e1e
FG
1//! This crate allows interacting with the data stored by [`OsStr`] and
2//! [`OsString`], without resorting to panics or corruption for invalid UTF-8.
3//! Thus, methods can be used that are already defined on [`[u8]`][slice] and
4//! [`Vec<u8>`].
5//!
6//! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`]
7//! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which
8//! requires the bytes to be valid in UTF-8. However, since this crate makes
9//! conversions directly between the platform encoding and raw bytes, even some
10//! strings invalid in UTF-8 can be converted.
11//!
12//! In most cases, [`RawOsStr`] and [`RawOsString`] should be used.
13//! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are
14//! easier to misuse.
15//!
16//! # Encoding
17//!
18//! The encoding of bytes returned or accepted by methods of this crate is
19//! intentionally left unspecified. It may vary for different platforms, so
20//! defining it would run contrary to the goal of generic string handling.
21//! However, the following invariants will always be upheld:
22//!
23//! - The encoding will be compatible with UTF-8. In particular, splitting an
24//! encoded byte sequence by a UTF-8–encoded character always produces other
25//! valid byte sequences. They can be re-encoded without error using
26//! [`OsStrBytes::from_raw_bytes`] and similar methods.
27//!
28//! - All characters valid in platform strings are representable. [`OsStr`] and
29//! [`OsString`] can always be losslessly reconstructed from extracted bytes.
30//!
31//! Note that the chosen encoding may not match how Rust stores these strings
32//! internally, which is undocumented. For instance, the result of calling
33//! [`OsStr::len`] will not necessarily match the number of bytes this crate
34//! uses to represent the same string.
35//!
36//! Additionally, concatenation may yield unexpected results without a UTF-8
37//! separator. If two platform strings need to be concatenated, the only safe
38//! way to do so is using [`OsString::push`]. This limitation also makes it
39//! undesirable to use the bytes in interchange.
40//!
41//! Since this encoding can change between versions and platforms, it should
42//! not be used for storage. The standard library provides implementations of
43//! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be
44//! preferred for that use case.
45//!
46//! # User Input
47//!
48//! Traits in this crate should ideally not be used to convert byte sequences
49//! that did not originate from [`OsStr`] or a related struct. The encoding
50//! used by this crate is an implementation detail, so it does not make sense
51//! to expose it to users.
52//!
53//! Crate [bstr] offers some useful alternative methods, such as
54//! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant
55//! for user input. But, they reject some byte sequences used to represent
56//! valid platform strings, which would be undesirable for reliable path
57//! handling. They are best used only when accepting unknown input.
58//!
59//! This crate is meant to help when you already have an instance of [`OsStr`]
60//! and need to modify the data in a lossless way.
61//!
62//! # Features
63//!
64//! These features are optional and can be enabled or disabled in a
65//! "Cargo.toml" file.
66//!
67//! ### Default Features
68//!
69//! - **memchr** -
70//! Changes the implementation to use crate [memchr] for better performance.
71//! This feature is useless when "raw\_os\_str" is disabled.
72//!
73//! For more information, see [`RawOsStr`][memchr complexity].
74//!
75//! - **raw\_os\_str** -
76//! Enables use of [`RawOsStr`] and [`RawOsString`].
77//!
78//! ### Optional Features
79//!
80//! - **print\_bytes** -
81//! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and
82//! [`RawOsString`].
83//!
84//! - **uniquote** -
85//! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and
86//! [`RawOsString`].
87//!
88//! # Implementation
89//!
90//! Some methods return [`Cow`] to account for platform differences. However,
91//! no guarantee is made that the same variant of that enum will always be
92//! returned for the same platform. Whichever can be constructed most
93//! efficiently will be returned.
94//!
95//! All traits are [sealed], meaning that they can only be implemented by this
96//! crate. Otherwise, backward compatibility would be more difficult to
97//! maintain for new features.
98//!
99//! # Complexity
100//!
101//! The time complexities of trait methods will vary based on what
102//! functionality is available for the platform. At worst, they will all be
103//! linear, but some can take constant time. For example,
104//! [`OsStringBytes::from_raw_vec`] might be able to reuse the allocation for
105//! its argument.
106//!
107//! # Examples
108//!
109//! ```
110//! # #[cfg(any())]
111//! use std::env;
112//! use std::fs;
113//! # use std::io;
114//!
115//! use os_str_bytes::OsStrBytes;
116//!
117//! # mod env {
118//! # use std::env;
119//! # use std::ffi::OsString;
120//! #
121//! # pub fn args_os() -> impl Iterator<Item = OsString> {
122//! # let mut file = env::temp_dir();
123//! # file.push("os_str_bytes\u{E9}.txt");
124//! # return vec![OsString::new(), file.into_os_string()].into_iter();
125//! # }
126//! # }
127//! #
128//! for file in env::args_os().skip(1) {
129//! if file.to_raw_bytes().first() != Some(&b'-') {
130//! let string = "Hello, world!";
131//! fs::write(&file, string)?;
132//! assert_eq!(string, fs::read_to_string(file)?);
133//! }
134//! }
135//! #
136//! # Ok::<_, io::Error>(())
137//! ```
138//!
139//! [bstr]: https://crates.io/crates/bstr
140//! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str
141//! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string
142//! [memchr complexity]: RawOsStr#complexity
143//! [memchr]: https://crates.io/crates/memchr
144//! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
145//! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
146//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed
147//! [print\_bytes]: https://crates.io/crates/print_bytes
148
149// Only require a nightly compiler when building documentation for docs.rs.
150// This is a private option that should not be used.
151// https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407
152// https://github.com/dylni/os_str_bytes/issues/2
153#![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))]
154// Nightly is also currently required for the SGX platform.
155#![cfg_attr(
156 all(target_vendor = "fortanix", target_env = "sgx"),
157 feature(sgx_platform)
158)]
159#![forbid(unsafe_op_in_unsafe_fn)]
160#![warn(unused_results)]
161
162use std::borrow::Cow;
163use std::error::Error;
164use std::ffi::OsStr;
165use std::ffi::OsString;
166use std::fmt;
167use std::fmt::Display;
168use std::fmt::Formatter;
169use std::path::Path;
170use std::path::PathBuf;
171use std::result;
172
173macro_rules! if_raw_str {
174 ( $($item:item)+ ) => {
175 $(
176 #[cfg(feature = "raw_os_str")]
177 $item
178 )+
179 };
180}
181
182#[cfg_attr(
183 all(target_arch = "wasm32", target_os = "unknown"),
184 path = "wasm32/mod.rs"
185)]
186#[cfg_attr(windows, path = "windows/mod.rs")]
187#[cfg_attr(
188 not(any(all(target_arch = "wasm32", target_os = "unknown"), windows)),
189 path = "common/mod.rs"
190)]
191mod imp;
192
193mod util;
194
195if_raw_str! {
196 pub mod iter;
197
198 mod pattern;
199 pub use pattern::Pattern;
200
201 mod raw_str;
202 pub use raw_str::RawOsStr;
203 pub use raw_str::RawOsString;
204}
205
206/// The error that occurs when a byte sequence is not representable in the
207/// platform encoding.
208///
209/// [`Result::unwrap`] should almost always be called on results containing
210/// this error. It should be known whether or not byte sequences are properly
211/// encoded for the platform, since [the module-level documentation][encoding]
212/// discourages using encoded bytes in interchange. Results are returned
213/// primarily to make panicking behavior explicit.
214///
215/// On Unix, this error is never returned, but [`OsStrExt`] or [`OsStringExt`]
216/// should be used instead if that needs to be guaranteed.
217///
218/// [encoding]: self#encoding
219/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
220/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
221/// [`Result::unwrap`]: ::std::result::Result::unwrap
222#[derive(Debug, Eq, PartialEq)]
223pub struct EncodingError(imp::EncodingError);
224
225impl Display for EncodingError {
226 #[inline]
227 fn fmt(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
228 self.0.fmt(formatter)
229 }
230}
231
232impl Error for EncodingError {}
233
234type Result<T> = result::Result<T, EncodingError>;
235
236/// A platform agnostic variant of [`OsStrExt`].
237///
238/// For more information, see [the module-level documentation][module].
239///
240/// [module]: self
241/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt
242pub trait OsStrBytes: private::Sealed + ToOwned {
243 /// Converts a byte slice into an equivalent platform-native string.
244 ///
245 /// Provided byte strings should always be valid for the [unspecified
246 /// encoding] used by this crate.
247 ///
248 /// # Errors
249 ///
250 /// See documentation for [`EncodingError`].
251 ///
252 /// # Examples
253 ///
254 /// ```
255 /// use std::env;
256 /// use std::ffi::OsStr;
257 /// # use std::io;
258 ///
259 /// use os_str_bytes::OsStrBytes;
260 ///
261 /// let os_string = env::current_exe()?;
262 /// let os_bytes = os_string.to_raw_bytes();
263 /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap());
264 /// #
265 /// # Ok::<_, io::Error>(())
266 /// ```
267 ///
268 /// [unspecified encoding]: self#encoding
269 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
270 where
271 S: Into<Cow<'a, [u8]>>;
272
273 /// Converts a platform-native string into an equivalent byte slice.
274 ///
275 /// The returned bytes string will use an [unspecified encoding].
276 ///
277 /// # Examples
278 ///
279 /// ```
280 /// use std::env;
281 /// # use std::io;
282 ///
283 /// use os_str_bytes::OsStrBytes;
284 ///
285 /// let os_string = env::current_exe()?;
286 /// println!("{:?}", os_string.to_raw_bytes());
287 /// #
288 /// # Ok::<_, io::Error>(())
289 /// ```
290 ///
291 /// [unspecified encoding]: self#encoding
292 #[must_use]
293 fn to_raw_bytes(&self) -> Cow<'_, [u8]>;
294}
295
296impl OsStrBytes for OsStr {
297 #[inline]
298 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
299 where
300 S: Into<Cow<'a, [u8]>>,
301 {
302 match string.into() {
303 Cow::Borrowed(string) => {
304 imp::os_str_from_bytes(string).map_err(EncodingError)
305 }
306 Cow::Owned(string) => {
307 OsStringBytes::from_raw_vec(string).map(Cow::Owned)
308 }
309 }
310 }
311
312 #[inline]
313 fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
314 imp::os_str_to_bytes(self)
315 }
316}
317
318impl OsStrBytes for Path {
319 #[inline]
320 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>>
321 where
322 S: Into<Cow<'a, [u8]>>,
323 {
324 OsStr::from_raw_bytes(string).map(|os_string| match os_string {
325 Cow::Borrowed(os_string) => Cow::Borrowed(Self::new(os_string)),
326 Cow::Owned(os_string) => Cow::Owned(os_string.into()),
327 })
328 }
329
330 #[inline]
331 fn to_raw_bytes(&self) -> Cow<'_, [u8]> {
332 self.as_os_str().to_raw_bytes()
333 }
334}
335
336/// A platform agnostic variant of [`OsStringExt`].
337///
338/// For more information, see [the module-level documentation][module].
339///
340/// [module]: self
341/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt
342pub trait OsStringBytes: private::Sealed + Sized {
343 /// Converts a byte vector into an equivalent platform-native string.
344 ///
345 /// Provided byte strings should always be valid for the [unspecified
346 /// encoding] used by this crate.
347 ///
348 /// # Errors
349 ///
350 /// See documentation for [`EncodingError`].
351 ///
352 /// # Examples
353 ///
354 /// ```
355 /// use std::env;
356 /// use std::ffi::OsString;
357 /// # use std::io;
358 ///
359 /// use os_str_bytes::OsStringBytes;
360 ///
361 /// let os_string = env::current_exe()?;
362 /// let os_bytes = os_string.clone().into_raw_vec();
363 /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap());
364 /// #
365 /// # Ok::<_, io::Error>(())
366 /// ```
367 ///
368 /// [unspecified encoding]: self#encoding
369 fn from_raw_vec(string: Vec<u8>) -> Result<Self>;
370
371 /// Converts a platform-native string into an equivalent byte vector.
372 ///
373 /// The returned byte string will use an [unspecified encoding].
374 ///
375 /// # Examples
376 ///
377 /// ```
378 /// use std::env;
379 /// # use std::io;
380 ///
381 /// use os_str_bytes::OsStringBytes;
382 ///
383 /// let os_string = env::current_exe()?;
384 /// println!("{:?}", os_string.into_raw_vec());
385 /// #
386 /// # Ok::<_, io::Error>(())
387 /// ```
388 ///
389 /// [unspecified encoding]: self#encoding
390 #[must_use]
391 fn into_raw_vec(self) -> Vec<u8>;
392}
393
394impl OsStringBytes for OsString {
395 #[inline]
396 fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
397 imp::os_string_from_vec(string).map_err(EncodingError)
398 }
399
400 #[inline]
401 fn into_raw_vec(self) -> Vec<u8> {
402 imp::os_string_into_vec(self)
403 }
404}
405
406impl OsStringBytes for PathBuf {
407 #[inline]
408 fn from_raw_vec(string: Vec<u8>) -> Result<Self> {
409 OsString::from_raw_vec(string).map(Into::into)
410 }
411
412 #[inline]
413 fn into_raw_vec(self) -> Vec<u8> {
414 self.into_os_string().into_raw_vec()
415 }
416}
417
418mod private {
419 use std::ffi::OsStr;
420 use std::ffi::OsString;
421 use std::path::Path;
422 use std::path::PathBuf;
423
424 pub trait Sealed {}
425 impl Sealed for char {}
426 impl Sealed for OsStr {}
427 impl Sealed for OsString {}
428 impl Sealed for Path {}
429 impl Sealed for PathBuf {}
430 impl Sealed for &str {}
431 impl Sealed for &String {}
432}