]>
Commit | Line | Data |
---|---|---|
d9579d0f AL |
1 | // Copyright 2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! Windows-specific extensions to the primitives in the `std::ffi` module. | |
abe05a73 XL |
12 | //! |
13 | //! # Overview | |
14 | //! | |
15 | //! For historical reasons, the Windows API uses a form of potentially | |
16 | //! ill-formed UTF-16 encoding for strings. Specifically, the 16-bit | |
17 | //! code units in Windows strings may contain [isolated surrogate code | |
18 | //! points which are not paired together][ill-formed-utf-16]. The | |
19 | //! Unicode standard requires that surrogate code points (those in the | |
20 | //! range U+D800 to U+DFFF) always be *paired*, because in the UTF-16 | |
21 | //! encoding a *surrogate code unit pair* is used to encode a single | |
22 | //! character. For compatibility with code that does not enforce | |
23 | //! these pairings, Windows does not enforce them, either. | |
24 | //! | |
25 | //! While it is not always possible to convert such a string losslessly into | |
26 | //! a valid UTF-16 string (or even UTF-8), it is often desirable to be | |
27 | //! able to round-trip such a string from and to Windows APIs | |
28 | //! losslessly. For example, some Rust code may be "bridging" some | |
29 | //! Windows APIs together, just passing `WCHAR` strings among those | |
30 | //! APIs without ever really looking into the strings. | |
31 | //! | |
32 | //! If Rust code *does* need to look into those strings, it can | |
33 | //! convert them to valid UTF-8, possibly lossily, by substituting | |
34 | //! invalid sequences with U+FFFD REPLACEMENT CHARACTER, as is | |
35 | //! conventionally done in other Rust APIs that deal with string | |
36 | //! encodings. | |
37 | //! | |
38 | //! # `OsStringExt` and `OsStrExt` | |
39 | //! | |
40 | //! [`OsString`] is the Rust wrapper for owned strings in the | |
41 | //! preferred representation of the operating system. On Windows, | |
42 | //! this struct gets augmented with an implementation of the | |
43 | //! [`OsStringExt`] trait, which has a [`from_wide`] method. This | |
44 | //! lets you create an [`OsString`] from a `&[u16]` slice; presumably | |
45 | //! you get such a slice out of a `WCHAR` Windows API. | |
46 | //! | |
47 | //! Similarly, [`OsStr`] is the Rust wrapper for borrowed strings from | |
48 | //! preferred representation of the operating system. On Windows, the | |
49 | //! [`OsStrExt`] trait provides the [`encode_wide`] method, which | |
50 | //! outputs an [`EncodeWide`] iterator. You can [`collect`] this | |
51 | //! iterator, for example, to obtain a `Vec<u16>`; you can later get a | |
52 | //! pointer to this vector's contents and feed it to Windows APIs. | |
53 | //! | |
54 | //! These traits, along with [`OsString`] and [`OsStr`], work in | |
55 | //! conjunction so that it is possible to **round-trip** strings from | |
56 | //! Windows and back, with no loss of data, even if the strings are | |
57 | //! ill-formed UTF-16. | |
58 | //! | |
59 | //! [ill-formed-utf-16]: https://simonsapin.github.io/wtf-8/#ill-formed-utf-16 | |
60 | //! [`OsString`]: ../../../ffi/struct.OsString.html | |
61 | //! [`OsStr`]: ../../../ffi/struct.OsStr.html | |
62 | //! [`OsStringExt`]: trait.OsStringExt.html | |
63 | //! [`OsStrExt`]: trait.OsStrExt.html | |
64 | //! [`EncodeWide`]: struct.EncodeWide.html | |
65 | //! [`from_wide`]: trait.OsStringExt.html#tymethod.from_wide | |
66 | //! [`encode_wide`]: trait.OsStrExt.html#tymethod.encode_wide | |
67 | //! [`collect`]: ../../../iter/trait.Iterator.html#method.collect | |
d9579d0f AL |
68 | |
69 | #![stable(feature = "rust1", since = "1.0.0")] | |
70 | ||
71 | use ffi::{OsString, OsStr}; | |
72 | use sys::os_str::Buf; | |
73 | use sys_common::wtf8::Wtf8Buf; | |
74 | use sys_common::{FromInner, AsInner}; | |
75 | ||
92a42be0 | 76 | #[stable(feature = "rust1", since = "1.0.0")] |
d9579d0f AL |
77 | pub use sys_common::wtf8::EncodeWide; |
78 | ||
79 | /// Windows-specific extensions to `OsString`. | |
80 | #[stable(feature = "rust1", since = "1.0.0")] | |
81 | pub trait OsStringExt { | |
82 | /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of | |
83 | /// 16-bit code units. | |
84 | /// | |
7cac9316 | 85 | /// This is lossless: calling [`encode_wide`] on the resulting string |
d9579d0f | 86 | /// will always return the original code units. |
7cac9316 XL |
87 | /// |
88 | /// # Examples | |
89 | /// | |
90 | /// ``` | |
91 | /// use std::ffi::OsString; | |
92 | /// use std::os::windows::prelude::*; | |
93 | /// | |
94 | /// // UTF-16 encoding for "Unicode". | |
95 | /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; | |
96 | /// | |
97 | /// let string = OsString::from_wide(&source[..]); | |
98 | /// ``` | |
99 | /// | |
100 | /// [`encode_wide`]: ./trait.OsStrExt.html#tymethod.encode_wide | |
d9579d0f AL |
101 | #[stable(feature = "rust1", since = "1.0.0")] |
102 | fn from_wide(wide: &[u16]) -> Self; | |
103 | } | |
104 | ||
105 | #[stable(feature = "rust1", since = "1.0.0")] | |
106 | impl OsStringExt for OsString { | |
107 | fn from_wide(wide: &[u16]) -> OsString { | |
108 | FromInner::from_inner(Buf { inner: Wtf8Buf::from_wide(wide) }) | |
109 | } | |
110 | } | |
111 | ||
112 | /// Windows-specific extensions to `OsStr`. | |
113 | #[stable(feature = "rust1", since = "1.0.0")] | |
114 | pub trait OsStrExt { | |
7cac9316 XL |
115 | /// Re-encodes an `OsStr` as a wide character sequence, i.e. potentially |
116 | /// ill-formed UTF-16. | |
117 | /// | |
118 | /// This is lossless: calling [`OsString::from_wide`] and then | |
119 | /// `encode_wide` on the result will yield the original code units. | |
120 | /// Note that the encoding does not add a final null terminator. | |
121 | /// | |
122 | /// # Examples | |
123 | /// | |
124 | /// ``` | |
125 | /// use std::ffi::OsString; | |
126 | /// use std::os::windows::prelude::*; | |
127 | /// | |
128 | /// // UTF-16 encoding for "Unicode". | |
129 | /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; | |
130 | /// | |
131 | /// let string = OsString::from_wide(&source[..]); | |
132 | /// | |
133 | /// let result: Vec<u16> = string.encode_wide().collect(); | |
134 | /// assert_eq!(&source[..], &result[..]); | |
135 | /// ``` | |
d9579d0f | 136 | /// |
7cac9316 | 137 | /// [`OsString::from_wide`]: ./trait.OsStringExt.html#tymethod.from_wide |
d9579d0f AL |
138 | #[stable(feature = "rust1", since = "1.0.0")] |
139 | fn encode_wide(&self) -> EncodeWide; | |
140 | } | |
141 | ||
142 | #[stable(feature = "rust1", since = "1.0.0")] | |
143 | impl OsStrExt for OsStr { | |
144 | fn encode_wide(&self) -> EncodeWide { | |
145 | self.as_inner().inner.encode_wide() | |
146 | } | |
147 | } |