]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | //! Character conversions. |
2 | ||
48663c56 XL |
3 | use crate::convert::TryFrom; |
4 | use crate::fmt; | |
5 | use crate::mem::transmute; | |
6 | use crate::str::FromStr; | |
7 | ||
83c7162d XL |
8 | use super::MAX; |
9 | ||
10 | /// Converts a `u32` to a `char`. | |
11 | /// | |
12 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 13 | /// `as`: |
83c7162d XL |
14 | /// |
15 | /// ``` | |
16 | /// let c = '💯'; | |
17 | /// let i = c as u32; | |
18 | /// | |
19 | /// assert_eq!(128175, i); | |
20 | /// ``` | |
21 | /// | |
22 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
23 | /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value | |
24 | /// for a [`char`]. | |
25 | /// | |
26 | /// [`char`]: ../../std/primitive.char.html | |
27 | /// [`u32`]: ../../std/primitive.u32.html | |
83c7162d XL |
28 | /// |
29 | /// For an unsafe version of this function which ignores these checks, see | |
30 | /// [`from_u32_unchecked`]. | |
31 | /// | |
32 | /// [`from_u32_unchecked`]: fn.from_u32_unchecked.html | |
33 | /// | |
34 | /// # Examples | |
35 | /// | |
36 | /// Basic usage: | |
37 | /// | |
38 | /// ``` | |
39 | /// use std::char; | |
40 | /// | |
41 | /// let c = char::from_u32(0x2764); | |
42 | /// | |
43 | /// assert_eq!(Some('❤'), c); | |
44 | /// ``` | |
45 | /// | |
46 | /// Returning `None` when the input is not a valid [`char`]: | |
47 | /// | |
48 | /// ``` | |
49 | /// use std::char; | |
50 | /// | |
51 | /// let c = char::from_u32(0x110000); | |
52 | /// | |
53 | /// assert_eq!(None, c); | |
54 | /// ``` | |
55 | #[inline] | |
56 | #[stable(feature = "rust1", since = "1.0.0")] | |
57 | pub fn from_u32(i: u32) -> Option<char> { | |
58 | char::try_from(i).ok() | |
59 | } | |
60 | ||
61 | /// Converts a `u32` to a `char`, ignoring validity. | |
62 | /// | |
63 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 64 | /// `as`: |
83c7162d XL |
65 | /// |
66 | /// ``` | |
67 | /// let c = '💯'; | |
68 | /// let i = c as u32; | |
69 | /// | |
70 | /// assert_eq!(128175, i); | |
71 | /// ``` | |
72 | /// | |
73 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
74 | /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to | |
75 | /// [`char`], possibly creating an invalid one. | |
76 | /// | |
77 | /// [`char`]: ../../std/primitive.char.html | |
78 | /// [`u32`]: ../../std/primitive.u32.html | |
83c7162d XL |
79 | /// |
80 | /// # Safety | |
81 | /// | |
82 | /// This function is unsafe, as it may construct invalid `char` values. | |
83 | /// | |
84 | /// For a safe version of this function, see the [`from_u32`] function. | |
85 | /// | |
86 | /// [`from_u32`]: fn.from_u32.html | |
87 | /// | |
88 | /// # Examples | |
89 | /// | |
90 | /// Basic usage: | |
91 | /// | |
92 | /// ``` | |
93 | /// use std::char; | |
94 | /// | |
95 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; | |
96 | /// | |
97 | /// assert_eq!('❤', c); | |
98 | /// ``` | |
99 | #[inline] | |
100 | #[stable(feature = "char_from_unchecked", since = "1.5.0")] | |
101 | pub unsafe fn from_u32_unchecked(i: u32) -> char { | |
102 | transmute(i) | |
103 | } | |
104 | ||
105 | #[stable(feature = "char_convert", since = "1.13.0")] | |
106 | impl From<char> for u32 { | |
0bf4aa26 XL |
107 | /// Converts a [`char`] into a [`u32`]. |
108 | /// | |
109 | /// # Examples | |
110 | /// | |
111 | /// ``` | |
112 | /// use std::mem; | |
113 | /// | |
e74abb32 XL |
114 | /// let c = 'c'; |
115 | /// let u = u32::from(c); | |
116 | /// assert!(4 == mem::size_of_val(&u)) | |
0bf4aa26 | 117 | /// ``` |
83c7162d XL |
118 | #[inline] |
119 | fn from(c: char) -> Self { | |
120 | c as u32 | |
121 | } | |
122 | } | |
123 | ||
dc9dc135 | 124 | /// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF. |
83c7162d XL |
125 | /// |
126 | /// Unicode is designed such that this effectively decodes bytes | |
127 | /// with the character encoding that IANA calls ISO-8859-1. | |
128 | /// This encoding is compatible with ASCII. | |
129 | /// | |
130 | /// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), | |
131 | /// which leaves some "blanks", byte values that are not assigned to any character. | |
132 | /// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. | |
133 | /// | |
134 | /// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, | |
135 | /// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks | |
136 | /// to punctuation and various Latin characters. | |
137 | /// | |
138 | /// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) | |
139 | /// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases | |
140 | /// for a superset of Windows-1252 that fills the remaining blanks with corresponding | |
141 | /// C0 and C1 control codes. | |
142 | #[stable(feature = "char_convert", since = "1.13.0")] | |
143 | impl From<u8> for char { | |
0bf4aa26 XL |
144 | /// Converts a [`u8`] into a [`char`]. |
145 | /// | |
146 | /// # Examples | |
147 | /// | |
148 | /// ``` | |
149 | /// use std::mem; | |
150 | /// | |
e74abb32 XL |
151 | /// let u = 32 as u8; |
152 | /// let c = char::from(u); | |
153 | /// assert!(4 == mem::size_of_val(&c)) | |
0bf4aa26 | 154 | /// ``` |
83c7162d XL |
155 | #[inline] |
156 | fn from(i: u8) -> Self { | |
157 | i as char | |
158 | } | |
159 | } | |
160 | ||
83c7162d XL |
161 | /// An error which can be returned when parsing a char. |
162 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
163 | #[derive(Clone, Debug, PartialEq, Eq)] | |
164 | pub struct ParseCharError { | |
165 | kind: CharErrorKind, | |
166 | } | |
167 | ||
168 | impl ParseCharError { | |
60c5eb7d XL |
169 | #[unstable( |
170 | feature = "char_error_internals", | |
171 | reason = "this method should not be available publicly", | |
172 | issue = "0" | |
173 | )] | |
83c7162d XL |
174 | #[doc(hidden)] |
175 | pub fn __description(&self) -> &str { | |
176 | match self.kind { | |
60c5eb7d XL |
177 | CharErrorKind::EmptyString => "cannot parse char from empty string", |
178 | CharErrorKind::TooManyChars => "too many characters in string", | |
83c7162d XL |
179 | } |
180 | } | |
181 | } | |
182 | ||
183 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] | |
184 | enum CharErrorKind { | |
185 | EmptyString, | |
186 | TooManyChars, | |
187 | } | |
188 | ||
189 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
190 | impl fmt::Display for ParseCharError { | |
48663c56 | 191 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
192 | self.__description().fmt(f) |
193 | } | |
194 | } | |
195 | ||
83c7162d XL |
196 | #[stable(feature = "char_from_str", since = "1.20.0")] |
197 | impl FromStr for char { | |
198 | type Err = ParseCharError; | |
199 | ||
200 | #[inline] | |
201 | fn from_str(s: &str) -> Result<Self, Self::Err> { | |
202 | let mut chars = s.chars(); | |
203 | match (chars.next(), chars.next()) { | |
60c5eb7d | 204 | (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }), |
83c7162d | 205 | (Some(c), None) => Ok(c), |
60c5eb7d | 206 | _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }), |
83c7162d XL |
207 | } |
208 | } | |
209 | } | |
210 | ||
9fa01778 | 211 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
212 | impl TryFrom<u32> for char { |
213 | type Error = CharTryFromError; | |
214 | ||
215 | #[inline] | |
216 | fn try_from(i: u32) -> Result<Self, Self::Error> { | |
217 | if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { | |
218 | Err(CharTryFromError(())) | |
219 | } else { | |
60c5eb7d | 220 | // SAFETY: checked that it's a legal unicode value |
83c7162d XL |
221 | Ok(unsafe { from_u32_unchecked(i) }) |
222 | } | |
223 | } | |
224 | } | |
225 | ||
226 | /// The error type returned when a conversion from u32 to char fails. | |
9fa01778 | 227 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
228 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] |
229 | pub struct CharTryFromError(()); | |
230 | ||
9fa01778 | 231 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d | 232 | impl fmt::Display for CharTryFromError { |
48663c56 | 233 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
234 | "converted integer out of range for `char`".fmt(f) |
235 | } | |
236 | } | |
237 | ||
238 | /// Converts a digit in the given radix to a `char`. | |
239 | /// | |
240 | /// A 'radix' here is sometimes also called a 'base'. A radix of two | |
241 | /// indicates a binary number, a radix of ten, decimal, and a radix of | |
242 | /// sixteen, hexadecimal, to give some common values. Arbitrary | |
243 | /// radices are supported. | |
244 | /// | |
245 | /// `from_digit()` will return `None` if the input is not a digit in | |
246 | /// the given radix. | |
247 | /// | |
248 | /// # Panics | |
249 | /// | |
250 | /// Panics if given a radix larger than 36. | |
251 | /// | |
252 | /// # Examples | |
253 | /// | |
254 | /// Basic usage: | |
255 | /// | |
256 | /// ``` | |
257 | /// use std::char; | |
258 | /// | |
259 | /// let c = char::from_digit(4, 10); | |
260 | /// | |
261 | /// assert_eq!(Some('4'), c); | |
262 | /// | |
263 | /// // Decimal 11 is a single digit in base 16 | |
264 | /// let c = char::from_digit(11, 16); | |
265 | /// | |
266 | /// assert_eq!(Some('b'), c); | |
267 | /// ``` | |
268 | /// | |
269 | /// Returning `None` when the input is not a digit: | |
270 | /// | |
271 | /// ``` | |
272 | /// use std::char; | |
273 | /// | |
274 | /// let c = char::from_digit(20, 10); | |
275 | /// | |
276 | /// assert_eq!(None, c); | |
277 | /// ``` | |
278 | /// | |
279 | /// Passing a large radix, causing a panic: | |
280 | /// | |
281 | /// ``` | |
282 | /// use std::thread; | |
283 | /// use std::char; | |
284 | /// | |
285 | /// let result = thread::spawn(|| { | |
286 | /// // this panics | |
287 | /// let c = char::from_digit(1, 37); | |
288 | /// }).join(); | |
289 | /// | |
290 | /// assert!(result.is_err()); | |
291 | /// ``` | |
292 | #[inline] | |
293 | #[stable(feature = "rust1", since = "1.0.0")] | |
294 | pub fn from_digit(num: u32, radix: u32) -> Option<char> { | |
295 | if radix > 36 { | |
296 | panic!("from_digit: radix is too high (maximum 36)"); | |
297 | } | |
298 | if num < radix { | |
299 | let num = num as u8; | |
60c5eb7d | 300 | if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) } |
83c7162d XL |
301 | } else { |
302 | None | |
303 | } | |
304 | } |