]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | //! Character conversions. |
2 | ||
48663c56 XL |
3 | use crate::convert::TryFrom; |
4 | use crate::fmt; | |
5 | use crate::mem::transmute; | |
6 | use crate::str::FromStr; | |
7 | ||
83c7162d XL |
8 | use super::MAX; |
9 | ||
10 | /// Converts a `u32` to a `char`. | |
11 | /// | |
12 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 13 | /// `as`: |
83c7162d XL |
14 | /// |
15 | /// ``` | |
16 | /// let c = '💯'; | |
17 | /// let i = c as u32; | |
18 | /// | |
19 | /// assert_eq!(128175, i); | |
20 | /// ``` | |
21 | /// | |
22 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
23 | /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value | |
24 | /// for a [`char`]. | |
25 | /// | |
83c7162d XL |
26 | /// For an unsafe version of this function which ignores these checks, see |
27 | /// [`from_u32_unchecked`]. | |
28 | /// | |
83c7162d XL |
29 | /// # Examples |
30 | /// | |
31 | /// Basic usage: | |
32 | /// | |
33 | /// ``` | |
34 | /// use std::char; | |
35 | /// | |
36 | /// let c = char::from_u32(0x2764); | |
37 | /// | |
38 | /// assert_eq!(Some('❤'), c); | |
39 | /// ``` | |
40 | /// | |
41 | /// Returning `None` when the input is not a valid [`char`]: | |
42 | /// | |
43 | /// ``` | |
44 | /// use std::char; | |
45 | /// | |
46 | /// let c = char::from_u32(0x110000); | |
47 | /// | |
48 | /// assert_eq!(None, c); | |
49 | /// ``` | |
5869c6ff | 50 | #[doc(alias = "chr")] |
83c7162d XL |
51 | #[inline] |
52 | #[stable(feature = "rust1", since = "1.0.0")] | |
53 | pub fn from_u32(i: u32) -> Option<char> { | |
54 | char::try_from(i).ok() | |
55 | } | |
56 | ||
57 | /// Converts a `u32` to a `char`, ignoring validity. | |
58 | /// | |
59 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 60 | /// `as`: |
83c7162d XL |
61 | /// |
62 | /// ``` | |
63 | /// let c = '💯'; | |
64 | /// let i = c as u32; | |
65 | /// | |
66 | /// assert_eq!(128175, i); | |
67 | /// ``` | |
68 | /// | |
69 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
70 | /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to | |
71 | /// [`char`], possibly creating an invalid one. | |
72 | /// | |
83c7162d XL |
73 | /// # Safety |
74 | /// | |
75 | /// This function is unsafe, as it may construct invalid `char` values. | |
76 | /// | |
77 | /// For a safe version of this function, see the [`from_u32`] function. | |
78 | /// | |
83c7162d XL |
79 | /// # Examples |
80 | /// | |
81 | /// Basic usage: | |
82 | /// | |
83 | /// ``` | |
84 | /// use std::char; | |
85 | /// | |
86 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; | |
87 | /// | |
88 | /// assert_eq!('❤', c); | |
89 | /// ``` | |
90 | #[inline] | |
91 | #[stable(feature = "char_from_unchecked", since = "1.5.0")] | |
92 | pub unsafe fn from_u32_unchecked(i: u32) -> char { | |
f035d41b XL |
93 | // SAFETY: the caller must guarantee that `i` is a valid char value. |
94 | if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { unsafe { transmute(i) } } | |
83c7162d XL |
95 | } |
96 | ||
97 | #[stable(feature = "char_convert", since = "1.13.0")] | |
98 | impl From<char> for u32 { | |
0bf4aa26 XL |
99 | /// Converts a [`char`] into a [`u32`]. |
100 | /// | |
101 | /// # Examples | |
102 | /// | |
103 | /// ``` | |
104 | /// use std::mem; | |
105 | /// | |
e74abb32 XL |
106 | /// let c = 'c'; |
107 | /// let u = u32::from(c); | |
108 | /// assert!(4 == mem::size_of_val(&u)) | |
0bf4aa26 | 109 | /// ``` |
83c7162d XL |
110 | #[inline] |
111 | fn from(c: char) -> Self { | |
112 | c as u32 | |
113 | } | |
114 | } | |
115 | ||
5869c6ff XL |
116 | #[stable(feature = "more_char_conversions", since = "1.51.0")] |
117 | impl From<char> for u64 { | |
118 | /// Converts a [`char`] into a [`u64`]. | |
119 | /// | |
120 | /// # Examples | |
121 | /// | |
122 | /// ``` | |
123 | /// use std::mem; | |
124 | /// | |
125 | /// let c = '👤'; | |
126 | /// let u = u64::from(c); | |
127 | /// assert!(8 == mem::size_of_val(&u)) | |
128 | /// ``` | |
129 | #[inline] | |
130 | fn from(c: char) -> Self { | |
131 | // The char is casted to the value of the code point, then zero-extended to 64 bit. | |
132 | // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics] | |
133 | c as u64 | |
134 | } | |
135 | } | |
136 | ||
137 | #[stable(feature = "more_char_conversions", since = "1.51.0")] | |
138 | impl From<char> for u128 { | |
139 | /// Converts a [`char`] into a [`u128`]. | |
140 | /// | |
141 | /// # Examples | |
142 | /// | |
143 | /// ``` | |
144 | /// use std::mem; | |
145 | /// | |
146 | /// let c = '⚙'; | |
147 | /// let u = u128::from(c); | |
148 | /// assert!(16 == mem::size_of_val(&u)) | |
149 | /// ``` | |
150 | #[inline] | |
151 | fn from(c: char) -> Self { | |
152 | // The char is casted to the value of the code point, then zero-extended to 128 bit. | |
153 | // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics] | |
154 | c as u128 | |
155 | } | |
156 | } | |
157 | ||
dc9dc135 | 158 | /// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF. |
83c7162d XL |
159 | /// |
160 | /// Unicode is designed such that this effectively decodes bytes | |
161 | /// with the character encoding that IANA calls ISO-8859-1. | |
162 | /// This encoding is compatible with ASCII. | |
163 | /// | |
164 | /// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), | |
165 | /// which leaves some "blanks", byte values that are not assigned to any character. | |
166 | /// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. | |
167 | /// | |
168 | /// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, | |
169 | /// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks | |
170 | /// to punctuation and various Latin characters. | |
171 | /// | |
172 | /// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) | |
173 | /// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases | |
174 | /// for a superset of Windows-1252 that fills the remaining blanks with corresponding | |
175 | /// C0 and C1 control codes. | |
176 | #[stable(feature = "char_convert", since = "1.13.0")] | |
177 | impl From<u8> for char { | |
0bf4aa26 XL |
178 | /// Converts a [`u8`] into a [`char`]. |
179 | /// | |
180 | /// # Examples | |
181 | /// | |
182 | /// ``` | |
183 | /// use std::mem; | |
184 | /// | |
e74abb32 XL |
185 | /// let u = 32 as u8; |
186 | /// let c = char::from(u); | |
187 | /// assert!(4 == mem::size_of_val(&c)) | |
0bf4aa26 | 188 | /// ``` |
83c7162d XL |
189 | #[inline] |
190 | fn from(i: u8) -> Self { | |
191 | i as char | |
192 | } | |
193 | } | |
194 | ||
83c7162d XL |
195 | /// An error which can be returned when parsing a char. |
196 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
197 | #[derive(Clone, Debug, PartialEq, Eq)] | |
198 | pub struct ParseCharError { | |
199 | kind: CharErrorKind, | |
200 | } | |
201 | ||
202 | impl ParseCharError { | |
60c5eb7d XL |
203 | #[unstable( |
204 | feature = "char_error_internals", | |
205 | reason = "this method should not be available publicly", | |
dfeec247 | 206 | issue = "none" |
60c5eb7d | 207 | )] |
83c7162d XL |
208 | #[doc(hidden)] |
209 | pub fn __description(&self) -> &str { | |
210 | match self.kind { | |
60c5eb7d XL |
211 | CharErrorKind::EmptyString => "cannot parse char from empty string", |
212 | CharErrorKind::TooManyChars => "too many characters in string", | |
83c7162d XL |
213 | } |
214 | } | |
215 | } | |
216 | ||
217 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] | |
218 | enum CharErrorKind { | |
219 | EmptyString, | |
220 | TooManyChars, | |
221 | } | |
222 | ||
223 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
224 | impl fmt::Display for ParseCharError { | |
48663c56 | 225 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
226 | self.__description().fmt(f) |
227 | } | |
228 | } | |
229 | ||
83c7162d XL |
230 | #[stable(feature = "char_from_str", since = "1.20.0")] |
231 | impl FromStr for char { | |
232 | type Err = ParseCharError; | |
233 | ||
234 | #[inline] | |
235 | fn from_str(s: &str) -> Result<Self, Self::Err> { | |
236 | let mut chars = s.chars(); | |
237 | match (chars.next(), chars.next()) { | |
60c5eb7d | 238 | (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }), |
83c7162d | 239 | (Some(c), None) => Ok(c), |
60c5eb7d | 240 | _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }), |
83c7162d XL |
241 | } |
242 | } | |
243 | } | |
244 | ||
9fa01778 | 245 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
246 | impl TryFrom<u32> for char { |
247 | type Error = CharTryFromError; | |
248 | ||
249 | #[inline] | |
250 | fn try_from(i: u32) -> Result<Self, Self::Error> { | |
251 | if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { | |
252 | Err(CharTryFromError(())) | |
253 | } else { | |
60c5eb7d | 254 | // SAFETY: checked that it's a legal unicode value |
f9f354fc | 255 | Ok(unsafe { transmute(i) }) |
83c7162d XL |
256 | } |
257 | } | |
258 | } | |
259 | ||
260 | /// The error type returned when a conversion from u32 to char fails. | |
9fa01778 | 261 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
262 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] |
263 | pub struct CharTryFromError(()); | |
264 | ||
9fa01778 | 265 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d | 266 | impl fmt::Display for CharTryFromError { |
48663c56 | 267 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
268 | "converted integer out of range for `char`".fmt(f) |
269 | } | |
270 | } | |
271 | ||
272 | /// Converts a digit in the given radix to a `char`. | |
273 | /// | |
274 | /// A 'radix' here is sometimes also called a 'base'. A radix of two | |
275 | /// indicates a binary number, a radix of ten, decimal, and a radix of | |
276 | /// sixteen, hexadecimal, to give some common values. Arbitrary | |
277 | /// radices are supported. | |
278 | /// | |
279 | /// `from_digit()` will return `None` if the input is not a digit in | |
280 | /// the given radix. | |
281 | /// | |
282 | /// # Panics | |
283 | /// | |
284 | /// Panics if given a radix larger than 36. | |
285 | /// | |
286 | /// # Examples | |
287 | /// | |
288 | /// Basic usage: | |
289 | /// | |
290 | /// ``` | |
291 | /// use std::char; | |
292 | /// | |
293 | /// let c = char::from_digit(4, 10); | |
294 | /// | |
295 | /// assert_eq!(Some('4'), c); | |
296 | /// | |
297 | /// // Decimal 11 is a single digit in base 16 | |
298 | /// let c = char::from_digit(11, 16); | |
299 | /// | |
300 | /// assert_eq!(Some('b'), c); | |
301 | /// ``` | |
302 | /// | |
303 | /// Returning `None` when the input is not a digit: | |
304 | /// | |
305 | /// ``` | |
306 | /// use std::char; | |
307 | /// | |
308 | /// let c = char::from_digit(20, 10); | |
309 | /// | |
310 | /// assert_eq!(None, c); | |
311 | /// ``` | |
312 | /// | |
313 | /// Passing a large radix, causing a panic: | |
314 | /// | |
f035d41b | 315 | /// ```should_panic |
83c7162d XL |
316 | /// use std::char; |
317 | /// | |
f035d41b XL |
318 | /// // this panics |
319 | /// let c = char::from_digit(1, 37); | |
83c7162d XL |
320 | /// ``` |
321 | #[inline] | |
322 | #[stable(feature = "rust1", since = "1.0.0")] | |
323 | pub fn from_digit(num: u32, radix: u32) -> Option<char> { | |
324 | if radix > 36 { | |
325 | panic!("from_digit: radix is too high (maximum 36)"); | |
326 | } | |
327 | if num < radix { | |
328 | let num = num as u8; | |
60c5eb7d | 329 | if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) } |
83c7162d XL |
330 | } else { |
331 | None | |
332 | } | |
333 | } |