]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | //! Character conversions. |
2 | ||
48663c56 XL |
3 | use crate::convert::TryFrom; |
4 | use crate::fmt; | |
5 | use crate::mem::transmute; | |
6 | use crate::str::FromStr; | |
7 | ||
83c7162d XL |
8 | use super::MAX; |
9 | ||
10 | /// Converts a `u32` to a `char`. | |
11 | /// | |
12 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 13 | /// `as`: |
83c7162d XL |
14 | /// |
15 | /// ``` | |
16 | /// let c = '💯'; | |
17 | /// let i = c as u32; | |
18 | /// | |
19 | /// assert_eq!(128175, i); | |
20 | /// ``` | |
21 | /// | |
22 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
23 | /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value | |
24 | /// for a [`char`]. | |
25 | /// | |
83c7162d XL |
26 | /// For an unsafe version of this function which ignores these checks, see |
27 | /// [`from_u32_unchecked`]. | |
28 | /// | |
83c7162d XL |
29 | /// # Examples |
30 | /// | |
31 | /// Basic usage: | |
32 | /// | |
33 | /// ``` | |
34 | /// use std::char; | |
35 | /// | |
36 | /// let c = char::from_u32(0x2764); | |
37 | /// | |
38 | /// assert_eq!(Some('❤'), c); | |
39 | /// ``` | |
40 | /// | |
41 | /// Returning `None` when the input is not a valid [`char`]: | |
42 | /// | |
43 | /// ``` | |
44 | /// use std::char; | |
45 | /// | |
46 | /// let c = char::from_u32(0x110000); | |
47 | /// | |
48 | /// assert_eq!(None, c); | |
49 | /// ``` | |
5869c6ff | 50 | #[doc(alias = "chr")] |
c295e0f8 | 51 | #[must_use] |
83c7162d XL |
52 | #[inline] |
53 | #[stable(feature = "rust1", since = "1.0.0")] | |
3c0e092e XL |
54 | #[rustc_const_unstable(feature = "const_char_convert", issue = "89259")] |
55 | pub const fn from_u32(i: u32) -> Option<char> { | |
56 | // FIXME: once Result::ok is const fn, use it here | |
57 | match char_try_from_u32(i) { | |
58 | Ok(c) => Some(c), | |
59 | Err(_) => None, | |
60 | } | |
83c7162d XL |
61 | } |
62 | ||
63 | /// Converts a `u32` to a `char`, ignoring validity. | |
64 | /// | |
65 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 66 | /// `as`: |
83c7162d XL |
67 | /// |
68 | /// ``` | |
69 | /// let c = '💯'; | |
70 | /// let i = c as u32; | |
71 | /// | |
72 | /// assert_eq!(128175, i); | |
73 | /// ``` | |
74 | /// | |
75 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
76 | /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to | |
77 | /// [`char`], possibly creating an invalid one. | |
78 | /// | |
83c7162d XL |
79 | /// # Safety |
80 | /// | |
81 | /// This function is unsafe, as it may construct invalid `char` values. | |
82 | /// | |
83 | /// For a safe version of this function, see the [`from_u32`] function. | |
84 | /// | |
83c7162d XL |
85 | /// # Examples |
86 | /// | |
87 | /// Basic usage: | |
88 | /// | |
89 | /// ``` | |
90 | /// use std::char; | |
91 | /// | |
92 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; | |
93 | /// | |
94 | /// assert_eq!('❤', c); | |
95 | /// ``` | |
96 | #[inline] | |
c295e0f8 | 97 | #[must_use] |
83c7162d | 98 | #[stable(feature = "char_from_unchecked", since = "1.5.0")] |
3c0e092e XL |
99 | #[rustc_const_unstable(feature = "const_char_convert", issue = "89259")] |
100 | pub const unsafe fn from_u32_unchecked(i: u32) -> char { | |
f035d41b XL |
101 | // SAFETY: the caller must guarantee that `i` is a valid char value. |
102 | if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { unsafe { transmute(i) } } | |
83c7162d XL |
103 | } |
104 | ||
105 | #[stable(feature = "char_convert", since = "1.13.0")] | |
3c0e092e XL |
106 | #[rustc_const_unstable(feature = "const_convert", issue = "88674")] |
107 | impl const From<char> for u32 { | |
0bf4aa26 XL |
108 | /// Converts a [`char`] into a [`u32`]. |
109 | /// | |
110 | /// # Examples | |
111 | /// | |
112 | /// ``` | |
113 | /// use std::mem; | |
114 | /// | |
e74abb32 XL |
115 | /// let c = 'c'; |
116 | /// let u = u32::from(c); | |
117 | /// assert!(4 == mem::size_of_val(&u)) | |
0bf4aa26 | 118 | /// ``` |
83c7162d XL |
119 | #[inline] |
120 | fn from(c: char) -> Self { | |
121 | c as u32 | |
122 | } | |
123 | } | |
124 | ||
5869c6ff | 125 | #[stable(feature = "more_char_conversions", since = "1.51.0")] |
3c0e092e XL |
126 | #[rustc_const_unstable(feature = "const_convert", issue = "88674")] |
127 | impl const From<char> for u64 { | |
5869c6ff XL |
128 | /// Converts a [`char`] into a [`u64`]. |
129 | /// | |
130 | /// # Examples | |
131 | /// | |
132 | /// ``` | |
133 | /// use std::mem; | |
134 | /// | |
135 | /// let c = '👤'; | |
136 | /// let u = u64::from(c); | |
137 | /// assert!(8 == mem::size_of_val(&u)) | |
138 | /// ``` | |
139 | #[inline] | |
140 | fn from(c: char) -> Self { | |
141 | // The char is casted to the value of the code point, then zero-extended to 64 bit. | |
142 | // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics] | |
143 | c as u64 | |
144 | } | |
145 | } | |
146 | ||
147 | #[stable(feature = "more_char_conversions", since = "1.51.0")] | |
3c0e092e XL |
148 | #[rustc_const_unstable(feature = "const_convert", issue = "88674")] |
149 | impl const From<char> for u128 { | |
5869c6ff XL |
150 | /// Converts a [`char`] into a [`u128`]. |
151 | /// | |
152 | /// # Examples | |
153 | /// | |
154 | /// ``` | |
155 | /// use std::mem; | |
156 | /// | |
157 | /// let c = '⚙'; | |
158 | /// let u = u128::from(c); | |
159 | /// assert!(16 == mem::size_of_val(&u)) | |
160 | /// ``` | |
161 | #[inline] | |
162 | fn from(c: char) -> Self { | |
163 | // The char is casted to the value of the code point, then zero-extended to 128 bit. | |
164 | // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics] | |
165 | c as u128 | |
166 | } | |
167 | } | |
168 | ||
dc9dc135 | 169 | /// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF. |
83c7162d XL |
170 | /// |
171 | /// Unicode is designed such that this effectively decodes bytes | |
172 | /// with the character encoding that IANA calls ISO-8859-1. | |
173 | /// This encoding is compatible with ASCII. | |
174 | /// | |
175 | /// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), | |
176 | /// which leaves some "blanks", byte values that are not assigned to any character. | |
177 | /// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. | |
178 | /// | |
179 | /// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, | |
180 | /// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks | |
181 | /// to punctuation and various Latin characters. | |
182 | /// | |
183 | /// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) | |
184 | /// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases | |
185 | /// for a superset of Windows-1252 that fills the remaining blanks with corresponding | |
186 | /// C0 and C1 control codes. | |
187 | #[stable(feature = "char_convert", since = "1.13.0")] | |
3c0e092e XL |
188 | #[rustc_const_unstable(feature = "const_convert", issue = "88674")] |
189 | impl const From<u8> for char { | |
0bf4aa26 XL |
190 | /// Converts a [`u8`] into a [`char`]. |
191 | /// | |
192 | /// # Examples | |
193 | /// | |
194 | /// ``` | |
195 | /// use std::mem; | |
196 | /// | |
e74abb32 XL |
197 | /// let u = 32 as u8; |
198 | /// let c = char::from(u); | |
199 | /// assert!(4 == mem::size_of_val(&c)) | |
0bf4aa26 | 200 | /// ``` |
83c7162d XL |
201 | #[inline] |
202 | fn from(i: u8) -> Self { | |
203 | i as char | |
204 | } | |
205 | } | |
206 | ||
83c7162d XL |
207 | /// An error which can be returned when parsing a char. |
208 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
209 | #[derive(Clone, Debug, PartialEq, Eq)] | |
210 | pub struct ParseCharError { | |
211 | kind: CharErrorKind, | |
212 | } | |
213 | ||
214 | impl ParseCharError { | |
60c5eb7d XL |
215 | #[unstable( |
216 | feature = "char_error_internals", | |
217 | reason = "this method should not be available publicly", | |
dfeec247 | 218 | issue = "none" |
60c5eb7d | 219 | )] |
83c7162d XL |
220 | #[doc(hidden)] |
221 | pub fn __description(&self) -> &str { | |
222 | match self.kind { | |
60c5eb7d XL |
223 | CharErrorKind::EmptyString => "cannot parse char from empty string", |
224 | CharErrorKind::TooManyChars => "too many characters in string", | |
83c7162d XL |
225 | } |
226 | } | |
227 | } | |
228 | ||
229 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] | |
230 | enum CharErrorKind { | |
231 | EmptyString, | |
232 | TooManyChars, | |
233 | } | |
234 | ||
235 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
236 | impl fmt::Display for ParseCharError { | |
48663c56 | 237 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
238 | self.__description().fmt(f) |
239 | } | |
240 | } | |
241 | ||
83c7162d XL |
242 | #[stable(feature = "char_from_str", since = "1.20.0")] |
243 | impl FromStr for char { | |
244 | type Err = ParseCharError; | |
245 | ||
246 | #[inline] | |
247 | fn from_str(s: &str) -> Result<Self, Self::Err> { | |
248 | let mut chars = s.chars(); | |
249 | match (chars.next(), chars.next()) { | |
60c5eb7d | 250 | (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }), |
83c7162d | 251 | (Some(c), None) => Ok(c), |
60c5eb7d | 252 | _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }), |
83c7162d XL |
253 | } |
254 | } | |
255 | } | |
256 | ||
3c0e092e XL |
257 | #[inline] |
258 | const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> { | |
259 | if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { | |
260 | Err(CharTryFromError(())) | |
261 | } else { | |
262 | // SAFETY: checked that it's a legal unicode value | |
263 | Ok(unsafe { transmute(i) }) | |
264 | } | |
265 | } | |
266 | ||
9fa01778 | 267 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
268 | impl TryFrom<u32> for char { |
269 | type Error = CharTryFromError; | |
270 | ||
271 | #[inline] | |
272 | fn try_from(i: u32) -> Result<Self, Self::Error> { | |
3c0e092e | 273 | char_try_from_u32(i) |
83c7162d XL |
274 | } |
275 | } | |
276 | ||
277 | /// The error type returned when a conversion from u32 to char fails. | |
9fa01778 | 278 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
279 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] |
280 | pub struct CharTryFromError(()); | |
281 | ||
9fa01778 | 282 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d | 283 | impl fmt::Display for CharTryFromError { |
48663c56 | 284 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
285 | "converted integer out of range for `char`".fmt(f) |
286 | } | |
287 | } | |
288 | ||
289 | /// Converts a digit in the given radix to a `char`. | |
290 | /// | |
291 | /// A 'radix' here is sometimes also called a 'base'. A radix of two | |
292 | /// indicates a binary number, a radix of ten, decimal, and a radix of | |
293 | /// sixteen, hexadecimal, to give some common values. Arbitrary | |
294 | /// radices are supported. | |
295 | /// | |
296 | /// `from_digit()` will return `None` if the input is not a digit in | |
297 | /// the given radix. | |
298 | /// | |
299 | /// # Panics | |
300 | /// | |
301 | /// Panics if given a radix larger than 36. | |
302 | /// | |
303 | /// # Examples | |
304 | /// | |
305 | /// Basic usage: | |
306 | /// | |
307 | /// ``` | |
308 | /// use std::char; | |
309 | /// | |
310 | /// let c = char::from_digit(4, 10); | |
311 | /// | |
312 | /// assert_eq!(Some('4'), c); | |
313 | /// | |
314 | /// // Decimal 11 is a single digit in base 16 | |
315 | /// let c = char::from_digit(11, 16); | |
316 | /// | |
317 | /// assert_eq!(Some('b'), c); | |
318 | /// ``` | |
319 | /// | |
320 | /// Returning `None` when the input is not a digit: | |
321 | /// | |
322 | /// ``` | |
323 | /// use std::char; | |
324 | /// | |
325 | /// let c = char::from_digit(20, 10); | |
326 | /// | |
327 | /// assert_eq!(None, c); | |
328 | /// ``` | |
329 | /// | |
330 | /// Passing a large radix, causing a panic: | |
331 | /// | |
f035d41b | 332 | /// ```should_panic |
83c7162d XL |
333 | /// use std::char; |
334 | /// | |
f035d41b XL |
335 | /// // this panics |
336 | /// let c = char::from_digit(1, 37); | |
83c7162d XL |
337 | /// ``` |
338 | #[inline] | |
c295e0f8 | 339 | #[must_use] |
83c7162d | 340 | #[stable(feature = "rust1", since = "1.0.0")] |
3c0e092e XL |
341 | #[rustc_const_unstable(feature = "const_char_convert", issue = "89259")] |
342 | pub const fn from_digit(num: u32, radix: u32) -> Option<char> { | |
83c7162d XL |
343 | if radix > 36 { |
344 | panic!("from_digit: radix is too high (maximum 36)"); | |
345 | } | |
346 | if num < radix { | |
347 | let num = num as u8; | |
60c5eb7d | 348 | if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) } |
83c7162d XL |
349 | } else { |
350 | None | |
351 | } | |
352 | } |