]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | //! Character conversions. |
2 | ||
48663c56 XL |
3 | use crate::convert::TryFrom; |
4 | use crate::fmt; | |
5 | use crate::mem::transmute; | |
6 | use crate::str::FromStr; | |
7 | ||
83c7162d XL |
8 | use super::MAX; |
9 | ||
10 | /// Converts a `u32` to a `char`. | |
11 | /// | |
12 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 13 | /// `as`: |
83c7162d XL |
14 | /// |
15 | /// ``` | |
16 | /// let c = '💯'; | |
17 | /// let i = c as u32; | |
18 | /// | |
19 | /// assert_eq!(128175, i); | |
20 | /// ``` | |
21 | /// | |
22 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
23 | /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value | |
24 | /// for a [`char`]. | |
25 | /// | |
26 | /// [`char`]: ../../std/primitive.char.html | |
27 | /// [`u32`]: ../../std/primitive.u32.html | |
83c7162d XL |
28 | /// |
29 | /// For an unsafe version of this function which ignores these checks, see | |
30 | /// [`from_u32_unchecked`]. | |
31 | /// | |
32 | /// [`from_u32_unchecked`]: fn.from_u32_unchecked.html | |
33 | /// | |
34 | /// # Examples | |
35 | /// | |
36 | /// Basic usage: | |
37 | /// | |
38 | /// ``` | |
39 | /// use std::char; | |
40 | /// | |
41 | /// let c = char::from_u32(0x2764); | |
42 | /// | |
43 | /// assert_eq!(Some('❤'), c); | |
44 | /// ``` | |
45 | /// | |
46 | /// Returning `None` when the input is not a valid [`char`]: | |
47 | /// | |
48 | /// ``` | |
49 | /// use std::char; | |
50 | /// | |
51 | /// let c = char::from_u32(0x110000); | |
52 | /// | |
53 | /// assert_eq!(None, c); | |
54 | /// ``` | |
55 | #[inline] | |
56 | #[stable(feature = "rust1", since = "1.0.0")] | |
57 | pub fn from_u32(i: u32) -> Option<char> { | |
58 | char::try_from(i).ok() | |
59 | } | |
60 | ||
61 | /// Converts a `u32` to a `char`, ignoring validity. | |
62 | /// | |
63 | /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with | |
13cf67c4 | 64 | /// `as`: |
83c7162d XL |
65 | /// |
66 | /// ``` | |
67 | /// let c = '💯'; | |
68 | /// let i = c as u32; | |
69 | /// | |
70 | /// assert_eq!(128175, i); | |
71 | /// ``` | |
72 | /// | |
73 | /// However, the reverse is not true: not all valid [`u32`]s are valid | |
74 | /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to | |
75 | /// [`char`], possibly creating an invalid one. | |
76 | /// | |
77 | /// [`char`]: ../../std/primitive.char.html | |
78 | /// [`u32`]: ../../std/primitive.u32.html | |
83c7162d XL |
79 | /// |
80 | /// # Safety | |
81 | /// | |
82 | /// This function is unsafe, as it may construct invalid `char` values. | |
83 | /// | |
84 | /// For a safe version of this function, see the [`from_u32`] function. | |
85 | /// | |
86 | /// [`from_u32`]: fn.from_u32.html | |
87 | /// | |
88 | /// # Examples | |
89 | /// | |
90 | /// Basic usage: | |
91 | /// | |
92 | /// ``` | |
93 | /// use std::char; | |
94 | /// | |
95 | /// let c = unsafe { char::from_u32_unchecked(0x2764) }; | |
96 | /// | |
97 | /// assert_eq!('❤', c); | |
98 | /// ``` | |
99 | #[inline] | |
100 | #[stable(feature = "char_from_unchecked", since = "1.5.0")] | |
101 | pub unsafe fn from_u32_unchecked(i: u32) -> char { | |
102 | transmute(i) | |
103 | } | |
104 | ||
105 | #[stable(feature = "char_convert", since = "1.13.0")] | |
106 | impl From<char> for u32 { | |
0bf4aa26 XL |
107 | /// Converts a [`char`] into a [`u32`]. |
108 | /// | |
109 | /// # Examples | |
110 | /// | |
111 | /// ``` | |
112 | /// use std::mem; | |
113 | /// | |
114 | /// fn main() { | |
115 | /// let c = 'c'; | |
116 | /// let u = u32::from(c); | |
117 | /// assert!(4 == mem::size_of_val(&u)) | |
118 | /// } | |
119 | /// ``` | |
83c7162d XL |
120 | #[inline] |
121 | fn from(c: char) -> Self { | |
122 | c as u32 | |
123 | } | |
124 | } | |
125 | ||
dc9dc135 | 126 | /// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF. |
83c7162d XL |
127 | /// |
128 | /// Unicode is designed such that this effectively decodes bytes | |
129 | /// with the character encoding that IANA calls ISO-8859-1. | |
130 | /// This encoding is compatible with ASCII. | |
131 | /// | |
132 | /// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), | |
133 | /// which leaves some "blanks", byte values that are not assigned to any character. | |
134 | /// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. | |
135 | /// | |
136 | /// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, | |
137 | /// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks | |
138 | /// to punctuation and various Latin characters. | |
139 | /// | |
140 | /// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) | |
141 | /// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases | |
142 | /// for a superset of Windows-1252 that fills the remaining blanks with corresponding | |
143 | /// C0 and C1 control codes. | |
144 | #[stable(feature = "char_convert", since = "1.13.0")] | |
145 | impl From<u8> for char { | |
0bf4aa26 XL |
146 | /// Converts a [`u8`] into a [`char`]. |
147 | /// | |
148 | /// # Examples | |
149 | /// | |
150 | /// ``` | |
151 | /// use std::mem; | |
152 | /// | |
153 | /// fn main() { | |
154 | /// let u = 32 as u8; | |
155 | /// let c = char::from(u); | |
156 | /// assert!(4 == mem::size_of_val(&c)) | |
157 | /// } | |
158 | /// ``` | |
83c7162d XL |
159 | #[inline] |
160 | fn from(i: u8) -> Self { | |
161 | i as char | |
162 | } | |
163 | } | |
164 | ||
165 | ||
166 | /// An error which can be returned when parsing a char. | |
167 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
168 | #[derive(Clone, Debug, PartialEq, Eq)] | |
169 | pub struct ParseCharError { | |
170 | kind: CharErrorKind, | |
171 | } | |
172 | ||
173 | impl ParseCharError { | |
174 | #[unstable(feature = "char_error_internals", | |
175 | reason = "this method should not be available publicly", | |
176 | issue = "0")] | |
177 | #[doc(hidden)] | |
178 | pub fn __description(&self) -> &str { | |
179 | match self.kind { | |
180 | CharErrorKind::EmptyString => { | |
181 | "cannot parse char from empty string" | |
182 | }, | |
183 | CharErrorKind::TooManyChars => "too many characters in string" | |
184 | } | |
185 | } | |
186 | } | |
187 | ||
188 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] | |
189 | enum CharErrorKind { | |
190 | EmptyString, | |
191 | TooManyChars, | |
192 | } | |
193 | ||
194 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
195 | impl fmt::Display for ParseCharError { | |
48663c56 | 196 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
197 | self.__description().fmt(f) |
198 | } | |
199 | } | |
200 | ||
201 | ||
202 | #[stable(feature = "char_from_str", since = "1.20.0")] | |
203 | impl FromStr for char { | |
204 | type Err = ParseCharError; | |
205 | ||
206 | #[inline] | |
207 | fn from_str(s: &str) -> Result<Self, Self::Err> { | |
208 | let mut chars = s.chars(); | |
209 | match (chars.next(), chars.next()) { | |
210 | (None, _) => { | |
211 | Err(ParseCharError { kind: CharErrorKind::EmptyString }) | |
212 | }, | |
213 | (Some(c), None) => Ok(c), | |
214 | _ => { | |
215 | Err(ParseCharError { kind: CharErrorKind::TooManyChars }) | |
216 | } | |
217 | } | |
218 | } | |
219 | } | |
220 | ||
221 | ||
9fa01778 | 222 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
223 | impl TryFrom<u32> for char { |
224 | type Error = CharTryFromError; | |
225 | ||
226 | #[inline] | |
227 | fn try_from(i: u32) -> Result<Self, Self::Error> { | |
228 | if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { | |
229 | Err(CharTryFromError(())) | |
230 | } else { | |
231 | Ok(unsafe { from_u32_unchecked(i) }) | |
232 | } | |
233 | } | |
234 | } | |
235 | ||
236 | /// The error type returned when a conversion from u32 to char fails. | |
9fa01778 | 237 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d XL |
238 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] |
239 | pub struct CharTryFromError(()); | |
240 | ||
9fa01778 | 241 | #[stable(feature = "try_from", since = "1.34.0")] |
83c7162d | 242 | impl fmt::Display for CharTryFromError { |
48663c56 | 243 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
83c7162d XL |
244 | "converted integer out of range for `char`".fmt(f) |
245 | } | |
246 | } | |
247 | ||
248 | /// Converts a digit in the given radix to a `char`. | |
249 | /// | |
250 | /// A 'radix' here is sometimes also called a 'base'. A radix of two | |
251 | /// indicates a binary number, a radix of ten, decimal, and a radix of | |
252 | /// sixteen, hexadecimal, to give some common values. Arbitrary | |
253 | /// radices are supported. | |
254 | /// | |
255 | /// `from_digit()` will return `None` if the input is not a digit in | |
256 | /// the given radix. | |
257 | /// | |
258 | /// # Panics | |
259 | /// | |
260 | /// Panics if given a radix larger than 36. | |
261 | /// | |
262 | /// # Examples | |
263 | /// | |
264 | /// Basic usage: | |
265 | /// | |
266 | /// ``` | |
267 | /// use std::char; | |
268 | /// | |
269 | /// let c = char::from_digit(4, 10); | |
270 | /// | |
271 | /// assert_eq!(Some('4'), c); | |
272 | /// | |
273 | /// // Decimal 11 is a single digit in base 16 | |
274 | /// let c = char::from_digit(11, 16); | |
275 | /// | |
276 | /// assert_eq!(Some('b'), c); | |
277 | /// ``` | |
278 | /// | |
279 | /// Returning `None` when the input is not a digit: | |
280 | /// | |
281 | /// ``` | |
282 | /// use std::char; | |
283 | /// | |
284 | /// let c = char::from_digit(20, 10); | |
285 | /// | |
286 | /// assert_eq!(None, c); | |
287 | /// ``` | |
288 | /// | |
289 | /// Passing a large radix, causing a panic: | |
290 | /// | |
291 | /// ``` | |
292 | /// use std::thread; | |
293 | /// use std::char; | |
294 | /// | |
295 | /// let result = thread::spawn(|| { | |
296 | /// // this panics | |
297 | /// let c = char::from_digit(1, 37); | |
298 | /// }).join(); | |
299 | /// | |
300 | /// assert!(result.is_err()); | |
301 | /// ``` | |
302 | #[inline] | |
303 | #[stable(feature = "rust1", since = "1.0.0")] | |
304 | pub fn from_digit(num: u32, radix: u32) -> Option<char> { | |
305 | if radix > 36 { | |
306 | panic!("from_digit: radix is too high (maximum 36)"); | |
307 | } | |
308 | if num < radix { | |
309 | let num = num as u8; | |
310 | if num < 10 { | |
311 | Some((b'0' + num) as char) | |
312 | } else { | |
313 | Some((b'a' + num - 10) as char) | |
314 | } | |
315 | } else { | |
316 | None | |
317 | } | |
318 | } |