]> git.proxmox.com Git - rustc.git/blob - src/libcore/char/convert.rs
New upstream version 1.41.1+dfsg1
[rustc.git] / src / libcore / char / convert.rs
1 //! Character conversions.
2
3 use crate::convert::TryFrom;
4 use crate::fmt;
5 use crate::mem::transmute;
6 use crate::str::FromStr;
7
8 use super::MAX;
9
10 /// Converts a `u32` to a `char`.
11 ///
12 /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
13 /// `as`:
14 ///
15 /// ```
16 /// let c = '💯';
17 /// let i = c as u32;
18 ///
19 /// assert_eq!(128175, i);
20 /// ```
21 ///
22 /// However, the reverse is not true: not all valid [`u32`]s are valid
23 /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
24 /// for a [`char`].
25 ///
26 /// [`char`]: ../../std/primitive.char.html
27 /// [`u32`]: ../../std/primitive.u32.html
28 ///
29 /// For an unsafe version of this function which ignores these checks, see
30 /// [`from_u32_unchecked`].
31 ///
32 /// [`from_u32_unchecked`]: fn.from_u32_unchecked.html
33 ///
34 /// # Examples
35 ///
36 /// Basic usage:
37 ///
38 /// ```
39 /// use std::char;
40 ///
41 /// let c = char::from_u32(0x2764);
42 ///
43 /// assert_eq!(Some('❤'), c);
44 /// ```
45 ///
46 /// Returning `None` when the input is not a valid [`char`]:
47 ///
48 /// ```
49 /// use std::char;
50 ///
51 /// let c = char::from_u32(0x110000);
52 ///
53 /// assert_eq!(None, c);
54 /// ```
55 #[inline]
56 #[stable(feature = "rust1", since = "1.0.0")]
57 pub fn from_u32(i: u32) -> Option<char> {
58 char::try_from(i).ok()
59 }
60
61 /// Converts a `u32` to a `char`, ignoring validity.
62 ///
63 /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
64 /// `as`:
65 ///
66 /// ```
67 /// let c = '💯';
68 /// let i = c as u32;
69 ///
70 /// assert_eq!(128175, i);
71 /// ```
72 ///
73 /// However, the reverse is not true: not all valid [`u32`]s are valid
74 /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
75 /// [`char`], possibly creating an invalid one.
76 ///
77 /// [`char`]: ../../std/primitive.char.html
78 /// [`u32`]: ../../std/primitive.u32.html
79 ///
80 /// # Safety
81 ///
82 /// This function is unsafe, as it may construct invalid `char` values.
83 ///
84 /// For a safe version of this function, see the [`from_u32`] function.
85 ///
86 /// [`from_u32`]: fn.from_u32.html
87 ///
88 /// # Examples
89 ///
90 /// Basic usage:
91 ///
92 /// ```
93 /// use std::char;
94 ///
95 /// let c = unsafe { char::from_u32_unchecked(0x2764) };
96 ///
97 /// assert_eq!('❤', c);
98 /// ```
99 #[inline]
100 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
101 pub unsafe fn from_u32_unchecked(i: u32) -> char {
102 transmute(i)
103 }
104
105 #[stable(feature = "char_convert", since = "1.13.0")]
106 impl From<char> for u32 {
107 /// Converts a [`char`] into a [`u32`].
108 ///
109 /// # Examples
110 ///
111 /// ```
112 /// use std::mem;
113 ///
114 /// let c = 'c';
115 /// let u = u32::from(c);
116 /// assert!(4 == mem::size_of_val(&u))
117 /// ```
118 #[inline]
119 fn from(c: char) -> Self {
120 c as u32
121 }
122 }
123
124 /// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
125 ///
126 /// Unicode is designed such that this effectively decodes bytes
127 /// with the character encoding that IANA calls ISO-8859-1.
128 /// This encoding is compatible with ASCII.
129 ///
130 /// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
131 /// which leaves some "blanks", byte values that are not assigned to any character.
132 /// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
133 ///
134 /// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
135 /// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
136 /// to punctuation and various Latin characters.
137 ///
138 /// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
139 /// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
140 /// for a superset of Windows-1252 that fills the remaining blanks with corresponding
141 /// C0 and C1 control codes.
142 #[stable(feature = "char_convert", since = "1.13.0")]
143 impl From<u8> for char {
144 /// Converts a [`u8`] into a [`char`].
145 ///
146 /// # Examples
147 ///
148 /// ```
149 /// use std::mem;
150 ///
151 /// let u = 32 as u8;
152 /// let c = char::from(u);
153 /// assert!(4 == mem::size_of_val(&c))
154 /// ```
155 #[inline]
156 fn from(i: u8) -> Self {
157 i as char
158 }
159 }
160
161 /// An error which can be returned when parsing a char.
162 #[stable(feature = "char_from_str", since = "1.20.0")]
163 #[derive(Clone, Debug, PartialEq, Eq)]
164 pub struct ParseCharError {
165 kind: CharErrorKind,
166 }
167
168 impl ParseCharError {
169 #[unstable(
170 feature = "char_error_internals",
171 reason = "this method should not be available publicly",
172 issue = "0"
173 )]
174 #[doc(hidden)]
175 pub fn __description(&self) -> &str {
176 match self.kind {
177 CharErrorKind::EmptyString => "cannot parse char from empty string",
178 CharErrorKind::TooManyChars => "too many characters in string",
179 }
180 }
181 }
182
183 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
184 enum CharErrorKind {
185 EmptyString,
186 TooManyChars,
187 }
188
189 #[stable(feature = "char_from_str", since = "1.20.0")]
190 impl fmt::Display for ParseCharError {
191 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
192 self.__description().fmt(f)
193 }
194 }
195
196 #[stable(feature = "char_from_str", since = "1.20.0")]
197 impl FromStr for char {
198 type Err = ParseCharError;
199
200 #[inline]
201 fn from_str(s: &str) -> Result<Self, Self::Err> {
202 let mut chars = s.chars();
203 match (chars.next(), chars.next()) {
204 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
205 (Some(c), None) => Ok(c),
206 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
207 }
208 }
209 }
210
211 #[stable(feature = "try_from", since = "1.34.0")]
212 impl TryFrom<u32> for char {
213 type Error = CharTryFromError;
214
215 #[inline]
216 fn try_from(i: u32) -> Result<Self, Self::Error> {
217 if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
218 Err(CharTryFromError(()))
219 } else {
220 // SAFETY: checked that it's a legal unicode value
221 Ok(unsafe { from_u32_unchecked(i) })
222 }
223 }
224 }
225
226 /// The error type returned when a conversion from u32 to char fails.
227 #[stable(feature = "try_from", since = "1.34.0")]
228 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
229 pub struct CharTryFromError(());
230
231 #[stable(feature = "try_from", since = "1.34.0")]
232 impl fmt::Display for CharTryFromError {
233 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
234 "converted integer out of range for `char`".fmt(f)
235 }
236 }
237
238 /// Converts a digit in the given radix to a `char`.
239 ///
240 /// A 'radix' here is sometimes also called a 'base'. A radix of two
241 /// indicates a binary number, a radix of ten, decimal, and a radix of
242 /// sixteen, hexadecimal, to give some common values. Arbitrary
243 /// radices are supported.
244 ///
245 /// `from_digit()` will return `None` if the input is not a digit in
246 /// the given radix.
247 ///
248 /// # Panics
249 ///
250 /// Panics if given a radix larger than 36.
251 ///
252 /// # Examples
253 ///
254 /// Basic usage:
255 ///
256 /// ```
257 /// use std::char;
258 ///
259 /// let c = char::from_digit(4, 10);
260 ///
261 /// assert_eq!(Some('4'), c);
262 ///
263 /// // Decimal 11 is a single digit in base 16
264 /// let c = char::from_digit(11, 16);
265 ///
266 /// assert_eq!(Some('b'), c);
267 /// ```
268 ///
269 /// Returning `None` when the input is not a digit:
270 ///
271 /// ```
272 /// use std::char;
273 ///
274 /// let c = char::from_digit(20, 10);
275 ///
276 /// assert_eq!(None, c);
277 /// ```
278 ///
279 /// Passing a large radix, causing a panic:
280 ///
281 /// ```
282 /// use std::thread;
283 /// use std::char;
284 ///
285 /// let result = thread::spawn(|| {
286 /// // this panics
287 /// let c = char::from_digit(1, 37);
288 /// }).join();
289 ///
290 /// assert!(result.is_err());
291 /// ```
292 #[inline]
293 #[stable(feature = "rust1", since = "1.0.0")]
294 pub fn from_digit(num: u32, radix: u32) -> Option<char> {
295 if radix > 36 {
296 panic!("from_digit: radix is too high (maximum 36)");
297 }
298 if num < radix {
299 let num = num as u8;
300 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
301 } else {
302 None
303 }
304 }