]> git.proxmox.com Git - rustc.git/blame - library/core/src/char/convert.rs
New upstream version 1.51.0+dfsg1
[rustc.git] / library / core / src / char / convert.rs
CommitLineData
83c7162d
XL
1//! Character conversions.
2
48663c56
XL
3use crate::convert::TryFrom;
4use crate::fmt;
5use crate::mem::transmute;
6use crate::str::FromStr;
7
83c7162d
XL
8use super::MAX;
9
10/// Converts a `u32` to a `char`.
11///
12/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
13cf67c4 13/// `as`:
83c7162d
XL
14///
15/// ```
16/// let c = '💯';
17/// let i = c as u32;
18///
19/// assert_eq!(128175, i);
20/// ```
21///
22/// However, the reverse is not true: not all valid [`u32`]s are valid
23/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
24/// for a [`char`].
25///
83c7162d
XL
26/// For an unsafe version of this function which ignores these checks, see
27/// [`from_u32_unchecked`].
28///
83c7162d
XL
29/// # Examples
30///
31/// Basic usage:
32///
33/// ```
34/// use std::char;
35///
36/// let c = char::from_u32(0x2764);
37///
38/// assert_eq!(Some('❤'), c);
39/// ```
40///
41/// Returning `None` when the input is not a valid [`char`]:
42///
43/// ```
44/// use std::char;
45///
46/// let c = char::from_u32(0x110000);
47///
48/// assert_eq!(None, c);
49/// ```
5869c6ff 50#[doc(alias = "chr")]
83c7162d
XL
51#[inline]
52#[stable(feature = "rust1", since = "1.0.0")]
53pub fn from_u32(i: u32) -> Option<char> {
54 char::try_from(i).ok()
55}
56
57/// Converts a `u32` to a `char`, ignoring validity.
58///
59/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
13cf67c4 60/// `as`:
83c7162d
XL
61///
62/// ```
63/// let c = '💯';
64/// let i = c as u32;
65///
66/// assert_eq!(128175, i);
67/// ```
68///
69/// However, the reverse is not true: not all valid [`u32`]s are valid
70/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
71/// [`char`], possibly creating an invalid one.
72///
83c7162d
XL
73/// # Safety
74///
75/// This function is unsafe, as it may construct invalid `char` values.
76///
77/// For a safe version of this function, see the [`from_u32`] function.
78///
83c7162d
XL
79/// # Examples
80///
81/// Basic usage:
82///
83/// ```
84/// use std::char;
85///
86/// let c = unsafe { char::from_u32_unchecked(0x2764) };
87///
88/// assert_eq!('❤', c);
89/// ```
90#[inline]
91#[stable(feature = "char_from_unchecked", since = "1.5.0")]
92pub unsafe fn from_u32_unchecked(i: u32) -> char {
f035d41b
XL
93 // SAFETY: the caller must guarantee that `i` is a valid char value.
94 if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { unsafe { transmute(i) } }
83c7162d
XL
95}
96
97#[stable(feature = "char_convert", since = "1.13.0")]
98impl From<char> for u32 {
0bf4aa26
XL
99 /// Converts a [`char`] into a [`u32`].
100 ///
101 /// # Examples
102 ///
103 /// ```
104 /// use std::mem;
105 ///
e74abb32
XL
106 /// let c = 'c';
107 /// let u = u32::from(c);
108 /// assert!(4 == mem::size_of_val(&u))
0bf4aa26 109 /// ```
83c7162d
XL
110 #[inline]
111 fn from(c: char) -> Self {
112 c as u32
113 }
114}
115
5869c6ff
XL
116#[stable(feature = "more_char_conversions", since = "1.51.0")]
117impl From<char> for u64 {
118 /// Converts a [`char`] into a [`u64`].
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// use std::mem;
124 ///
125 /// let c = '👤';
126 /// let u = u64::from(c);
127 /// assert!(8 == mem::size_of_val(&u))
128 /// ```
129 #[inline]
130 fn from(c: char) -> Self {
131 // The char is casted to the value of the code point, then zero-extended to 64 bit.
132 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
133 c as u64
134 }
135}
136
137#[stable(feature = "more_char_conversions", since = "1.51.0")]
138impl From<char> for u128 {
139 /// Converts a [`char`] into a [`u128`].
140 ///
141 /// # Examples
142 ///
143 /// ```
144 /// use std::mem;
145 ///
146 /// let c = '⚙';
147 /// let u = u128::from(c);
148 /// assert!(16 == mem::size_of_val(&u))
149 /// ```
150 #[inline]
151 fn from(c: char) -> Self {
152 // The char is casted to the value of the code point, then zero-extended to 128 bit.
153 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
154 c as u128
155 }
156}
157
dc9dc135 158/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
83c7162d
XL
159///
160/// Unicode is designed such that this effectively decodes bytes
161/// with the character encoding that IANA calls ISO-8859-1.
162/// This encoding is compatible with ASCII.
163///
164/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
165/// which leaves some "blanks", byte values that are not assigned to any character.
166/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
167///
168/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
169/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
170/// to punctuation and various Latin characters.
171///
172/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
173/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
174/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
175/// C0 and C1 control codes.
176#[stable(feature = "char_convert", since = "1.13.0")]
177impl From<u8> for char {
0bf4aa26
XL
178 /// Converts a [`u8`] into a [`char`].
179 ///
180 /// # Examples
181 ///
182 /// ```
183 /// use std::mem;
184 ///
e74abb32
XL
185 /// let u = 32 as u8;
186 /// let c = char::from(u);
187 /// assert!(4 == mem::size_of_val(&c))
0bf4aa26 188 /// ```
83c7162d
XL
189 #[inline]
190 fn from(i: u8) -> Self {
191 i as char
192 }
193}
194
83c7162d
XL
195/// An error which can be returned when parsing a char.
196#[stable(feature = "char_from_str", since = "1.20.0")]
197#[derive(Clone, Debug, PartialEq, Eq)]
198pub struct ParseCharError {
199 kind: CharErrorKind,
200}
201
202impl ParseCharError {
60c5eb7d
XL
203 #[unstable(
204 feature = "char_error_internals",
205 reason = "this method should not be available publicly",
dfeec247 206 issue = "none"
60c5eb7d 207 )]
83c7162d
XL
208 #[doc(hidden)]
209 pub fn __description(&self) -> &str {
210 match self.kind {
60c5eb7d
XL
211 CharErrorKind::EmptyString => "cannot parse char from empty string",
212 CharErrorKind::TooManyChars => "too many characters in string",
83c7162d
XL
213 }
214 }
215}
216
217#[derive(Copy, Clone, Debug, PartialEq, Eq)]
218enum CharErrorKind {
219 EmptyString,
220 TooManyChars,
221}
222
223#[stable(feature = "char_from_str", since = "1.20.0")]
224impl fmt::Display for ParseCharError {
48663c56 225 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
226 self.__description().fmt(f)
227 }
228}
229
83c7162d
XL
230#[stable(feature = "char_from_str", since = "1.20.0")]
231impl FromStr for char {
232 type Err = ParseCharError;
233
234 #[inline]
235 fn from_str(s: &str) -> Result<Self, Self::Err> {
236 let mut chars = s.chars();
237 match (chars.next(), chars.next()) {
60c5eb7d 238 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
83c7162d 239 (Some(c), None) => Ok(c),
60c5eb7d 240 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
83c7162d
XL
241 }
242 }
243}
244
9fa01778 245#[stable(feature = "try_from", since = "1.34.0")]
83c7162d
XL
246impl TryFrom<u32> for char {
247 type Error = CharTryFromError;
248
249 #[inline]
250 fn try_from(i: u32) -> Result<Self, Self::Error> {
251 if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
252 Err(CharTryFromError(()))
253 } else {
60c5eb7d 254 // SAFETY: checked that it's a legal unicode value
f9f354fc 255 Ok(unsafe { transmute(i) })
83c7162d
XL
256 }
257 }
258}
259
260/// The error type returned when a conversion from u32 to char fails.
9fa01778 261#[stable(feature = "try_from", since = "1.34.0")]
83c7162d
XL
262#[derive(Copy, Clone, Debug, PartialEq, Eq)]
263pub struct CharTryFromError(());
264
9fa01778 265#[stable(feature = "try_from", since = "1.34.0")]
83c7162d 266impl fmt::Display for CharTryFromError {
48663c56 267 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
268 "converted integer out of range for `char`".fmt(f)
269 }
270}
271
272/// Converts a digit in the given radix to a `char`.
273///
274/// A 'radix' here is sometimes also called a 'base'. A radix of two
275/// indicates a binary number, a radix of ten, decimal, and a radix of
276/// sixteen, hexadecimal, to give some common values. Arbitrary
277/// radices are supported.
278///
279/// `from_digit()` will return `None` if the input is not a digit in
280/// the given radix.
281///
282/// # Panics
283///
284/// Panics if given a radix larger than 36.
285///
286/// # Examples
287///
288/// Basic usage:
289///
290/// ```
291/// use std::char;
292///
293/// let c = char::from_digit(4, 10);
294///
295/// assert_eq!(Some('4'), c);
296///
297/// // Decimal 11 is a single digit in base 16
298/// let c = char::from_digit(11, 16);
299///
300/// assert_eq!(Some('b'), c);
301/// ```
302///
303/// Returning `None` when the input is not a digit:
304///
305/// ```
306/// use std::char;
307///
308/// let c = char::from_digit(20, 10);
309///
310/// assert_eq!(None, c);
311/// ```
312///
313/// Passing a large radix, causing a panic:
314///
f035d41b 315/// ```should_panic
83c7162d
XL
316/// use std::char;
317///
f035d41b
XL
318/// // this panics
319/// let c = char::from_digit(1, 37);
83c7162d
XL
320/// ```
321#[inline]
322#[stable(feature = "rust1", since = "1.0.0")]
323pub fn from_digit(num: u32, radix: u32) -> Option<char> {
324 if radix > 36 {
325 panic!("from_digit: radix is too high (maximum 36)");
326 }
327 if num < radix {
328 let num = num as u8;
60c5eb7d 329 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
83c7162d
XL
330 } else {
331 None
332 }
333}