]> git.proxmox.com Git - rustc.git/blame - library/core/src/char/convert.rs
New upstream version 1.58.1+dfsg1
[rustc.git] / library / core / src / char / convert.rs
CommitLineData
83c7162d
XL
1//! Character conversions.
2
48663c56
XL
3use crate::convert::TryFrom;
4use crate::fmt;
5use crate::mem::transmute;
6use crate::str::FromStr;
7
83c7162d
XL
8use super::MAX;
9
10/// Converts a `u32` to a `char`.
11///
12/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
13cf67c4 13/// `as`:
83c7162d
XL
14///
15/// ```
16/// let c = '💯';
17/// let i = c as u32;
18///
19/// assert_eq!(128175, i);
20/// ```
21///
22/// However, the reverse is not true: not all valid [`u32`]s are valid
23/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
24/// for a [`char`].
25///
83c7162d
XL
26/// For an unsafe version of this function which ignores these checks, see
27/// [`from_u32_unchecked`].
28///
83c7162d
XL
29/// # Examples
30///
31/// Basic usage:
32///
33/// ```
34/// use std::char;
35///
36/// let c = char::from_u32(0x2764);
37///
38/// assert_eq!(Some('❤'), c);
39/// ```
40///
41/// Returning `None` when the input is not a valid [`char`]:
42///
43/// ```
44/// use std::char;
45///
46/// let c = char::from_u32(0x110000);
47///
48/// assert_eq!(None, c);
49/// ```
5869c6ff 50#[doc(alias = "chr")]
c295e0f8 51#[must_use]
83c7162d
XL
52#[inline]
53#[stable(feature = "rust1", since = "1.0.0")]
3c0e092e
XL
54#[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
55pub const fn from_u32(i: u32) -> Option<char> {
56 // FIXME: once Result::ok is const fn, use it here
57 match char_try_from_u32(i) {
58 Ok(c) => Some(c),
59 Err(_) => None,
60 }
83c7162d
XL
61}
62
63/// Converts a `u32` to a `char`, ignoring validity.
64///
65/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
13cf67c4 66/// `as`:
83c7162d
XL
67///
68/// ```
69/// let c = '💯';
70/// let i = c as u32;
71///
72/// assert_eq!(128175, i);
73/// ```
74///
75/// However, the reverse is not true: not all valid [`u32`]s are valid
76/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
77/// [`char`], possibly creating an invalid one.
78///
83c7162d
XL
79/// # Safety
80///
81/// This function is unsafe, as it may construct invalid `char` values.
82///
83/// For a safe version of this function, see the [`from_u32`] function.
84///
83c7162d
XL
85/// # Examples
86///
87/// Basic usage:
88///
89/// ```
90/// use std::char;
91///
92/// let c = unsafe { char::from_u32_unchecked(0x2764) };
93///
94/// assert_eq!('❤', c);
95/// ```
96#[inline]
c295e0f8 97#[must_use]
83c7162d 98#[stable(feature = "char_from_unchecked", since = "1.5.0")]
3c0e092e
XL
99#[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
100pub const unsafe fn from_u32_unchecked(i: u32) -> char {
f035d41b
XL
101 // SAFETY: the caller must guarantee that `i` is a valid char value.
102 if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { unsafe { transmute(i) } }
83c7162d
XL
103}
104
105#[stable(feature = "char_convert", since = "1.13.0")]
3c0e092e
XL
106#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
107impl const From<char> for u32 {
0bf4aa26
XL
108 /// Converts a [`char`] into a [`u32`].
109 ///
110 /// # Examples
111 ///
112 /// ```
113 /// use std::mem;
114 ///
e74abb32
XL
115 /// let c = 'c';
116 /// let u = u32::from(c);
117 /// assert!(4 == mem::size_of_val(&u))
0bf4aa26 118 /// ```
83c7162d
XL
119 #[inline]
120 fn from(c: char) -> Self {
121 c as u32
122 }
123}
124
5869c6ff 125#[stable(feature = "more_char_conversions", since = "1.51.0")]
3c0e092e
XL
126#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
127impl const From<char> for u64 {
5869c6ff
XL
128 /// Converts a [`char`] into a [`u64`].
129 ///
130 /// # Examples
131 ///
132 /// ```
133 /// use std::mem;
134 ///
135 /// let c = '👤';
136 /// let u = u64::from(c);
137 /// assert!(8 == mem::size_of_val(&u))
138 /// ```
139 #[inline]
140 fn from(c: char) -> Self {
141 // The char is casted to the value of the code point, then zero-extended to 64 bit.
142 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
143 c as u64
144 }
145}
146
147#[stable(feature = "more_char_conversions", since = "1.51.0")]
3c0e092e
XL
148#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
149impl const From<char> for u128 {
5869c6ff
XL
150 /// Converts a [`char`] into a [`u128`].
151 ///
152 /// # Examples
153 ///
154 /// ```
155 /// use std::mem;
156 ///
157 /// let c = '⚙';
158 /// let u = u128::from(c);
159 /// assert!(16 == mem::size_of_val(&u))
160 /// ```
161 #[inline]
162 fn from(c: char) -> Self {
163 // The char is casted to the value of the code point, then zero-extended to 128 bit.
164 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
165 c as u128
166 }
167}
168
dc9dc135 169/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
83c7162d
XL
170///
171/// Unicode is designed such that this effectively decodes bytes
172/// with the character encoding that IANA calls ISO-8859-1.
173/// This encoding is compatible with ASCII.
174///
175/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
176/// which leaves some "blanks", byte values that are not assigned to any character.
177/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
178///
179/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
180/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
181/// to punctuation and various Latin characters.
182///
183/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
184/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
185/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
186/// C0 and C1 control codes.
187#[stable(feature = "char_convert", since = "1.13.0")]
3c0e092e
XL
188#[rustc_const_unstable(feature = "const_convert", issue = "88674")]
189impl const From<u8> for char {
0bf4aa26
XL
190 /// Converts a [`u8`] into a [`char`].
191 ///
192 /// # Examples
193 ///
194 /// ```
195 /// use std::mem;
196 ///
e74abb32
XL
197 /// let u = 32 as u8;
198 /// let c = char::from(u);
199 /// assert!(4 == mem::size_of_val(&c))
0bf4aa26 200 /// ```
83c7162d
XL
201 #[inline]
202 fn from(i: u8) -> Self {
203 i as char
204 }
205}
206
83c7162d
XL
207/// An error which can be returned when parsing a char.
208#[stable(feature = "char_from_str", since = "1.20.0")]
209#[derive(Clone, Debug, PartialEq, Eq)]
210pub struct ParseCharError {
211 kind: CharErrorKind,
212}
213
214impl ParseCharError {
60c5eb7d
XL
215 #[unstable(
216 feature = "char_error_internals",
217 reason = "this method should not be available publicly",
dfeec247 218 issue = "none"
60c5eb7d 219 )]
83c7162d
XL
220 #[doc(hidden)]
221 pub fn __description(&self) -> &str {
222 match self.kind {
60c5eb7d
XL
223 CharErrorKind::EmptyString => "cannot parse char from empty string",
224 CharErrorKind::TooManyChars => "too many characters in string",
83c7162d
XL
225 }
226 }
227}
228
229#[derive(Copy, Clone, Debug, PartialEq, Eq)]
230enum CharErrorKind {
231 EmptyString,
232 TooManyChars,
233}
234
235#[stable(feature = "char_from_str", since = "1.20.0")]
236impl fmt::Display for ParseCharError {
48663c56 237 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
238 self.__description().fmt(f)
239 }
240}
241
83c7162d
XL
242#[stable(feature = "char_from_str", since = "1.20.0")]
243impl FromStr for char {
244 type Err = ParseCharError;
245
246 #[inline]
247 fn from_str(s: &str) -> Result<Self, Self::Err> {
248 let mut chars = s.chars();
249 match (chars.next(), chars.next()) {
60c5eb7d 250 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
83c7162d 251 (Some(c), None) => Ok(c),
60c5eb7d 252 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
83c7162d
XL
253 }
254 }
255}
256
3c0e092e
XL
257#[inline]
258const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
259 if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
260 Err(CharTryFromError(()))
261 } else {
262 // SAFETY: checked that it's a legal unicode value
263 Ok(unsafe { transmute(i) })
264 }
265}
266
9fa01778 267#[stable(feature = "try_from", since = "1.34.0")]
83c7162d
XL
268impl TryFrom<u32> for char {
269 type Error = CharTryFromError;
270
271 #[inline]
272 fn try_from(i: u32) -> Result<Self, Self::Error> {
3c0e092e 273 char_try_from_u32(i)
83c7162d
XL
274 }
275}
276
277/// The error type returned when a conversion from u32 to char fails.
9fa01778 278#[stable(feature = "try_from", since = "1.34.0")]
83c7162d
XL
279#[derive(Copy, Clone, Debug, PartialEq, Eq)]
280pub struct CharTryFromError(());
281
9fa01778 282#[stable(feature = "try_from", since = "1.34.0")]
83c7162d 283impl fmt::Display for CharTryFromError {
48663c56 284 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
285 "converted integer out of range for `char`".fmt(f)
286 }
287}
288
289/// Converts a digit in the given radix to a `char`.
290///
291/// A 'radix' here is sometimes also called a 'base'. A radix of two
292/// indicates a binary number, a radix of ten, decimal, and a radix of
293/// sixteen, hexadecimal, to give some common values. Arbitrary
294/// radices are supported.
295///
296/// `from_digit()` will return `None` if the input is not a digit in
297/// the given radix.
298///
299/// # Panics
300///
301/// Panics if given a radix larger than 36.
302///
303/// # Examples
304///
305/// Basic usage:
306///
307/// ```
308/// use std::char;
309///
310/// let c = char::from_digit(4, 10);
311///
312/// assert_eq!(Some('4'), c);
313///
314/// // Decimal 11 is a single digit in base 16
315/// let c = char::from_digit(11, 16);
316///
317/// assert_eq!(Some('b'), c);
318/// ```
319///
320/// Returning `None` when the input is not a digit:
321///
322/// ```
323/// use std::char;
324///
325/// let c = char::from_digit(20, 10);
326///
327/// assert_eq!(None, c);
328/// ```
329///
330/// Passing a large radix, causing a panic:
331///
f035d41b 332/// ```should_panic
83c7162d
XL
333/// use std::char;
334///
f035d41b
XL
335/// // this panics
336/// let c = char::from_digit(1, 37);
83c7162d
XL
337/// ```
338#[inline]
c295e0f8 339#[must_use]
83c7162d 340#[stable(feature = "rust1", since = "1.0.0")]
3c0e092e
XL
341#[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
342pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
83c7162d
XL
343 if radix > 36 {
344 panic!("from_digit: radix is too high (maximum 36)");
345 }
346 if num < radix {
347 let num = num as u8;
60c5eb7d 348 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
83c7162d
XL
349 } else {
350 None
351 }
352}