]> git.proxmox.com Git - rustc.git/blob - src/libcore/char/convert.rs
New upstream version 1.46.0~beta.2+dfsg1
[rustc.git] / src / libcore / char / convert.rs
1 //! Character conversions.
2
3 use crate::convert::TryFrom;
4 use crate::fmt;
5 use crate::mem::transmute;
6 use crate::str::FromStr;
7
8 use super::MAX;
9
10 /// Converts a `u32` to a `char`.
11 ///
12 /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
13 /// `as`:
14 ///
15 /// ```
16 /// let c = '💯';
17 /// let i = c as u32;
18 ///
19 /// assert_eq!(128175, i);
20 /// ```
21 ///
22 /// However, the reverse is not true: not all valid [`u32`]s are valid
23 /// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
24 /// for a [`char`].
25 ///
26 /// [`char`]: ../../std/primitive.char.html
27 /// [`u32`]: ../../std/primitive.u32.html
28 ///
29 /// For an unsafe version of this function which ignores these checks, see
30 /// [`from_u32_unchecked`].
31 ///
32 /// [`from_u32_unchecked`]: fn.from_u32_unchecked.html
33 ///
34 /// # Examples
35 ///
36 /// Basic usage:
37 ///
38 /// ```
39 /// use std::char;
40 ///
41 /// let c = char::from_u32(0x2764);
42 ///
43 /// assert_eq!(Some('❤'), c);
44 /// ```
45 ///
46 /// Returning `None` when the input is not a valid [`char`]:
47 ///
48 /// ```
49 /// use std::char;
50 ///
51 /// let c = char::from_u32(0x110000);
52 ///
53 /// assert_eq!(None, c);
54 /// ```
55 #[inline]
56 #[stable(feature = "rust1", since = "1.0.0")]
57 pub fn from_u32(i: u32) -> Option<char> {
58 char::try_from(i).ok()
59 }
60
61 /// Converts a `u32` to a `char`, ignoring validity.
62 ///
63 /// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
64 /// `as`:
65 ///
66 /// ```
67 /// let c = '💯';
68 /// let i = c as u32;
69 ///
70 /// assert_eq!(128175, i);
71 /// ```
72 ///
73 /// However, the reverse is not true: not all valid [`u32`]s are valid
74 /// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
75 /// [`char`], possibly creating an invalid one.
76 ///
77 /// [`char`]: ../../std/primitive.char.html
78 /// [`u32`]: ../../std/primitive.u32.html
79 ///
80 /// # Safety
81 ///
82 /// This function is unsafe, as it may construct invalid `char` values.
83 ///
84 /// For a safe version of this function, see the [`from_u32`] function.
85 ///
86 /// [`from_u32`]: fn.from_u32.html
87 ///
88 /// # Examples
89 ///
90 /// Basic usage:
91 ///
92 /// ```
93 /// use std::char;
94 ///
95 /// let c = unsafe { char::from_u32_unchecked(0x2764) };
96 ///
97 /// assert_eq!('❤', c);
98 /// ```
99 #[inline]
100 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
101 pub unsafe fn from_u32_unchecked(i: u32) -> char {
102 // SAFETY: the caller must guarantee that `i` is a valid char value.
103 if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { unsafe { transmute(i) } }
104 }
105
106 #[stable(feature = "char_convert", since = "1.13.0")]
107 impl From<char> for u32 {
108 /// Converts a [`char`] into a [`u32`].
109 ///
110 /// # Examples
111 ///
112 /// ```
113 /// use std::mem;
114 ///
115 /// let c = 'c';
116 /// let u = u32::from(c);
117 /// assert!(4 == mem::size_of_val(&u))
118 /// ```
119 #[inline]
120 fn from(c: char) -> Self {
121 c as u32
122 }
123 }
124
125 /// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
126 ///
127 /// Unicode is designed such that this effectively decodes bytes
128 /// with the character encoding that IANA calls ISO-8859-1.
129 /// This encoding is compatible with ASCII.
130 ///
131 /// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
132 /// which leaves some "blanks", byte values that are not assigned to any character.
133 /// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
134 ///
135 /// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
136 /// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
137 /// to punctuation and various Latin characters.
138 ///
139 /// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
140 /// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
141 /// for a superset of Windows-1252 that fills the remaining blanks with corresponding
142 /// C0 and C1 control codes.
143 #[stable(feature = "char_convert", since = "1.13.0")]
144 impl From<u8> for char {
145 /// Converts a [`u8`] into a [`char`].
146 ///
147 /// # Examples
148 ///
149 /// ```
150 /// use std::mem;
151 ///
152 /// let u = 32 as u8;
153 /// let c = char::from(u);
154 /// assert!(4 == mem::size_of_val(&c))
155 /// ```
156 #[inline]
157 fn from(i: u8) -> Self {
158 i as char
159 }
160 }
161
162 /// An error which can be returned when parsing a char.
163 #[stable(feature = "char_from_str", since = "1.20.0")]
164 #[derive(Clone, Debug, PartialEq, Eq)]
165 pub struct ParseCharError {
166 kind: CharErrorKind,
167 }
168
169 impl ParseCharError {
170 #[unstable(
171 feature = "char_error_internals",
172 reason = "this method should not be available publicly",
173 issue = "none"
174 )]
175 #[doc(hidden)]
176 pub fn __description(&self) -> &str {
177 match self.kind {
178 CharErrorKind::EmptyString => "cannot parse char from empty string",
179 CharErrorKind::TooManyChars => "too many characters in string",
180 }
181 }
182 }
183
184 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
185 enum CharErrorKind {
186 EmptyString,
187 TooManyChars,
188 }
189
190 #[stable(feature = "char_from_str", since = "1.20.0")]
191 impl fmt::Display for ParseCharError {
192 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
193 self.__description().fmt(f)
194 }
195 }
196
197 #[stable(feature = "char_from_str", since = "1.20.0")]
198 impl FromStr for char {
199 type Err = ParseCharError;
200
201 #[inline]
202 fn from_str(s: &str) -> Result<Self, Self::Err> {
203 let mut chars = s.chars();
204 match (chars.next(), chars.next()) {
205 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
206 (Some(c), None) => Ok(c),
207 _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
208 }
209 }
210 }
211
212 #[stable(feature = "try_from", since = "1.34.0")]
213 impl TryFrom<u32> for char {
214 type Error = CharTryFromError;
215
216 #[inline]
217 fn try_from(i: u32) -> Result<Self, Self::Error> {
218 if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
219 Err(CharTryFromError(()))
220 } else {
221 // SAFETY: checked that it's a legal unicode value
222 Ok(unsafe { transmute(i) })
223 }
224 }
225 }
226
227 /// The error type returned when a conversion from u32 to char fails.
228 #[stable(feature = "try_from", since = "1.34.0")]
229 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
230 pub struct CharTryFromError(());
231
232 #[stable(feature = "try_from", since = "1.34.0")]
233 impl fmt::Display for CharTryFromError {
234 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
235 "converted integer out of range for `char`".fmt(f)
236 }
237 }
238
239 /// Converts a digit in the given radix to a `char`.
240 ///
241 /// A 'radix' here is sometimes also called a 'base'. A radix of two
242 /// indicates a binary number, a radix of ten, decimal, and a radix of
243 /// sixteen, hexadecimal, to give some common values. Arbitrary
244 /// radices are supported.
245 ///
246 /// `from_digit()` will return `None` if the input is not a digit in
247 /// the given radix.
248 ///
249 /// # Panics
250 ///
251 /// Panics if given a radix larger than 36.
252 ///
253 /// # Examples
254 ///
255 /// Basic usage:
256 ///
257 /// ```
258 /// use std::char;
259 ///
260 /// let c = char::from_digit(4, 10);
261 ///
262 /// assert_eq!(Some('4'), c);
263 ///
264 /// // Decimal 11 is a single digit in base 16
265 /// let c = char::from_digit(11, 16);
266 ///
267 /// assert_eq!(Some('b'), c);
268 /// ```
269 ///
270 /// Returning `None` when the input is not a digit:
271 ///
272 /// ```
273 /// use std::char;
274 ///
275 /// let c = char::from_digit(20, 10);
276 ///
277 /// assert_eq!(None, c);
278 /// ```
279 ///
280 /// Passing a large radix, causing a panic:
281 ///
282 /// ```should_panic
283 /// use std::char;
284 ///
285 /// // this panics
286 /// let c = char::from_digit(1, 37);
287 /// ```
288 #[inline]
289 #[stable(feature = "rust1", since = "1.0.0")]
290 pub fn from_digit(num: u32, radix: u32) -> Option<char> {
291 if radix > 36 {
292 panic!("from_digit: radix is too high (maximum 36)");
293 }
294 if num < radix {
295 let num = num as u8;
296 if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
297 } else {
298 None
299 }
300 }