]> git.proxmox.com Git - rustc.git/blame - library/core/src/char/mod.rs
New upstream version 1.66.0+dfsg1
[rustc.git] / library / core / src / char / mod.rs
CommitLineData
f2b60f7d
FG
1//! Utilities for the `char` primitive type.
2//!
3//! *[See also the `char` primitive type](primitive@char).*
83c7162d
XL
4//!
5//! The `char` type represents a single character. More specifically, since
6//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7//! scalar value]', which is similar to, but not the same as, a '[Unicode code
8//! point]'.
9//!
136023e0
XL
10//! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11//! [Unicode code point]: https://www.unicode.org/glossary/#code_point
83c7162d
XL
12//!
13//! This module exists for technical reasons, the primary documentation for
6a06907d 14//! `char` is directly on [the `char` primitive type][char] itself.
83c7162d
XL
15//!
16//! This module is the home of the iterator implementations for the iterators
17//! implemented on `char`, as well as some useful constants and conversion
18//! functions that convert various types to `char`.
19
20#![allow(non_snake_case)]
21#![stable(feature = "core_char", since = "1.2.0")]
22
23mod convert;
24mod decode;
25mod methods;
26
27// stable re-exports
9fa01778 28#[stable(feature = "try_from", since = "1.34.0")]
83c7162d 29pub use self::convert::CharTryFromError;
60c5eb7d
XL
30#[stable(feature = "char_from_str", since = "1.20.0")]
31pub use self::convert::ParseCharError;
83c7162d 32#[stable(feature = "decode_utf16", since = "1.9.0")]
04454e1e 33pub use self::decode::{DecodeUtf16, DecodeUtf16Error};
83c7162d 34
f9f354fc
XL
35// perma-unstable re-exports
36#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
37pub use self::methods::encode_utf16_raw;
38#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39pub use self::methods::encode_utf8_raw;
40
f2b60f7d 41use crate::error::Error;
48663c56
XL
42use crate::fmt::{self, Write};
43use crate::iter::FusedIterator;
83c7162d 44
cdc7bbd5
XL
45pub(crate) use self::methods::EscapeDebugExtArgs;
46
83c7162d 47// UTF-8 ranges and tags for encoding characters
60c5eb7d
XL
48const TAG_CONT: u8 = 0b1000_0000;
49const TAG_TWO_B: u8 = 0b1100_0000;
50const TAG_THREE_B: u8 = 0b1110_0000;
51const TAG_FOUR_B: u8 = 0b1111_0000;
52const MAX_ONE_B: u32 = 0x80;
53const MAX_TWO_B: u32 = 0x800;
54const MAX_THREE_B: u32 = 0x10000;
83c7162d
XL
55
56/*
57 Lu Uppercase_Letter an uppercase letter
58 Ll Lowercase_Letter a lowercase letter
59 Lt Titlecase_Letter a digraphic character, with first part uppercase
60 Lm Modifier_Letter a modifier letter
61 Lo Other_Letter other letters, including syllables and ideographs
62 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
63 Mc Spacing_Mark a spacing combining mark (positive advance width)
64 Me Enclosing_Mark an enclosing combining mark
65 Nd Decimal_Number a decimal digit
66 Nl Letter_Number a letterlike numeric character
67 No Other_Number a numeric character of other type
68 Pc Connector_Punctuation a connecting punctuation mark, like a tie
69 Pd Dash_Punctuation a dash or hyphen punctuation mark
70 Ps Open_Punctuation an opening punctuation mark (of a pair)
71 Pe Close_Punctuation a closing punctuation mark (of a pair)
72 Pi Initial_Punctuation an initial quotation mark
73 Pf Final_Punctuation a final quotation mark
74 Po Other_Punctuation a punctuation mark of other type
75 Sm Math_Symbol a symbol of primarily mathematical use
76 Sc Currency_Symbol a currency sign
77 Sk Modifier_Symbol a non-letterlike modifier symbol
78 So Other_Symbol a symbol of other type
79 Zs Space_Separator a space character (of various non-zero widths)
80 Zl Line_Separator U+2028 LINE SEPARATOR only
81 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
82 Cc Control a C0 or C1 control code
83 Cf Format a format control character
84 Cs Surrogate a surrogate code point
85 Co Private_Use a private-use character
86 Cn Unassigned a reserved unassigned code point or a noncharacter
87*/
88
04454e1e 89/// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
83c7162d 90#[stable(feature = "rust1", since = "1.0.0")]
f9f354fc 91pub const MAX: char = char::MAX;
83c7162d
XL
92
93/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
04454e1e 94/// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
83c7162d 95#[stable(feature = "decode_utf16", since = "1.9.0")]
f9f354fc 96pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
83c7162d 97
04454e1e
FG
98/// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
99/// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
100#[stable(feature = "unicode_version", since = "1.45.0")]
101pub const UNICODE_VERSION: (u8, u8, u8) = char::UNICODE_VERSION;
102
103/// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
104/// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
105#[stable(feature = "decode_utf16", since = "1.9.0")]
106#[inline]
107pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
108 self::decode::decode_utf16(iter)
109}
110
111/// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
112#[stable(feature = "rust1", since = "1.0.0")]
113#[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
114#[must_use]
115#[inline]
116pub const fn from_u32(i: u32) -> Option<char> {
117 self::convert::from_u32(i)
118}
119
120/// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`].
121/// instead.
122#[stable(feature = "char_from_unchecked", since = "1.5.0")]
123#[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
124#[must_use]
125#[inline]
126pub const unsafe fn from_u32_unchecked(i: u32) -> char {
127 // SAFETY: the safety contract must be upheld by the caller.
128 unsafe { self::convert::from_u32_unchecked(i) }
129}
130
131/// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
132#[stable(feature = "rust1", since = "1.0.0")]
133#[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
134#[must_use]
135#[inline]
136pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
137 self::convert::from_digit(num, radix)
138}
139
83c7162d
XL
140/// Returns an iterator that yields the hexadecimal Unicode escape of a
141/// character, as `char`s.
142///
143/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
144/// its documentation for more.
145///
3dfed10e 146/// [`escape_unicode`]: char::escape_unicode
83c7162d
XL
147#[derive(Clone, Debug)]
148#[stable(feature = "rust1", since = "1.0.0")]
149pub struct EscapeUnicode {
150 c: char,
151 state: EscapeUnicodeState,
152
153 // The index of the next hex digit to be printed (0 if none),
0731742a 154 // i.e., the number of remaining hex digits to be printed;
83c7162d
XL
155 // increasing from the least significant digit: 0x543210
156 hex_digit_idx: usize,
157}
158
159// The enum values are ordered so that their representation is the
160// same as the remaining length (besides the hexadecimal digits). This
161// likely makes `len()` a single load from memory) and inline-worth.
162#[derive(Clone, Debug)]
163enum EscapeUnicodeState {
164 Done,
165 RightBrace,
166 Value,
167 LeftBrace,
168 Type,
169 Backslash,
170}
171
172#[stable(feature = "rust1", since = "1.0.0")]
173impl Iterator for EscapeUnicode {
174 type Item = char;
175
176 fn next(&mut self) -> Option<char> {
177 match self.state {
178 EscapeUnicodeState::Backslash => {
179 self.state = EscapeUnicodeState::Type;
180 Some('\\')
181 }
182 EscapeUnicodeState::Type => {
183 self.state = EscapeUnicodeState::LeftBrace;
184 Some('u')
185 }
186 EscapeUnicodeState::LeftBrace => {
187 self.state = EscapeUnicodeState::Value;
188 Some('{')
189 }
190 EscapeUnicodeState::Value => {
191 let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
192 let c = from_digit(hex_digit, 16).unwrap();
193 if self.hex_digit_idx == 0 {
194 self.state = EscapeUnicodeState::RightBrace;
195 } else {
196 self.hex_digit_idx -= 1;
197 }
198 Some(c)
199 }
200 EscapeUnicodeState::RightBrace => {
201 self.state = EscapeUnicodeState::Done;
202 Some('}')
203 }
204 EscapeUnicodeState::Done => None,
205 }
206 }
207
208 #[inline]
209 fn size_hint(&self) -> (usize, Option<usize>) {
210 let n = self.len();
211 (n, Some(n))
212 }
213
214 #[inline]
215 fn count(self) -> usize {
216 self.len()
217 }
218
219 fn last(self) -> Option<char> {
220 match self.state {
221 EscapeUnicodeState::Done => None,
222
60c5eb7d
XL
223 EscapeUnicodeState::RightBrace
224 | EscapeUnicodeState::Value
225 | EscapeUnicodeState::LeftBrace
226 | EscapeUnicodeState::Type
227 | EscapeUnicodeState::Backslash => Some('}'),
83c7162d
XL
228 }
229 }
230}
231
232#[stable(feature = "exact_size_escape", since = "1.11.0")]
233impl ExactSizeIterator for EscapeUnicode {
234 #[inline]
235 fn len(&self) -> usize {
236 // The match is a single memory access with no branching
60c5eb7d
XL
237 self.hex_digit_idx
238 + match self.state {
239 EscapeUnicodeState::Done => 0,
240 EscapeUnicodeState::RightBrace => 1,
241 EscapeUnicodeState::Value => 2,
242 EscapeUnicodeState::LeftBrace => 3,
243 EscapeUnicodeState::Type => 4,
244 EscapeUnicodeState::Backslash => 5,
245 }
83c7162d
XL
246 }
247}
248
249#[stable(feature = "fused", since = "1.26.0")]
250impl FusedIterator for EscapeUnicode {}
251
252#[stable(feature = "char_struct_display", since = "1.16.0")]
253impl fmt::Display for EscapeUnicode {
48663c56 254 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
255 for c in self.clone() {
256 f.write_char(c)?;
257 }
258 Ok(())
259 }
260}
261
262/// An iterator that yields the literal escape code of a `char`.
263///
264/// This `struct` is created by the [`escape_default`] method on [`char`]. See
265/// its documentation for more.
266///
3dfed10e 267/// [`escape_default`]: char::escape_default
83c7162d
XL
268#[derive(Clone, Debug)]
269#[stable(feature = "rust1", since = "1.0.0")]
270pub struct EscapeDefault {
60c5eb7d 271 state: EscapeDefaultState,
83c7162d
XL
272}
273
274#[derive(Clone, Debug)]
275enum EscapeDefaultState {
276 Done,
277 Char(char),
278 Backslash(char),
279 Unicode(EscapeUnicode),
280}
281
282#[stable(feature = "rust1", since = "1.0.0")]
283impl Iterator for EscapeDefault {
284 type Item = char;
285
286 fn next(&mut self) -> Option<char> {
287 match self.state {
288 EscapeDefaultState::Backslash(c) => {
289 self.state = EscapeDefaultState::Char(c);
290 Some('\\')
291 }
292 EscapeDefaultState::Char(c) => {
293 self.state = EscapeDefaultState::Done;
294 Some(c)
295 }
296 EscapeDefaultState::Done => None,
297 EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
298 }
299 }
300
301 #[inline]
302 fn size_hint(&self) -> (usize, Option<usize>) {
303 let n = self.len();
304 (n, Some(n))
305 }
306
307 #[inline]
308 fn count(self) -> usize {
309 self.len()
310 }
311
312 fn nth(&mut self, n: usize) -> Option<char> {
313 match self.state {
314 EscapeDefaultState::Backslash(c) if n == 0 => {
315 self.state = EscapeDefaultState::Char(c);
316 Some('\\')
60c5eb7d 317 }
83c7162d
XL
318 EscapeDefaultState::Backslash(c) if n == 1 => {
319 self.state = EscapeDefaultState::Done;
320 Some(c)
60c5eb7d 321 }
83c7162d
XL
322 EscapeDefaultState::Backslash(_) => {
323 self.state = EscapeDefaultState::Done;
324 None
60c5eb7d 325 }
83c7162d
XL
326 EscapeDefaultState::Char(c) => {
327 self.state = EscapeDefaultState::Done;
328
60c5eb7d
XL
329 if n == 0 { Some(c) } else { None }
330 }
b7449926
XL
331 EscapeDefaultState::Done => None,
332 EscapeDefaultState::Unicode(ref mut i) => i.nth(n),
83c7162d
XL
333 }
334 }
335
336 fn last(self) -> Option<char> {
337 match self.state {
338 EscapeDefaultState::Unicode(iter) => iter.last(),
339 EscapeDefaultState::Done => None,
340 EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
341 }
342 }
343}
344
345#[stable(feature = "exact_size_escape", since = "1.11.0")]
346impl ExactSizeIterator for EscapeDefault {
347 fn len(&self) -> usize {
348 match self.state {
349 EscapeDefaultState::Done => 0,
350 EscapeDefaultState::Char(_) => 1,
351 EscapeDefaultState::Backslash(_) => 2,
352 EscapeDefaultState::Unicode(ref iter) => iter.len(),
353 }
354 }
355}
356
357#[stable(feature = "fused", since = "1.26.0")]
358impl FusedIterator for EscapeDefault {}
359
360#[stable(feature = "char_struct_display", since = "1.16.0")]
361impl fmt::Display for EscapeDefault {
48663c56 362 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
363 for c in self.clone() {
364 f.write_char(c)?;
365 }
366 Ok(())
367 }
368}
369
370/// An iterator that yields the literal escape code of a `char`.
371///
372/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
373/// documentation for more.
374///
3dfed10e 375/// [`escape_debug`]: char::escape_debug
83c7162d
XL
376#[stable(feature = "char_escape_debug", since = "1.20.0")]
377#[derive(Clone, Debug)]
378pub struct EscapeDebug(EscapeDefault);
379
380#[stable(feature = "char_escape_debug", since = "1.20.0")]
381impl Iterator for EscapeDebug {
382 type Item = char;
60c5eb7d
XL
383 fn next(&mut self) -> Option<char> {
384 self.0.next()
385 }
386 fn size_hint(&self) -> (usize, Option<usize>) {
387 self.0.size_hint()
388 }
83c7162d
XL
389}
390
391#[stable(feature = "char_escape_debug", since = "1.20.0")]
60c5eb7d 392impl ExactSizeIterator for EscapeDebug {}
83c7162d
XL
393
394#[stable(feature = "fused", since = "1.26.0")]
395impl FusedIterator for EscapeDebug {}
396
397#[stable(feature = "char_escape_debug", since = "1.20.0")]
398impl fmt::Display for EscapeDebug {
48663c56 399 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
400 fmt::Display::fmt(&self.0, f)
401 }
402}
403
404/// Returns an iterator that yields the lowercase equivalent of a `char`.
405///
406/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
407/// its documentation for more.
408///
3dfed10e 409/// [`to_lowercase`]: char::to_lowercase
83c7162d
XL
410#[stable(feature = "rust1", since = "1.0.0")]
411#[derive(Debug, Clone)]
412pub struct ToLowercase(CaseMappingIter);
413
414#[stable(feature = "rust1", since = "1.0.0")]
415impl Iterator for ToLowercase {
416 type Item = char;
417 fn next(&mut self) -> Option<char> {
418 self.0.next()
419 }
532ac7d7
XL
420 fn size_hint(&self) -> (usize, Option<usize>) {
421 self.0.size_hint()
422 }
83c7162d
XL
423}
424
a2a8927a
XL
425#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
426impl DoubleEndedIterator for ToLowercase {
427 fn next_back(&mut self) -> Option<char> {
428 self.0.next_back()
429 }
430}
431
83c7162d
XL
432#[stable(feature = "fused", since = "1.26.0")]
433impl FusedIterator for ToLowercase {}
434
532ac7d7
XL
435#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
436impl ExactSizeIterator for ToLowercase {}
437
83c7162d
XL
438/// Returns an iterator that yields the uppercase equivalent of a `char`.
439///
440/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
441/// its documentation for more.
442///
3dfed10e 443/// [`to_uppercase`]: char::to_uppercase
83c7162d
XL
444#[stable(feature = "rust1", since = "1.0.0")]
445#[derive(Debug, Clone)]
446pub struct ToUppercase(CaseMappingIter);
447
448#[stable(feature = "rust1", since = "1.0.0")]
449impl Iterator for ToUppercase {
450 type Item = char;
451 fn next(&mut self) -> Option<char> {
452 self.0.next()
453 }
532ac7d7
XL
454 fn size_hint(&self) -> (usize, Option<usize>) {
455 self.0.size_hint()
456 }
83c7162d
XL
457}
458
a2a8927a
XL
459#[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
460impl DoubleEndedIterator for ToUppercase {
461 fn next_back(&mut self) -> Option<char> {
462 self.0.next_back()
463 }
464}
465
83c7162d
XL
466#[stable(feature = "fused", since = "1.26.0")]
467impl FusedIterator for ToUppercase {}
468
532ac7d7
XL
469#[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
470impl ExactSizeIterator for ToUppercase {}
471
83c7162d
XL
472#[derive(Debug, Clone)]
473enum CaseMappingIter {
474 Three(char, char, char),
475 Two(char, char),
476 One(char),
477 Zero,
478}
479
480impl CaseMappingIter {
481 fn new(chars: [char; 3]) -> CaseMappingIter {
482 if chars[2] == '\0' {
483 if chars[1] == '\0' {
60c5eb7d 484 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
83c7162d
XL
485 } else {
486 CaseMappingIter::Two(chars[0], chars[1])
487 }
488 } else {
489 CaseMappingIter::Three(chars[0], chars[1], chars[2])
490 }
491 }
492}
493
494impl Iterator for CaseMappingIter {
495 type Item = char;
496 fn next(&mut self) -> Option<char> {
497 match *self {
498 CaseMappingIter::Three(a, b, c) => {
499 *self = CaseMappingIter::Two(b, c);
500 Some(a)
501 }
502 CaseMappingIter::Two(b, c) => {
503 *self = CaseMappingIter::One(c);
504 Some(b)
505 }
506 CaseMappingIter::One(c) => {
507 *self = CaseMappingIter::Zero;
508 Some(c)
509 }
510 CaseMappingIter::Zero => None,
511 }
512 }
532ac7d7
XL
513
514 fn size_hint(&self) -> (usize, Option<usize>) {
515 let size = match self {
516 CaseMappingIter::Three(..) => 3,
517 CaseMappingIter::Two(..) => 2,
518 CaseMappingIter::One(_) => 1,
519 CaseMappingIter::Zero => 0,
520 };
521 (size, Some(size))
522 }
83c7162d
XL
523}
524
a2a8927a
XL
525impl DoubleEndedIterator for CaseMappingIter {
526 fn next_back(&mut self) -> Option<char> {
527 match *self {
528 CaseMappingIter::Three(a, b, c) => {
529 *self = CaseMappingIter::Two(a, b);
530 Some(c)
531 }
532 CaseMappingIter::Two(b, c) => {
533 *self = CaseMappingIter::One(b);
534 Some(c)
535 }
536 CaseMappingIter::One(c) => {
537 *self = CaseMappingIter::Zero;
538 Some(c)
539 }
540 CaseMappingIter::Zero => None,
541 }
542 }
543}
544
83c7162d 545impl fmt::Display for CaseMappingIter {
48663c56 546 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
547 match *self {
548 CaseMappingIter::Three(a, b, c) => {
549 f.write_char(a)?;
550 f.write_char(b)?;
551 f.write_char(c)
552 }
553 CaseMappingIter::Two(b, c) => {
554 f.write_char(b)?;
555 f.write_char(c)
556 }
60c5eb7d 557 CaseMappingIter::One(c) => f.write_char(c),
83c7162d
XL
558 CaseMappingIter::Zero => Ok(()),
559 }
560 }
561}
562
563#[stable(feature = "char_struct_display", since = "1.16.0")]
564impl fmt::Display for ToLowercase {
48663c56 565 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
566 fmt::Display::fmt(&self.0, f)
567 }
568}
569
570#[stable(feature = "char_struct_display", since = "1.16.0")]
571impl fmt::Display for ToUppercase {
48663c56 572 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83c7162d
XL
573 fmt::Display::fmt(&self.0, f)
574 }
575}
a2a8927a
XL
576
577/// The error type returned when a checked char conversion fails.
578#[stable(feature = "u8_from_char", since = "1.59.0")]
579#[derive(Debug, Copy, Clone, PartialEq, Eq)]
580pub struct TryFromCharError(pub(crate) ());
581
582#[stable(feature = "u8_from_char", since = "1.59.0")]
583impl fmt::Display for TryFromCharError {
584 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
585 "unicode code point out of range".fmt(fmt)
586 }
587}
f2b60f7d 588
f2b60f7d
FG
589#[stable(feature = "u8_from_char", since = "1.59.0")]
590impl Error for TryFromCharError {}