1 //! Utilities for the `char` primitive type.
3 //! *[See also the `char` primitive type](primitive@char).*
5 //! The `char` type represents a single character. More specifically, since
6 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
7 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
10 //! [Unicode scalar value]: https://www.unicode.org/glossary/#unicode_scalar_value
11 //! [Unicode code point]: https://www.unicode.org/glossary/#code_point
13 //! This module exists for technical reasons, the primary documentation for
14 //! `char` is directly on [the `char` primitive type][char] itself.
16 //! This module is the home of the iterator implementations for the iterators
17 //! implemented on `char`, as well as some useful constants and conversion
18 //! functions that convert various types to `char`.
20 #![allow(non_snake_case)]
21 #![stable(feature = "core_char", since = "1.2.0")]
28 #[stable(feature = "try_from", since = "1.34.0")]
29 pub use self::convert
::CharTryFromError
;
30 #[stable(feature = "char_from_str", since = "1.20.0")]
31 pub use self::convert
::ParseCharError
;
32 #[stable(feature = "decode_utf16", since = "1.9.0")]
33 pub use self::decode
::{DecodeUtf16, DecodeUtf16Error}
;
35 // perma-unstable re-exports
36 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
37 pub use self::methods
::encode_utf16_raw
;
38 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
39 pub use self::methods
::encode_utf8_raw
;
41 #[cfg(not(bootstrap))]
42 use crate::error
::Error
;
43 use crate::fmt
::{self, Write}
;
44 use crate::iter
::FusedIterator
;
46 pub(crate) use self::methods
::EscapeDebugExtArgs
;
48 // UTF-8 ranges and tags for encoding characters
49 const TAG_CONT
: u8 = 0b1000_0000;
50 const TAG_TWO_B
: u8 = 0b1100_0000;
51 const TAG_THREE_B
: u8 = 0b1110_0000;
52 const TAG_FOUR_B
: u8 = 0b1111_0000;
53 const MAX_ONE_B
: u32 = 0x80;
54 const MAX_TWO_B
: u32 = 0x800;
55 const MAX_THREE_B
: u32 = 0x10000;
58 Lu Uppercase_Letter an uppercase letter
59 Ll Lowercase_Letter a lowercase letter
60 Lt Titlecase_Letter a digraphic character, with first part uppercase
61 Lm Modifier_Letter a modifier letter
62 Lo Other_Letter other letters, including syllables and ideographs
63 Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
64 Mc Spacing_Mark a spacing combining mark (positive advance width)
65 Me Enclosing_Mark an enclosing combining mark
66 Nd Decimal_Number a decimal digit
67 Nl Letter_Number a letterlike numeric character
68 No Other_Number a numeric character of other type
69 Pc Connector_Punctuation a connecting punctuation mark, like a tie
70 Pd Dash_Punctuation a dash or hyphen punctuation mark
71 Ps Open_Punctuation an opening punctuation mark (of a pair)
72 Pe Close_Punctuation a closing punctuation mark (of a pair)
73 Pi Initial_Punctuation an initial quotation mark
74 Pf Final_Punctuation a final quotation mark
75 Po Other_Punctuation a punctuation mark of other type
76 Sm Math_Symbol a symbol of primarily mathematical use
77 Sc Currency_Symbol a currency sign
78 Sk Modifier_Symbol a non-letterlike modifier symbol
79 So Other_Symbol a symbol of other type
80 Zs Space_Separator a space character (of various non-zero widths)
81 Zl Line_Separator U+2028 LINE SEPARATOR only
82 Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
83 Cc Control a C0 or C1 control code
84 Cf Format a format control character
85 Cs Surrogate a surrogate code point
86 Co Private_Use a private-use character
87 Cn Unassigned a reserved unassigned code point or a noncharacter
90 /// The highest valid code point a `char` can have, `'\u{10FFFF}'`. Use [`char::MAX`] instead.
91 #[stable(feature = "rust1", since = "1.0.0")]
92 pub const MAX
: char = char::MAX
;
94 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
95 /// decoding error. Use [`char::REPLACEMENT_CHARACTER`] instead.
96 #[stable(feature = "decode_utf16", since = "1.9.0")]
97 pub const REPLACEMENT_CHARACTER
: char = char::REPLACEMENT_CHARACTER
;
99 /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
100 /// `char` and `str` methods are based on. Use [`char::UNICODE_VERSION`] instead.
101 #[stable(feature = "unicode_version", since = "1.45.0")]
102 pub const UNICODE_VERSION
: (u8, u8, u8) = char::UNICODE_VERSION
;
104 /// Creates an iterator over the UTF-16 encoded code points in `iter`, returning
105 /// unpaired surrogates as `Err`s. Use [`char::decode_utf16`] instead.
106 #[stable(feature = "decode_utf16", since = "1.9.0")]
108 pub fn decode_utf16
<I
: IntoIterator
<Item
= u16>>(iter
: I
) -> DecodeUtf16
<I
::IntoIter
> {
109 self::decode
::decode_utf16(iter
)
112 /// Converts a `u32` to a `char`. Use [`char::from_u32`] instead.
113 #[stable(feature = "rust1", since = "1.0.0")]
114 #[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
117 pub const fn from_u32(i
: u32) -> Option
<char> {
118 self::convert
::from_u32(i
)
121 /// Converts a `u32` to a `char`, ignoring validity. Use [`char::from_u32_unchecked`].
123 #[stable(feature = "char_from_unchecked", since = "1.5.0")]
124 #[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
127 pub const unsafe fn from_u32_unchecked(i
: u32) -> char {
128 // SAFETY: the safety contract must be upheld by the caller.
129 unsafe { self::convert::from_u32_unchecked(i) }
132 /// Converts a digit in the given radix to a `char`. Use [`char::from_digit`] instead.
133 #[stable(feature = "rust1", since = "1.0.0")]
134 #[rustc_const_unstable(feature = "const_char_convert", issue = "89259")]
137 pub const fn from_digit(num
: u32, radix
: u32) -> Option
<char> {
138 self::convert
::from_digit(num
, radix
)
141 /// Returns an iterator that yields the hexadecimal Unicode escape of a
142 /// character, as `char`s.
144 /// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
145 /// its documentation for more.
147 /// [`escape_unicode`]: char::escape_unicode
148 #[derive(Clone, Debug)]
149 #[stable(feature = "rust1", since = "1.0.0")]
150 pub struct EscapeUnicode
{
152 state
: EscapeUnicodeState
,
154 // The index of the next hex digit to be printed (0 if none),
155 // i.e., the number of remaining hex digits to be printed;
156 // increasing from the least significant digit: 0x543210
157 hex_digit_idx
: usize,
160 // The enum values are ordered so that their representation is the
161 // same as the remaining length (besides the hexadecimal digits). This
162 // likely makes `len()` a single load from memory) and inline-worth.
163 #[derive(Clone, Debug)]
164 enum EscapeUnicodeState
{
173 #[stable(feature = "rust1", since = "1.0.0")]
174 impl Iterator
for EscapeUnicode
{
177 fn next(&mut self) -> Option
<char> {
179 EscapeUnicodeState
::Backslash
=> {
180 self.state
= EscapeUnicodeState
::Type
;
183 EscapeUnicodeState
::Type
=> {
184 self.state
= EscapeUnicodeState
::LeftBrace
;
187 EscapeUnicodeState
::LeftBrace
=> {
188 self.state
= EscapeUnicodeState
::Value
;
191 EscapeUnicodeState
::Value
=> {
192 let hex_digit
= ((self.c
as u32) >> (self.hex_digit_idx
* 4)) & 0xf;
193 let c
= from_digit(hex_digit
, 16).unwrap();
194 if self.hex_digit_idx
== 0 {
195 self.state
= EscapeUnicodeState
::RightBrace
;
197 self.hex_digit_idx
-= 1;
201 EscapeUnicodeState
::RightBrace
=> {
202 self.state
= EscapeUnicodeState
::Done
;
205 EscapeUnicodeState
::Done
=> None
,
210 fn size_hint(&self) -> (usize, Option
<usize>) {
216 fn count(self) -> usize {
220 fn last(self) -> Option
<char> {
222 EscapeUnicodeState
::Done
=> None
,
224 EscapeUnicodeState
::RightBrace
225 | EscapeUnicodeState
::Value
226 | EscapeUnicodeState
::LeftBrace
227 | EscapeUnicodeState
::Type
228 | EscapeUnicodeState
::Backslash
=> Some('
}'
),
233 #[stable(feature = "exact_size_escape", since = "1.11.0")]
234 impl ExactSizeIterator
for EscapeUnicode
{
236 fn len(&self) -> usize {
237 // The match is a single memory access with no branching
240 EscapeUnicodeState
::Done
=> 0,
241 EscapeUnicodeState
::RightBrace
=> 1,
242 EscapeUnicodeState
::Value
=> 2,
243 EscapeUnicodeState
::LeftBrace
=> 3,
244 EscapeUnicodeState
::Type
=> 4,
245 EscapeUnicodeState
::Backslash
=> 5,
250 #[stable(feature = "fused", since = "1.26.0")]
251 impl FusedIterator
for EscapeUnicode {}
253 #[stable(feature = "char_struct_display", since = "1.16.0")]
254 impl fmt
::Display
for EscapeUnicode
{
255 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
256 for c
in self.clone() {
263 /// An iterator that yields the literal escape code of a `char`.
265 /// This `struct` is created by the [`escape_default`] method on [`char`]. See
266 /// its documentation for more.
268 /// [`escape_default`]: char::escape_default
269 #[derive(Clone, Debug)]
270 #[stable(feature = "rust1", since = "1.0.0")]
271 pub struct EscapeDefault
{
272 state
: EscapeDefaultState
,
275 #[derive(Clone, Debug)]
276 enum EscapeDefaultState
{
280 Unicode(EscapeUnicode
),
283 #[stable(feature = "rust1", since = "1.0.0")]
284 impl Iterator
for EscapeDefault
{
287 fn next(&mut self) -> Option
<char> {
289 EscapeDefaultState
::Backslash(c
) => {
290 self.state
= EscapeDefaultState
::Char(c
);
293 EscapeDefaultState
::Char(c
) => {
294 self.state
= EscapeDefaultState
::Done
;
297 EscapeDefaultState
::Done
=> None
,
298 EscapeDefaultState
::Unicode(ref mut iter
) => iter
.next(),
303 fn size_hint(&self) -> (usize, Option
<usize>) {
309 fn count(self) -> usize {
313 fn nth(&mut self, n
: usize) -> Option
<char> {
315 EscapeDefaultState
::Backslash(c
) if n
== 0 => {
316 self.state
= EscapeDefaultState
::Char(c
);
319 EscapeDefaultState
::Backslash(c
) if n
== 1 => {
320 self.state
= EscapeDefaultState
::Done
;
323 EscapeDefaultState
::Backslash(_
) => {
324 self.state
= EscapeDefaultState
::Done
;
327 EscapeDefaultState
::Char(c
) => {
328 self.state
= EscapeDefaultState
::Done
;
330 if n
== 0 { Some(c) }
else { None }
332 EscapeDefaultState
::Done
=> None
,
333 EscapeDefaultState
::Unicode(ref mut i
) => i
.nth(n
),
337 fn last(self) -> Option
<char> {
339 EscapeDefaultState
::Unicode(iter
) => iter
.last(),
340 EscapeDefaultState
::Done
=> None
,
341 EscapeDefaultState
::Backslash(c
) | EscapeDefaultState
::Char(c
) => Some(c
),
346 #[stable(feature = "exact_size_escape", since = "1.11.0")]
347 impl ExactSizeIterator
for EscapeDefault
{
348 fn len(&self) -> usize {
350 EscapeDefaultState
::Done
=> 0,
351 EscapeDefaultState
::Char(_
) => 1,
352 EscapeDefaultState
::Backslash(_
) => 2,
353 EscapeDefaultState
::Unicode(ref iter
) => iter
.len(),
358 #[stable(feature = "fused", since = "1.26.0")]
359 impl FusedIterator
for EscapeDefault {}
361 #[stable(feature = "char_struct_display", since = "1.16.0")]
362 impl fmt
::Display
for EscapeDefault
{
363 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
364 for c
in self.clone() {
371 /// An iterator that yields the literal escape code of a `char`.
373 /// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
374 /// documentation for more.
376 /// [`escape_debug`]: char::escape_debug
377 #[stable(feature = "char_escape_debug", since = "1.20.0")]
378 #[derive(Clone, Debug)]
379 pub struct EscapeDebug(EscapeDefault
);
381 #[stable(feature = "char_escape_debug", since = "1.20.0")]
382 impl Iterator
for EscapeDebug
{
384 fn next(&mut self) -> Option
<char> {
387 fn size_hint(&self) -> (usize, Option
<usize>) {
392 #[stable(feature = "char_escape_debug", since = "1.20.0")]
393 impl ExactSizeIterator
for EscapeDebug {}
395 #[stable(feature = "fused", since = "1.26.0")]
396 impl FusedIterator
for EscapeDebug {}
398 #[stable(feature = "char_escape_debug", since = "1.20.0")]
399 impl fmt
::Display
for EscapeDebug
{
400 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
401 fmt
::Display
::fmt(&self.0, f
)
405 /// Returns an iterator that yields the lowercase equivalent of a `char`.
407 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
408 /// its documentation for more.
410 /// [`to_lowercase`]: char::to_lowercase
411 #[stable(feature = "rust1", since = "1.0.0")]
412 #[derive(Debug, Clone)]
413 pub struct ToLowercase(CaseMappingIter
);
415 #[stable(feature = "rust1", since = "1.0.0")]
416 impl Iterator
for ToLowercase
{
418 fn next(&mut self) -> Option
<char> {
421 fn size_hint(&self) -> (usize, Option
<usize>) {
426 #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
427 impl DoubleEndedIterator
for ToLowercase
{
428 fn next_back(&mut self) -> Option
<char> {
433 #[stable(feature = "fused", since = "1.26.0")]
434 impl FusedIterator
for ToLowercase {}
436 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
437 impl ExactSizeIterator
for ToLowercase {}
439 /// Returns an iterator that yields the uppercase equivalent of a `char`.
441 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
442 /// its documentation for more.
444 /// [`to_uppercase`]: char::to_uppercase
445 #[stable(feature = "rust1", since = "1.0.0")]
446 #[derive(Debug, Clone)]
447 pub struct ToUppercase(CaseMappingIter
);
449 #[stable(feature = "rust1", since = "1.0.0")]
450 impl Iterator
for ToUppercase
{
452 fn next(&mut self) -> Option
<char> {
455 fn size_hint(&self) -> (usize, Option
<usize>) {
460 #[stable(feature = "case_mapping_double_ended", since = "1.59.0")]
461 impl DoubleEndedIterator
for ToUppercase
{
462 fn next_back(&mut self) -> Option
<char> {
467 #[stable(feature = "fused", since = "1.26.0")]
468 impl FusedIterator
for ToUppercase {}
470 #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")]
471 impl ExactSizeIterator
for ToUppercase {}
473 #[derive(Debug, Clone)]
474 enum CaseMappingIter
{
475 Three(char, char, char),
481 impl CaseMappingIter
{
482 fn new(chars
: [char; 3]) -> CaseMappingIter
{
483 if chars
[2] == '
\0'
{
484 if chars
[1] == '
\0'
{
485 CaseMappingIter
::One(chars
[0]) // Including if chars[0] == '\0'
487 CaseMappingIter
::Two(chars
[0], chars
[1])
490 CaseMappingIter
::Three(chars
[0], chars
[1], chars
[2])
495 impl Iterator
for CaseMappingIter
{
497 fn next(&mut self) -> Option
<char> {
499 CaseMappingIter
::Three(a
, b
, c
) => {
500 *self = CaseMappingIter
::Two(b
, c
);
503 CaseMappingIter
::Two(b
, c
) => {
504 *self = CaseMappingIter
::One(c
);
507 CaseMappingIter
::One(c
) => {
508 *self = CaseMappingIter
::Zero
;
511 CaseMappingIter
::Zero
=> None
,
515 fn size_hint(&self) -> (usize, Option
<usize>) {
516 let size
= match self {
517 CaseMappingIter
::Three(..) => 3,
518 CaseMappingIter
::Two(..) => 2,
519 CaseMappingIter
::One(_
) => 1,
520 CaseMappingIter
::Zero
=> 0,
526 impl DoubleEndedIterator
for CaseMappingIter
{
527 fn next_back(&mut self) -> Option
<char> {
529 CaseMappingIter
::Three(a
, b
, c
) => {
530 *self = CaseMappingIter
::Two(a
, b
);
533 CaseMappingIter
::Two(b
, c
) => {
534 *self = CaseMappingIter
::One(b
);
537 CaseMappingIter
::One(c
) => {
538 *self = CaseMappingIter
::Zero
;
541 CaseMappingIter
::Zero
=> None
,
546 impl fmt
::Display
for CaseMappingIter
{
547 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
549 CaseMappingIter
::Three(a
, b
, c
) => {
554 CaseMappingIter
::Two(b
, c
) => {
558 CaseMappingIter
::One(c
) => f
.write_char(c
),
559 CaseMappingIter
::Zero
=> Ok(()),
564 #[stable(feature = "char_struct_display", since = "1.16.0")]
565 impl fmt
::Display
for ToLowercase
{
566 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
567 fmt
::Display
::fmt(&self.0, f
)
571 #[stable(feature = "char_struct_display", since = "1.16.0")]
572 impl fmt
::Display
for ToUppercase
{
573 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
574 fmt
::Display
::fmt(&self.0, f
)
578 /// The error type returned when a checked char conversion fails.
579 #[stable(feature = "u8_from_char", since = "1.59.0")]
580 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
581 pub struct TryFromCharError(pub(crate) ());
583 #[stable(feature = "u8_from_char", since = "1.59.0")]
584 impl fmt
::Display
for TryFromCharError
{
585 fn fmt(&self, fmt
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
586 "unicode code point out of range".fmt(fmt
)
590 #[cfg(not(bootstrap))]
591 #[stable(feature = "u8_from_char", since = "1.59.0")]
592 impl Error
for TryFromCharError {}