1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 //! The `char` type represents a single character. More specifically, since
14 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
15 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
18 //! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
19 //! [Unicode code point]: http://www.unicode.org/glossary/#code_point
21 //! This module exists for technical reasons, the primary documentation for
22 //! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
25 //! This module is the home of the iterator implementations for the iterators
26 //! implemented on `char`, as well as some useful constants and conversion
27 //! functions that convert various types to `char`.
29 #![stable(feature = "rust1", since = "1.0.0")]
31 use core
::char::CharExt
as C
;
32 use core
::iter
::FusedIterator
;
33 use core
::fmt
::{self, Write}
;
34 use tables
::{conversions, derived_property, general_category, property}
;
37 #[stable(feature = "rust1", since = "1.0.0")]
38 pub use core
::char::{MAX, from_digit, from_u32, from_u32_unchecked}
;
39 #[stable(feature = "rust1", since = "1.0.0")]
40 pub use core
::char::{EscapeDebug, EscapeDefault, EscapeUnicode}
;
41 #[stable(feature = "char_from_str", since = "1.20.0")]
42 pub use core
::char::ParseCharError
;
45 #[unstable(feature = "try_from", issue = "33417")]
46 pub use core
::char::CharTryFromError
;
47 #[unstable(feature = "decode_utf8", issue = "33906")]
48 pub use core
::char::{DecodeUtf8, decode_utf8}
;
49 #[unstable(feature = "unicode", issue = "27783")]
50 pub use tables
::{UnicodeVersion, UNICODE_VERSION}
;
52 /// Returns an iterator that yields the lowercase equivalent of a `char`.
54 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
55 /// its documentation for more.
57 /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
58 /// [`char`]: ../../std/primitive.char.html
59 #[stable(feature = "rust1", since = "1.0.0")]
60 pub struct ToLowercase(CaseMappingIter
);
62 #[stable(feature = "rust1", since = "1.0.0")]
63 impl Iterator
for ToLowercase
{
65 fn next(&mut self) -> Option
<char> {
70 #[unstable(feature = "fused", issue = "35602")]
71 impl FusedIterator
for ToLowercase {}
73 /// Returns an iterator that yields the uppercase equivalent of a `char`.
75 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
76 /// its documentation for more.
78 /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
79 /// [`char`]: ../../std/primitive.char.html
80 #[stable(feature = "rust1", since = "1.0.0")]
81 pub struct ToUppercase(CaseMappingIter
);
83 #[stable(feature = "rust1", since = "1.0.0")]
84 impl Iterator
for ToUppercase
{
86 fn next(&mut self) -> Option
<char> {
91 #[unstable(feature = "fused", issue = "35602")]
92 impl FusedIterator
for ToUppercase {}
94 enum CaseMappingIter
{
95 Three(char, char, char),
101 impl CaseMappingIter
{
102 fn new(chars
: [char; 3]) -> CaseMappingIter
{
103 if chars
[2] == '
\0'
{
104 if chars
[1] == '
\0'
{
105 CaseMappingIter
::One(chars
[0]) // Including if chars[0] == '\0'
107 CaseMappingIter
::Two(chars
[0], chars
[1])
110 CaseMappingIter
::Three(chars
[0], chars
[1], chars
[2])
115 impl Iterator
for CaseMappingIter
{
117 fn next(&mut self) -> Option
<char> {
119 CaseMappingIter
::Three(a
, b
, c
) => {
120 *self = CaseMappingIter
::Two(b
, c
);
123 CaseMappingIter
::Two(b
, c
) => {
124 *self = CaseMappingIter
::One(c
);
127 CaseMappingIter
::One(c
) => {
128 *self = CaseMappingIter
::Zero
;
131 CaseMappingIter
::Zero
=> None
,
136 impl fmt
::Display
for CaseMappingIter
{
137 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
139 CaseMappingIter
::Three(a
, b
, c
) => {
144 CaseMappingIter
::Two(b
, c
) => {
148 CaseMappingIter
::One(c
) => {
151 CaseMappingIter
::Zero
=> Ok(()),
156 #[stable(feature = "char_struct_display", since = "1.16.0")]
157 impl fmt
::Display
for ToLowercase
{
158 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
159 fmt
::Display
::fmt(&self.0, f
)
163 #[stable(feature = "char_struct_display", since = "1.16.0")]
164 impl fmt
::Display
for ToUppercase
{
165 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
166 fmt
::Display
::fmt(&self.0, f
)
172 /// Checks if a `char` is a digit in the given radix.
174 /// A 'radix' here is sometimes also called a 'base'. A radix of two
175 /// indicates a binary number, a radix of ten, decimal, and a radix of
176 /// sixteen, hexadecimal, to give some common values. Arbitrary
177 /// radices are supported.
179 /// Compared to `is_numeric()`, this function only recognizes the characters
180 /// `0-9`, `a-z` and `A-Z`.
182 /// 'Digit' is defined to be only the following characters:
188 /// For a more comprehensive understanding of 'digit', see [`is_numeric`][is_numeric].
190 /// [is_numeric]: #method.is_numeric
194 /// Panics if given a radix larger than 36.
201 /// assert!('1'.is_digit(10));
202 /// assert!('f'.is_digit(16));
203 /// assert!(!'f'.is_digit(10));
206 /// Passing a large radix, causing a panic:
211 /// let result = thread::spawn(|| {
213 /// '1'.is_digit(37);
216 /// assert!(result.is_err());
218 #[stable(feature = "rust1", since = "1.0.0")]
220 pub fn is_digit(self, radix
: u32) -> bool
{
221 C
::is_digit(self, radix
)
224 /// Converts a `char` to a digit in the given radix.
226 /// A 'radix' here is sometimes also called a 'base'. A radix of two
227 /// indicates a binary number, a radix of ten, decimal, and a radix of
228 /// sixteen, hexadecimal, to give some common values. Arbitrary
229 /// radices are supported.
231 /// 'Digit' is defined to be only the following characters:
239 /// Returns `None` if the `char` does not refer to a digit in the given radix.
243 /// Panics if given a radix larger than 36.
250 /// assert_eq!('1'.to_digit(10), Some(1));
251 /// assert_eq!('f'.to_digit(16), Some(15));
254 /// Passing a non-digit results in failure:
257 /// assert_eq!('f'.to_digit(10), None);
258 /// assert_eq!('z'.to_digit(16), None);
261 /// Passing a large radix, causing a panic:
266 /// let result = thread::spawn(|| {
267 /// '1'.to_digit(37);
270 /// assert!(result.is_err());
272 #[stable(feature = "rust1", since = "1.0.0")]
274 pub fn to_digit(self, radix
: u32) -> Option
<u32> {
275 C
::to_digit(self, radix
)
278 /// Returns an iterator that yields the hexadecimal Unicode escape of a
279 /// character as `char`s.
281 /// This will escape characters with the Rust syntax of the form
282 /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
289 /// for c in '❤'.escape_unicode() {
295 /// Using `println!` directly:
298 /// println!("{}", '❤'.escape_unicode());
301 /// Both are equivalent to:
304 /// println!("\\u{{2764}}");
307 /// Using `to_string`:
310 /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
312 #[stable(feature = "rust1", since = "1.0.0")]
314 pub fn escape_unicode(self) -> EscapeUnicode
{
315 C
::escape_unicode(self)
318 /// Returns an iterator that yields the literal escape code of a character
321 /// This will escape the characters similar to the `Debug` implementations
322 /// of `str` or `char`.
329 /// for c in '\n'.escape_debug() {
335 /// Using `println!` directly:
338 /// println!("{}", '\n'.escape_debug());
341 /// Both are equivalent to:
347 /// Using `to_string`:
350 /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
352 #[stable(feature = "char_escape_debug", since = "1.20.0")]
354 pub fn escape_debug(self) -> EscapeDebug
{
355 C
::escape_debug(self)
358 /// Returns an iterator that yields the literal escape code of a character
361 /// The default is chosen with a bias toward producing literals that are
362 /// legal in a variety of languages, including C++11 and similar C-family
363 /// languages. The exact rules are:
365 /// * Tab is escaped as `\t`.
366 /// * Carriage return is escaped as `\r`.
367 /// * Line feed is escaped as `\n`.
368 /// * Single quote is escaped as `\'`.
369 /// * Double quote is escaped as `\"`.
370 /// * Backslash is escaped as `\\`.
371 /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
372 /// inclusive is not escaped.
373 /// * All other characters are given hexadecimal Unicode escapes; see
374 /// [`escape_unicode`][escape_unicode].
376 /// [escape_unicode]: #method.escape_unicode
383 /// for c in '"'.escape_default() {
389 /// Using `println!` directly:
392 /// println!("{}", '"'.escape_default());
396 /// Both are equivalent to:
399 /// println!("\\\"");
402 /// Using `to_string`:
405 /// assert_eq!('"'.escape_default().to_string(), "\\\"");
407 #[stable(feature = "rust1", since = "1.0.0")]
409 pub fn escape_default(self) -> EscapeDefault
{
410 C
::escape_default(self)
413 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
415 /// That number of bytes is always between 1 and 4, inclusive.
422 /// let len = 'A'.len_utf8();
423 /// assert_eq!(len, 1);
425 /// let len = 'ß'.len_utf8();
426 /// assert_eq!(len, 2);
428 /// let len = 'ℝ'.len_utf8();
429 /// assert_eq!(len, 3);
431 /// let len = '💣'.len_utf8();
432 /// assert_eq!(len, 4);
435 /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
436 /// would take if each code point was represented as a `char` vs in the `&str` itself:
440 /// let eastern = '東';
441 /// let capitol = '京';
443 /// // both can be represented as three bytes
444 /// assert_eq!(3, eastern.len_utf8());
445 /// assert_eq!(3, capitol.len_utf8());
447 /// // as a &str, these two are encoded in UTF-8
448 /// let tokyo = "東京";
450 /// let len = eastern.len_utf8() + capitol.len_utf8();
452 /// // we can see that they take six bytes total...
453 /// assert_eq!(6, tokyo.len());
455 /// // ... just like the &str
456 /// assert_eq!(len, tokyo.len());
458 #[stable(feature = "rust1", since = "1.0.0")]
460 pub fn len_utf8(self) -> usize {
464 /// Returns the number of 16-bit code units this `char` would need if
465 /// encoded in UTF-16.
467 /// See the documentation for [`len_utf8`] for more explanation of this
468 /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
470 /// [`len_utf8`]: #method.len_utf8
477 /// let n = 'ß'.len_utf16();
478 /// assert_eq!(n, 1);
480 /// let len = '💣'.len_utf16();
481 /// assert_eq!(len, 2);
483 #[stable(feature = "rust1", since = "1.0.0")]
485 pub fn len_utf16(self) -> usize {
489 /// Encodes this character as UTF-8 into the provided byte buffer,
490 /// and then returns the subslice of the buffer that contains the encoded character.
494 /// Panics if the buffer is not large enough.
495 /// A buffer of length four is large enough to encode any `char`.
499 /// In both of these examples, 'ß' takes two bytes to encode.
502 /// let mut b = [0; 2];
504 /// let result = 'ß'.encode_utf8(&mut b);
506 /// assert_eq!(result, "ß");
508 /// assert_eq!(result.len(), 2);
511 /// A buffer that's too small:
516 /// let result = thread::spawn(|| {
517 /// let mut b = [0; 1];
520 /// 'ß'.encode_utf8(&mut b);
523 /// assert!(result.is_err());
525 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
527 pub fn encode_utf8(self, dst
: &mut [u8]) -> &mut str {
528 C
::encode_utf8(self, dst
)
531 /// Encodes this character as UTF-16 into the provided `u16` buffer,
532 /// and then returns the subslice of the buffer that contains the encoded character.
536 /// Panics if the buffer is not large enough.
537 /// A buffer of length 2 is large enough to encode any `char`.
541 /// In both of these examples, '𝕊' takes two `u16`s to encode.
544 /// let mut b = [0; 2];
546 /// let result = '𝕊'.encode_utf16(&mut b);
548 /// assert_eq!(result.len(), 2);
551 /// A buffer that's too small:
556 /// let result = thread::spawn(|| {
557 /// let mut b = [0; 1];
560 /// '𝕊'.encode_utf16(&mut b);
563 /// assert!(result.is_err());
565 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
567 pub fn encode_utf16(self, dst
: &mut [u16]) -> &mut [u16] {
568 C
::encode_utf16(self, dst
)
571 /// Returns true if this `char` is an alphabetic code point, and false if not.
578 /// assert!('a'.is_alphabetic());
579 /// assert!('京'.is_alphabetic());
582 /// // love is many things, but it is not alphabetic
583 /// assert!(!c.is_alphabetic());
585 #[stable(feature = "rust1", since = "1.0.0")]
587 pub fn is_alphabetic(self) -> bool
{
589 'a'
...'z'
| 'A'
...'Z'
=> true,
590 c
if c
> '
\x7f'
=> derived_property
::Alphabetic(c
),
595 /// Returns true if this `char` satisfies the 'XID_Start' Unicode property, and false
598 /// 'XID_Start' is a Unicode Derived Property specified in
599 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
600 /// mostly similar to `ID_Start` but modified for closure under `NFKx`.
601 #[unstable(feature = "rustc_private",
602 reason
= "mainly needed for compiler internals",
605 pub fn is_xid_start(self) -> bool
{
606 derived_property
::XID_Start(self)
609 /// Returns true if this `char` satisfies the 'XID_Continue' Unicode property, and false
612 /// 'XID_Continue' is a Unicode Derived Property specified in
613 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
614 /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
615 #[unstable(feature = "rustc_private",
616 reason
= "mainly needed for compiler internals",
619 pub fn is_xid_continue(self) -> bool
{
620 derived_property
::XID_Continue(self)
623 /// Returns true if this `char` is lowercase, and false otherwise.
625 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core
626 /// Property `Lowercase`.
633 /// assert!('a'.is_lowercase());
634 /// assert!('δ'.is_lowercase());
635 /// assert!(!'A'.is_lowercase());
636 /// assert!(!'Δ'.is_lowercase());
638 /// // The various Chinese scripts do not have case, and so:
639 /// assert!(!'中'.is_lowercase());
641 #[stable(feature = "rust1", since = "1.0.0")]
643 pub fn is_lowercase(self) -> bool
{
646 c
if c
> '
\x7f'
=> derived_property
::Lowercase(c
),
651 /// Returns true if this `char` is uppercase, and false otherwise.
653 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core
654 /// Property `Uppercase`.
661 /// assert!(!'a'.is_uppercase());
662 /// assert!(!'δ'.is_uppercase());
663 /// assert!('A'.is_uppercase());
664 /// assert!('Δ'.is_uppercase());
666 /// // The various Chinese scripts do not have case, and so:
667 /// assert!(!'中'.is_uppercase());
669 #[stable(feature = "rust1", since = "1.0.0")]
671 pub fn is_uppercase(self) -> bool
{
674 c
if c
> '
\x7f'
=> derived_property
::Uppercase(c
),
679 /// Returns true if this `char` is whitespace, and false otherwise.
681 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core
682 /// Property `White_Space`.
689 /// assert!(' '.is_whitespace());
691 /// // a non-breaking space
692 /// assert!('\u{A0}'.is_whitespace());
694 /// assert!(!'越'.is_whitespace());
696 #[stable(feature = "rust1", since = "1.0.0")]
698 pub fn is_whitespace(self) -> bool
{
700 ' '
| '
\x09'
...'
\x0d'
=> true,
701 c
if c
> '
\x7f'
=> property
::White_Space(c
),
706 /// Returns true if this `char` is alphanumeric, and false otherwise.
708 /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
709 /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
716 /// assert!('٣'.is_alphanumeric());
717 /// assert!('7'.is_alphanumeric());
718 /// assert!('৬'.is_alphanumeric());
719 /// assert!('K'.is_alphanumeric());
720 /// assert!('و'.is_alphanumeric());
721 /// assert!('藏'.is_alphanumeric());
722 /// assert!(!'¾'.is_alphanumeric());
723 /// assert!(!'①'.is_alphanumeric());
725 #[stable(feature = "rust1", since = "1.0.0")]
727 pub fn is_alphanumeric(self) -> bool
{
728 self.is_alphabetic() || self.is_numeric()
731 /// Returns true if this `char` is a control code point, and false otherwise.
733 /// 'Control code point' is defined in terms of the Unicode General
741 /// // U+009C, STRING TERMINATOR
742 /// assert!('\9c'.is_control());
743 /// assert!(!'q'.is_control());
745 #[stable(feature = "rust1", since = "1.0.0")]
747 pub fn is_control(self) -> bool
{
748 general_category
::Cc(self)
751 /// Returns true if this `char` is numeric, and false otherwise.
753 /// 'Numeric'-ness is defined in terms of the Unicode General Categories
754 /// 'Nd', 'Nl', 'No'.
761 /// assert!('٣'.is_numeric());
762 /// assert!('7'.is_numeric());
763 /// assert!('৬'.is_numeric());
764 /// assert!(!'K'.is_numeric());
765 /// assert!(!'و'.is_numeric());
766 /// assert!(!'藏'.is_numeric());
767 /// assert!(!'¾'.is_numeric());
768 /// assert!(!'①'.is_numeric());
770 #[stable(feature = "rust1", since = "1.0.0")]
772 pub fn is_numeric(self) -> bool
{
775 c
if c
> '
\x7f'
=> general_category
::N(c
),
780 /// Returns an iterator that yields the lowercase equivalent of a `char`
781 /// as one or more `char`s.
783 /// If a character does not have a lowercase equivalent, the same character
784 /// will be returned back by the iterator.
786 /// This performs complex unconditional mappings with no tailoring: it maps
787 /// one Unicode character to its lowercase equivalent according to the
788 /// [Unicode database] and the additional complex mappings
789 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
790 /// language) are not considered here.
792 /// For a full reference, see [here][reference].
794 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
796 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
798 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
805 /// for c in 'İ'.to_lowercase() {
811 /// Using `println!` directly:
814 /// println!("{}", 'İ'.to_lowercase());
817 /// Both are equivalent to:
820 /// println!("i\u{307}");
823 /// Using `to_string`:
826 /// assert_eq!('C'.to_lowercase().to_string(), "c");
828 /// // Sometimes the result is more than one character:
829 /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
831 /// // Characters that do not have both uppercase and lowercase
832 /// // convert into themselves.
833 /// assert_eq!('山'.to_lowercase().to_string(), "山");
835 #[stable(feature = "rust1", since = "1.0.0")]
837 pub fn to_lowercase(self) -> ToLowercase
{
838 ToLowercase(CaseMappingIter
::new(conversions
::to_lower(self)))
841 /// Returns an iterator that yields the uppercase equivalent of a `char`
842 /// as one or more `char`s.
844 /// If a character does not have an uppercase equivalent, the same character
845 /// will be returned back by the iterator.
847 /// This performs complex unconditional mappings with no tailoring: it maps
848 /// one Unicode character to its uppercase equivalent according to the
849 /// [Unicode database] and the additional complex mappings
850 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
851 /// language) are not considered here.
853 /// For a full reference, see [here][reference].
855 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
857 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
859 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
866 /// for c in 'ß'.to_uppercase() {
872 /// Using `println!` directly:
875 /// println!("{}", 'ß'.to_uppercase());
878 /// Both are equivalent to:
884 /// Using `to_string`:
887 /// assert_eq!('c'.to_uppercase().to_string(), "C");
889 /// // Sometimes the result is more than one character:
890 /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
892 /// // Characters that do not have both uppercase and lowercase
893 /// // convert into themselves.
894 /// assert_eq!('山'.to_uppercase().to_string(), "山");
899 /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
901 /// * 'Dotless': I / ı, sometimes written ï
902 /// * 'Dotted': İ / i
904 /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
907 /// let upper_i = 'i'.to_uppercase().to_string();
910 /// The value of `upper_i` here relies on the language of the text: if we're
911 /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
912 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
915 /// let upper_i = 'i'.to_uppercase().to_string();
917 /// assert_eq!(upper_i, "I");
920 /// holds across languages.
921 #[stable(feature = "rust1", since = "1.0.0")]
923 pub fn to_uppercase(self) -> ToUppercase
{
924 ToUppercase(CaseMappingIter
::new(conversions
::to_upper(self)))
927 /// Checks if the value is within the ASCII range.
933 /// let non_ascii = '❤';
935 /// assert!(ascii.is_ascii());
936 /// assert!(!non_ascii.is_ascii());
938 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
940 pub fn is_ascii(&self) -> bool
{
944 /// Makes a copy of the value in its ASCII upper case equivalent.
946 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
947 /// but non-ASCII letters are unchanged.
949 /// To uppercase the value in-place, use [`make_ascii_uppercase`].
951 /// To uppercase ASCII characters in addition to non-ASCII characters, use
952 /// [`to_uppercase`].
958 /// let non_ascii = '❤';
960 /// assert_eq!('A', ascii.to_ascii_uppercase());
961 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
964 /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
965 /// [`to_uppercase`]: #method.to_uppercase
966 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
968 pub fn to_ascii_uppercase(&self) -> char {
970 (*self as u8).to_ascii_uppercase() as char
976 /// Makes a copy of the value in its ASCII lower case equivalent.
978 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
979 /// but non-ASCII letters are unchanged.
981 /// To lowercase the value in-place, use [`make_ascii_lowercase`].
983 /// To lowercase ASCII characters in addition to non-ASCII characters, use
984 /// [`to_lowercase`].
990 /// let non_ascii = '❤';
992 /// assert_eq!('a', ascii.to_ascii_lowercase());
993 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
996 /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
997 /// [`to_lowercase`]: #method.to_lowercase
998 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1000 pub fn to_ascii_lowercase(&self) -> char {
1001 if self.is_ascii() {
1002 (*self as u8).to_ascii_lowercase() as char
1008 /// Checks that two values are an ASCII case-insensitive match.
1010 /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`.
1015 /// let upper_a = 'A';
1016 /// let lower_a = 'a';
1017 /// let lower_z = 'z';
1019 /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1020 /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1021 /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1023 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1025 pub fn eq_ignore_ascii_case(&self, other
: &char) -> bool
{
1026 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
1029 /// Converts this type to its ASCII upper case equivalent in-place.
1031 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1032 /// but non-ASCII letters are unchanged.
1034 /// To return a new uppercased value without modifying the existing one, use
1035 /// [`to_ascii_uppercase`].
1040 /// let mut ascii = 'a';
1042 /// ascii.make_ascii_uppercase();
1044 /// assert_eq!('A', ascii);
1047 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
1048 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1050 pub fn make_ascii_uppercase(&mut self) {
1051 *self = self.to_ascii_uppercase();
1054 /// Converts this type to its ASCII lower case equivalent in-place.
1056 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1057 /// but non-ASCII letters are unchanged.
1059 /// To return a new lowercased value without modifying the existing one, use
1060 /// [`to_ascii_lowercase`].
1065 /// let mut ascii = 'A';
1067 /// ascii.make_ascii_lowercase();
1069 /// assert_eq!('a', ascii);
1072 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
1073 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1075 pub fn make_ascii_lowercase(&mut self) {
1076 *self = self.to_ascii_lowercase();
1079 /// Checks if the value is an ASCII alphabetic character:
1081 /// - U+0041 'A' ... U+005A 'Z', or
1082 /// - U+0061 'a' ... U+007A 'z'.
1087 /// #![feature(ascii_ctype)]
1089 /// let uppercase_a = 'A';
1090 /// let uppercase_g = 'G';
1094 /// let percent = '%';
1095 /// let space = ' ';
1097 /// let esc: char = 0x1b_u8.into();
1099 /// assert!(uppercase_a.is_ascii_alphabetic());
1100 /// assert!(uppercase_g.is_ascii_alphabetic());
1101 /// assert!(a.is_ascii_alphabetic());
1102 /// assert!(g.is_ascii_alphabetic());
1103 /// assert!(!zero.is_ascii_alphabetic());
1104 /// assert!(!percent.is_ascii_alphabetic());
1105 /// assert!(!space.is_ascii_alphabetic());
1106 /// assert!(!lf.is_ascii_alphabetic());
1107 /// assert!(!esc.is_ascii_alphabetic());
1109 #[unstable(feature = "ascii_ctype", issue = "39658")]
1111 pub fn is_ascii_alphabetic(&self) -> bool
{
1112 self.is_ascii() && (*self as u8).is_ascii_alphabetic()
1115 /// Checks if the value is an ASCII uppercase character:
1116 /// U+0041 'A' ... U+005A 'Z'.
1121 /// #![feature(ascii_ctype)]
1123 /// let uppercase_a = 'A';
1124 /// let uppercase_g = 'G';
1128 /// let percent = '%';
1129 /// let space = ' ';
1131 /// let esc: char = 0x1b_u8.into();
1133 /// assert!(uppercase_a.is_ascii_uppercase());
1134 /// assert!(uppercase_g.is_ascii_uppercase());
1135 /// assert!(!a.is_ascii_uppercase());
1136 /// assert!(!g.is_ascii_uppercase());
1137 /// assert!(!zero.is_ascii_uppercase());
1138 /// assert!(!percent.is_ascii_uppercase());
1139 /// assert!(!space.is_ascii_uppercase());
1140 /// assert!(!lf.is_ascii_uppercase());
1141 /// assert!(!esc.is_ascii_uppercase());
1143 #[unstable(feature = "ascii_ctype", issue = "39658")]
1145 pub fn is_ascii_uppercase(&self) -> bool
{
1146 self.is_ascii() && (*self as u8).is_ascii_uppercase()
1149 /// Checks if the value is an ASCII lowercase character:
1150 /// U+0061 'a' ... U+007A 'z'.
1155 /// #![feature(ascii_ctype)]
1157 /// let uppercase_a = 'A';
1158 /// let uppercase_g = 'G';
1162 /// let percent = '%';
1163 /// let space = ' ';
1165 /// let esc: char = 0x1b_u8.into();
1167 /// assert!(!uppercase_a.is_ascii_lowercase());
1168 /// assert!(!uppercase_g.is_ascii_lowercase());
1169 /// assert!(a.is_ascii_lowercase());
1170 /// assert!(g.is_ascii_lowercase());
1171 /// assert!(!zero.is_ascii_lowercase());
1172 /// assert!(!percent.is_ascii_lowercase());
1173 /// assert!(!space.is_ascii_lowercase());
1174 /// assert!(!lf.is_ascii_lowercase());
1175 /// assert!(!esc.is_ascii_lowercase());
1177 #[unstable(feature = "ascii_ctype", issue = "39658")]
1179 pub fn is_ascii_lowercase(&self) -> bool
{
1180 self.is_ascii() && (*self as u8).is_ascii_lowercase()
1183 /// Checks if the value is an ASCII alphanumeric character:
1185 /// - U+0041 'A' ... U+005A 'Z', or
1186 /// - U+0061 'a' ... U+007A 'z', or
1187 /// - U+0030 '0' ... U+0039 '9'.
1192 /// #![feature(ascii_ctype)]
1194 /// let uppercase_a = 'A';
1195 /// let uppercase_g = 'G';
1199 /// let percent = '%';
1200 /// let space = ' ';
1202 /// let esc: char = 0x1b_u8.into();
1204 /// assert!(uppercase_a.is_ascii_alphanumeric());
1205 /// assert!(uppercase_g.is_ascii_alphanumeric());
1206 /// assert!(a.is_ascii_alphanumeric());
1207 /// assert!(g.is_ascii_alphanumeric());
1208 /// assert!(zero.is_ascii_alphanumeric());
1209 /// assert!(!percent.is_ascii_alphanumeric());
1210 /// assert!(!space.is_ascii_alphanumeric());
1211 /// assert!(!lf.is_ascii_alphanumeric());
1212 /// assert!(!esc.is_ascii_alphanumeric());
1214 #[unstable(feature = "ascii_ctype", issue = "39658")]
1216 pub fn is_ascii_alphanumeric(&self) -> bool
{
1217 self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
1220 /// Checks if the value is an ASCII decimal digit:
1221 /// U+0030 '0' ... U+0039 '9'.
1226 /// #![feature(ascii_ctype)]
1228 /// let uppercase_a = 'A';
1229 /// let uppercase_g = 'G';
1233 /// let percent = '%';
1234 /// let space = ' ';
1236 /// let esc: char = 0x1b_u8.into();
1238 /// assert!(!uppercase_a.is_ascii_digit());
1239 /// assert!(!uppercase_g.is_ascii_digit());
1240 /// assert!(!a.is_ascii_digit());
1241 /// assert!(!g.is_ascii_digit());
1242 /// assert!(zero.is_ascii_digit());
1243 /// assert!(!percent.is_ascii_digit());
1244 /// assert!(!space.is_ascii_digit());
1245 /// assert!(!lf.is_ascii_digit());
1246 /// assert!(!esc.is_ascii_digit());
1248 #[unstable(feature = "ascii_ctype", issue = "39658")]
1250 pub fn is_ascii_digit(&self) -> bool
{
1251 self.is_ascii() && (*self as u8).is_ascii_digit()
1254 /// Checks if the value is an ASCII hexadecimal digit:
1256 /// - U+0030 '0' ... U+0039 '9', or
1257 /// - U+0041 'A' ... U+0046 'F', or
1258 /// - U+0061 'a' ... U+0066 'f'.
1263 /// #![feature(ascii_ctype)]
1265 /// let uppercase_a = 'A';
1266 /// let uppercase_g = 'G';
1270 /// let percent = '%';
1271 /// let space = ' ';
1273 /// let esc: char = 0x1b_u8.into();
1275 /// assert!(uppercase_a.is_ascii_hexdigit());
1276 /// assert!(!uppercase_g.is_ascii_hexdigit());
1277 /// assert!(a.is_ascii_hexdigit());
1278 /// assert!(!g.is_ascii_hexdigit());
1279 /// assert!(zero.is_ascii_hexdigit());
1280 /// assert!(!percent.is_ascii_hexdigit());
1281 /// assert!(!space.is_ascii_hexdigit());
1282 /// assert!(!lf.is_ascii_hexdigit());
1283 /// assert!(!esc.is_ascii_hexdigit());
1285 #[unstable(feature = "ascii_ctype", issue = "39658")]
1287 pub fn is_ascii_hexdigit(&self) -> bool
{
1288 self.is_ascii() && (*self as u8).is_ascii_hexdigit()
1291 /// Checks if the value is an ASCII punctuation character:
1293 /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
1294 /// - U+003A ... U+0040 `: ; < = > ? @`, or
1295 /// - U+005B ... U+0060 ``[ \ ] ^ _ ` ``, or
1296 /// - U+007B ... U+007E `{ | } ~`
1301 /// #![feature(ascii_ctype)]
1303 /// let uppercase_a = 'A';
1304 /// let uppercase_g = 'G';
1308 /// let percent = '%';
1309 /// let space = ' ';
1311 /// let esc: char = 0x1b_u8.into();
1313 /// assert!(!uppercase_a.is_ascii_punctuation());
1314 /// assert!(!uppercase_g.is_ascii_punctuation());
1315 /// assert!(!a.is_ascii_punctuation());
1316 /// assert!(!g.is_ascii_punctuation());
1317 /// assert!(!zero.is_ascii_punctuation());
1318 /// assert!(percent.is_ascii_punctuation());
1319 /// assert!(!space.is_ascii_punctuation());
1320 /// assert!(!lf.is_ascii_punctuation());
1321 /// assert!(!esc.is_ascii_punctuation());
1323 #[unstable(feature = "ascii_ctype", issue = "39658")]
1325 pub fn is_ascii_punctuation(&self) -> bool
{
1326 self.is_ascii() && (*self as u8).is_ascii_punctuation()
1329 /// Checks if the value is an ASCII graphic character:
1330 /// U+0021 '@' ... U+007E '~'.
1335 /// #![feature(ascii_ctype)]
1337 /// let uppercase_a = 'A';
1338 /// let uppercase_g = 'G';
1342 /// let percent = '%';
1343 /// let space = ' ';
1345 /// let esc: char = 0x1b_u8.into();
1347 /// assert!(uppercase_a.is_ascii_graphic());
1348 /// assert!(uppercase_g.is_ascii_graphic());
1349 /// assert!(a.is_ascii_graphic());
1350 /// assert!(g.is_ascii_graphic());
1351 /// assert!(zero.is_ascii_graphic());
1352 /// assert!(percent.is_ascii_graphic());
1353 /// assert!(!space.is_ascii_graphic());
1354 /// assert!(!lf.is_ascii_graphic());
1355 /// assert!(!esc.is_ascii_graphic());
1357 #[unstable(feature = "ascii_ctype", issue = "39658")]
1359 pub fn is_ascii_graphic(&self) -> bool
{
1360 self.is_ascii() && (*self as u8).is_ascii_graphic()
1363 /// Checks if the value is an ASCII whitespace character:
1364 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1365 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1367 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1368 /// whitespace][infra-aw]. There are several other definitions in
1369 /// wide use. For instance, [the POSIX locale][pct] includes
1370 /// U+000B VERTICAL TAB as well as all the above characters,
1371 /// but—from the very same specification—[the default rule for
1372 /// "field splitting" in the Bourne shell][bfs] considers *only*
1373 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1375 /// If you are writing a program that will process an existing
1376 /// file format, check what that format's definition of whitespace is
1377 /// before using this function.
1379 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1380 /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1381 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1386 /// #![feature(ascii_ctype)]
1388 /// let uppercase_a = 'A';
1389 /// let uppercase_g = 'G';
1393 /// let percent = '%';
1394 /// let space = ' ';
1396 /// let esc: char = 0x1b_u8.into();
1398 /// assert!(!uppercase_a.is_ascii_whitespace());
1399 /// assert!(!uppercase_g.is_ascii_whitespace());
1400 /// assert!(!a.is_ascii_whitespace());
1401 /// assert!(!g.is_ascii_whitespace());
1402 /// assert!(!zero.is_ascii_whitespace());
1403 /// assert!(!percent.is_ascii_whitespace());
1404 /// assert!(space.is_ascii_whitespace());
1405 /// assert!(lf.is_ascii_whitespace());
1406 /// assert!(!esc.is_ascii_whitespace());
1408 #[unstable(feature = "ascii_ctype", issue = "39658")]
1410 pub fn is_ascii_whitespace(&self) -> bool
{
1411 self.is_ascii() && (*self as u8).is_ascii_whitespace()
1414 /// Checks if the value is an ASCII control character:
1415 /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE.
1416 /// Note that most ASCII whitespace characters are control
1417 /// characters, but SPACE is not.
1422 /// #![feature(ascii_ctype)]
1424 /// let uppercase_a = 'A';
1425 /// let uppercase_g = 'G';
1429 /// let percent = '%';
1430 /// let space = ' ';
1432 /// let esc: char = 0x1b_u8.into();
1434 /// assert!(!uppercase_a.is_ascii_control());
1435 /// assert!(!uppercase_g.is_ascii_control());
1436 /// assert!(!a.is_ascii_control());
1437 /// assert!(!g.is_ascii_control());
1438 /// assert!(!zero.is_ascii_control());
1439 /// assert!(!percent.is_ascii_control());
1440 /// assert!(!space.is_ascii_control());
1441 /// assert!(lf.is_ascii_control());
1442 /// assert!(esc.is_ascii_control());
1444 #[unstable(feature = "ascii_ctype", issue = "39658")]
1446 pub fn is_ascii_control(&self) -> bool
{
1447 self.is_ascii() && (*self as u8).is_ascii_control()
1451 /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
1452 #[stable(feature = "decode_utf16", since = "1.9.0")]
1454 pub struct DecodeUtf16
<I
>
1455 where I
: Iterator
<Item
= u16>
1461 /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
1462 #[stable(feature = "decode_utf16", since = "1.9.0")]
1463 #[derive(Debug, Clone, Eq, PartialEq)]
1464 pub struct DecodeUtf16Error
{
1468 /// Create an iterator over the UTF-16 encoded code points in `iter`,
1469 /// returning unpaired surrogates as `Err`s.
1476 /// use std::char::decode_utf16;
1479 /// // 𝄞mus<invalid>ic<invalid>
1480 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
1481 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
1484 /// assert_eq!(decode_utf16(v.iter().cloned())
1485 /// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
1486 /// .collect::<Vec<_>>(),
1488 /// Ok('m'), Ok('u'), Ok('s'),
1490 /// Ok('i'), Ok('c'),
1495 /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
1498 /// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
1501 /// // 𝄞mus<invalid>ic<invalid>
1502 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
1503 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
1506 /// assert_eq!(decode_utf16(v.iter().cloned())
1507 /// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
1508 /// .collect::<String>(),
1512 #[stable(feature = "decode_utf16", since = "1.9.0")]
1514 pub fn decode_utf16
<I
: IntoIterator
<Item
= u16>>(iter
: I
) -> DecodeUtf16
<I
::IntoIter
> {
1516 iter
: iter
.into_iter(),
1521 #[stable(feature = "decode_utf16", since = "1.9.0")]
1522 impl<I
: Iterator
<Item
= u16>> Iterator
for DecodeUtf16
<I
> {
1523 type Item
= Result
<char, DecodeUtf16Error
>;
1525 fn next(&mut self) -> Option
<Result
<char, DecodeUtf16Error
>> {
1526 let u
= match self.buf
.take() {
1529 match self.iter
.next() {
1531 None
=> return None
,
1536 if u
< 0xD800 || 0xDFFF < u
{
1538 Some(Ok(unsafe { from_u32_unchecked(u as u32) }
))
1539 } else if u
>= 0xDC00 {
1540 // a trailing surrogate
1541 Some(Err(DecodeUtf16Error { code: u }
))
1543 let u2
= match self.iter
.next() {
1546 None
=> return Some(Err(DecodeUtf16Error { code: u }
)),
1548 if u2
< 0xDC00 || u2
> 0xDFFF {
1549 // not a trailing surrogate so we're not a valid
1550 // surrogate pair, so rewind to redecode u2 next time.
1551 self.buf
= Some(u2
);
1552 return Some(Err(DecodeUtf16Error { code: u }
));
1555 // all ok, so lets decode it.
1556 let c
= (((u
- 0xD800) as u32) << 10 | (u2
- 0xDC00) as u32) + 0x1_0000;
1557 Some(Ok(unsafe { from_u32_unchecked(c) }
))
1562 fn size_hint(&self) -> (usize, Option
<usize>) {
1563 let (low
, high
) = self.iter
.size_hint();
1564 // we could be entirely valid surrogates (2 elements per
1565 // char), or entirely non-surrogates (1 element per char)
1570 impl DecodeUtf16Error
{
1571 /// Returns the unpaired surrogate which caused this error.
1572 #[stable(feature = "decode_utf16", since = "1.9.0")]
1573 pub fn unpaired_surrogate(&self) -> u16 {
1578 #[stable(feature = "decode_utf16", since = "1.9.0")]
1579 impl fmt
::Display
for DecodeUtf16Error
{
1580 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
1581 write
!(f
, "unpaired surrogate found: {:x}", self.code
)
1585 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
1588 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
1589 /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
1590 #[stable(feature = "decode_utf16", since = "1.9.0")]
1591 pub const REPLACEMENT_CHARACTER
: char = '
\u{FFFD}'
;