1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 //! The `char` type represents a single character. More specifically, since
14 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
15 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
18 //! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
19 //! [Unicode code point]: http://www.unicode.org/glossary/#code_point
21 //! This module exists for technical reasons, the primary documentation for
22 //! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
25 //! This module is the home of the iterator implementations for the iterators
26 //! implemented on `char`, as well as some useful constants and conversion
27 //! functions that convert various types to `char`.
29 #![stable(feature = "rust1", since = "1.0.0")]
31 use core
::char::CharExt
as C
;
32 use core
::iter
::FusedIterator
;
33 use core
::fmt
::{self, Write}
;
34 use tables
::{conversions, derived_property, general_category, property}
;
37 #[stable(feature = "rust1", since = "1.0.0")]
38 pub use core
::char::{MAX, from_digit, from_u32, from_u32_unchecked}
;
39 #[stable(feature = "rust1", since = "1.0.0")]
40 pub use core
::char::{EscapeDebug, EscapeDefault, EscapeUnicode}
;
41 #[stable(feature = "char_from_str", since = "1.20.0")]
42 pub use core
::char::ParseCharError
;
44 // unstable re-exports
45 #[unstable(feature = "try_from", issue = "33417")]
46 pub use core
::char::CharTryFromError
;
47 #[unstable(feature = "decode_utf8", issue = "33906")]
48 pub use core
::char::{DecodeUtf8, decode_utf8}
;
49 #[unstable(feature = "unicode", issue = "27783")]
50 pub use tables
::{UNICODE_VERSION}
;
51 #[unstable(feature = "unicode", issue = "27783")]
52 pub use version
::UnicodeVersion
;
54 /// Returns an iterator that yields the lowercase equivalent of a `char`.
56 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
57 /// its documentation for more.
59 /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
60 /// [`char`]: ../../std/primitive.char.html
61 #[stable(feature = "rust1", since = "1.0.0")]
63 pub struct ToLowercase(CaseMappingIter
);
65 #[stable(feature = "rust1", since = "1.0.0")]
66 impl Iterator
for ToLowercase
{
68 fn next(&mut self) -> Option
<char> {
73 #[unstable(feature = "fused", issue = "35602")]
74 impl FusedIterator
for ToLowercase {}
76 /// Returns an iterator that yields the uppercase equivalent of a `char`.
78 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
79 /// its documentation for more.
81 /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
82 /// [`char`]: ../../std/primitive.char.html
83 #[stable(feature = "rust1", since = "1.0.0")]
85 pub struct ToUppercase(CaseMappingIter
);
87 #[stable(feature = "rust1", since = "1.0.0")]
88 impl Iterator
for ToUppercase
{
90 fn next(&mut self) -> Option
<char> {
95 #[unstable(feature = "fused", issue = "35602")]
96 impl FusedIterator
for ToUppercase {}
99 enum CaseMappingIter
{
100 Three(char, char, char),
106 impl CaseMappingIter
{
107 fn new(chars
: [char; 3]) -> CaseMappingIter
{
108 if chars
[2] == '
\0'
{
109 if chars
[1] == '
\0'
{
110 CaseMappingIter
::One(chars
[0]) // Including if chars[0] == '\0'
112 CaseMappingIter
::Two(chars
[0], chars
[1])
115 CaseMappingIter
::Three(chars
[0], chars
[1], chars
[2])
120 impl Iterator
for CaseMappingIter
{
122 fn next(&mut self) -> Option
<char> {
124 CaseMappingIter
::Three(a
, b
, c
) => {
125 *self = CaseMappingIter
::Two(b
, c
);
128 CaseMappingIter
::Two(b
, c
) => {
129 *self = CaseMappingIter
::One(c
);
132 CaseMappingIter
::One(c
) => {
133 *self = CaseMappingIter
::Zero
;
136 CaseMappingIter
::Zero
=> None
,
141 impl fmt
::Display
for CaseMappingIter
{
142 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
144 CaseMappingIter
::Three(a
, b
, c
) => {
149 CaseMappingIter
::Two(b
, c
) => {
153 CaseMappingIter
::One(c
) => {
156 CaseMappingIter
::Zero
=> Ok(()),
161 #[stable(feature = "char_struct_display", since = "1.16.0")]
162 impl fmt
::Display
for ToLowercase
{
163 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
164 fmt
::Display
::fmt(&self.0, f
)
168 #[stable(feature = "char_struct_display", since = "1.16.0")]
169 impl fmt
::Display
for ToUppercase
{
170 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
171 fmt
::Display
::fmt(&self.0, f
)
177 /// Checks if a `char` is a digit in the given radix.
179 /// A 'radix' here is sometimes also called a 'base'. A radix of two
180 /// indicates a binary number, a radix of ten, decimal, and a radix of
181 /// sixteen, hexadecimal, to give some common values. Arbitrary
182 /// radices are supported.
184 /// Compared to `is_numeric()`, this function only recognizes the characters
185 /// `0-9`, `a-z` and `A-Z`.
187 /// 'Digit' is defined to be only the following characters:
193 /// For a more comprehensive understanding of 'digit', see [`is_numeric`][is_numeric].
195 /// [is_numeric]: #method.is_numeric
199 /// Panics if given a radix larger than 36.
206 /// assert!('1'.is_digit(10));
207 /// assert!('f'.is_digit(16));
208 /// assert!(!'f'.is_digit(10));
211 /// Passing a large radix, causing a panic:
216 /// let result = thread::spawn(|| {
218 /// '1'.is_digit(37);
221 /// assert!(result.is_err());
223 #[stable(feature = "rust1", since = "1.0.0")]
225 pub fn is_digit(self, radix
: u32) -> bool
{
226 C
::is_digit(self, radix
)
229 /// Converts a `char` to a digit in the given radix.
231 /// A 'radix' here is sometimes also called a 'base'. A radix of two
232 /// indicates a binary number, a radix of ten, decimal, and a radix of
233 /// sixteen, hexadecimal, to give some common values. Arbitrary
234 /// radices are supported.
236 /// 'Digit' is defined to be only the following characters:
244 /// Returns `None` if the `char` does not refer to a digit in the given radix.
248 /// Panics if given a radix larger than 36.
255 /// assert_eq!('1'.to_digit(10), Some(1));
256 /// assert_eq!('f'.to_digit(16), Some(15));
259 /// Passing a non-digit results in failure:
262 /// assert_eq!('f'.to_digit(10), None);
263 /// assert_eq!('z'.to_digit(16), None);
266 /// Passing a large radix, causing a panic:
271 /// let result = thread::spawn(|| {
272 /// '1'.to_digit(37);
275 /// assert!(result.is_err());
277 #[stable(feature = "rust1", since = "1.0.0")]
279 pub fn to_digit(self, radix
: u32) -> Option
<u32> {
280 C
::to_digit(self, radix
)
283 /// Returns an iterator that yields the hexadecimal Unicode escape of a
284 /// character as `char`s.
286 /// This will escape characters with the Rust syntax of the form
287 /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
294 /// for c in '❤'.escape_unicode() {
300 /// Using `println!` directly:
303 /// println!("{}", '❤'.escape_unicode());
306 /// Both are equivalent to:
309 /// println!("\\u{{2764}}");
312 /// Using `to_string`:
315 /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
317 #[stable(feature = "rust1", since = "1.0.0")]
319 pub fn escape_unicode(self) -> EscapeUnicode
{
320 C
::escape_unicode(self)
323 /// Returns an iterator that yields the literal escape code of a character
326 /// This will escape the characters similar to the `Debug` implementations
327 /// of `str` or `char`.
334 /// for c in '\n'.escape_debug() {
340 /// Using `println!` directly:
343 /// println!("{}", '\n'.escape_debug());
346 /// Both are equivalent to:
352 /// Using `to_string`:
355 /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
357 #[stable(feature = "char_escape_debug", since = "1.20.0")]
359 pub fn escape_debug(self) -> EscapeDebug
{
360 C
::escape_debug(self)
363 /// Returns an iterator that yields the literal escape code of a character
366 /// The default is chosen with a bias toward producing literals that are
367 /// legal in a variety of languages, including C++11 and similar C-family
368 /// languages. The exact rules are:
370 /// * Tab is escaped as `\t`.
371 /// * Carriage return is escaped as `\r`.
372 /// * Line feed is escaped as `\n`.
373 /// * Single quote is escaped as `\'`.
374 /// * Double quote is escaped as `\"`.
375 /// * Backslash is escaped as `\\`.
376 /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
377 /// inclusive is not escaped.
378 /// * All other characters are given hexadecimal Unicode escapes; see
379 /// [`escape_unicode`][escape_unicode].
381 /// [escape_unicode]: #method.escape_unicode
388 /// for c in '"'.escape_default() {
394 /// Using `println!` directly:
397 /// println!("{}", '"'.escape_default());
401 /// Both are equivalent to:
404 /// println!("\\\"");
407 /// Using `to_string`:
410 /// assert_eq!('"'.escape_default().to_string(), "\\\"");
412 #[stable(feature = "rust1", since = "1.0.0")]
414 pub fn escape_default(self) -> EscapeDefault
{
415 C
::escape_default(self)
418 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
420 /// That number of bytes is always between 1 and 4, inclusive.
427 /// let len = 'A'.len_utf8();
428 /// assert_eq!(len, 1);
430 /// let len = 'ß'.len_utf8();
431 /// assert_eq!(len, 2);
433 /// let len = 'ℝ'.len_utf8();
434 /// assert_eq!(len, 3);
436 /// let len = '💣'.len_utf8();
437 /// assert_eq!(len, 4);
440 /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
441 /// would take if each code point was represented as a `char` vs in the `&str` itself:
445 /// let eastern = '東';
446 /// let capitol = '京';
448 /// // both can be represented as three bytes
449 /// assert_eq!(3, eastern.len_utf8());
450 /// assert_eq!(3, capitol.len_utf8());
452 /// // as a &str, these two are encoded in UTF-8
453 /// let tokyo = "東京";
455 /// let len = eastern.len_utf8() + capitol.len_utf8();
457 /// // we can see that they take six bytes total...
458 /// assert_eq!(6, tokyo.len());
460 /// // ... just like the &str
461 /// assert_eq!(len, tokyo.len());
463 #[stable(feature = "rust1", since = "1.0.0")]
465 pub fn len_utf8(self) -> usize {
469 /// Returns the number of 16-bit code units this `char` would need if
470 /// encoded in UTF-16.
472 /// See the documentation for [`len_utf8`] for more explanation of this
473 /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
475 /// [`len_utf8`]: #method.len_utf8
482 /// let n = 'ß'.len_utf16();
483 /// assert_eq!(n, 1);
485 /// let len = '💣'.len_utf16();
486 /// assert_eq!(len, 2);
488 #[stable(feature = "rust1", since = "1.0.0")]
490 pub fn len_utf16(self) -> usize {
494 /// Encodes this character as UTF-8 into the provided byte buffer,
495 /// and then returns the subslice of the buffer that contains the encoded character.
499 /// Panics if the buffer is not large enough.
500 /// A buffer of length four is large enough to encode any `char`.
504 /// In both of these examples, 'ß' takes two bytes to encode.
507 /// let mut b = [0; 2];
509 /// let result = 'ß'.encode_utf8(&mut b);
511 /// assert_eq!(result, "ß");
513 /// assert_eq!(result.len(), 2);
516 /// A buffer that's too small:
521 /// let result = thread::spawn(|| {
522 /// let mut b = [0; 1];
525 /// 'ß'.encode_utf8(&mut b);
528 /// assert!(result.is_err());
530 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
532 pub fn encode_utf8(self, dst
: &mut [u8]) -> &mut str {
533 C
::encode_utf8(self, dst
)
536 /// Encodes this character as UTF-16 into the provided `u16` buffer,
537 /// and then returns the subslice of the buffer that contains the encoded character.
541 /// Panics if the buffer is not large enough.
542 /// A buffer of length 2 is large enough to encode any `char`.
546 /// In both of these examples, '𝕊' takes two `u16`s to encode.
549 /// let mut b = [0; 2];
551 /// let result = '𝕊'.encode_utf16(&mut b);
553 /// assert_eq!(result.len(), 2);
556 /// A buffer that's too small:
561 /// let result = thread::spawn(|| {
562 /// let mut b = [0; 1];
565 /// '𝕊'.encode_utf16(&mut b);
568 /// assert!(result.is_err());
570 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
572 pub fn encode_utf16(self, dst
: &mut [u16]) -> &mut [u16] {
573 C
::encode_utf16(self, dst
)
576 /// Returns true if this `char` is an alphabetic code point, and false if not.
583 /// assert!('a'.is_alphabetic());
584 /// assert!('京'.is_alphabetic());
587 /// // love is many things, but it is not alphabetic
588 /// assert!(!c.is_alphabetic());
590 #[stable(feature = "rust1", since = "1.0.0")]
592 pub fn is_alphabetic(self) -> bool
{
594 'a'
...'z'
| 'A'
...'Z'
=> true,
595 c
if c
> '
\x7f'
=> derived_property
::Alphabetic(c
),
600 /// Returns true if this `char` satisfies the 'XID_Start' Unicode property, and false
603 /// 'XID_Start' is a Unicode Derived Property specified in
604 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
605 /// mostly similar to `ID_Start` but modified for closure under `NFKx`.
606 #[unstable(feature = "rustc_private",
607 reason
= "mainly needed for compiler internals",
610 pub fn is_xid_start(self) -> bool
{
611 derived_property
::XID_Start(self)
614 /// Returns true if this `char` satisfies the 'XID_Continue' Unicode property, and false
617 /// 'XID_Continue' is a Unicode Derived Property specified in
618 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
619 /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
620 #[unstable(feature = "rustc_private",
621 reason
= "mainly needed for compiler internals",
624 pub fn is_xid_continue(self) -> bool
{
625 derived_property
::XID_Continue(self)
628 /// Returns true if this `char` is lowercase, and false otherwise.
630 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core
631 /// Property `Lowercase`.
638 /// assert!('a'.is_lowercase());
639 /// assert!('δ'.is_lowercase());
640 /// assert!(!'A'.is_lowercase());
641 /// assert!(!'Δ'.is_lowercase());
643 /// // The various Chinese scripts do not have case, and so:
644 /// assert!(!'中'.is_lowercase());
646 #[stable(feature = "rust1", since = "1.0.0")]
648 pub fn is_lowercase(self) -> bool
{
651 c
if c
> '
\x7f'
=> derived_property
::Lowercase(c
),
656 /// Returns true if this `char` is uppercase, and false otherwise.
658 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core
659 /// Property `Uppercase`.
666 /// assert!(!'a'.is_uppercase());
667 /// assert!(!'δ'.is_uppercase());
668 /// assert!('A'.is_uppercase());
669 /// assert!('Δ'.is_uppercase());
671 /// // The various Chinese scripts do not have case, and so:
672 /// assert!(!'中'.is_uppercase());
674 #[stable(feature = "rust1", since = "1.0.0")]
676 pub fn is_uppercase(self) -> bool
{
679 c
if c
> '
\x7f'
=> derived_property
::Uppercase(c
),
684 /// Returns true if this `char` is whitespace, and false otherwise.
686 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core
687 /// Property `White_Space`.
694 /// assert!(' '.is_whitespace());
696 /// // a non-breaking space
697 /// assert!('\u{A0}'.is_whitespace());
699 /// assert!(!'越'.is_whitespace());
701 #[stable(feature = "rust1", since = "1.0.0")]
703 pub fn is_whitespace(self) -> bool
{
705 ' '
| '
\x09'
...'
\x0d'
=> true,
706 c
if c
> '
\x7f'
=> property
::White_Space(c
),
711 /// Returns true if this `char` is alphanumeric, and false otherwise.
713 /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
714 /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
721 /// assert!('٣'.is_alphanumeric());
722 /// assert!('7'.is_alphanumeric());
723 /// assert!('৬'.is_alphanumeric());
724 /// assert!('K'.is_alphanumeric());
725 /// assert!('و'.is_alphanumeric());
726 /// assert!('藏'.is_alphanumeric());
727 /// assert!(!'¾'.is_alphanumeric());
728 /// assert!(!'①'.is_alphanumeric());
730 #[stable(feature = "rust1", since = "1.0.0")]
732 pub fn is_alphanumeric(self) -> bool
{
733 self.is_alphabetic() || self.is_numeric()
736 /// Returns true if this `char` is a control code point, and false otherwise.
738 /// 'Control code point' is defined in terms of the Unicode General
746 /// // U+009C, STRING TERMINATOR
747 /// assert!('\9c'.is_control());
748 /// assert!(!'q'.is_control());
750 #[stable(feature = "rust1", since = "1.0.0")]
752 pub fn is_control(self) -> bool
{
753 general_category
::Cc(self)
756 /// Returns true if this `char` is numeric, and false otherwise.
758 /// 'Numeric'-ness is defined in terms of the Unicode General Categories
759 /// 'Nd', 'Nl', 'No'.
766 /// assert!('٣'.is_numeric());
767 /// assert!('7'.is_numeric());
768 /// assert!('৬'.is_numeric());
769 /// assert!(!'K'.is_numeric());
770 /// assert!(!'و'.is_numeric());
771 /// assert!(!'藏'.is_numeric());
772 /// assert!(!'¾'.is_numeric());
773 /// assert!(!'①'.is_numeric());
775 #[stable(feature = "rust1", since = "1.0.0")]
777 pub fn is_numeric(self) -> bool
{
780 c
if c
> '
\x7f'
=> general_category
::N(c
),
785 /// Returns an iterator that yields the lowercase equivalent of a `char`
786 /// as one or more `char`s.
788 /// If a character does not have a lowercase equivalent, the same character
789 /// will be returned back by the iterator.
791 /// This performs complex unconditional mappings with no tailoring: it maps
792 /// one Unicode character to its lowercase equivalent according to the
793 /// [Unicode database] and the additional complex mappings
794 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
795 /// language) are not considered here.
797 /// For a full reference, see [here][reference].
799 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
801 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
803 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
810 /// for c in 'İ'.to_lowercase() {
816 /// Using `println!` directly:
819 /// println!("{}", 'İ'.to_lowercase());
822 /// Both are equivalent to:
825 /// println!("i\u{307}");
828 /// Using `to_string`:
831 /// assert_eq!('C'.to_lowercase().to_string(), "c");
833 /// // Sometimes the result is more than one character:
834 /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
836 /// // Characters that do not have both uppercase and lowercase
837 /// // convert into themselves.
838 /// assert_eq!('山'.to_lowercase().to_string(), "山");
840 #[stable(feature = "rust1", since = "1.0.0")]
842 pub fn to_lowercase(self) -> ToLowercase
{
843 ToLowercase(CaseMappingIter
::new(conversions
::to_lower(self)))
846 /// Returns an iterator that yields the uppercase equivalent of a `char`
847 /// as one or more `char`s.
849 /// If a character does not have an uppercase equivalent, the same character
850 /// will be returned back by the iterator.
852 /// This performs complex unconditional mappings with no tailoring: it maps
853 /// one Unicode character to its uppercase equivalent according to the
854 /// [Unicode database] and the additional complex mappings
855 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
856 /// language) are not considered here.
858 /// For a full reference, see [here][reference].
860 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
862 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
864 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
871 /// for c in 'ß'.to_uppercase() {
877 /// Using `println!` directly:
880 /// println!("{}", 'ß'.to_uppercase());
883 /// Both are equivalent to:
889 /// Using `to_string`:
892 /// assert_eq!('c'.to_uppercase().to_string(), "C");
894 /// // Sometimes the result is more than one character:
895 /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
897 /// // Characters that do not have both uppercase and lowercase
898 /// // convert into themselves.
899 /// assert_eq!('山'.to_uppercase().to_string(), "山");
904 /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
906 /// * 'Dotless': I / ı, sometimes written ï
907 /// * 'Dotted': İ / i
909 /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
912 /// let upper_i = 'i'.to_uppercase().to_string();
915 /// The value of `upper_i` here relies on the language of the text: if we're
916 /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
917 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
920 /// let upper_i = 'i'.to_uppercase().to_string();
922 /// assert_eq!(upper_i, "I");
925 /// holds across languages.
926 #[stable(feature = "rust1", since = "1.0.0")]
928 pub fn to_uppercase(self) -> ToUppercase
{
929 ToUppercase(CaseMappingIter
::new(conversions
::to_upper(self)))
932 /// Checks if the value is within the ASCII range.
938 /// let non_ascii = '❤';
940 /// assert!(ascii.is_ascii());
941 /// assert!(!non_ascii.is_ascii());
943 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
945 pub fn is_ascii(&self) -> bool
{
949 /// Makes a copy of the value in its ASCII upper case equivalent.
951 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
952 /// but non-ASCII letters are unchanged.
954 /// To uppercase the value in-place, use [`make_ascii_uppercase`].
956 /// To uppercase ASCII characters in addition to non-ASCII characters, use
957 /// [`to_uppercase`].
963 /// let non_ascii = '❤';
965 /// assert_eq!('A', ascii.to_ascii_uppercase());
966 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
969 /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
970 /// [`to_uppercase`]: #method.to_uppercase
971 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
973 pub fn to_ascii_uppercase(&self) -> char {
975 (*self as u8).to_ascii_uppercase() as char
981 /// Makes a copy of the value in its ASCII lower case equivalent.
983 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
984 /// but non-ASCII letters are unchanged.
986 /// To lowercase the value in-place, use [`make_ascii_lowercase`].
988 /// To lowercase ASCII characters in addition to non-ASCII characters, use
989 /// [`to_lowercase`].
995 /// let non_ascii = '❤';
997 /// assert_eq!('a', ascii.to_ascii_lowercase());
998 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1001 /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
1002 /// [`to_lowercase`]: #method.to_lowercase
1003 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1005 pub fn to_ascii_lowercase(&self) -> char {
1006 if self.is_ascii() {
1007 (*self as u8).to_ascii_lowercase() as char
1013 /// Checks that two values are an ASCII case-insensitive match.
1015 /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`.
1020 /// let upper_a = 'A';
1021 /// let lower_a = 'a';
1022 /// let lower_z = 'z';
1024 /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1025 /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1026 /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1028 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1030 pub fn eq_ignore_ascii_case(&self, other
: &char) -> bool
{
1031 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
1034 /// Converts this type to its ASCII upper case equivalent in-place.
1036 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1037 /// but non-ASCII letters are unchanged.
1039 /// To return a new uppercased value without modifying the existing one, use
1040 /// [`to_ascii_uppercase`].
1045 /// let mut ascii = 'a';
1047 /// ascii.make_ascii_uppercase();
1049 /// assert_eq!('A', ascii);
1052 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
1053 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1055 pub fn make_ascii_uppercase(&mut self) {
1056 *self = self.to_ascii_uppercase();
1059 /// Converts this type to its ASCII lower case equivalent in-place.
1061 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1062 /// but non-ASCII letters are unchanged.
1064 /// To return a new lowercased value without modifying the existing one, use
1065 /// [`to_ascii_lowercase`].
1070 /// let mut ascii = 'A';
1072 /// ascii.make_ascii_lowercase();
1074 /// assert_eq!('a', ascii);
1077 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
1078 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1080 pub fn make_ascii_lowercase(&mut self) {
1081 *self = self.to_ascii_lowercase();
1084 /// Checks if the value is an ASCII alphabetic character:
1086 /// - U+0041 'A' ... U+005A 'Z', or
1087 /// - U+0061 'a' ... U+007A 'z'.
1092 /// #![feature(ascii_ctype)]
1094 /// let uppercase_a = 'A';
1095 /// let uppercase_g = 'G';
1099 /// let percent = '%';
1100 /// let space = ' ';
1102 /// let esc: char = 0x1b_u8.into();
1104 /// assert!(uppercase_a.is_ascii_alphabetic());
1105 /// assert!(uppercase_g.is_ascii_alphabetic());
1106 /// assert!(a.is_ascii_alphabetic());
1107 /// assert!(g.is_ascii_alphabetic());
1108 /// assert!(!zero.is_ascii_alphabetic());
1109 /// assert!(!percent.is_ascii_alphabetic());
1110 /// assert!(!space.is_ascii_alphabetic());
1111 /// assert!(!lf.is_ascii_alphabetic());
1112 /// assert!(!esc.is_ascii_alphabetic());
1114 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1116 pub fn is_ascii_alphabetic(&self) -> bool
{
1117 self.is_ascii() && (*self as u8).is_ascii_alphabetic()
1120 /// Checks if the value is an ASCII uppercase character:
1121 /// U+0041 'A' ... U+005A 'Z'.
1126 /// #![feature(ascii_ctype)]
1128 /// let uppercase_a = 'A';
1129 /// let uppercase_g = 'G';
1133 /// let percent = '%';
1134 /// let space = ' ';
1136 /// let esc: char = 0x1b_u8.into();
1138 /// assert!(uppercase_a.is_ascii_uppercase());
1139 /// assert!(uppercase_g.is_ascii_uppercase());
1140 /// assert!(!a.is_ascii_uppercase());
1141 /// assert!(!g.is_ascii_uppercase());
1142 /// assert!(!zero.is_ascii_uppercase());
1143 /// assert!(!percent.is_ascii_uppercase());
1144 /// assert!(!space.is_ascii_uppercase());
1145 /// assert!(!lf.is_ascii_uppercase());
1146 /// assert!(!esc.is_ascii_uppercase());
1148 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1150 pub fn is_ascii_uppercase(&self) -> bool
{
1151 self.is_ascii() && (*self as u8).is_ascii_uppercase()
1154 /// Checks if the value is an ASCII lowercase character:
1155 /// U+0061 'a' ... U+007A 'z'.
1160 /// #![feature(ascii_ctype)]
1162 /// let uppercase_a = 'A';
1163 /// let uppercase_g = 'G';
1167 /// let percent = '%';
1168 /// let space = ' ';
1170 /// let esc: char = 0x1b_u8.into();
1172 /// assert!(!uppercase_a.is_ascii_lowercase());
1173 /// assert!(!uppercase_g.is_ascii_lowercase());
1174 /// assert!(a.is_ascii_lowercase());
1175 /// assert!(g.is_ascii_lowercase());
1176 /// assert!(!zero.is_ascii_lowercase());
1177 /// assert!(!percent.is_ascii_lowercase());
1178 /// assert!(!space.is_ascii_lowercase());
1179 /// assert!(!lf.is_ascii_lowercase());
1180 /// assert!(!esc.is_ascii_lowercase());
1182 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1184 pub fn is_ascii_lowercase(&self) -> bool
{
1185 self.is_ascii() && (*self as u8).is_ascii_lowercase()
1188 /// Checks if the value is an ASCII alphanumeric character:
1190 /// - U+0041 'A' ... U+005A 'Z', or
1191 /// - U+0061 'a' ... U+007A 'z', or
1192 /// - U+0030 '0' ... U+0039 '9'.
1197 /// #![feature(ascii_ctype)]
1199 /// let uppercase_a = 'A';
1200 /// let uppercase_g = 'G';
1204 /// let percent = '%';
1205 /// let space = ' ';
1207 /// let esc: char = 0x1b_u8.into();
1209 /// assert!(uppercase_a.is_ascii_alphanumeric());
1210 /// assert!(uppercase_g.is_ascii_alphanumeric());
1211 /// assert!(a.is_ascii_alphanumeric());
1212 /// assert!(g.is_ascii_alphanumeric());
1213 /// assert!(zero.is_ascii_alphanumeric());
1214 /// assert!(!percent.is_ascii_alphanumeric());
1215 /// assert!(!space.is_ascii_alphanumeric());
1216 /// assert!(!lf.is_ascii_alphanumeric());
1217 /// assert!(!esc.is_ascii_alphanumeric());
1219 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1221 pub fn is_ascii_alphanumeric(&self) -> bool
{
1222 self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
1225 /// Checks if the value is an ASCII decimal digit:
1226 /// U+0030 '0' ... U+0039 '9'.
1231 /// #![feature(ascii_ctype)]
1233 /// let uppercase_a = 'A';
1234 /// let uppercase_g = 'G';
1238 /// let percent = '%';
1239 /// let space = ' ';
1241 /// let esc: char = 0x1b_u8.into();
1243 /// assert!(!uppercase_a.is_ascii_digit());
1244 /// assert!(!uppercase_g.is_ascii_digit());
1245 /// assert!(!a.is_ascii_digit());
1246 /// assert!(!g.is_ascii_digit());
1247 /// assert!(zero.is_ascii_digit());
1248 /// assert!(!percent.is_ascii_digit());
1249 /// assert!(!space.is_ascii_digit());
1250 /// assert!(!lf.is_ascii_digit());
1251 /// assert!(!esc.is_ascii_digit());
1253 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1255 pub fn is_ascii_digit(&self) -> bool
{
1256 self.is_ascii() && (*self as u8).is_ascii_digit()
1259 /// Checks if the value is an ASCII hexadecimal digit:
1261 /// - U+0030 '0' ... U+0039 '9', or
1262 /// - U+0041 'A' ... U+0046 'F', or
1263 /// - U+0061 'a' ... U+0066 'f'.
1268 /// #![feature(ascii_ctype)]
1270 /// let uppercase_a = 'A';
1271 /// let uppercase_g = 'G';
1275 /// let percent = '%';
1276 /// let space = ' ';
1278 /// let esc: char = 0x1b_u8.into();
1280 /// assert!(uppercase_a.is_ascii_hexdigit());
1281 /// assert!(!uppercase_g.is_ascii_hexdigit());
1282 /// assert!(a.is_ascii_hexdigit());
1283 /// assert!(!g.is_ascii_hexdigit());
1284 /// assert!(zero.is_ascii_hexdigit());
1285 /// assert!(!percent.is_ascii_hexdigit());
1286 /// assert!(!space.is_ascii_hexdigit());
1287 /// assert!(!lf.is_ascii_hexdigit());
1288 /// assert!(!esc.is_ascii_hexdigit());
1290 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1292 pub fn is_ascii_hexdigit(&self) -> bool
{
1293 self.is_ascii() && (*self as u8).is_ascii_hexdigit()
1296 /// Checks if the value is an ASCII punctuation character:
1298 /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
1299 /// - U+003A ... U+0040 `: ; < = > ? @`, or
1300 /// - U+005B ... U+0060 ``[ \ ] ^ _ ` ``, or
1301 /// - U+007B ... U+007E `{ | } ~`
1306 /// #![feature(ascii_ctype)]
1308 /// let uppercase_a = 'A';
1309 /// let uppercase_g = 'G';
1313 /// let percent = '%';
1314 /// let space = ' ';
1316 /// let esc: char = 0x1b_u8.into();
1318 /// assert!(!uppercase_a.is_ascii_punctuation());
1319 /// assert!(!uppercase_g.is_ascii_punctuation());
1320 /// assert!(!a.is_ascii_punctuation());
1321 /// assert!(!g.is_ascii_punctuation());
1322 /// assert!(!zero.is_ascii_punctuation());
1323 /// assert!(percent.is_ascii_punctuation());
1324 /// assert!(!space.is_ascii_punctuation());
1325 /// assert!(!lf.is_ascii_punctuation());
1326 /// assert!(!esc.is_ascii_punctuation());
1328 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1330 pub fn is_ascii_punctuation(&self) -> bool
{
1331 self.is_ascii() && (*self as u8).is_ascii_punctuation()
1334 /// Checks if the value is an ASCII graphic character:
1335 /// U+0021 '@' ... U+007E '~'.
1340 /// #![feature(ascii_ctype)]
1342 /// let uppercase_a = 'A';
1343 /// let uppercase_g = 'G';
1347 /// let percent = '%';
1348 /// let space = ' ';
1350 /// let esc: char = 0x1b_u8.into();
1352 /// assert!(uppercase_a.is_ascii_graphic());
1353 /// assert!(uppercase_g.is_ascii_graphic());
1354 /// assert!(a.is_ascii_graphic());
1355 /// assert!(g.is_ascii_graphic());
1356 /// assert!(zero.is_ascii_graphic());
1357 /// assert!(percent.is_ascii_graphic());
1358 /// assert!(!space.is_ascii_graphic());
1359 /// assert!(!lf.is_ascii_graphic());
1360 /// assert!(!esc.is_ascii_graphic());
1362 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1364 pub fn is_ascii_graphic(&self) -> bool
{
1365 self.is_ascii() && (*self as u8).is_ascii_graphic()
1368 /// Checks if the value is an ASCII whitespace character:
1369 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1370 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1372 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1373 /// whitespace][infra-aw]. There are several other definitions in
1374 /// wide use. For instance, [the POSIX locale][pct] includes
1375 /// U+000B VERTICAL TAB as well as all the above characters,
1376 /// but—from the very same specification—[the default rule for
1377 /// "field splitting" in the Bourne shell][bfs] considers *only*
1378 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1380 /// If you are writing a program that will process an existing
1381 /// file format, check what that format's definition of whitespace is
1382 /// before using this function.
1384 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1385 /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1386 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1391 /// #![feature(ascii_ctype)]
1393 /// let uppercase_a = 'A';
1394 /// let uppercase_g = 'G';
1398 /// let percent = '%';
1399 /// let space = ' ';
1401 /// let esc: char = 0x1b_u8.into();
1403 /// assert!(!uppercase_a.is_ascii_whitespace());
1404 /// assert!(!uppercase_g.is_ascii_whitespace());
1405 /// assert!(!a.is_ascii_whitespace());
1406 /// assert!(!g.is_ascii_whitespace());
1407 /// assert!(!zero.is_ascii_whitespace());
1408 /// assert!(!percent.is_ascii_whitespace());
1409 /// assert!(space.is_ascii_whitespace());
1410 /// assert!(lf.is_ascii_whitespace());
1411 /// assert!(!esc.is_ascii_whitespace());
1413 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1415 pub fn is_ascii_whitespace(&self) -> bool
{
1416 self.is_ascii() && (*self as u8).is_ascii_whitespace()
1419 /// Checks if the value is an ASCII control character:
1420 /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE.
1421 /// Note that most ASCII whitespace characters are control
1422 /// characters, but SPACE is not.
1427 /// #![feature(ascii_ctype)]
1429 /// let uppercase_a = 'A';
1430 /// let uppercase_g = 'G';
1434 /// let percent = '%';
1435 /// let space = ' ';
1437 /// let esc: char = 0x1b_u8.into();
1439 /// assert!(!uppercase_a.is_ascii_control());
1440 /// assert!(!uppercase_g.is_ascii_control());
1441 /// assert!(!a.is_ascii_control());
1442 /// assert!(!g.is_ascii_control());
1443 /// assert!(!zero.is_ascii_control());
1444 /// assert!(!percent.is_ascii_control());
1445 /// assert!(!space.is_ascii_control());
1446 /// assert!(lf.is_ascii_control());
1447 /// assert!(esc.is_ascii_control());
1449 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1451 pub fn is_ascii_control(&self) -> bool
{
1452 self.is_ascii() && (*self as u8).is_ascii_control()
1456 /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
1457 #[stable(feature = "decode_utf16", since = "1.9.0")]
1458 #[derive(Clone, Debug)]
1459 pub struct DecodeUtf16
<I
>
1460 where I
: Iterator
<Item
= u16>
1466 /// An error that can be returned when decoding UTF-16 code points.
1467 #[stable(feature = "decode_utf16", since = "1.9.0")]
1468 #[derive(Debug, Clone, Eq, PartialEq)]
1469 pub struct DecodeUtf16Error
{
1473 /// Create an iterator over the UTF-16 encoded code points in `iter`,
1474 /// returning unpaired surrogates as `Err`s.
1481 /// use std::char::decode_utf16;
1484 /// // 𝄞mus<invalid>ic<invalid>
1485 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
1486 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
1489 /// assert_eq!(decode_utf16(v.iter().cloned())
1490 /// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
1491 /// .collect::<Vec<_>>(),
1493 /// Ok('m'), Ok('u'), Ok('s'),
1495 /// Ok('i'), Ok('c'),
1500 /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
1503 /// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
1506 /// // 𝄞mus<invalid>ic<invalid>
1507 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
1508 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
1511 /// assert_eq!(decode_utf16(v.iter().cloned())
1512 /// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
1513 /// .collect::<String>(),
1517 #[stable(feature = "decode_utf16", since = "1.9.0")]
1519 pub fn decode_utf16
<I
: IntoIterator
<Item
= u16>>(iter
: I
) -> DecodeUtf16
<I
::IntoIter
> {
1521 iter
: iter
.into_iter(),
1526 #[stable(feature = "decode_utf16", since = "1.9.0")]
1527 impl<I
: Iterator
<Item
= u16>> Iterator
for DecodeUtf16
<I
> {
1528 type Item
= Result
<char, DecodeUtf16Error
>;
1530 fn next(&mut self) -> Option
<Result
<char, DecodeUtf16Error
>> {
1531 let u
= match self.buf
.take() {
1533 None
=> self.iter
.next()?
1536 if u
< 0xD800 || 0xDFFF < u
{
1538 Some(Ok(unsafe { from_u32_unchecked(u as u32) }
))
1539 } else if u
>= 0xDC00 {
1540 // a trailing surrogate
1541 Some(Err(DecodeUtf16Error { code: u }
))
1543 let u2
= match self.iter
.next() {
1546 None
=> return Some(Err(DecodeUtf16Error { code: u }
)),
1548 if u2
< 0xDC00 || u2
> 0xDFFF {
1549 // not a trailing surrogate so we're not a valid
1550 // surrogate pair, so rewind to redecode u2 next time.
1551 self.buf
= Some(u2
);
1552 return Some(Err(DecodeUtf16Error { code: u }
));
1555 // all ok, so lets decode it.
1556 let c
= (((u
- 0xD800) as u32) << 10 | (u2
- 0xDC00) as u32) + 0x1_0000;
1557 Some(Ok(unsafe { from_u32_unchecked(c) }
))
1562 fn size_hint(&self) -> (usize, Option
<usize>) {
1563 let (low
, high
) = self.iter
.size_hint();
1564 // we could be entirely valid surrogates (2 elements per
1565 // char), or entirely non-surrogates (1 element per char)
1570 impl DecodeUtf16Error
{
1571 /// Returns the unpaired surrogate which caused this error.
1572 #[stable(feature = "decode_utf16", since = "1.9.0")]
1573 pub fn unpaired_surrogate(&self) -> u16 {
1578 #[stable(feature = "decode_utf16", since = "1.9.0")]
1579 impl fmt
::Display
for DecodeUtf16Error
{
1580 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
1581 write
!(f
, "unpaired surrogate found: {:x}", self.code
)
1585 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
1588 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
1589 /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
1590 #[stable(feature = "decode_utf16", since = "1.9.0")]
1591 pub const REPLACEMENT_CHARACTER
: char = '
\u{FFFD}'
;