[rustc.git] / src / libcore / char / mod.rs

// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! A character type.
//!
//! The `char` type represents a single character. More specifically, since
//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
//! scalar value]', which is similar to, but not the same as, a '[Unicode code
//! point]'.
//!
//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
//! [Unicode code point]: http://www.unicode.org/glossary/#code_point
//!
//! This module exists for technical reasons, the primary documentation for
//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
//! itself.
//!
//! This module is the home of the iterator implementations for the iterators
//! implemented on `char`, as well as some useful constants and conversion
//! functions that convert various types to `char`.

#![allow(non_snake_case)]
#![stable(feature = "core_char", since = "1.2.0")]

mod convert;
mod decode;
mod methods;

// stable re-exports
#[stable(feature = "rust1", since = "1.0.0")]
pub use self::convert::{from_u32, from_digit};
#[stable(feature = "char_from_unchecked", since = "1.5.0")]
pub use self::convert::from_u32_unchecked;
#[stable(feature = "char_from_str", since = "1.20.0")]
pub use self::convert::ParseCharError;
#[unstable(feature = "try_from", issue = "33417")]
pub use self::convert::CharTryFromError;
#[stable(feature = "decode_utf16", since = "1.9.0")]
pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};

// unstable re-exports
#[unstable(feature = "unicode_version", issue = "49726")]
pub use unicode::tables::UNICODE_VERSION;
#[unstable(feature = "unicode_version", issue = "49726")]
pub use unicode::version::UnicodeVersion;
#[unstable(feature = "decode_utf8", issue = "33906")]
#[rustc_deprecated(since = "1.27.0", reason = "Use str::from_utf8 instead:
    https://doc.rust-lang.org/nightly/std/str/struct.Utf8Error.html#examples")]
#[allow(deprecated)]
pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence};

use fmt::{self, Write};
use iter::FusedIterator;

// UTF-8 ranges and tags for encoding characters
const TAG_CONT: u8     = 0b1000_0000;
const TAG_TWO_B: u8    = 0b1100_0000;
const TAG_THREE_B: u8  = 0b1110_0000;
const TAG_FOUR_B: u8   = 0b1111_0000;
const MAX_ONE_B: u32   =     0x80;
const MAX_TWO_B: u32   =    0x800;
const MAX_THREE_B: u32 =  0x10000;

/*
    Lu  Uppercase_Letter        an uppercase letter
    Ll  Lowercase_Letter        a lowercase letter
    Lt  Titlecase_Letter        a digraphic character, with first part uppercase
    Lm  Modifier_Letter         a modifier letter
    Lo  Other_Letter            other letters, including syllables and ideographs
    Mn  Nonspacing_Mark         a nonspacing combining mark (zero advance width)
    Mc  Spacing_Mark            a spacing combining mark (positive advance width)
    Me  Enclosing_Mark          an enclosing combining mark
    Nd  Decimal_Number          a decimal digit
    Nl  Letter_Number           a letterlike numeric character
    No  Other_Number            a numeric character of other type
    Pc  Connector_Punctuation   a connecting punctuation mark, like a tie
    Pd  Dash_Punctuation        a dash or hyphen punctuation mark
    Ps  Open_Punctuation        an opening punctuation mark (of a pair)
    Pe  Close_Punctuation       a closing punctuation mark (of a pair)
    Pi  Initial_Punctuation     an initial quotation mark
    Pf  Final_Punctuation       a final quotation mark
    Po  Other_Punctuation       a punctuation mark of other type
    Sm  Math_Symbol             a symbol of primarily mathematical use
    Sc  Currency_Symbol         a currency sign
    Sk  Modifier_Symbol         a non-letterlike modifier symbol
    So  Other_Symbol            a symbol of other type
    Zs  Space_Separator         a space character (of various non-zero widths)
    Zl  Line_Separator          U+2028 LINE SEPARATOR only
    Zp  Paragraph_Separator     U+2029 PARAGRAPH SEPARATOR only
    Cc  Control                 a C0 or C1 control code
    Cf  Format                  a format control character
    Cs  Surrogate               a surrogate code point
    Co  Private_Use             a private-use character
    Cn  Unassigned              a reserved unassigned code point or a noncharacter
*/

/// The highest valid code point a `char` can have.
///
/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
/// Point], but only ones within a certain range. `MAX` is the highest valid
/// code point that's a valid [Unicode Scalar Value].
///
/// [`char`]: ../../std/primitive.char.html
/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
/// [Code Point]: http://www.unicode.org/glossary/#code_point
#[stable(feature = "rust1", since = "1.0.0")]
pub const MAX: char = '\u{10ffff}';

/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
/// decoding error.
///
/// It can occur, for example, when giving ill-formed UTF-8 bytes to
/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
#[stable(feature = "decode_utf16", since = "1.9.0")]
pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';

/// Returns an iterator that yields the hexadecimal Unicode escape of a
/// character, as `char`s.
///
/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
/// its documentation for more.
///
/// [`escape_unicode`]: ../../std/primitive.char.html#method.escape_unicode
/// [`char`]: ../../std/primitive.char.html
#[derive(Clone, Debug)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeUnicode {
    c: char,
    state: EscapeUnicodeState,

    // The index of the next hex digit to be printed (0 if none),
    // i.e. the number of remaining hex digits to be printed;
    // increasing from the least significant digit: 0x543210
    hex_digit_idx: usize,
}

// The enum values are ordered so that their representation is the
// same as the remaining length (besides the hexadecimal digits). This
// likely makes `len()` a single load from memory) and inline-worth.
#[derive(Clone, Debug)]
enum EscapeUnicodeState {
    Done,
    RightBrace,
    Value,
    LeftBrace,
    Type,
    Backslash,
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for EscapeUnicode {
    type Item = char;

    fn next(&mut self) -> Option<char> {
        match self.state {
            EscapeUnicodeState::Backslash => {
                self.state = EscapeUnicodeState::Type;
                Some('\\')
            }
            EscapeUnicodeState::Type => {
                self.state = EscapeUnicodeState::LeftBrace;
                Some('u')
            }
            EscapeUnicodeState::LeftBrace => {
                self.state = EscapeUnicodeState::Value;
                Some('{')
            }
            EscapeUnicodeState::Value => {
                let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
                let c = from_digit(hex_digit, 16).unwrap();
                if self.hex_digit_idx == 0 {
                    self.state = EscapeUnicodeState::RightBrace;
                } else {
                    self.hex_digit_idx -= 1;
                }
                Some(c)
            }
            EscapeUnicodeState::RightBrace => {
                self.state = EscapeUnicodeState::Done;
                Some('}')
            }
            EscapeUnicodeState::Done => None,
        }
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.len();
        (n, Some(n))
    }

    #[inline]
    fn count(self) -> usize {
        self.len()
    }

    fn last(self) -> Option<char> {
        match self.state {
            EscapeUnicodeState::Done => None,

            EscapeUnicodeState::RightBrace |
            EscapeUnicodeState::Value |
            EscapeUnicodeState::LeftBrace |
            EscapeUnicodeState::Type |
            EscapeUnicodeState::Backslash => Some('}'),
        }
    }
}

#[stable(feature = "exact_size_escape", since = "1.11.0")]
impl ExactSizeIterator for EscapeUnicode {
    #[inline]
    fn len(&self) -> usize {
        // The match is a single memory access with no branching
        self.hex_digit_idx + match self.state {
            EscapeUnicodeState::Done => 0,
            EscapeUnicodeState::RightBrace => 1,
            EscapeUnicodeState::Value => 2,
            EscapeUnicodeState::LeftBrace => 3,
            EscapeUnicodeState::Type => 4,
            EscapeUnicodeState::Backslash => 5,
        }
    }
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for EscapeUnicode {}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for EscapeUnicode {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        for c in self.clone() {
            f.write_char(c)?;
        }
        Ok(())
    }
}

/// An iterator that yields the literal escape code of a `char`.
///
/// This `struct` is created by the [`escape_default`] method on [`char`]. See
/// its documentation for more.
///
/// [`escape_default`]: ../../std/primitive.char.html#method.escape_default
/// [`char`]: ../../std/primitive.char.html
#[derive(Clone, Debug)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeDefault {
    state: EscapeDefaultState
}

#[derive(Clone, Debug)]
enum EscapeDefaultState {
    Done,
    Char(char),
    Backslash(char),
    Unicode(EscapeUnicode),
}

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for EscapeDefault {
    type Item = char;

    fn next(&mut self) -> Option<char> {
        match self.state {
            EscapeDefaultState::Backslash(c) => {
                self.state = EscapeDefaultState::Char(c);
                Some('\\')
            }
            EscapeDefaultState::Char(c) => {
                self.state = EscapeDefaultState::Done;
                Some(c)
            }
            EscapeDefaultState::Done => None,
            EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
        }
    }

    #[inline]
    fn size_hint(&self) -> (usize, Option<usize>) {
        let n = self.len();
        (n, Some(n))
    }

    #[inline]
    fn count(self) -> usize {
        self.len()
    }

    fn nth(&mut self, n: usize) -> Option<char> {
        match self.state {
            EscapeDefaultState::Backslash(c) if n == 0 => {
                self.state = EscapeDefaultState::Char(c);
                Some('\\')
            },
            EscapeDefaultState::Backslash(c) if n == 1 => {
                self.state = EscapeDefaultState::Done;
                Some(c)
            },
            EscapeDefaultState::Backslash(_) => {
                self.state = EscapeDefaultState::Done;
                None
            },
            EscapeDefaultState::Char(c) => {
                self.state = EscapeDefaultState::Done;

                if n == 0 {
                    Some(c)
                } else {
                    None
                }
            },
            EscapeDefaultState::Done => return None,
            EscapeDefaultState::Unicode(ref mut i) => return i.nth(n),
        }
    }

    fn last(self) -> Option<char> {
        match self.state {
            EscapeDefaultState::Unicode(iter) => iter.last(),
            EscapeDefaultState::Done => None,
            EscapeDefaultState::Backslash(c) | EscapeDefaultState::Char(c) => Some(c),
        }
    }
}

#[stable(feature = "exact_size_escape", since = "1.11.0")]
impl ExactSizeIterator for EscapeDefault {
    fn len(&self) -> usize {
        match self.state {
            EscapeDefaultState::Done => 0,
            EscapeDefaultState::Char(_) => 1,
            EscapeDefaultState::Backslash(_) => 2,
            EscapeDefaultState::Unicode(ref iter) => iter.len(),
        }
    }
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for EscapeDefault {}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for EscapeDefault {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        for c in self.clone() {
            f.write_char(c)?;
        }
        Ok(())
    }
}

/// An iterator that yields the literal escape code of a `char`.
///
/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
/// documentation for more.
///
/// [`escape_debug`]: ../../std/primitive.char.html#method.escape_debug
/// [`char`]: ../../std/primitive.char.html
#[stable(feature = "char_escape_debug", since = "1.20.0")]
#[derive(Clone, Debug)]
pub struct EscapeDebug(EscapeDefault);

#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl Iterator for EscapeDebug {
    type Item = char;
    fn next(&mut self) -> Option<char> { self.0.next() }
    fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
}

#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl ExactSizeIterator for EscapeDebug { }

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for EscapeDebug {}

#[stable(feature = "char_escape_debug", since = "1.20.0")]
impl fmt::Display for EscapeDebug {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::Display::fmt(&self.0, f)
    }
}

/// Returns an iterator that yields the lowercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
/// [`char`]: ../../std/primitive.char.html
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Debug, Clone)]
pub struct ToLowercase(CaseMappingIter);

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for ToLowercase {
    type Item = char;
    fn next(&mut self) -> Option<char> {
        self.0.next()
    }
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for ToLowercase {}

/// Returns an iterator that yields the uppercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
/// [`char`]: ../../std/primitive.char.html
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Debug, Clone)]
pub struct ToUppercase(CaseMappingIter);

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for ToUppercase {
    type Item = char;
    fn next(&mut self) -> Option<char> {
        self.0.next()
    }
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for ToUppercase {}

#[derive(Debug, Clone)]
enum CaseMappingIter {
    Three(char, char, char),
    Two(char, char),
    One(char),
    Zero,
}

impl CaseMappingIter {
    fn new(chars: [char; 3]) -> CaseMappingIter {
        if chars[2] == '\0' {
            if chars[1] == '\0' {
                CaseMappingIter::One(chars[0])  // Including if chars[0] == '\0'
            } else {
                CaseMappingIter::Two(chars[0], chars[1])
            }
        } else {
            CaseMappingIter::Three(chars[0], chars[1], chars[2])
        }
    }
}

impl Iterator for CaseMappingIter {
    type Item = char;
    fn next(&mut self) -> Option<char> {
        match *self {
            CaseMappingIter::Three(a, b, c) => {
                *self = CaseMappingIter::Two(b, c);
                Some(a)
            }
            CaseMappingIter::Two(b, c) => {
                *self = CaseMappingIter::One(c);
                Some(b)
            }
            CaseMappingIter::One(c) => {
                *self = CaseMappingIter::Zero;
                Some(c)
            }
            CaseMappingIter::Zero => None,
        }
    }
}

impl fmt::Display for CaseMappingIter {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            CaseMappingIter::Three(a, b, c) => {
                f.write_char(a)?;
                f.write_char(b)?;
                f.write_char(c)
            }
            CaseMappingIter::Two(b, c) => {
                f.write_char(b)?;
                f.write_char(c)
            }
            CaseMappingIter::One(c) => {
                f.write_char(c)
            }
            CaseMappingIter::Zero => Ok(()),
        }
    }
}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for ToLowercase {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::Display::fmt(&self.0, f)
    }
}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for ToUppercase {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::Display::fmt(&self.0, f)
    }
}
Commit	Line	Data
83c7162d XL	1	// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
	2	// file at the top-level directory of this distribution and at
	3	// http://rust-lang.org/COPYRIGHT.
	4	//
	5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	8	// option. This file may not be copied, modified, or distributed
	9	// except according to those terms.
	10
	11	//! A character type.
	12	//!
	13	//! The `char` type represents a single character. More specifically, since
	14	//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
	15	//! scalar value]', which is similar to, but not the same as, a '[Unicode code
	16	//! point]'.
	17	//!
	18	//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
	19	//! [Unicode code point]: http://www.unicode.org/glossary/#code_point
	20	//!
	21	//! This module exists for technical reasons, the primary documentation for
	22	//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
	23	//! itself.
	24	//!
	25	//! This module is the home of the iterator implementations for the iterators
	26	//! implemented on `char`, as well as some useful constants and conversion
	27	//! functions that convert various types to `char`.
	28
	29	#![allow(non_snake_case)]
	30	#![stable(feature = "core_char", since = "1.2.0")]
	31
	32	mod convert;
	33	mod decode;
	34	mod methods;
	35
	36	// stable re-exports
	37	#[stable(feature = "rust1", since = "1.0.0")]
	38	pub use self::convert::{from_u32, from_digit};
	39	#[stable(feature = "char_from_unchecked", since = "1.5.0")]
	40	pub use self::convert::from_u32_unchecked;
	41	#[stable(feature = "char_from_str", since = "1.20.0")]
	42	pub use self::convert::ParseCharError;
	43	#[unstable(feature = "try_from", issue = "33417")]
	44	pub use self::convert::CharTryFromError;
	45	#[stable(feature = "decode_utf16", since = "1.9.0")]
	46	pub use self::decode::{decode_utf16, DecodeUtf16, DecodeUtf16Error};
	47
	48	// unstable re-exports
	49	#[unstable(feature = "unicode_version", issue = "49726")]
	50	pub use unicode::tables::UNICODE_VERSION;
	51	#[unstable(feature = "unicode_version", issue = "49726")]
	52	pub use unicode::version::UnicodeVersion;
	53	#[unstable(feature = "decode_utf8", issue = "33906")]
	54	#[rustc_deprecated(since = "1.27.0", reason = "Use str::from_utf8 instead:
	55	https://doc.rust-lang.org/nightly/std/str/struct.Utf8Error.html#examples")]
	56	#[allow(deprecated)]
	57	pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence};
	58
	59	use fmt::{self, Write};
	60	use iter::FusedIterator;
	61
	62	// UTF-8 ranges and tags for encoding characters
94b46f34 XL	63	const TAG_CONT: u8 = 0b1000_0000;
	64	const TAG_TWO_B: u8 = 0b1100_0000;
	65	const TAG_THREE_B: u8 = 0b1110_0000;
	66	const TAG_FOUR_B: u8 = 0b1111_0000;
83c7162d XL	67	const MAX_ONE_B: u32 = 0x80;
	68	const MAX_TWO_B: u32 = 0x800;
	69	const MAX_THREE_B: u32 = 0x10000;
	70
	71	/*
	72	Lu Uppercase_Letter an uppercase letter
	73	Ll Lowercase_Letter a lowercase letter
	74	Lt Titlecase_Letter a digraphic character, with first part uppercase
	75	Lm Modifier_Letter a modifier letter
	76	Lo Other_Letter other letters, including syllables and ideographs
	77	Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
	78	Mc Spacing_Mark a spacing combining mark (positive advance width)
	79	Me Enclosing_Mark an enclosing combining mark
	80	Nd Decimal_Number a decimal digit
	81	Nl Letter_Number a letterlike numeric character
	82	No Other_Number a numeric character of other type
	83	Pc Connector_Punctuation a connecting punctuation mark, like a tie
	84	Pd Dash_Punctuation a dash or hyphen punctuation mark
	85	Ps Open_Punctuation an opening punctuation mark (of a pair)
	86	Pe Close_Punctuation a closing punctuation mark (of a pair)
	87	Pi Initial_Punctuation an initial quotation mark
	88	Pf Final_Punctuation a final quotation mark
	89	Po Other_Punctuation a punctuation mark of other type
	90	Sm Math_Symbol a symbol of primarily mathematical use
	91	Sc Currency_Symbol a currency sign
	92	Sk Modifier_Symbol a non-letterlike modifier symbol
	93	So Other_Symbol a symbol of other type
	94	Zs Space_Separator a space character (of various non-zero widths)
	95	Zl Line_Separator U+2028 LINE SEPARATOR only
	96	Zp Paragraph_Separator U+2029 PARAGRAPH SEPARATOR only
	97	Cc Control a C0 or C1 control code
	98	Cf Format a format control character
	99	Cs Surrogate a surrogate code point
	100	Co Private_Use a private-use character
	101	Cn Unassigned a reserved unassigned code point or a noncharacter
	102	*/
	103
	104	/// The highest valid code point a `char` can have.
	105	///
	106	/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
	107	/// Point], but only ones within a certain range. `MAX` is the highest valid
	108	/// code point that's a valid [Unicode Scalar Value].
	109	///
	110	/// [`char`]: ../../std/primitive.char.html
	111	/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
	112	/// [Code Point]: http://www.unicode.org/glossary/#code_point
	113	#[stable(feature = "rust1", since = "1.0.0")]
	114	pub const MAX: char = '\u{10ffff}';
	115
	116	/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
	117	/// decoding error.
	118	///
	119	/// It can occur, for example, when giving ill-formed UTF-8 bytes to
	120	/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
	121	#[stable(feature = "decode_utf16", since = "1.9.0")]
	122	pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
	123
	124	/// Returns an iterator that yields the hexadecimal Unicode escape of a
	125	/// character, as `char`s.
	126	///
	127	/// This `struct` is created by the [`escape_unicode`] method on [`char`]. See
	128	/// its documentation for more.
	129	///
	130	/// [`escape_unicode`]: ../../std/primitive.char.html#method.escape_unicode
131	/// [`char`]: ../../std/primitive.char.html
132	#[derive(Clone, Debug)]
133	#[stable(feature = "rust1", since = "1.0.0")]
134	pub struct EscapeUnicode {
135	c: char,
136	state: EscapeUnicodeState,
137
138	// The index of the next hex digit to be printed (0 if none),
139	// i.e. the number of remaining hex digits to be printed;
140	// increasing from the least significant digit: 0x543210
141	hex_digit_idx: usize,
142	}
143
144	// The enum values are ordered so that their representation is the
145	// same as the remaining length (besides the hexadecimal digits). This
146	// likely makes `len()` a single load from memory) and inline-worth.
147	#[derive(Clone, Debug)]
148	enum EscapeUnicodeState {
149	Done,
150	RightBrace,
151	Value,
152	LeftBrace,
153	Type,
154	Backslash,
155	}
156
157	#[stable(feature = "rust1", since = "1.0.0")]
158	impl Iterator for EscapeUnicode {
159	type Item = char;
160
161	fn next(&mut self) -> Option<char> {
162	match self.state {
163	EscapeUnicodeState::Backslash => {
164	self.state = EscapeUnicodeState::Type;
165	Some('\\')
166	}
167	EscapeUnicodeState::Type => {
168	self.state = EscapeUnicodeState::LeftBrace;
169	Some('u')
170	}
171	EscapeUnicodeState::LeftBrace => {
172	self.state = EscapeUnicodeState::Value;
173	Some('{')
174	}
175	EscapeUnicodeState::Value => {
176	let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
177	let c = from_digit(hex_digit, 16).unwrap();
178	if self.hex_digit_idx == 0 {
179	self.state = EscapeUnicodeState::RightBrace;
180	} else {
181	self.hex_digit_idx -= 1;
182	}
183	Some(c)
184	}
185	EscapeUnicodeState::RightBrace => {
186	self.state = EscapeUnicodeState::Done;
187	Some('}')
188	}
189	EscapeUnicodeState::Done => None,
190	}
191	}
192
193	#[inline]
194	fn size_hint(&self) -> (usize, Option<usize>) {
195	let n = self.len();
196	(n, Some(n))
197	}
198
199	#[inline]
200	fn count(self) -> usize {
201	self.len()
202	}
203
204	fn last(self) -> Option<char> {
205	match self.state {
206	EscapeUnicodeState::Done => None,
207
208	EscapeUnicodeState::RightBrace \|
209	EscapeUnicodeState::Value \|
210	EscapeUnicodeState::LeftBrace \|
211	EscapeUnicodeState::Type \|
212	EscapeUnicodeState::Backslash => Some('}'),
213	}
214	}
215	}
216
217	#[stable(feature = "exact_size_escape", since = "1.11.0")]
218	impl ExactSizeIterator for EscapeUnicode {
219	#[inline]
220	fn len(&self) -> usize {
221	// The match is a single memory access with no branching
222	self.hex_digit_idx + match self.state {
223	EscapeUnicodeState::Done => 0,
224	EscapeUnicodeState::RightBrace => 1,
225	EscapeUnicodeState::Value => 2,
226	EscapeUnicodeState::LeftBrace => 3,
227	EscapeUnicodeState::Type => 4,
228	EscapeUnicodeState::Backslash => 5,
229	}
230	}
231	}
232
233	#[stable(feature = "fused", since = "1.26.0")]
234	impl FusedIterator for EscapeUnicode {}
235
236	#[stable(feature = "char_struct_display", since = "1.16.0")]
237	impl fmt::Display for EscapeUnicode {
238	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
239	for c in self.clone() {
240	f.write_char(c)?;
241	}
242	Ok(())
243	}
244	}
245
246	/// An iterator that yields the literal escape code of a `char`.
247	///
248	/// This `struct` is created by the [`escape_default`] method on [`char`]. See
249	/// its documentation for more.
250	///
251	/// [`escape_default`]: ../../std/primitive.char.html#method.escape_default
252	/// [`char`]: ../../std/primitive.char.html
253	#[derive(Clone, Debug)]
254	#[stable(feature = "rust1", since = "1.0.0")]
255	pub struct EscapeDefault {
256	state: EscapeDefaultState
257	}
258
259	#[derive(Clone, Debug)]
260	enum EscapeDefaultState {
261	Done,
262	Char(char),
263	Backslash(char),
264	Unicode(EscapeUnicode),
265	}
266
267	#[stable(feature = "rust1", since = "1.0.0")]
268	impl Iterator for EscapeDefault {
269	type Item = char;
270
271	fn next(&mut self) -> Option<char> {
272	match self.state {
273	EscapeDefaultState::Backslash(c) => {
274	self.state = EscapeDefaultState::Char(c);
275	Some('\\')
276	}
277	EscapeDefaultState::Char(c) => {
278	self.state = EscapeDefaultState::Done;
279	Some(c)
280	}
281	EscapeDefaultState::Done => None,
282	EscapeDefaultState::Unicode(ref mut iter) => iter.next(),
283	}
284	}
285
286	#[inline]
287	fn size_hint(&self) -> (usize, Option<usize>) {
288	let n = self.len();
289	(n, Some(n))
290	}
291
292	#[inline]
293	fn count(self) -> usize {
294	self.len()
295	}
296
297	fn nth(&mut self, n: usize) -> Option<char> {
298	match self.state {
299	EscapeDefaultState::Backslash(c) if n == 0 => {
300	self.state = EscapeDefaultState::Char(c);
301	Some('\\')
302	},
303	EscapeDefaultState::Backslash(c) if n == 1 => {
304	self.state = EscapeDefaultState::Done;
305	Some(c)
306	},
307	EscapeDefaultState::Backslash(_) => {
308	self.state = EscapeDefaultState::Done;
309	None
310	},
311	EscapeDefaultState::Char(c) => {
312	self.state = EscapeDefaultState::Done;
313
314	if n == 0 {
315	Some(c)
316	} else {
317	None
318	}
319	},
320	EscapeDefaultState::Done => return None,
321	EscapeDefaultState::Unicode(ref mut i) => return i.nth(n),
322	}
323	}
324
325	fn last(self) -> Option<char> {
326	match self.state {
327	EscapeDefaultState::Unicode(iter) => iter.last(),
328	EscapeDefaultState::Done => None,
329	EscapeDefaultState::Backslash(c) \| EscapeDefaultState::Char(c) => Some(c),
330	}
331	}
332	}
333
334	#[stable(feature = "exact_size_escape", since = "1.11.0")]
335	impl ExactSizeIterator for EscapeDefault {
336	fn len(&self) -> usize {
337	match self.state {
338	EscapeDefaultState::Done => 0,
339	EscapeDefaultState::Char(_) => 1,
340	EscapeDefaultState::Backslash(_) => 2,
341	EscapeDefaultState::Unicode(ref iter) => iter.len(),
342	}
343	}
344	}
345
346	#[stable(feature = "fused", since = "1.26.0")]
347	impl FusedIterator for EscapeDefault {}
348
349	#[stable(feature = "char_struct_display", since = "1.16.0")]
350	impl fmt::Display for EscapeDefault {
351	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
352	for c in self.clone() {
353	f.write_char(c)?;
354	}
355	Ok(())
356	}
357	}
358
359	/// An iterator that yields the literal escape code of a `char`.
360	///
361	/// This `struct` is created by the [`escape_debug`] method on [`char`]. See its
362	/// documentation for more.
363	///
364	/// [`escape_debug`]: ../../std/primitive.char.html#method.escape_debug
365	/// [`char`]: ../../std/primitive.char.html
366	#[stable(feature = "char_escape_debug", since = "1.20.0")]
367	#[derive(Clone, Debug)]
368	pub struct EscapeDebug(EscapeDefault);
369
370	#[stable(feature = "char_escape_debug", since = "1.20.0")]
371	impl Iterator for EscapeDebug {
372	type Item = char;
373	fn next(&mut self) -> Option<char> { self.0.next() }
374	fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
375	}
376
377	#[stable(feature = "char_escape_debug", since = "1.20.0")]
378	impl ExactSizeIterator for EscapeDebug { }
379
380	#[stable(feature = "fused", since = "1.26.0")]
381	impl FusedIterator for EscapeDebug {}
382
383	#[stable(feature = "char_escape_debug", since = "1.20.0")]
384	impl fmt::Display for EscapeDebug {
385	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
386	fmt::Display::fmt(&self.0, f)
387	}
388	}
389
390	/// Returns an iterator that yields the lowercase equivalent of a `char`.
391	///
392	/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
393	/// its documentation for more.
394	///
395	/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
396	/// [`char`]: ../../std/primitive.char.html
397	#[stable(feature = "rust1", since = "1.0.0")]
398	#[derive(Debug, Clone)]
399	pub struct ToLowercase(CaseMappingIter);
400
401	#[stable(feature = "rust1", since = "1.0.0")]
402	impl Iterator for ToLowercase {
403	type Item = char;
404	fn next(&mut self) -> Option<char> {
405	self.0.next()
406	}
407	}
408
409	#[stable(feature = "fused", since = "1.26.0")]
410	impl FusedIterator for ToLowercase {}
411
412	/// Returns an iterator that yields the uppercase equivalent of a `char`.
413	///
414	/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
415	/// its documentation for more.
416	///
417	/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
418	/// [`char`]: ../../std/primitive.char.html
419	#[stable(feature = "rust1", since = "1.0.0")]
420	#[derive(Debug, Clone)]
421	pub struct ToUppercase(CaseMappingIter);
422
423	#[stable(feature = "rust1", since = "1.0.0")]
424	impl Iterator for ToUppercase {
425	type Item = char;
426	fn next(&mut self) -> Option<char> {
427	self.0.next()
428	}
429	}
430
431	#[stable(feature = "fused", since = "1.26.0")]
432	impl FusedIterator for ToUppercase {}
433
434	#[derive(Debug, Clone)]
435	enum CaseMappingIter {
436	Three(char, char, char),
437	Two(char, char),
438	One(char),
439	Zero,
440	}
441
442	impl CaseMappingIter {
443	fn new(chars: [char; 3]) -> CaseMappingIter {
444	if chars[2] == '\0' {
445	if chars[1] == '\0' {
446	CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
447	} else {
448	CaseMappingIter::Two(chars[0], chars[1])
449	}
450	} else {
451	CaseMappingIter::Three(chars[0], chars[1], chars[2])
452	}
453	}
454	}
455
456	impl Iterator for CaseMappingIter {
457	type Item = char;
458	fn next(&mut self) -> Option<char> {
459	match *self {
460	CaseMappingIter::Three(a, b, c) => {
461	*self = CaseMappingIter::Two(b, c);
462	Some(a)
463	}
464	CaseMappingIter::Two(b, c) => {
465	*self = CaseMappingIter::One(c);
466	Some(b)
467	}
468	CaseMappingIter::One(c) => {
469	*self = CaseMappingIter::Zero;
470	Some(c)
471	}
472	CaseMappingIter::Zero => None,
473	}
474	}
475	}
476
477	impl fmt::Display for CaseMappingIter {
478	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
479	match *self {
480	CaseMappingIter::Three(a, b, c) => {
481	f.write_char(a)?;
482	f.write_char(b)?;
483	f.write_char(c)
484	}
485	CaseMappingIter::Two(b, c) => {
486	f.write_char(b)?;
487	f.write_char(c)
488	}
489	CaseMappingIter::One(c) => {
490	f.write_char(c)
491	}
492	CaseMappingIter::Zero => Ok(()),
493	}
494	}
495	}
496
497	#[stable(feature = "char_struct_display", since = "1.16.0")]
498	impl fmt::Display for ToLowercase {
499	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
500	fmt::Display::fmt(&self.0, f)
501	}
502	}
503
504	#[stable(feature = "char_struct_display", since = "1.16.0")]
505	impl fmt::Display for ToUppercase {
506	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
507	fmt::Display::fmt(&self.0, f)
508	}
509	}