library/core/src/char/methods.rs

   1 //! impl char {}
   2
   3 use crate::slice;
   4 use crate::str::from_utf8_unchecked_mut;
   5 use crate::unicode::printable::is_printable;
   6 use crate::unicode::{self, conversions};
   7
   8 use super::*;
   9
  10 #[lang = "char"]
  11 impl char {
  12     /// The highest valid code point a `char` can have.
  13     ///
  14     /// A `char` is a [Unicode Scalar Value], which means that it is a [Code
  15     /// Point], but only ones within a certain range. `MAX` is the highest valid
  16     /// code point that's a valid [Unicode Scalar Value].
  17     ///
  18     /// [Unicode Scalar Value]: https://www.unicode.org/glossary/#unicode_scalar_value
  19     /// [Code Point]: https://www.unicode.org/glossary/#code_point
  20     #[stable(feature = "assoc_char_consts", since = "1.52.0")]
  21     pub const MAX: char = '\u{10ffff}';
  22
  23     /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
  24     /// decoding error.
  25     ///
  26     /// It can occur, for example, when giving ill-formed UTF-8 bytes to
  27     /// [`String::from_utf8_lossy`](string/struct.String.html#method.from_utf8_lossy).
  28     #[stable(feature = "assoc_char_consts", since = "1.52.0")]
  29     pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
  30
  31     /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
  32     /// `char` and `str` methods are based on.
  33     ///
  34     /// New versions of Unicode are released regularly and subsequently all methods
  35     /// in the standard library depending on Unicode are updated. Therefore the
  36     /// behavior of some `char` and `str` methods and the value of this constant
  37     /// changes over time. This is *not* considered to be a breaking change.
  38     ///
  39     /// The version numbering scheme is explained in
  40     /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
  41     #[stable(feature = "assoc_char_consts", since = "1.52.0")]
  42     pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
  43
  44     /// Creates an iterator over the UTF-16 encoded code points in `iter`,
  45     /// returning unpaired surrogates as `Err`s.
  46     ///
  47     /// # Examples
  48     ///
  49     /// Basic usage:
  50     ///
  51     /// ```
  52     /// use std::char::decode_utf16;
  53     ///
  54     /// // 𝄞mus<invalid>ic<invalid>
  55     /// let v = [
  56     ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
  57     /// ];
  58     ///
  59     /// assert_eq!(
  60     ///     decode_utf16(v)
  61     ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
  62     ///         .collect::<Vec<_>>(),
  63     ///     vec![
  64     ///         Ok('𝄞'),
  65     ///         Ok('m'), Ok('u'), Ok('s'),
  66     ///         Err(0xDD1E),
  67     ///         Ok('i'), Ok('c'),
  68     ///         Err(0xD834)
  69     ///     ]
  70     /// );
  71     /// ```
  72     ///
  73     /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
  74     ///
  75     /// ```
  76     /// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
  77     ///
  78     /// // 𝄞mus<invalid>ic<invalid>
  79     /// let v = [
  80     ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
  81     /// ];
  82     ///
  83     /// assert_eq!(
  84     ///     decode_utf16(v)
  85     ///        .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
  86     ///        .collect::<String>(),
  87     ///     "𝄞mus�ic�"
  88     /// );
  89     /// ```
  90     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
  91     #[inline]
  92     pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
  93         super::decode::decode_utf16(iter)
  94     }
  95
  96     /// Converts a `u32` to a `char`.
  97     ///
  98     /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
  99     /// `as`:
 100     ///
 101     /// ```
 102     /// let c = '💯';
 103     /// let i = c as u32;
 104     ///
 105     /// assert_eq!(128175, i);
 106     /// ```
 107     ///
 108     /// However, the reverse is not true: not all valid [`u32`]s are valid
 109     /// `char`s. `from_u32()` will return `None` if the input is not a valid value
 110     /// for a `char`.
 111     ///
 112     /// For an unsafe version of this function which ignores these checks, see
 113     /// [`from_u32_unchecked`].
 114     ///
 115     /// [`from_u32_unchecked`]: #method.from_u32_unchecked
 116     ///
 117     /// # Examples
 118     ///
 119     /// Basic usage:
 120     ///
 121     /// ```
 122     /// use std::char;
 123     ///
 124     /// let c = char::from_u32(0x2764);
 125     ///
 126     /// assert_eq!(Some('❤'), c);
 127     /// ```
 128     ///
 129     /// Returning `None` when the input is not a valid `char`:
 130     ///
 131     /// ```
 132     /// use std::char;
 133     ///
 134     /// let c = char::from_u32(0x110000);
 135     ///
 136     /// assert_eq!(None, c);
 137     /// ```
 138     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
 139     #[inline]
 140     pub fn from_u32(i: u32) -> Option<char> {
 141         super::convert::from_u32(i)
 142     }
 143
 144     /// Converts a `u32` to a `char`, ignoring validity.
 145     ///
 146     /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
 147     /// `as`:
 148     ///
 149     /// ```
 150     /// let c = '💯';
 151     /// let i = c as u32;
 152     ///
 153     /// assert_eq!(128175, i);
 154     /// ```
 155     ///
 156     /// However, the reverse is not true: not all valid [`u32`]s are valid
 157     /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
 158     /// `char`, possibly creating an invalid one.
 159     ///
 160     /// # Safety
 161     ///
 162     /// This function is unsafe, as it may construct invalid `char` values.
 163     ///
 164     /// For a safe version of this function, see the [`from_u32`] function.
 165     ///
 166     /// [`from_u32`]: #method.from_u32
 167     ///
 168     /// # Examples
 169     ///
 170     /// Basic usage:
 171     ///
 172     /// ```
 173     /// use std::char;
 174     ///
 175     /// let c = unsafe { char::from_u32_unchecked(0x2764) };
 176     ///
 177     /// assert_eq!('❤', c);
 178     /// ```
 179     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
 180     #[inline]
 181     pub unsafe fn from_u32_unchecked(i: u32) -> char {
 182         // SAFETY: the safety contract must be upheld by the caller.
 183         unsafe { super::convert::from_u32_unchecked(i) }
 184     }
 185
 186     /// Converts a digit in the given radix to a `char`.
 187     ///
 188     /// A 'radix' here is sometimes also called a 'base'. A radix of two
 189     /// indicates a binary number, a radix of ten, decimal, and a radix of
 190     /// sixteen, hexadecimal, to give some common values. Arbitrary
 191     /// radices are supported.
 192     ///
 193     /// `from_digit()` will return `None` if the input is not a digit in
 194     /// the given radix.
 195     ///
 196     /// # Panics
 197     ///
 198     /// Panics if given a radix larger than 36.
 199     ///
 200     /// # Examples
 201     ///
 202     /// Basic usage:
 203     ///
 204     /// ```
 205     /// use std::char;
 206     ///
 207     /// let c = char::from_digit(4, 10);
 208     ///
 209     /// assert_eq!(Some('4'), c);
 210     ///
 211     /// // Decimal 11 is a single digit in base 16
 212     /// let c = char::from_digit(11, 16);
 213     ///
 214     /// assert_eq!(Some('b'), c);
 215     /// ```
 216     ///
 217     /// Returning `None` when the input is not a digit:
 218     ///
 219     /// ```
 220     /// use std::char;
 221     ///
 222     /// let c = char::from_digit(20, 10);
 223     ///
 224     /// assert_eq!(None, c);
 225     /// ```
 226     ///
 227     /// Passing a large radix, causing a panic:
 228     ///
 229     /// ```should_panic
 230     /// use std::char;
 231     ///
 232     /// // this panics
 233     /// char::from_digit(1, 37);
 234     /// ```
 235     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
 236     #[inline]
 237     pub fn from_digit(num: u32, radix: u32) -> Option<char> {
 238         super::convert::from_digit(num, radix)
 239     }
 240
 241     /// Checks if a `char` is a digit in the given radix.
 242     ///
 243     /// A 'radix' here is sometimes also called a 'base'. A radix of two
 244     /// indicates a binary number, a radix of ten, decimal, and a radix of
 245     /// sixteen, hexadecimal, to give some common values. Arbitrary
 246     /// radices are supported.
 247     ///
 248     /// Compared to [`is_numeric()`], this function only recognizes the characters
 249     /// `0-9`, `a-z` and `A-Z`.
 250     ///
 251     /// 'Digit' is defined to be only the following characters:
 252     ///
 253     /// * `0-9`
 254     /// * `a-z`
 255     /// * `A-Z`
 256     ///
 257     /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
 258     ///
 259     /// [`is_numeric()`]: #method.is_numeric
 260     ///
 261     /// # Panics
 262     ///
 263     /// Panics if given a radix larger than 36.
 264     ///
 265     /// # Examples
 266     ///
 267     /// Basic usage:
 268     ///
 269     /// ```
 270     /// assert!('1'.is_digit(10));
 271     /// assert!('f'.is_digit(16));
 272     /// assert!(!'f'.is_digit(10));
 273     /// ```
 274     ///
 275     /// Passing a large radix, causing a panic:
 276     ///
 277     /// ```should_panic
 278     /// // this panics
 279     /// '1'.is_digit(37);
 280     /// ```
 281     #[stable(feature = "rust1", since = "1.0.0")]
 282     #[inline]
 283     pub fn is_digit(self, radix: u32) -> bool {
 284         self.to_digit(radix).is_some()
 285     }
 286
 287     /// Converts a `char` to a digit in the given radix.
 288     ///
 289     /// A 'radix' here is sometimes also called a 'base'. A radix of two
 290     /// indicates a binary number, a radix of ten, decimal, and a radix of
 291     /// sixteen, hexadecimal, to give some common values. Arbitrary
 292     /// radices are supported.
 293     ///
 294     /// 'Digit' is defined to be only the following characters:
 295     ///
 296     /// * `0-9`
 297     /// * `a-z`
 298     /// * `A-Z`
 299     ///
 300     /// # Errors
 301     ///
 302     /// Returns `None` if the `char` does not refer to a digit in the given radix.
 303     ///
 304     /// # Panics
 305     ///
 306     /// Panics if given a radix larger than 36.
 307     ///
 308     /// # Examples
 309     ///
 310     /// Basic usage:
 311     ///
 312     /// ```
 313     /// assert_eq!('1'.to_digit(10), Some(1));
 314     /// assert_eq!('f'.to_digit(16), Some(15));
 315     /// ```
 316     ///
 317     /// Passing a non-digit results in failure:
 318     ///
 319     /// ```
 320     /// assert_eq!('f'.to_digit(10), None);
 321     /// assert_eq!('z'.to_digit(16), None);
 322     /// ```
 323     ///
 324     /// Passing a large radix, causing a panic:
 325     ///
 326     /// ```should_panic
 327     /// // this panics
 328     /// '1'.to_digit(37);
 329     /// ```
 330     #[stable(feature = "rust1", since = "1.0.0")]
 331     #[inline]
 332     pub fn to_digit(self, radix: u32) -> Option<u32> {
 333         assert!(radix <= 36, "to_digit: radix is too high (maximum 36)");
 334         // If not a digit, a number greater than radix will be created.
 335         let mut digit = (self as u32).wrapping_sub('0' as u32);
 336         if radix > 10 {
 337             if digit < 10 {
 338                 return Some(digit);
 339             }
 340             // Force the 6th bit to be set to ensure ascii is lower case.
 341             digit = (self as u32 | 0b10_0000).wrapping_sub('a' as u32).saturating_add(10);
 342         }
 343         (digit < radix).then_some(digit)
 344     }
 345
 346     /// Returns an iterator that yields the hexadecimal Unicode escape of a
 347     /// character as `char`s.
 348     ///
 349     /// This will escape characters with the Rust syntax of the form
 350     /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
 351     ///
 352     /// # Examples
 353     ///
 354     /// As an iterator:
 355     ///
 356     /// ```
 357     /// for c in '❤'.escape_unicode() {
 358     ///     print!("{}", c);
 359     /// }
 360     /// println!();
 361     /// ```
 362     ///
 363     /// Using `println!` directly:
 364     ///
 365     /// ```
 366     /// println!("{}", '❤'.escape_unicode());
 367     /// ```
 368     ///
 369     /// Both are equivalent to:
 370     ///
 371     /// ```
 372     /// println!("\\u{{2764}}");
 373     /// ```
 374     ///
 375     /// Using `to_string`:
 376     ///
 377     /// ```
 378     /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
 379     /// ```
 380     #[stable(feature = "rust1", since = "1.0.0")]
 381     #[inline]
 382     pub fn escape_unicode(self) -> EscapeUnicode {
 383         let c = self as u32;
 384
 385         // or-ing 1 ensures that for c==0 the code computes that one
 386         // digit should be printed and (which is the same) avoids the
 387         // (31 - 32) underflow
 388         let msb = 31 - (c | 1).leading_zeros();
 389
 390         // the index of the most significant hex digit
 391         let ms_hex_digit = msb / 4;
 392         EscapeUnicode {
 393             c: self,
 394             state: EscapeUnicodeState::Backslash,
 395             hex_digit_idx: ms_hex_digit as usize,
 396         }
 397     }
 398
 399     /// An extended version of `escape_debug` that optionally permits escaping
 400     /// Extended Grapheme codepoints, single quotes, and double quotes. This
 401     /// allows us to format characters like nonspacing marks better when they're
 402     /// at the start of a string, and allows escaping single quotes in
 403     /// characters, and double quotes in strings.
 404     #[inline]
 405     pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
 406         let init_state = match self {
 407             '\t' => EscapeDefaultState::Backslash('t'),
 408             '\r' => EscapeDefaultState::Backslash('r'),
 409             '\n' => EscapeDefaultState::Backslash('n'),
 410             '\\' => EscapeDefaultState::Backslash(self),
 411             '"' if args.escape_double_quote => EscapeDefaultState::Backslash(self),
 412             '\'' if args.escape_single_quote => EscapeDefaultState::Backslash(self),
 413             _ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
 414                 EscapeDefaultState::Unicode(self.escape_unicode())
 415             }
 416             _ if is_printable(self) => EscapeDefaultState::Char(self),
 417             _ => EscapeDefaultState::Unicode(self.escape_unicode()),
 418         };
 419         EscapeDebug(EscapeDefault { state: init_state })
 420     }
 421
 422     /// Returns an iterator that yields the literal escape code of a character
 423     /// as `char`s.
 424     ///
 425     /// This will escape the characters similar to the `Debug` implementations
 426     /// of `str` or `char`.
 427     ///
 428     /// # Examples
 429     ///
 430     /// As an iterator:
 431     ///
 432     /// ```
 433     /// for c in '\n'.escape_debug() {
 434     ///     print!("{}", c);
 435     /// }
 436     /// println!();
 437     /// ```
 438     ///
 439     /// Using `println!` directly:
 440     ///
 441     /// ```
 442     /// println!("{}", '\n'.escape_debug());
 443     /// ```
 444     ///
 445     /// Both are equivalent to:
 446     ///
 447     /// ```
 448     /// println!("\\n");
 449     /// ```
 450     ///
 451     /// Using `to_string`:
 452     ///
 453     /// ```
 454     /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
 455     /// ```
 456     #[stable(feature = "char_escape_debug", since = "1.20.0")]
 457     #[inline]
 458     pub fn escape_debug(self) -> EscapeDebug {
 459         self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
 460     }
 461
 462     /// Returns an iterator that yields the literal escape code of a character
 463     /// as `char`s.
 464     ///
 465     /// The default is chosen with a bias toward producing literals that are
 466     /// legal in a variety of languages, including C++11 and similar C-family
 467     /// languages. The exact rules are:
 468     ///
 469     /// * Tab is escaped as `\t`.
 470     /// * Carriage return is escaped as `\r`.
 471     /// * Line feed is escaped as `\n`.
 472     /// * Single quote is escaped as `\'`.
 473     /// * Double quote is escaped as `\"`.
 474     /// * Backslash is escaped as `\\`.
 475     /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
 476     ///   inclusive is not escaped.
 477     /// * All other characters are given hexadecimal Unicode escapes; see
 478     ///   [`escape_unicode`].
 479     ///
 480     /// [`escape_unicode`]: #method.escape_unicode
 481     ///
 482     /// # Examples
 483     ///
 484     /// As an iterator:
 485     ///
 486     /// ```
 487     /// for c in '"'.escape_default() {
 488     ///     print!("{}", c);
 489     /// }
 490     /// println!();
 491     /// ```
 492     ///
 493     /// Using `println!` directly:
 494     ///
 495     /// ```
 496     /// println!("{}", '"'.escape_default());
 497     /// ```
 498     ///
 499     /// Both are equivalent to:
 500     ///
 501     /// ```
 502     /// println!("\\\"");
 503     /// ```
 504     ///
 505     /// Using `to_string`:
 506     ///
 507     /// ```
 508     /// assert_eq!('"'.escape_default().to_string(), "\\\"");
 509     /// ```
 510     #[stable(feature = "rust1", since = "1.0.0")]
 511     #[inline]
 512     pub fn escape_default(self) -> EscapeDefault {
 513         let init_state = match self {
 514             '\t' => EscapeDefaultState::Backslash('t'),
 515             '\r' => EscapeDefaultState::Backslash('r'),
 516             '\n' => EscapeDefaultState::Backslash('n'),
 517             '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
 518             '\x20'..='\x7e' => EscapeDefaultState::Char(self),
 519             _ => EscapeDefaultState::Unicode(self.escape_unicode()),
 520         };
 521         EscapeDefault { state: init_state }
 522     }
 523
 524     /// Returns the number of bytes this `char` would need if encoded in UTF-8.
 525     ///
 526     /// That number of bytes is always between 1 and 4, inclusive.
 527     ///
 528     /// # Examples
 529     ///
 530     /// Basic usage:
 531     ///
 532     /// ```
 533     /// let len = 'A'.len_utf8();
 534     /// assert_eq!(len, 1);
 535     ///
 536     /// let len = 'ß'.len_utf8();
 537     /// assert_eq!(len, 2);
 538     ///
 539     /// let len = 'ℝ'.len_utf8();
 540     /// assert_eq!(len, 3);
 541     ///
 542     /// let len = '💣'.len_utf8();
 543     /// assert_eq!(len, 4);
 544     /// ```
 545     ///
 546     /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
 547     /// would take if each code point was represented as a `char` vs in the `&str` itself:
 548     ///
 549     /// ```
 550     /// // as chars
 551     /// let eastern = '東';
 552     /// let capital = '京';
 553     ///
 554     /// // both can be represented as three bytes
 555     /// assert_eq!(3, eastern.len_utf8());
 556     /// assert_eq!(3, capital.len_utf8());
 557     ///
 558     /// // as a &str, these two are encoded in UTF-8
 559     /// let tokyo = "東京";
 560     ///
 561     /// let len = eastern.len_utf8() + capital.len_utf8();
 562     ///
 563     /// // we can see that they take six bytes total...
 564     /// assert_eq!(6, tokyo.len());
 565     ///
 566     /// // ... just like the &str
 567     /// assert_eq!(len, tokyo.len());
 568     /// ```
 569     #[stable(feature = "rust1", since = "1.0.0")]
 570     #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
 571     #[inline]
 572     pub const fn len_utf8(self) -> usize {
 573         len_utf8(self as u32)
 574     }
 575
 576     /// Returns the number of 16-bit code units this `char` would need if
 577     /// encoded in UTF-16.
 578     ///
 579     /// See the documentation for [`len_utf8()`] for more explanation of this
 580     /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
 581     ///
 582     /// [`len_utf8()`]: #method.len_utf8
 583     ///
 584     /// # Examples
 585     ///
 586     /// Basic usage:
 587     ///
 588     /// ```
 589     /// let n = 'ß'.len_utf16();
 590     /// assert_eq!(n, 1);
 591     ///
 592     /// let len = '💣'.len_utf16();
 593     /// assert_eq!(len, 2);
 594     /// ```
 595     #[stable(feature = "rust1", since = "1.0.0")]
 596     #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
 597     #[inline]
 598     pub const fn len_utf16(self) -> usize {
 599         let ch = self as u32;
 600         if (ch & 0xFFFF) == ch { 1 } else { 2 }
 601     }
 602
 603     /// Encodes this character as UTF-8 into the provided byte buffer,
 604     /// and then returns the subslice of the buffer that contains the encoded character.
 605     ///
 606     /// # Panics
 607     ///
 608     /// Panics if the buffer is not large enough.
 609     /// A buffer of length four is large enough to encode any `char`.
 610     ///
 611     /// # Examples
 612     ///
 613     /// In both of these examples, 'ß' takes two bytes to encode.
 614     ///
 615     /// ```
 616     /// let mut b = [0; 2];
 617     ///
 618     /// let result = 'ß'.encode_utf8(&mut b);
 619     ///
 620     /// assert_eq!(result, "ß");
 621     ///
 622     /// assert_eq!(result.len(), 2);
 623     /// ```
 624     ///
 625     /// A buffer that's too small:
 626     ///
 627     /// ```should_panic
 628     /// let mut b = [0; 1];
 629     ///
 630     /// // this panics
 631     /// 'ß'.encode_utf8(&mut b);
 632     /// ```
 633     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
 634     #[inline]
 635     pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
 636         // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
 637         unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
 638     }
 639
 640     /// Encodes this character as UTF-16 into the provided `u16` buffer,
 641     /// and then returns the subslice of the buffer that contains the encoded character.
 642     ///
 643     /// # Panics
 644     ///
 645     /// Panics if the buffer is not large enough.
 646     /// A buffer of length 2 is large enough to encode any `char`.
 647     ///
 648     /// # Examples
 649     ///
 650     /// In both of these examples, '𝕊' takes two `u16`s to encode.
 651     ///
 652     /// ```
 653     /// let mut b = [0; 2];
 654     ///
 655     /// let result = '𝕊'.encode_utf16(&mut b);
 656     ///
 657     /// assert_eq!(result.len(), 2);
 658     /// ```
 659     ///
 660     /// A buffer that's too small:
 661     ///
 662     /// ```should_panic
 663     /// let mut b = [0; 1];
 664     ///
 665     /// // this panics
 666     /// '𝕊'.encode_utf16(&mut b);
 667     /// ```
 668     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
 669     #[inline]
 670     pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
 671         encode_utf16_raw(self as u32, dst)
 672     }
 673
 674     /// Returns `true` if this `char` has the `Alphabetic` property.
 675     ///
 676     /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
 677     /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
 678     ///
 679     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 680     /// [ucd]: https://www.unicode.org/reports/tr44/
 681     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 682     ///
 683     /// # Examples
 684     ///
 685     /// Basic usage:
 686     ///
 687     /// ```
 688     /// assert!('a'.is_alphabetic());
 689     /// assert!('京'.is_alphabetic());
 690     ///
 691     /// let c = '💝';
 692     /// // love is many things, but it is not alphabetic
 693     /// assert!(!c.is_alphabetic());
 694     /// ```
 695     #[stable(feature = "rust1", since = "1.0.0")]
 696     #[inline]
 697     pub fn is_alphabetic(self) -> bool {
 698         match self {
 699             'a'..='z' | 'A'..='Z' => true,
 700             c => c > '\x7f' && unicode::Alphabetic(c),
 701         }
 702     }
 703
 704     /// Returns `true` if this `char` has the `Lowercase` property.
 705     ///
 706     /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
 707     /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
 708     ///
 709     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 710     /// [ucd]: https://www.unicode.org/reports/tr44/
 711     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 712     ///
 713     /// # Examples
 714     ///
 715     /// Basic usage:
 716     ///
 717     /// ```
 718     /// assert!('a'.is_lowercase());
 719     /// assert!('δ'.is_lowercase());
 720     /// assert!(!'A'.is_lowercase());
 721     /// assert!(!'Δ'.is_lowercase());
 722     ///
 723     /// // The various Chinese scripts and punctuation do not have case, and so:
 724     /// assert!(!'中'.is_lowercase());
 725     /// assert!(!' '.is_lowercase());
 726     /// ```
 727     #[stable(feature = "rust1", since = "1.0.0")]
 728     #[inline]
 729     pub fn is_lowercase(self) -> bool {
 730         match self {
 731             'a'..='z' => true,
 732             c => c > '\x7f' && unicode::Lowercase(c),
 733         }
 734     }
 735
 736     /// Returns `true` if this `char` has the `Uppercase` property.
 737     ///
 738     /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
 739     /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
 740     ///
 741     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 742     /// [ucd]: https://www.unicode.org/reports/tr44/
 743     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 744     ///
 745     /// # Examples
 746     ///
 747     /// Basic usage:
 748     ///
 749     /// ```
 750     /// assert!(!'a'.is_uppercase());
 751     /// assert!(!'δ'.is_uppercase());
 752     /// assert!('A'.is_uppercase());
 753     /// assert!('Δ'.is_uppercase());
 754     ///
 755     /// // The various Chinese scripts and punctuation do not have case, and so:
 756     /// assert!(!'中'.is_uppercase());
 757     /// assert!(!' '.is_uppercase());
 758     /// ```
 759     #[stable(feature = "rust1", since = "1.0.0")]
 760     #[inline]
 761     pub fn is_uppercase(self) -> bool {
 762         match self {
 763             'A'..='Z' => true,
 764             c => c > '\x7f' && unicode::Uppercase(c),
 765         }
 766     }
 767
 768     /// Returns `true` if this `char` has the `White_Space` property.
 769     ///
 770     /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
 771     ///
 772     /// [ucd]: https://www.unicode.org/reports/tr44/
 773     /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
 774     ///
 775     /// # Examples
 776     ///
 777     /// Basic usage:
 778     ///
 779     /// ```
 780     /// assert!(' '.is_whitespace());
 781     ///
 782     /// // a non-breaking space
 783     /// assert!('\u{A0}'.is_whitespace());
 784     ///
 785     /// assert!(!'越'.is_whitespace());
 786     /// ```
 787     #[stable(feature = "rust1", since = "1.0.0")]
 788     #[inline]
 789     pub fn is_whitespace(self) -> bool {
 790         match self {
 791             ' ' | '\x09'..='\x0d' => true,
 792             c => c > '\x7f' && unicode::White_Space(c),
 793         }
 794     }
 795
 796     /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
 797     ///
 798     /// [`is_alphabetic()`]: #method.is_alphabetic
 799     /// [`is_numeric()`]: #method.is_numeric
 800     ///
 801     /// # Examples
 802     ///
 803     /// Basic usage:
 804     ///
 805     /// ```
 806     /// assert!('٣'.is_alphanumeric());
 807     /// assert!('7'.is_alphanumeric());
 808     /// assert!('৬'.is_alphanumeric());
 809     /// assert!('¾'.is_alphanumeric());
 810     /// assert!('①'.is_alphanumeric());
 811     /// assert!('K'.is_alphanumeric());
 812     /// assert!('و'.is_alphanumeric());
 813     /// assert!('藏'.is_alphanumeric());
 814     /// ```
 815     #[stable(feature = "rust1", since = "1.0.0")]
 816     #[inline]
 817     pub fn is_alphanumeric(self) -> bool {
 818         self.is_alphabetic() || self.is_numeric()
 819     }
 820
 821     /// Returns `true` if this `char` has the general category for control codes.
 822     ///
 823     /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
 824     /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
 825     /// Database][ucd] [`UnicodeData.txt`].
 826     ///
 827     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 828     /// [ucd]: https://www.unicode.org/reports/tr44/
 829     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 830     ///
 831     /// # Examples
 832     ///
 833     /// Basic usage:
 834     ///
 835     /// ```
 836     /// // U+009C, STRING TERMINATOR
 837     /// assert!('\9c'.is_control());
 838     /// assert!(!'q'.is_control());
 839     /// ```
 840     #[stable(feature = "rust1", since = "1.0.0")]
 841     #[inline]
 842     pub fn is_control(self) -> bool {
 843         unicode::Cc(self)
 844     }
 845
 846     /// Returns `true` if this `char` has the `Grapheme_Extend` property.
 847     ///
 848     /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
 849     /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
 850     /// [`DerivedCoreProperties.txt`].
 851     ///
 852     /// [uax29]: https://www.unicode.org/reports/tr29/
 853     /// [ucd]: https://www.unicode.org/reports/tr44/
 854     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 855     #[inline]
 856     pub(crate) fn is_grapheme_extended(self) -> bool {
 857         unicode::Grapheme_Extend(self)
 858     }
 859
 860     /// Returns `true` if this `char` has one of the general categories for numbers.
 861     ///
 862     /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
 863     /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
 864     /// Database][ucd] [`UnicodeData.txt`].
 865     ///
 866     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 867     /// [ucd]: https://www.unicode.org/reports/tr44/
 868     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 869     ///
 870     /// # Examples
 871     ///
 872     /// Basic usage:
 873     ///
 874     /// ```
 875     /// assert!('٣'.is_numeric());
 876     /// assert!('7'.is_numeric());
 877     /// assert!('৬'.is_numeric());
 878     /// assert!('¾'.is_numeric());
 879     /// assert!('①'.is_numeric());
 880     /// assert!(!'K'.is_numeric());
 881     /// assert!(!'و'.is_numeric());
 882     /// assert!(!'藏'.is_numeric());
 883     /// ```
 884     #[stable(feature = "rust1", since = "1.0.0")]
 885     #[inline]
 886     pub fn is_numeric(self) -> bool {
 887         match self {
 888             '0'..='9' => true,
 889             c => c > '\x7f' && unicode::N(c),
 890         }
 891     }
 892
 893     /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
 894     /// `char`s.
 895     ///
 896     /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
 897     ///
 898     /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
 899     /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
 900     ///
 901     /// [ucd]: https://www.unicode.org/reports/tr44/
 902     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 903     ///
 904     /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
 905     /// the `char`(s) given by [`SpecialCasing.txt`].
 906     ///
 907     /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
 908     ///
 909     /// This operation performs an unconditional mapping without tailoring. That is, the conversion
 910     /// is independent of context and language.
 911     ///
 912     /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
 913     /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
 914     ///
 915     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 916     ///
 917     /// # Examples
 918     ///
 919     /// As an iterator:
 920     ///
 921     /// ```
 922     /// for c in 'İ'.to_lowercase() {
 923     ///     print!("{}", c);
 924     /// }
 925     /// println!();
 926     /// ```
 927     ///
 928     /// Using `println!` directly:
 929     ///
 930     /// ```
 931     /// println!("{}", 'İ'.to_lowercase());
 932     /// ```
 933     ///
 934     /// Both are equivalent to:
 935     ///
 936     /// ```
 937     /// println!("i\u{307}");
 938     /// ```
 939     ///
 940     /// Using `to_string`:
 941     ///
 942     /// ```
 943     /// assert_eq!('C'.to_lowercase().to_string(), "c");
 944     ///
 945     /// // Sometimes the result is more than one character:
 946     /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
 947     ///
 948     /// // Characters that do not have both uppercase and lowercase
 949     /// // convert into themselves.
 950     /// assert_eq!('山'.to_lowercase().to_string(), "山");
 951     /// ```
 952     #[stable(feature = "rust1", since = "1.0.0")]
 953     #[inline]
 954     pub fn to_lowercase(self) -> ToLowercase {
 955         ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
 956     }
 957
 958     /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
 959     /// `char`s.
 960     ///
 961     /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
 962     ///
 963     /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
 964     /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
 965     ///
 966     /// [ucd]: https://www.unicode.org/reports/tr44/
 967     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 968     ///
 969     /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
 970     /// the `char`(s) given by [`SpecialCasing.txt`].
 971     ///
 972     /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
 973     ///
 974     /// This operation performs an unconditional mapping without tailoring. That is, the conversion
 975     /// is independent of context and language.
 976     ///
 977     /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
 978     /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
 979     ///
 980     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 981     ///
 982     /// # Examples
 983     ///
 984     /// As an iterator:
 985     ///
 986     /// ```
 987     /// for c in 'ß'.to_uppercase() {
 988     ///     print!("{}", c);
 989     /// }
 990     /// println!();
 991     /// ```
 992     ///
 993     /// Using `println!` directly:
 994     ///
 995     /// ```
 996     /// println!("{}", 'ß'.to_uppercase());
 997     /// ```
 998     ///
 999     /// Both are equivalent to:
1000     ///
1001     /// ```
1002     /// println!("SS");
1003     /// ```
1004     ///
1005     /// Using `to_string`:
1006     ///
1007     /// ```
1008     /// assert_eq!('c'.to_uppercase().to_string(), "C");
1009     ///
1010     /// // Sometimes the result is more than one character:
1011     /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
1012     ///
1013     /// // Characters that do not have both uppercase and lowercase
1014     /// // convert into themselves.
1015     /// assert_eq!('山'.to_uppercase().to_string(), "山");
1016     /// ```
1017     ///
1018     /// # Note on locale
1019     ///
1020     /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1021     ///
1022     /// * 'Dotless': I / ı, sometimes written ï
1023     /// * 'Dotted': İ / i
1024     ///
1025     /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1026     ///
1027     /// ```
1028     /// let upper_i = 'i'.to_uppercase().to_string();
1029     /// ```
1030     ///
1031     /// The value of `upper_i` here relies on the language of the text: if we're
1032     /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
1033     /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1034     ///
1035     /// ```
1036     /// let upper_i = 'i'.to_uppercase().to_string();
1037     ///
1038     /// assert_eq!(upper_i, "I");
1039     /// ```
1040     ///
1041     /// holds across languages.
1042     #[stable(feature = "rust1", since = "1.0.0")]
1043     #[inline]
1044     pub fn to_uppercase(self) -> ToUppercase {
1045         ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1046     }
1047
1048     /// Checks if the value is within the ASCII range.
1049     ///
1050     /// # Examples
1051     ///
1052     /// ```
1053     /// let ascii = 'a';
1054     /// let non_ascii = '❤';
1055     ///
1056     /// assert!(ascii.is_ascii());
1057     /// assert!(!non_ascii.is_ascii());
1058     /// ```
1059     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1060     #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.32.0")]
1061     #[inline]
1062     pub const fn is_ascii(&self) -> bool {
1063         *self as u32 <= 0x7F
1064     }
1065
1066     /// Makes a copy of the value in its ASCII upper case equivalent.
1067     ///
1068     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1069     /// but non-ASCII letters are unchanged.
1070     ///
1071     /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1072     ///
1073     /// To uppercase ASCII characters in addition to non-ASCII characters, use
1074     /// [`to_uppercase()`].
1075     ///
1076     /// # Examples
1077     ///
1078     /// ```
1079     /// let ascii = 'a';
1080     /// let non_ascii = '❤';
1081     ///
1082     /// assert_eq!('A', ascii.to_ascii_uppercase());
1083     /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1084     /// ```
1085     ///
1086     /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1087     /// [`to_uppercase()`]: #method.to_uppercase
1088     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1089     #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1090     #[inline]
1091     pub const fn to_ascii_uppercase(&self) -> char {
1092         if self.is_ascii_lowercase() {
1093             (*self as u8).ascii_change_case_unchecked() as char
1094         } else {
1095             *self
1096         }
1097     }
1098
1099     /// Makes a copy of the value in its ASCII lower case equivalent.
1100     ///
1101     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1102     /// but non-ASCII letters are unchanged.
1103     ///
1104     /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1105     ///
1106     /// To lowercase ASCII characters in addition to non-ASCII characters, use
1107     /// [`to_lowercase()`].
1108     ///
1109     /// # Examples
1110     ///
1111     /// ```
1112     /// let ascii = 'A';
1113     /// let non_ascii = '❤';
1114     ///
1115     /// assert_eq!('a', ascii.to_ascii_lowercase());
1116     /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1117     /// ```
1118     ///
1119     /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1120     /// [`to_lowercase()`]: #method.to_lowercase
1121     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1122     #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1123     #[inline]
1124     pub const fn to_ascii_lowercase(&self) -> char {
1125         if self.is_ascii_uppercase() {
1126             (*self as u8).ascii_change_case_unchecked() as char
1127         } else {
1128             *self
1129         }
1130     }
1131
1132     /// Checks that two values are an ASCII case-insensitive match.
1133     ///
1134     /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`.
1135     ///
1136     /// # Examples
1137     ///
1138     /// ```
1139     /// let upper_a = 'A';
1140     /// let lower_a = 'a';
1141     /// let lower_z = 'z';
1142     ///
1143     /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1144     /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1145     /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1146     /// ```
1147     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1148     #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1149     #[inline]
1150     pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1151         self.to_ascii_lowercase() == other.to_ascii_lowercase()
1152     }
1153
1154     /// Converts this type to its ASCII upper case equivalent in-place.
1155     ///
1156     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1157     /// but non-ASCII letters are unchanged.
1158     ///
1159     /// To return a new uppercased value without modifying the existing one, use
1160     /// [`to_ascii_uppercase()`].
1161     ///
1162     /// # Examples
1163     ///
1164     /// ```
1165     /// let mut ascii = 'a';
1166     ///
1167     /// ascii.make_ascii_uppercase();
1168     ///
1169     /// assert_eq!('A', ascii);
1170     /// ```
1171     ///
1172     /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1173     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1174     #[inline]
1175     pub fn make_ascii_uppercase(&mut self) {
1176         *self = self.to_ascii_uppercase();
1177     }
1178
1179     /// Converts this type to its ASCII lower case equivalent in-place.
1180     ///
1181     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1182     /// but non-ASCII letters are unchanged.
1183     ///
1184     /// To return a new lowercased value without modifying the existing one, use
1185     /// [`to_ascii_lowercase()`].
1186     ///
1187     /// # Examples
1188     ///
1189     /// ```
1190     /// let mut ascii = 'A';
1191     ///
1192     /// ascii.make_ascii_lowercase();
1193     ///
1194     /// assert_eq!('a', ascii);
1195     /// ```
1196     ///
1197     /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
1198     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1199     #[inline]
1200     pub fn make_ascii_lowercase(&mut self) {
1201         *self = self.to_ascii_lowercase();
1202     }
1203
1204     /// Checks if the value is an ASCII alphabetic character:
1205     ///
1206     /// - U+0041 'A' ..= U+005A 'Z', or
1207     /// - U+0061 'a' ..= U+007A 'z'.
1208     ///
1209     /// # Examples
1210     ///
1211     /// ```
1212     /// let uppercase_a = 'A';
1213     /// let uppercase_g = 'G';
1214     /// let a = 'a';
1215     /// let g = 'g';
1216     /// let zero = '0';
1217     /// let percent = '%';
1218     /// let space = ' ';
1219     /// let lf = '\n';
1220     /// let esc: char = 0x1b_u8.into();
1221     ///
1222     /// assert!(uppercase_a.is_ascii_alphabetic());
1223     /// assert!(uppercase_g.is_ascii_alphabetic());
1224     /// assert!(a.is_ascii_alphabetic());
1225     /// assert!(g.is_ascii_alphabetic());
1226     /// assert!(!zero.is_ascii_alphabetic());
1227     /// assert!(!percent.is_ascii_alphabetic());
1228     /// assert!(!space.is_ascii_alphabetic());
1229     /// assert!(!lf.is_ascii_alphabetic());
1230     /// assert!(!esc.is_ascii_alphabetic());
1231     /// ```
1232     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1233     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1234     #[inline]
1235     pub const fn is_ascii_alphabetic(&self) -> bool {
1236         matches!(*self, 'A'..='Z' | 'a'..='z')
1237     }
1238
1239     /// Checks if the value is an ASCII uppercase character:
1240     /// U+0041 'A' ..= U+005A 'Z'.
1241     ///
1242     /// # Examples
1243     ///
1244     /// ```
1245     /// let uppercase_a = 'A';
1246     /// let uppercase_g = 'G';
1247     /// let a = 'a';
1248     /// let g = 'g';
1249     /// let zero = '0';
1250     /// let percent = '%';
1251     /// let space = ' ';
1252     /// let lf = '\n';
1253     /// let esc: char = 0x1b_u8.into();
1254     ///
1255     /// assert!(uppercase_a.is_ascii_uppercase());
1256     /// assert!(uppercase_g.is_ascii_uppercase());
1257     /// assert!(!a.is_ascii_uppercase());
1258     /// assert!(!g.is_ascii_uppercase());
1259     /// assert!(!zero.is_ascii_uppercase());
1260     /// assert!(!percent.is_ascii_uppercase());
1261     /// assert!(!space.is_ascii_uppercase());
1262     /// assert!(!lf.is_ascii_uppercase());
1263     /// assert!(!esc.is_ascii_uppercase());
1264     /// ```
1265     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1266     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1267     #[inline]
1268     pub const fn is_ascii_uppercase(&self) -> bool {
1269         matches!(*self, 'A'..='Z')
1270     }
1271
1272     /// Checks if the value is an ASCII lowercase character:
1273     /// U+0061 'a' ..= U+007A 'z'.
1274     ///
1275     /// # Examples
1276     ///
1277     /// ```
1278     /// let uppercase_a = 'A';
1279     /// let uppercase_g = 'G';
1280     /// let a = 'a';
1281     /// let g = 'g';
1282     /// let zero = '0';
1283     /// let percent = '%';
1284     /// let space = ' ';
1285     /// let lf = '\n';
1286     /// let esc: char = 0x1b_u8.into();
1287     ///
1288     /// assert!(!uppercase_a.is_ascii_lowercase());
1289     /// assert!(!uppercase_g.is_ascii_lowercase());
1290     /// assert!(a.is_ascii_lowercase());
1291     /// assert!(g.is_ascii_lowercase());
1292     /// assert!(!zero.is_ascii_lowercase());
1293     /// assert!(!percent.is_ascii_lowercase());
1294     /// assert!(!space.is_ascii_lowercase());
1295     /// assert!(!lf.is_ascii_lowercase());
1296     /// assert!(!esc.is_ascii_lowercase());
1297     /// ```
1298     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1299     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1300     #[inline]
1301     pub const fn is_ascii_lowercase(&self) -> bool {
1302         matches!(*self, 'a'..='z')
1303     }
1304
1305     /// Checks if the value is an ASCII alphanumeric character:
1306     ///
1307     /// - U+0041 'A' ..= U+005A 'Z', or
1308     /// - U+0061 'a' ..= U+007A 'z', or
1309     /// - U+0030 '0' ..= U+0039 '9'.
1310     ///
1311     /// # Examples
1312     ///
1313     /// ```
1314     /// let uppercase_a = 'A';
1315     /// let uppercase_g = 'G';
1316     /// let a = 'a';
1317     /// let g = 'g';
1318     /// let zero = '0';
1319     /// let percent = '%';
1320     /// let space = ' ';
1321     /// let lf = '\n';
1322     /// let esc: char = 0x1b_u8.into();
1323     ///
1324     /// assert!(uppercase_a.is_ascii_alphanumeric());
1325     /// assert!(uppercase_g.is_ascii_alphanumeric());
1326     /// assert!(a.is_ascii_alphanumeric());
1327     /// assert!(g.is_ascii_alphanumeric());
1328     /// assert!(zero.is_ascii_alphanumeric());
1329     /// assert!(!percent.is_ascii_alphanumeric());
1330     /// assert!(!space.is_ascii_alphanumeric());
1331     /// assert!(!lf.is_ascii_alphanumeric());
1332     /// assert!(!esc.is_ascii_alphanumeric());
1333     /// ```
1334     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1335     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1336     #[inline]
1337     pub const fn is_ascii_alphanumeric(&self) -> bool {
1338         matches!(*self, '0'..='9' | 'A'..='Z' | 'a'..='z')
1339     }
1340
1341     /// Checks if the value is an ASCII decimal digit:
1342     /// U+0030 '0' ..= U+0039 '9'.
1343     ///
1344     /// # Examples
1345     ///
1346     /// ```
1347     /// let uppercase_a = 'A';
1348     /// let uppercase_g = 'G';
1349     /// let a = 'a';
1350     /// let g = 'g';
1351     /// let zero = '0';
1352     /// let percent = '%';
1353     /// let space = ' ';
1354     /// let lf = '\n';
1355     /// let esc: char = 0x1b_u8.into();
1356     ///
1357     /// assert!(!uppercase_a.is_ascii_digit());
1358     /// assert!(!uppercase_g.is_ascii_digit());
1359     /// assert!(!a.is_ascii_digit());
1360     /// assert!(!g.is_ascii_digit());
1361     /// assert!(zero.is_ascii_digit());
1362     /// assert!(!percent.is_ascii_digit());
1363     /// assert!(!space.is_ascii_digit());
1364     /// assert!(!lf.is_ascii_digit());
1365     /// assert!(!esc.is_ascii_digit());
1366     /// ```
1367     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1368     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1369     #[inline]
1370     pub const fn is_ascii_digit(&self) -> bool {
1371         matches!(*self, '0'..='9')
1372     }
1373
1374     /// Checks if the value is an ASCII hexadecimal digit:
1375     ///
1376     /// - U+0030 '0' ..= U+0039 '9', or
1377     /// - U+0041 'A' ..= U+0046 'F', or
1378     /// - U+0061 'a' ..= U+0066 'f'.
1379     ///
1380     /// # Examples
1381     ///
1382     /// ```
1383     /// let uppercase_a = 'A';
1384     /// let uppercase_g = 'G';
1385     /// let a = 'a';
1386     /// let g = 'g';
1387     /// let zero = '0';
1388     /// let percent = '%';
1389     /// let space = ' ';
1390     /// let lf = '\n';
1391     /// let esc: char = 0x1b_u8.into();
1392     ///
1393     /// assert!(uppercase_a.is_ascii_hexdigit());
1394     /// assert!(!uppercase_g.is_ascii_hexdigit());
1395     /// assert!(a.is_ascii_hexdigit());
1396     /// assert!(!g.is_ascii_hexdigit());
1397     /// assert!(zero.is_ascii_hexdigit());
1398     /// assert!(!percent.is_ascii_hexdigit());
1399     /// assert!(!space.is_ascii_hexdigit());
1400     /// assert!(!lf.is_ascii_hexdigit());
1401     /// assert!(!esc.is_ascii_hexdigit());
1402     /// ```
1403     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1404     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1405     #[inline]
1406     pub const fn is_ascii_hexdigit(&self) -> bool {
1407         matches!(*self, '0'..='9' | 'A'..='F' | 'a'..='f')
1408     }
1409
1410     /// Checks if the value is an ASCII punctuation character:
1411     ///
1412     /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
1413     /// - U+003A ..= U+0040 `: ; < = > ? @`, or
1414     /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
1415     /// - U+007B ..= U+007E `{ | } ~`
1416     ///
1417     /// # Examples
1418     ///
1419     /// ```
1420     /// let uppercase_a = 'A';
1421     /// let uppercase_g = 'G';
1422     /// let a = 'a';
1423     /// let g = 'g';
1424     /// let zero = '0';
1425     /// let percent = '%';
1426     /// let space = ' ';
1427     /// let lf = '\n';
1428     /// let esc: char = 0x1b_u8.into();
1429     ///
1430     /// assert!(!uppercase_a.is_ascii_punctuation());
1431     /// assert!(!uppercase_g.is_ascii_punctuation());
1432     /// assert!(!a.is_ascii_punctuation());
1433     /// assert!(!g.is_ascii_punctuation());
1434     /// assert!(!zero.is_ascii_punctuation());
1435     /// assert!(percent.is_ascii_punctuation());
1436     /// assert!(!space.is_ascii_punctuation());
1437     /// assert!(!lf.is_ascii_punctuation());
1438     /// assert!(!esc.is_ascii_punctuation());
1439     /// ```
1440     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1441     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1442     #[inline]
1443     pub const fn is_ascii_punctuation(&self) -> bool {
1444         matches!(*self, '!'..='/' | ':'..='@' | '['..='`' | '{'..='~')
1445     }
1446
1447     /// Checks if the value is an ASCII graphic character:
1448     /// U+0021 '!' ..= U+007E '~'.
1449     ///
1450     /// # Examples
1451     ///
1452     /// ```
1453     /// let uppercase_a = 'A';
1454     /// let uppercase_g = 'G';
1455     /// let a = 'a';
1456     /// let g = 'g';
1457     /// let zero = '0';
1458     /// let percent = '%';
1459     /// let space = ' ';
1460     /// let lf = '\n';
1461     /// let esc: char = 0x1b_u8.into();
1462     ///
1463     /// assert!(uppercase_a.is_ascii_graphic());
1464     /// assert!(uppercase_g.is_ascii_graphic());
1465     /// assert!(a.is_ascii_graphic());
1466     /// assert!(g.is_ascii_graphic());
1467     /// assert!(zero.is_ascii_graphic());
1468     /// assert!(percent.is_ascii_graphic());
1469     /// assert!(!space.is_ascii_graphic());
1470     /// assert!(!lf.is_ascii_graphic());
1471     /// assert!(!esc.is_ascii_graphic());
1472     /// ```
1473     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1474     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1475     #[inline]
1476     pub const fn is_ascii_graphic(&self) -> bool {
1477         matches!(*self, '!'..='~')
1478     }
1479
1480     /// Checks if the value is an ASCII whitespace character:
1481     /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1482     /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1483     ///
1484     /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1485     /// whitespace][infra-aw]. There are several other definitions in
1486     /// wide use. For instance, [the POSIX locale][pct] includes
1487     /// U+000B VERTICAL TAB as well as all the above characters,
1488     /// but—from the very same specification—[the default rule for
1489     /// "field splitting" in the Bourne shell][bfs] considers *only*
1490     /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1491     ///
1492     /// If you are writing a program that will process an existing
1493     /// file format, check what that format's definition of whitespace is
1494     /// before using this function.
1495     ///
1496     /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1497     /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1498     /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1499     ///
1500     /// # Examples
1501     ///
1502     /// ```
1503     /// let uppercase_a = 'A';
1504     /// let uppercase_g = 'G';
1505     /// let a = 'a';
1506     /// let g = 'g';
1507     /// let zero = '0';
1508     /// let percent = '%';
1509     /// let space = ' ';
1510     /// let lf = '\n';
1511     /// let esc: char = 0x1b_u8.into();
1512     ///
1513     /// assert!(!uppercase_a.is_ascii_whitespace());
1514     /// assert!(!uppercase_g.is_ascii_whitespace());
1515     /// assert!(!a.is_ascii_whitespace());
1516     /// assert!(!g.is_ascii_whitespace());
1517     /// assert!(!zero.is_ascii_whitespace());
1518     /// assert!(!percent.is_ascii_whitespace());
1519     /// assert!(space.is_ascii_whitespace());
1520     /// assert!(lf.is_ascii_whitespace());
1521     /// assert!(!esc.is_ascii_whitespace());
1522     /// ```
1523     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1524     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1525     #[inline]
1526     pub const fn is_ascii_whitespace(&self) -> bool {
1527         matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
1528     }
1529
1530     /// Checks if the value is an ASCII control character:
1531     /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
1532     /// Note that most ASCII whitespace characters are control
1533     /// characters, but SPACE is not.
1534     ///
1535     /// # Examples
1536     ///
1537     /// ```
1538     /// let uppercase_a = 'A';
1539     /// let uppercase_g = 'G';
1540     /// let a = 'a';
1541     /// let g = 'g';
1542     /// let zero = '0';
1543     /// let percent = '%';
1544     /// let space = ' ';
1545     /// let lf = '\n';
1546     /// let esc: char = 0x1b_u8.into();
1547     ///
1548     /// assert!(!uppercase_a.is_ascii_control());
1549     /// assert!(!uppercase_g.is_ascii_control());
1550     /// assert!(!a.is_ascii_control());
1551     /// assert!(!g.is_ascii_control());
1552     /// assert!(!zero.is_ascii_control());
1553     /// assert!(!percent.is_ascii_control());
1554     /// assert!(!space.is_ascii_control());
1555     /// assert!(lf.is_ascii_control());
1556     /// assert!(esc.is_ascii_control());
1557     /// ```
1558     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1559     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1560     #[inline]
1561     pub const fn is_ascii_control(&self) -> bool {
1562         matches!(*self, '\0'..='\x1F' | '\x7F')
1563     }
1564 }
1565
1566 pub(crate) struct EscapeDebugExtArgs {
1567     /// Escape Extended Grapheme codepoints?
1568     pub(crate) escape_grapheme_extended: bool,
1569
1570     /// Escape single quotes?
1571     pub(crate) escape_single_quote: bool,
1572
1573     /// Escape double quotes?
1574     pub(crate) escape_double_quote: bool,
1575 }
1576
1577 impl EscapeDebugExtArgs {
1578     pub(crate) const ESCAPE_ALL: Self = Self {
1579         escape_grapheme_extended: true,
1580         escape_single_quote: true,
1581         escape_double_quote: true,
1582     };
1583 }
1584
1585 #[inline]
1586 const fn len_utf8(code: u32) -> usize {
1587     if code < MAX_ONE_B {
1588         1
1589     } else if code < MAX_TWO_B {
1590         2
1591     } else if code < MAX_THREE_B {
1592         3
1593     } else {
1594         4
1595     }
1596 }
1597
1598 /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1599 /// and then returns the subslice of the buffer that contains the encoded character.
1600 ///
1601 /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1602 /// (Creating a `char` in the surrogate range is UB.)
1603 /// The result is valid [generalized UTF-8] but not valid UTF-8.
1604 ///
1605 /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1606 ///
1607 /// # Panics
1608 ///
1609 /// Panics if the buffer is not large enough.
1610 /// A buffer of length four is large enough to encode any `char`.
1611 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1612 #[doc(hidden)]
1613 #[inline]
1614 pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1615     let len = len_utf8(code);
1616     match (len, &mut dst[..]) {
1617         (1, [a, ..]) => {
1618             *a = code as u8;
1619         }
1620         (2, [a, b, ..]) => {
1621             *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
1622             *b = (code & 0x3F) as u8 | TAG_CONT;
1623         }
1624         (3, [a, b, c, ..]) => {
1625             *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
1626             *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1627             *c = (code & 0x3F) as u8 | TAG_CONT;
1628         }
1629         (4, [a, b, c, d, ..]) => {
1630             *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
1631             *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1632             *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1633             *d = (code & 0x3F) as u8 | TAG_CONT;
1634         }
1635         _ => panic!(
1636             "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
1637             len,
1638             code,
1639             dst.len(),
1640         ),
1641     };
1642     &mut dst[..len]
1643 }
1644
1645 /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1646 /// and then returns the subslice of the buffer that contains the encoded character.
1647 ///
1648 /// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
1649 /// (Creating a `char` in the surrogate range is UB.)
1650 ///
1651 /// # Panics
1652 ///
1653 /// Panics if the buffer is not large enough.
1654 /// A buffer of length 2 is large enough to encode any `char`.
1655 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1656 #[doc(hidden)]
1657 #[inline]
1658 pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
1659     // SAFETY: each arm checks whether there are enough bits to write into
1660     unsafe {
1661         if (code & 0xFFFF) == code && !dst.is_empty() {
1662             // The BMP falls through
1663             *dst.get_unchecked_mut(0) = code as u16;
1664             slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
1665         } else if dst.len() >= 2 {
1666             // Supplementary planes break into surrogates.
1667             code -= 0x1_0000;
1668             *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
1669             *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
1670             slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
1671         } else {
1672             panic!(
1673                 "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
1674                 from_u32_unchecked(code).len_utf16(),
1675                 code,
1676                 dst.len(),
1677             )
1678         }
1679     }
1680 }