library/core/src/char/methods.rs

   1 //! impl char {}
   2
   3 use crate::ascii;
   4 use crate::slice;
   5 use crate::str::from_utf8_unchecked_mut;
   6 use crate::unicode::printable::is_printable;
   7 use crate::unicode::{self, conversions};
   8
   9 use super::*;
  10
  11 impl char {
  12     /// The lowest valid code point a `char` can have, `'\0'`.
  13     ///
  14     /// Unlike integer types, `char` actually has a gap in the middle,
  15     /// meaning that the range of possible `char`s is smaller than you
  16     /// might expect. Ranges of `char` will automatically hop this gap
  17     /// for you:
  18     ///
  19     /// ```
  20     /// #![feature(char_min)]
  21     /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
  22     /// let size = (char::MIN..=char::MAX).count() as u32;
  23     /// assert!(size < dist);
  24     /// ```
  25     ///
  26     /// Despite this gap, the `MIN` and [`MAX`] values can be used as bounds for
  27     /// all `char` values.
  28     ///
  29     /// [`MAX`]: char::MAX
  30     ///
  31     /// # Examples
  32     ///
  33     /// ```
  34     /// #![feature(char_min)]
  35     /// # fn something_which_returns_char() -> char { 'a' }
  36     /// let c: char = something_which_returns_char();
  37     /// assert!(char::MIN <= c);
  38     ///
  39     /// let value_at_min = u32::from(char::MIN);
  40     /// assert_eq!(char::from_u32(value_at_min), Some('\0'));
  41     /// ```
  42     #[unstable(feature = "char_min", issue = "114298")]
  43     pub const MIN: char = '\0';
  44
  45     /// The highest valid code point a `char` can have, `'\u{10FFFF}'`.
  46     ///
  47     /// Unlike integer types, `char` actually has a gap in the middle,
  48     /// meaning that the range of possible `char`s is smaller than you
  49     /// might expect. Ranges of `char` will automatically hop this gap
  50     /// for you:
  51     ///
  52     /// ```
  53     /// #![feature(char_min)]
  54     /// let dist = u32::from(char::MAX) - u32::from(char::MIN);
  55     /// let size = (char::MIN..=char::MAX).count() as u32;
  56     /// assert!(size < dist);
  57     /// ```
  58     ///
  59     /// Despite this gap, the [`MIN`] and `MAX` values can be used as bounds for
  60     /// all `char` values.
  61     ///
  62     /// [`MIN`]: char::MIN
  63     ///
  64     /// # Examples
  65     ///
  66     /// ```
  67     /// # fn something_which_returns_char() -> char { 'a' }
  68     /// let c: char = something_which_returns_char();
  69     /// assert!(c <= char::MAX);
  70     ///
  71     /// let value_at_max = u32::from(char::MAX);
  72     /// assert_eq!(char::from_u32(value_at_max), Some('\u{10FFFF}'));
  73     /// assert_eq!(char::from_u32(value_at_max + 1), None);
  74     /// ```
  75     #[stable(feature = "assoc_char_consts", since = "1.52.0")]
  76     pub const MAX: char = '\u{10ffff}';
  77
  78     /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
  79     /// decoding error.
  80     ///
  81     /// It can occur, for example, when giving ill-formed UTF-8 bytes to
  82     /// [`String::from_utf8_lossy`](../std/string/struct.String.html#method.from_utf8_lossy).
  83     #[stable(feature = "assoc_char_consts", since = "1.52.0")]
  84     pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
  85
  86     /// The version of [Unicode](https://www.unicode.org/) that the Unicode parts of
  87     /// `char` and `str` methods are based on.
  88     ///
  89     /// New versions of Unicode are released regularly and subsequently all methods
  90     /// in the standard library depending on Unicode are updated. Therefore the
  91     /// behavior of some `char` and `str` methods and the value of this constant
  92     /// changes over time. This is *not* considered to be a breaking change.
  93     ///
  94     /// The version numbering scheme is explained in
  95     /// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
  96     #[stable(feature = "assoc_char_consts", since = "1.52.0")]
  97     pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
  98
  99     /// Creates an iterator over the UTF-16 encoded code points in `iter`,
 100     /// returning unpaired surrogates as `Err`s.
 101     ///
 102     /// # Examples
 103     ///
 104     /// Basic usage:
 105     ///
 106     /// ```
 107     /// // 𝄞mus<invalid>ic<invalid>
 108     /// let v = [
 109     ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
 110     /// ];
 111     ///
 112     /// assert_eq!(
 113     ///     char::decode_utf16(v)
 114     ///         .map(|r| r.map_err(|e| e.unpaired_surrogate()))
 115     ///         .collect::<Vec<_>>(),
 116     ///     vec![
 117     ///         Ok('𝄞'),
 118     ///         Ok('m'), Ok('u'), Ok('s'),
 119     ///         Err(0xDD1E),
 120     ///         Ok('i'), Ok('c'),
 121     ///         Err(0xD834)
 122     ///     ]
 123     /// );
 124     /// ```
 125     ///
 126     /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
 127     ///
 128     /// ```
 129     /// // 𝄞mus<invalid>ic<invalid>
 130     /// let v = [
 131     ///     0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
 132     /// ];
 133     ///
 134     /// assert_eq!(
 135     ///     char::decode_utf16(v)
 136     ///        .map(|r| r.unwrap_or(char::REPLACEMENT_CHARACTER))
 137     ///        .collect::<String>(),
 138     ///     "𝄞mus�ic�"
 139     /// );
 140     /// ```
 141     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
 142     #[inline]
 143     pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
 144         super::decode::decode_utf16(iter)
 145     }
 146
 147     /// Converts a `u32` to a `char`.
 148     ///
 149     /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
 150     /// [`as`](../std/keyword.as.html):
 151     ///
 152     /// ```
 153     /// let c = '💯';
 154     /// let i = c as u32;
 155     ///
 156     /// assert_eq!(128175, i);
 157     /// ```
 158     ///
 159     /// However, the reverse is not true: not all valid [`u32`]s are valid
 160     /// `char`s. `from_u32()` will return `None` if the input is not a valid value
 161     /// for a `char`.
 162     ///
 163     /// For an unsafe version of this function which ignores these checks, see
 164     /// [`from_u32_unchecked`].
 165     ///
 166     /// [`from_u32_unchecked`]: #method.from_u32_unchecked
 167     ///
 168     /// # Examples
 169     ///
 170     /// Basic usage:
 171     ///
 172     /// ```
 173     /// let c = char::from_u32(0x2764);
 174     ///
 175     /// assert_eq!(Some('❤'), c);
 176     /// ```
 177     ///
 178     /// Returning `None` when the input is not a valid `char`:
 179     ///
 180     /// ```
 181     /// let c = char::from_u32(0x110000);
 182     ///
 183     /// assert_eq!(None, c);
 184     /// ```
 185     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
 186     #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
 187     #[must_use]
 188     #[inline]
 189     pub const fn from_u32(i: u32) -> Option<char> {
 190         super::convert::from_u32(i)
 191     }
 192
 193     /// Converts a `u32` to a `char`, ignoring validity.
 194     ///
 195     /// Note that all `char`s are valid [`u32`]s, and can be cast to one with
 196     /// `as`:
 197     ///
 198     /// ```
 199     /// let c = '💯';
 200     /// let i = c as u32;
 201     ///
 202     /// assert_eq!(128175, i);
 203     /// ```
 204     ///
 205     /// However, the reverse is not true: not all valid [`u32`]s are valid
 206     /// `char`s. `from_u32_unchecked()` will ignore this, and blindly cast to
 207     /// `char`, possibly creating an invalid one.
 208     ///
 209     /// # Safety
 210     ///
 211     /// This function is unsafe, as it may construct invalid `char` values.
 212     ///
 213     /// For a safe version of this function, see the [`from_u32`] function.
 214     ///
 215     /// [`from_u32`]: #method.from_u32
 216     ///
 217     /// # Examples
 218     ///
 219     /// Basic usage:
 220     ///
 221     /// ```
 222     /// let c = unsafe { char::from_u32_unchecked(0x2764) };
 223     ///
 224     /// assert_eq!('❤', c);
 225     /// ```
 226     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
 227     #[rustc_const_unstable(feature = "const_char_from_u32_unchecked", issue = "89259")]
 228     #[must_use]
 229     #[inline]
 230     pub const unsafe fn from_u32_unchecked(i: u32) -> char {
 231         // SAFETY: the safety contract must be upheld by the caller.
 232         unsafe { super::convert::from_u32_unchecked(i) }
 233     }
 234
 235     /// Converts a digit in the given radix to a `char`.
 236     ///
 237     /// A 'radix' here is sometimes also called a 'base'. A radix of two
 238     /// indicates a binary number, a radix of ten, decimal, and a radix of
 239     /// sixteen, hexadecimal, to give some common values. Arbitrary
 240     /// radices are supported.
 241     ///
 242     /// `from_digit()` will return `None` if the input is not a digit in
 243     /// the given radix.
 244     ///
 245     /// # Panics
 246     ///
 247     /// Panics if given a radix larger than 36.
 248     ///
 249     /// # Examples
 250     ///
 251     /// Basic usage:
 252     ///
 253     /// ```
 254     /// let c = char::from_digit(4, 10);
 255     ///
 256     /// assert_eq!(Some('4'), c);
 257     ///
 258     /// // Decimal 11 is a single digit in base 16
 259     /// let c = char::from_digit(11, 16);
 260     ///
 261     /// assert_eq!(Some('b'), c);
 262     /// ```
 263     ///
 264     /// Returning `None` when the input is not a digit:
 265     ///
 266     /// ```
 267     /// let c = char::from_digit(20, 10);
 268     ///
 269     /// assert_eq!(None, c);
 270     /// ```
 271     ///
 272     /// Passing a large radix, causing a panic:
 273     ///
 274     /// ```should_panic
 275     /// // this panics
 276     /// let _c = char::from_digit(1, 37);
 277     /// ```
 278     #[stable(feature = "assoc_char_funcs", since = "1.52.0")]
 279     #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
 280     #[must_use]
 281     #[inline]
 282     pub const fn from_digit(num: u32, radix: u32) -> Option<char> {
 283         super::convert::from_digit(num, radix)
 284     }
 285
 286     /// Checks if a `char` is a digit in the given radix.
 287     ///
 288     /// A 'radix' here is sometimes also called a 'base'. A radix of two
 289     /// indicates a binary number, a radix of ten, decimal, and a radix of
 290     /// sixteen, hexadecimal, to give some common values. Arbitrary
 291     /// radices are supported.
 292     ///
 293     /// Compared to [`is_numeric()`], this function only recognizes the characters
 294     /// `0-9`, `a-z` and `A-Z`.
 295     ///
 296     /// 'Digit' is defined to be only the following characters:
 297     ///
 298     /// * `0-9`
 299     /// * `a-z`
 300     /// * `A-Z`
 301     ///
 302     /// For a more comprehensive understanding of 'digit', see [`is_numeric()`].
 303     ///
 304     /// [`is_numeric()`]: #method.is_numeric
 305     ///
 306     /// # Panics
 307     ///
 308     /// Panics if given a radix larger than 36.
 309     ///
 310     /// # Examples
 311     ///
 312     /// Basic usage:
 313     ///
 314     /// ```
 315     /// assert!('1'.is_digit(10));
 316     /// assert!('f'.is_digit(16));
 317     /// assert!(!'f'.is_digit(10));
 318     /// ```
 319     ///
 320     /// Passing a large radix, causing a panic:
 321     ///
 322     /// ```should_panic
 323     /// // this panics
 324     /// '1'.is_digit(37);
 325     /// ```
 326     #[stable(feature = "rust1", since = "1.0.0")]
 327     #[inline]
 328     pub fn is_digit(self, radix: u32) -> bool {
 329         self.to_digit(radix).is_some()
 330     }
 331
 332     /// Converts a `char` to a digit in the given radix.
 333     ///
 334     /// A 'radix' here is sometimes also called a 'base'. A radix of two
 335     /// indicates a binary number, a radix of ten, decimal, and a radix of
 336     /// sixteen, hexadecimal, to give some common values. Arbitrary
 337     /// radices are supported.
 338     ///
 339     /// 'Digit' is defined to be only the following characters:
 340     ///
 341     /// * `0-9`
 342     /// * `a-z`
 343     /// * `A-Z`
 344     ///
 345     /// # Errors
 346     ///
 347     /// Returns `None` if the `char` does not refer to a digit in the given radix.
 348     ///
 349     /// # Panics
 350     ///
 351     /// Panics if given a radix larger than 36.
 352     ///
 353     /// # Examples
 354     ///
 355     /// Basic usage:
 356     ///
 357     /// ```
 358     /// assert_eq!('1'.to_digit(10), Some(1));
 359     /// assert_eq!('f'.to_digit(16), Some(15));
 360     /// ```
 361     ///
 362     /// Passing a non-digit results in failure:
 363     ///
 364     /// ```
 365     /// assert_eq!('f'.to_digit(10), None);
 366     /// assert_eq!('z'.to_digit(16), None);
 367     /// ```
 368     ///
 369     /// Passing a large radix, causing a panic:
 370     ///
 371     /// ```should_panic
 372     /// // this panics
 373     /// let _ = '1'.to_digit(37);
 374     /// ```
 375     #[stable(feature = "rust1", since = "1.0.0")]
 376     #[rustc_const_stable(feature = "const_char_convert", since = "1.67.0")]
 377     #[must_use = "this returns the result of the operation, \
 378                   without modifying the original"]
 379     #[inline]
 380     pub const fn to_digit(self, radix: u32) -> Option<u32> {
 381         // If not a digit, a number greater than radix will be created.
 382         let mut digit = (self as u32).wrapping_sub('0' as u32);
 383         if radix > 10 {
 384             assert!(radix <= 36, "to_digit: radix is too high (maximum 36)");
 385             if digit < 10 {
 386                 return Some(digit);
 387             }
 388             // Force the 6th bit to be set to ensure ascii is lower case.
 389             digit = (self as u32 | 0b10_0000).wrapping_sub('a' as u32).saturating_add(10);
 390         }
 391         // FIXME: once then_some is const fn, use it here
 392         if digit < radix { Some(digit) } else { None }
 393     }
 394
 395     /// Returns an iterator that yields the hexadecimal Unicode escape of a
 396     /// character as `char`s.
 397     ///
 398     /// This will escape characters with the Rust syntax of the form
 399     /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
 400     ///
 401     /// # Examples
 402     ///
 403     /// As an iterator:
 404     ///
 405     /// ```
 406     /// for c in '❤'.escape_unicode() {
 407     ///     print!("{c}");
 408     /// }
 409     /// println!();
 410     /// ```
 411     ///
 412     /// Using `println!` directly:
 413     ///
 414     /// ```
 415     /// println!("{}", '❤'.escape_unicode());
 416     /// ```
 417     ///
 418     /// Both are equivalent to:
 419     ///
 420     /// ```
 421     /// println!("\\u{{2764}}");
 422     /// ```
 423     ///
 424     /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
 425     ///
 426     /// ```
 427     /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
 428     /// ```
 429     #[must_use = "this returns the escaped char as an iterator, \
 430                   without modifying the original"]
 431     #[stable(feature = "rust1", since = "1.0.0")]
 432     #[inline]
 433     pub fn escape_unicode(self) -> EscapeUnicode {
 434         EscapeUnicode::new(self)
 435     }
 436
 437     /// An extended version of `escape_debug` that optionally permits escaping
 438     /// Extended Grapheme codepoints, single quotes, and double quotes. This
 439     /// allows us to format characters like nonspacing marks better when they're
 440     /// at the start of a string, and allows escaping single quotes in
 441     /// characters, and double quotes in strings.
 442     #[inline]
 443     pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
 444         match self {
 445             '\0' => EscapeDebug::backslash(ascii::Char::Digit0),
 446             '\t' => EscapeDebug::backslash(ascii::Char::SmallT),
 447             '\r' => EscapeDebug::backslash(ascii::Char::SmallR),
 448             '\n' => EscapeDebug::backslash(ascii::Char::SmallN),
 449             '\\' => EscapeDebug::backslash(ascii::Char::ReverseSolidus),
 450             '\"' if args.escape_double_quote => EscapeDebug::backslash(ascii::Char::QuotationMark),
 451             '\'' if args.escape_single_quote => EscapeDebug::backslash(ascii::Char::Apostrophe),
 452             _ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
 453                 EscapeDebug::from_unicode(self.escape_unicode())
 454             }
 455             _ if is_printable(self) => EscapeDebug::printable(self),
 456             _ => EscapeDebug::from_unicode(self.escape_unicode()),
 457         }
 458     }
 459
 460     /// Returns an iterator that yields the literal escape code of a character
 461     /// as `char`s.
 462     ///
 463     /// This will escape the characters similar to the [`Debug`](core::fmt::Debug) implementations
 464     /// of `str` or `char`.
 465     ///
 466     /// # Examples
 467     ///
 468     /// As an iterator:
 469     ///
 470     /// ```
 471     /// for c in '\n'.escape_debug() {
 472     ///     print!("{c}");
 473     /// }
 474     /// println!();
 475     /// ```
 476     ///
 477     /// Using `println!` directly:
 478     ///
 479     /// ```
 480     /// println!("{}", '\n'.escape_debug());
 481     /// ```
 482     ///
 483     /// Both are equivalent to:
 484     ///
 485     /// ```
 486     /// println!("\\n");
 487     /// ```
 488     ///
 489     /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
 490     ///
 491     /// ```
 492     /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
 493     /// ```
 494     #[must_use = "this returns the escaped char as an iterator, \
 495                   without modifying the original"]
 496     #[stable(feature = "char_escape_debug", since = "1.20.0")]
 497     #[inline]
 498     pub fn escape_debug(self) -> EscapeDebug {
 499         self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
 500     }
 501
 502     /// Returns an iterator that yields the literal escape code of a character
 503     /// as `char`s.
 504     ///
 505     /// The default is chosen with a bias toward producing literals that are
 506     /// legal in a variety of languages, including C++11 and similar C-family
 507     /// languages. The exact rules are:
 508     ///
 509     /// * Tab is escaped as `\t`.
 510     /// * Carriage return is escaped as `\r`.
 511     /// * Line feed is escaped as `\n`.
 512     /// * Single quote is escaped as `\'`.
 513     /// * Double quote is escaped as `\"`.
 514     /// * Backslash is escaped as `\\`.
 515     /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
 516     ///   inclusive is not escaped.
 517     /// * All other characters are given hexadecimal Unicode escapes; see
 518     ///   [`escape_unicode`].
 519     ///
 520     /// [`escape_unicode`]: #method.escape_unicode
 521     ///
 522     /// # Examples
 523     ///
 524     /// As an iterator:
 525     ///
 526     /// ```
 527     /// for c in '"'.escape_default() {
 528     ///     print!("{c}");
 529     /// }
 530     /// println!();
 531     /// ```
 532     ///
 533     /// Using `println!` directly:
 534     ///
 535     /// ```
 536     /// println!("{}", '"'.escape_default());
 537     /// ```
 538     ///
 539     /// Both are equivalent to:
 540     ///
 541     /// ```
 542     /// println!("\\\"");
 543     /// ```
 544     ///
 545     /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
 546     ///
 547     /// ```
 548     /// assert_eq!('"'.escape_default().to_string(), "\\\"");
 549     /// ```
 550     #[must_use = "this returns the escaped char as an iterator, \
 551                   without modifying the original"]
 552     #[stable(feature = "rust1", since = "1.0.0")]
 553     #[inline]
 554     pub fn escape_default(self) -> EscapeDefault {
 555         match self {
 556             '\t' => EscapeDefault::backslash(ascii::Char::SmallT),
 557             '\r' => EscapeDefault::backslash(ascii::Char::SmallR),
 558             '\n' => EscapeDefault::backslash(ascii::Char::SmallN),
 559             '\\' | '\'' | '"' => EscapeDefault::backslash(self.as_ascii().unwrap()),
 560             '\x20'..='\x7e' => EscapeDefault::printable(self.as_ascii().unwrap()),
 561             _ => EscapeDefault::from_unicode(self.escape_unicode()),
 562         }
 563     }
 564
 565     /// Returns the number of bytes this `char` would need if encoded in UTF-8.
 566     ///
 567     /// That number of bytes is always between 1 and 4, inclusive.
 568     ///
 569     /// # Examples
 570     ///
 571     /// Basic usage:
 572     ///
 573     /// ```
 574     /// let len = 'A'.len_utf8();
 575     /// assert_eq!(len, 1);
 576     ///
 577     /// let len = 'ß'.len_utf8();
 578     /// assert_eq!(len, 2);
 579     ///
 580     /// let len = 'ℝ'.len_utf8();
 581     /// assert_eq!(len, 3);
 582     ///
 583     /// let len = '💣'.len_utf8();
 584     /// assert_eq!(len, 4);
 585     /// ```
 586     ///
 587     /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
 588     /// would take if each code point was represented as a `char` vs in the `&str` itself:
 589     ///
 590     /// ```
 591     /// // as chars
 592     /// let eastern = '東';
 593     /// let capital = '京';
 594     ///
 595     /// // both can be represented as three bytes
 596     /// assert_eq!(3, eastern.len_utf8());
 597     /// assert_eq!(3, capital.len_utf8());
 598     ///
 599     /// // as a &str, these two are encoded in UTF-8
 600     /// let tokyo = "東京";
 601     ///
 602     /// let len = eastern.len_utf8() + capital.len_utf8();
 603     ///
 604     /// // we can see that they take six bytes total...
 605     /// assert_eq!(6, tokyo.len());
 606     ///
 607     /// // ... just like the &str
 608     /// assert_eq!(len, tokyo.len());
 609     /// ```
 610     #[stable(feature = "rust1", since = "1.0.0")]
 611     #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
 612     #[inline]
 613     pub const fn len_utf8(self) -> usize {
 614         len_utf8(self as u32)
 615     }
 616
 617     /// Returns the number of 16-bit code units this `char` would need if
 618     /// encoded in UTF-16.
 619     ///
 620     /// That number of code units is always either 1 or 2, for unicode scalar values in
 621     /// the [basic multilingual plane] or [supplementary planes] respectively.
 622     ///
 623     /// See the documentation for [`len_utf8()`] for more explanation of this
 624     /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
 625     ///
 626     /// [basic multilingual plane]: http://www.unicode.org/glossary/#basic_multilingual_plane
 627     /// [supplementary planes]: http://www.unicode.org/glossary/#supplementary_planes
 628     /// [`len_utf8()`]: #method.len_utf8
 629     ///
 630     /// # Examples
 631     ///
 632     /// Basic usage:
 633     ///
 634     /// ```
 635     /// let n = 'ß'.len_utf16();
 636     /// assert_eq!(n, 1);
 637     ///
 638     /// let len = '💣'.len_utf16();
 639     /// assert_eq!(len, 2);
 640     /// ```
 641     #[stable(feature = "rust1", since = "1.0.0")]
 642     #[rustc_const_stable(feature = "const_char_len_utf", since = "1.52.0")]
 643     #[inline]
 644     pub const fn len_utf16(self) -> usize {
 645         let ch = self as u32;
 646         if (ch & 0xFFFF) == ch { 1 } else { 2 }
 647     }
 648
 649     /// Encodes this character as UTF-8 into the provided byte buffer,
 650     /// and then returns the subslice of the buffer that contains the encoded character.
 651     ///
 652     /// # Panics
 653     ///
 654     /// Panics if the buffer is not large enough.
 655     /// A buffer of length four is large enough to encode any `char`.
 656     ///
 657     /// # Examples
 658     ///
 659     /// In both of these examples, 'ß' takes two bytes to encode.
 660     ///
 661     /// ```
 662     /// let mut b = [0; 2];
 663     ///
 664     /// let result = 'ß'.encode_utf8(&mut b);
 665     ///
 666     /// assert_eq!(result, "ß");
 667     ///
 668     /// assert_eq!(result.len(), 2);
 669     /// ```
 670     ///
 671     /// A buffer that's too small:
 672     ///
 673     /// ```should_panic
 674     /// let mut b = [0; 1];
 675     ///
 676     /// // this panics
 677     /// 'ß'.encode_utf8(&mut b);
 678     /// ```
 679     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
 680     #[inline]
 681     pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
 682         // SAFETY: `char` is not a surrogate, so this is valid UTF-8.
 683         unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
 684     }
 685
 686     /// Encodes this character as UTF-16 into the provided `u16` buffer,
 687     /// and then returns the subslice of the buffer that contains the encoded character.
 688     ///
 689     /// # Panics
 690     ///
 691     /// Panics if the buffer is not large enough.
 692     /// A buffer of length 2 is large enough to encode any `char`.
 693     ///
 694     /// # Examples
 695     ///
 696     /// In both of these examples, '𝕊' takes two `u16`s to encode.
 697     ///
 698     /// ```
 699     /// let mut b = [0; 2];
 700     ///
 701     /// let result = '𝕊'.encode_utf16(&mut b);
 702     ///
 703     /// assert_eq!(result.len(), 2);
 704     /// ```
 705     ///
 706     /// A buffer that's too small:
 707     ///
 708     /// ```should_panic
 709     /// let mut b = [0; 1];
 710     ///
 711     /// // this panics
 712     /// '𝕊'.encode_utf16(&mut b);
 713     /// ```
 714     #[stable(feature = "unicode_encode_char", since = "1.15.0")]
 715     #[inline]
 716     pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
 717         encode_utf16_raw(self as u32, dst)
 718     }
 719
 720     /// Returns `true` if this `char` has the `Alphabetic` property.
 721     ///
 722     /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
 723     /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
 724     ///
 725     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 726     /// [ucd]: https://www.unicode.org/reports/tr44/
 727     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 728     ///
 729     /// # Examples
 730     ///
 731     /// Basic usage:
 732     ///
 733     /// ```
 734     /// assert!('a'.is_alphabetic());
 735     /// assert!('京'.is_alphabetic());
 736     ///
 737     /// let c = '💝';
 738     /// // love is many things, but it is not alphabetic
 739     /// assert!(!c.is_alphabetic());
 740     /// ```
 741     #[must_use]
 742     #[stable(feature = "rust1", since = "1.0.0")]
 743     #[inline]
 744     pub fn is_alphabetic(self) -> bool {
 745         match self {
 746             'a'..='z' | 'A'..='Z' => true,
 747             c => c > '\x7f' && unicode::Alphabetic(c),
 748         }
 749     }
 750
 751     /// Returns `true` if this `char` has the `Lowercase` property.
 752     ///
 753     /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
 754     /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
 755     ///
 756     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 757     /// [ucd]: https://www.unicode.org/reports/tr44/
 758     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 759     ///
 760     /// # Examples
 761     ///
 762     /// Basic usage:
 763     ///
 764     /// ```
 765     /// assert!('a'.is_lowercase());
 766     /// assert!('δ'.is_lowercase());
 767     /// assert!(!'A'.is_lowercase());
 768     /// assert!(!'Δ'.is_lowercase());
 769     ///
 770     /// // The various Chinese scripts and punctuation do not have case, and so:
 771     /// assert!(!'中'.is_lowercase());
 772     /// assert!(!' '.is_lowercase());
 773     /// ```
 774     ///
 775     /// In a const context:
 776     ///
 777     /// ```
 778     /// #![feature(const_unicode_case_lookup)]
 779     /// const CAPITAL_DELTA_IS_LOWERCASE: bool = 'Δ'.is_lowercase();
 780     /// assert!(!CAPITAL_DELTA_IS_LOWERCASE);
 781     /// ```
 782     #[must_use]
 783     #[stable(feature = "rust1", since = "1.0.0")]
 784     #[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
 785     #[inline]
 786     pub const fn is_lowercase(self) -> bool {
 787         match self {
 788             'a'..='z' => true,
 789             c => c > '\x7f' && unicode::Lowercase(c),
 790         }
 791     }
 792
 793     /// Returns `true` if this `char` has the `Uppercase` property.
 794     ///
 795     /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
 796     /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
 797     ///
 798     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 799     /// [ucd]: https://www.unicode.org/reports/tr44/
 800     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 801     ///
 802     /// # Examples
 803     ///
 804     /// Basic usage:
 805     ///
 806     /// ```
 807     /// assert!(!'a'.is_uppercase());
 808     /// assert!(!'δ'.is_uppercase());
 809     /// assert!('A'.is_uppercase());
 810     /// assert!('Δ'.is_uppercase());
 811     ///
 812     /// // The various Chinese scripts and punctuation do not have case, and so:
 813     /// assert!(!'中'.is_uppercase());
 814     /// assert!(!' '.is_uppercase());
 815     /// ```
 816     ///
 817     /// In a const context:
 818     ///
 819     /// ```
 820     /// #![feature(const_unicode_case_lookup)]
 821     /// const CAPITAL_DELTA_IS_UPPERCASE: bool = 'Δ'.is_uppercase();
 822     /// assert!(CAPITAL_DELTA_IS_UPPERCASE);
 823     /// ```
 824     #[must_use]
 825     #[stable(feature = "rust1", since = "1.0.0")]
 826     #[rustc_const_unstable(feature = "const_unicode_case_lookup", issue = "101400")]
 827     #[inline]
 828     pub const fn is_uppercase(self) -> bool {
 829         match self {
 830             'A'..='Z' => true,
 831             c => c > '\x7f' && unicode::Uppercase(c),
 832         }
 833     }
 834
 835     /// Returns `true` if this `char` has the `White_Space` property.
 836     ///
 837     /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
 838     ///
 839     /// [ucd]: https://www.unicode.org/reports/tr44/
 840     /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
 841     ///
 842     /// # Examples
 843     ///
 844     /// Basic usage:
 845     ///
 846     /// ```
 847     /// assert!(' '.is_whitespace());
 848     ///
 849     /// // line break
 850     /// assert!('\n'.is_whitespace());
 851     ///
 852     /// // a non-breaking space
 853     /// assert!('\u{A0}'.is_whitespace());
 854     ///
 855     /// assert!(!'越'.is_whitespace());
 856     /// ```
 857     #[must_use]
 858     #[stable(feature = "rust1", since = "1.0.0")]
 859     #[inline]
 860     pub fn is_whitespace(self) -> bool {
 861         match self {
 862             ' ' | '\x09'..='\x0d' => true,
 863             c => c > '\x7f' && unicode::White_Space(c),
 864         }
 865     }
 866
 867     /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
 868     ///
 869     /// [`is_alphabetic()`]: #method.is_alphabetic
 870     /// [`is_numeric()`]: #method.is_numeric
 871     ///
 872     /// # Examples
 873     ///
 874     /// Basic usage:
 875     ///
 876     /// ```
 877     /// assert!('٣'.is_alphanumeric());
 878     /// assert!('7'.is_alphanumeric());
 879     /// assert!('৬'.is_alphanumeric());
 880     /// assert!('¾'.is_alphanumeric());
 881     /// assert!('①'.is_alphanumeric());
 882     /// assert!('K'.is_alphanumeric());
 883     /// assert!('و'.is_alphanumeric());
 884     /// assert!('藏'.is_alphanumeric());
 885     /// ```
 886     #[must_use]
 887     #[stable(feature = "rust1", since = "1.0.0")]
 888     #[inline]
 889     pub fn is_alphanumeric(self) -> bool {
 890         self.is_alphabetic() || self.is_numeric()
 891     }
 892
 893     /// Returns `true` if this `char` has the general category for control codes.
 894     ///
 895     /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
 896     /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
 897     /// Database][ucd] [`UnicodeData.txt`].
 898     ///
 899     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 900     /// [ucd]: https://www.unicode.org/reports/tr44/
 901     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 902     ///
 903     /// # Examples
 904     ///
 905     /// Basic usage:
 906     ///
 907     /// ```
 908     /// // U+009C, STRING TERMINATOR
 909     /// assert!('\9c'.is_control());
 910     /// assert!(!'q'.is_control());
 911     /// ```
 912     #[must_use]
 913     #[stable(feature = "rust1", since = "1.0.0")]
 914     #[inline]
 915     pub fn is_control(self) -> bool {
 916         unicode::Cc(self)
 917     }
 918
 919     /// Returns `true` if this `char` has the `Grapheme_Extend` property.
 920     ///
 921     /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
 922     /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
 923     /// [`DerivedCoreProperties.txt`].
 924     ///
 925     /// [uax29]: https://www.unicode.org/reports/tr29/
 926     /// [ucd]: https://www.unicode.org/reports/tr44/
 927     /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
 928     #[must_use]
 929     #[inline]
 930     pub(crate) fn is_grapheme_extended(self) -> bool {
 931         unicode::Grapheme_Extend(self)
 932     }
 933
 934     /// Returns `true` if this `char` has one of the general categories for numbers.
 935     ///
 936     /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
 937     /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
 938     /// Database][ucd] [`UnicodeData.txt`].
 939     ///
 940     /// This method doesn't cover everything that could be considered a number, e.g. ideographic numbers like '三'.
 941     /// If you want everything including characters with overlapping purposes then you might want to use
 942     /// a unicode or language-processing library that exposes the appropriate character properties instead
 943     /// of looking at the unicode categories.
 944     ///
 945     /// If you want to parse ASCII decimal digits (0-9) or ASCII base-N, use
 946     /// `is_ascii_digit` or `is_digit` instead.
 947     ///
 948     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
 949     /// [ucd]: https://www.unicode.org/reports/tr44/
 950     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 951     ///
 952     /// # Examples
 953     ///
 954     /// Basic usage:
 955     ///
 956     /// ```
 957     /// assert!('٣'.is_numeric());
 958     /// assert!('7'.is_numeric());
 959     /// assert!('৬'.is_numeric());
 960     /// assert!('¾'.is_numeric());
 961     /// assert!('①'.is_numeric());
 962     /// assert!(!'K'.is_numeric());
 963     /// assert!(!'و'.is_numeric());
 964     /// assert!(!'藏'.is_numeric());
 965     /// assert!(!'三'.is_numeric());
 966     /// ```
 967     #[must_use]
 968     #[stable(feature = "rust1", since = "1.0.0")]
 969     #[inline]
 970     pub fn is_numeric(self) -> bool {
 971         match self {
 972             '0'..='9' => true,
 973             c => c > '\x7f' && unicode::N(c),
 974         }
 975     }
 976
 977     /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
 978     /// `char`s.
 979     ///
 980     /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
 981     ///
 982     /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
 983     /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
 984     ///
 985     /// [ucd]: https://www.unicode.org/reports/tr44/
 986     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 987     ///
 988     /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
 989     /// the `char`(s) given by [`SpecialCasing.txt`].
 990     ///
 991     /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
 992     ///
 993     /// This operation performs an unconditional mapping without tailoring. That is, the conversion
 994     /// is independent of context and language.
 995     ///
 996     /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
 997     /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
 998     ///
 999     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1000     ///
1001     /// # Examples
1002     ///
1003     /// As an iterator:
1004     ///
1005     /// ```
1006     /// for c in 'İ'.to_lowercase() {
1007     ///     print!("{c}");
1008     /// }
1009     /// println!();
1010     /// ```
1011     ///
1012     /// Using `println!` directly:
1013     ///
1014     /// ```
1015     /// println!("{}", 'İ'.to_lowercase());
1016     /// ```
1017     ///
1018     /// Both are equivalent to:
1019     ///
1020     /// ```
1021     /// println!("i\u{307}");
1022     /// ```
1023     ///
1024     /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1025     ///
1026     /// ```
1027     /// assert_eq!('C'.to_lowercase().to_string(), "c");
1028     ///
1029     /// // Sometimes the result is more than one character:
1030     /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
1031     ///
1032     /// // Characters that do not have both uppercase and lowercase
1033     /// // convert into themselves.
1034     /// assert_eq!('山'.to_lowercase().to_string(), "山");
1035     /// ```
1036     #[must_use = "this returns the lowercase character as a new iterator, \
1037                   without modifying the original"]
1038     #[stable(feature = "rust1", since = "1.0.0")]
1039     #[inline]
1040     pub fn to_lowercase(self) -> ToLowercase {
1041         ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
1042     }
1043
1044     /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
1045     /// `char`s.
1046     ///
1047     /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`.
1048     ///
1049     /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
1050     /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1051     ///
1052     /// [ucd]: https://www.unicode.org/reports/tr44/
1053     /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1054     ///
1055     /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
1056     /// the `char`(s) given by [`SpecialCasing.txt`].
1057     ///
1058     /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1059     ///
1060     /// This operation performs an unconditional mapping without tailoring. That is, the conversion
1061     /// is independent of context and language.
1062     ///
1063     /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
1064     /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
1065     ///
1066     /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1067     ///
1068     /// # Examples
1069     ///
1070     /// As an iterator:
1071     ///
1072     /// ```
1073     /// for c in 'ß'.to_uppercase() {
1074     ///     print!("{c}");
1075     /// }
1076     /// println!();
1077     /// ```
1078     ///
1079     /// Using `println!` directly:
1080     ///
1081     /// ```
1082     /// println!("{}", 'ß'.to_uppercase());
1083     /// ```
1084     ///
1085     /// Both are equivalent to:
1086     ///
1087     /// ```
1088     /// println!("SS");
1089     /// ```
1090     ///
1091     /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string):
1092     ///
1093     /// ```
1094     /// assert_eq!('c'.to_uppercase().to_string(), "C");
1095     ///
1096     /// // Sometimes the result is more than one character:
1097     /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
1098     ///
1099     /// // Characters that do not have both uppercase and lowercase
1100     /// // convert into themselves.
1101     /// assert_eq!('山'.to_uppercase().to_string(), "山");
1102     /// ```
1103     ///
1104     /// # Note on locale
1105     ///
1106     /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
1107     ///
1108     /// * 'Dotless': I / ı, sometimes written ï
1109     /// * 'Dotted': İ / i
1110     ///
1111     /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
1112     ///
1113     /// ```
1114     /// let upper_i = 'i'.to_uppercase().to_string();
1115     /// ```
1116     ///
1117     /// The value of `upper_i` here relies on the language of the text: if we're
1118     /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
1119     /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
1120     ///
1121     /// ```
1122     /// let upper_i = 'i'.to_uppercase().to_string();
1123     ///
1124     /// assert_eq!(upper_i, "I");
1125     /// ```
1126     ///
1127     /// holds across languages.
1128     #[must_use = "this returns the uppercase character as a new iterator, \
1129                   without modifying the original"]
1130     #[stable(feature = "rust1", since = "1.0.0")]
1131     #[inline]
1132     pub fn to_uppercase(self) -> ToUppercase {
1133         ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
1134     }
1135
1136     /// Checks if the value is within the ASCII range.
1137     ///
1138     /// # Examples
1139     ///
1140     /// ```
1141     /// let ascii = 'a';
1142     /// let non_ascii = '❤';
1143     ///
1144     /// assert!(ascii.is_ascii());
1145     /// assert!(!non_ascii.is_ascii());
1146     /// ```
1147     #[must_use]
1148     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1149     #[rustc_const_stable(feature = "const_char_is_ascii", since = "1.32.0")]
1150     #[inline]
1151     pub const fn is_ascii(&self) -> bool {
1152         *self as u32 <= 0x7F
1153     }
1154
1155     /// Returns `Some` if the value is within the ASCII range,
1156     /// or `None` if it's not.
1157     ///
1158     /// This is preferred to [`Self::is_ascii`] when you're passing the value
1159     /// along to something else that can take [`ascii::Char`] rather than
1160     /// needing to check again for itself whether the value is in ASCII.
1161     #[must_use]
1162     #[unstable(feature = "ascii_char", issue = "110998")]
1163     #[inline]
1164     pub const fn as_ascii(&self) -> Option<ascii::Char> {
1165         if self.is_ascii() {
1166             // SAFETY: Just checked that this is ASCII.
1167             Some(unsafe { ascii::Char::from_u8_unchecked(*self as u8) })
1168         } else {
1169             None
1170         }
1171     }
1172
1173     /// Makes a copy of the value in its ASCII upper case equivalent.
1174     ///
1175     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1176     /// but non-ASCII letters are unchanged.
1177     ///
1178     /// To uppercase the value in-place, use [`make_ascii_uppercase()`].
1179     ///
1180     /// To uppercase ASCII characters in addition to non-ASCII characters, use
1181     /// [`to_uppercase()`].
1182     ///
1183     /// # Examples
1184     ///
1185     /// ```
1186     /// let ascii = 'a';
1187     /// let non_ascii = '❤';
1188     ///
1189     /// assert_eq!('A', ascii.to_ascii_uppercase());
1190     /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
1191     /// ```
1192     ///
1193     /// [`make_ascii_uppercase()`]: #method.make_ascii_uppercase
1194     /// [`to_uppercase()`]: #method.to_uppercase
1195     #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"]
1196     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1197     #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1198     #[inline]
1199     pub const fn to_ascii_uppercase(&self) -> char {
1200         if self.is_ascii_lowercase() {
1201             (*self as u8).ascii_change_case_unchecked() as char
1202         } else {
1203             *self
1204         }
1205     }
1206
1207     /// Makes a copy of the value in its ASCII lower case equivalent.
1208     ///
1209     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1210     /// but non-ASCII letters are unchanged.
1211     ///
1212     /// To lowercase the value in-place, use [`make_ascii_lowercase()`].
1213     ///
1214     /// To lowercase ASCII characters in addition to non-ASCII characters, use
1215     /// [`to_lowercase()`].
1216     ///
1217     /// # Examples
1218     ///
1219     /// ```
1220     /// let ascii = 'A';
1221     /// let non_ascii = '❤';
1222     ///
1223     /// assert_eq!('a', ascii.to_ascii_lowercase());
1224     /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
1225     /// ```
1226     ///
1227     /// [`make_ascii_lowercase()`]: #method.make_ascii_lowercase
1228     /// [`to_lowercase()`]: #method.to_lowercase
1229     #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"]
1230     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1231     #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1232     #[inline]
1233     pub const fn to_ascii_lowercase(&self) -> char {
1234         if self.is_ascii_uppercase() {
1235             (*self as u8).ascii_change_case_unchecked() as char
1236         } else {
1237             *self
1238         }
1239     }
1240
1241     /// Checks that two values are an ASCII case-insensitive match.
1242     ///
1243     /// Equivalent to <code>[to_ascii_lowercase]\(a) == [to_ascii_lowercase]\(b)</code>.
1244     ///
1245     /// # Examples
1246     ///
1247     /// ```
1248     /// let upper_a = 'A';
1249     /// let lower_a = 'a';
1250     /// let lower_z = 'z';
1251     ///
1252     /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1253     /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1254     /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1255     /// ```
1256     ///
1257     /// [to_ascii_lowercase]: #method.to_ascii_lowercase
1258     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1259     #[rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.52.0")]
1260     #[inline]
1261     pub const fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1262         self.to_ascii_lowercase() == other.to_ascii_lowercase()
1263     }
1264
1265     /// Converts this type to its ASCII upper case equivalent in-place.
1266     ///
1267     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1268     /// but non-ASCII letters are unchanged.
1269     ///
1270     /// To return a new uppercased value without modifying the existing one, use
1271     /// [`to_ascii_uppercase()`].
1272     ///
1273     /// # Examples
1274     ///
1275     /// ```
1276     /// let mut ascii = 'a';
1277     ///
1278     /// ascii.make_ascii_uppercase();
1279     ///
1280     /// assert_eq!('A', ascii);
1281     /// ```
1282     ///
1283     /// [`to_ascii_uppercase()`]: #method.to_ascii_uppercase
1284     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1285     #[inline]
1286     pub fn make_ascii_uppercase(&mut self) {
1287         *self = self.to_ascii_uppercase();
1288     }
1289
1290     /// Converts this type to its ASCII lower case equivalent in-place.
1291     ///
1292     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1293     /// but non-ASCII letters are unchanged.
1294     ///
1295     /// To return a new lowercased value without modifying the existing one, use
1296     /// [`to_ascii_lowercase()`].
1297     ///
1298     /// # Examples
1299     ///
1300     /// ```
1301     /// let mut ascii = 'A';
1302     ///
1303     /// ascii.make_ascii_lowercase();
1304     ///
1305     /// assert_eq!('a', ascii);
1306     /// ```
1307     ///
1308     /// [`to_ascii_lowercase()`]: #method.to_ascii_lowercase
1309     #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
1310     #[inline]
1311     pub fn make_ascii_lowercase(&mut self) {
1312         *self = self.to_ascii_lowercase();
1313     }
1314
1315     /// Checks if the value is an ASCII alphabetic character:
1316     ///
1317     /// - U+0041 'A' ..= U+005A 'Z', or
1318     /// - U+0061 'a' ..= U+007A 'z'.
1319     ///
1320     /// # Examples
1321     ///
1322     /// ```
1323     /// let uppercase_a = 'A';
1324     /// let uppercase_g = 'G';
1325     /// let a = 'a';
1326     /// let g = 'g';
1327     /// let zero = '0';
1328     /// let percent = '%';
1329     /// let space = ' ';
1330     /// let lf = '\n';
1331     /// let esc = '\x1b';
1332     ///
1333     /// assert!(uppercase_a.is_ascii_alphabetic());
1334     /// assert!(uppercase_g.is_ascii_alphabetic());
1335     /// assert!(a.is_ascii_alphabetic());
1336     /// assert!(g.is_ascii_alphabetic());
1337     /// assert!(!zero.is_ascii_alphabetic());
1338     /// assert!(!percent.is_ascii_alphabetic());
1339     /// assert!(!space.is_ascii_alphabetic());
1340     /// assert!(!lf.is_ascii_alphabetic());
1341     /// assert!(!esc.is_ascii_alphabetic());
1342     /// ```
1343     #[must_use]
1344     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1345     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1346     #[inline]
1347     pub const fn is_ascii_alphabetic(&self) -> bool {
1348         matches!(*self, 'A'..='Z' | 'a'..='z')
1349     }
1350
1351     /// Checks if the value is an ASCII uppercase character:
1352     /// U+0041 'A' ..= U+005A 'Z'.
1353     ///
1354     /// # Examples
1355     ///
1356     /// ```
1357     /// let uppercase_a = 'A';
1358     /// let uppercase_g = 'G';
1359     /// let a = 'a';
1360     /// let g = 'g';
1361     /// let zero = '0';
1362     /// let percent = '%';
1363     /// let space = ' ';
1364     /// let lf = '\n';
1365     /// let esc = '\x1b';
1366     ///
1367     /// assert!(uppercase_a.is_ascii_uppercase());
1368     /// assert!(uppercase_g.is_ascii_uppercase());
1369     /// assert!(!a.is_ascii_uppercase());
1370     /// assert!(!g.is_ascii_uppercase());
1371     /// assert!(!zero.is_ascii_uppercase());
1372     /// assert!(!percent.is_ascii_uppercase());
1373     /// assert!(!space.is_ascii_uppercase());
1374     /// assert!(!lf.is_ascii_uppercase());
1375     /// assert!(!esc.is_ascii_uppercase());
1376     /// ```
1377     #[must_use]
1378     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1379     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1380     #[inline]
1381     pub const fn is_ascii_uppercase(&self) -> bool {
1382         matches!(*self, 'A'..='Z')
1383     }
1384
1385     /// Checks if the value is an ASCII lowercase character:
1386     /// U+0061 'a' ..= U+007A 'z'.
1387     ///
1388     /// # Examples
1389     ///
1390     /// ```
1391     /// let uppercase_a = 'A';
1392     /// let uppercase_g = 'G';
1393     /// let a = 'a';
1394     /// let g = 'g';
1395     /// let zero = '0';
1396     /// let percent = '%';
1397     /// let space = ' ';
1398     /// let lf = '\n';
1399     /// let esc = '\x1b';
1400     ///
1401     /// assert!(!uppercase_a.is_ascii_lowercase());
1402     /// assert!(!uppercase_g.is_ascii_lowercase());
1403     /// assert!(a.is_ascii_lowercase());
1404     /// assert!(g.is_ascii_lowercase());
1405     /// assert!(!zero.is_ascii_lowercase());
1406     /// assert!(!percent.is_ascii_lowercase());
1407     /// assert!(!space.is_ascii_lowercase());
1408     /// assert!(!lf.is_ascii_lowercase());
1409     /// assert!(!esc.is_ascii_lowercase());
1410     /// ```
1411     #[must_use]
1412     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1413     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1414     #[inline]
1415     pub const fn is_ascii_lowercase(&self) -> bool {
1416         matches!(*self, 'a'..='z')
1417     }
1418
1419     /// Checks if the value is an ASCII alphanumeric character:
1420     ///
1421     /// - U+0041 'A' ..= U+005A 'Z', or
1422     /// - U+0061 'a' ..= U+007A 'z', or
1423     /// - U+0030 '0' ..= U+0039 '9'.
1424     ///
1425     /// # Examples
1426     ///
1427     /// ```
1428     /// let uppercase_a = 'A';
1429     /// let uppercase_g = 'G';
1430     /// let a = 'a';
1431     /// let g = 'g';
1432     /// let zero = '0';
1433     /// let percent = '%';
1434     /// let space = ' ';
1435     /// let lf = '\n';
1436     /// let esc = '\x1b';
1437     ///
1438     /// assert!(uppercase_a.is_ascii_alphanumeric());
1439     /// assert!(uppercase_g.is_ascii_alphanumeric());
1440     /// assert!(a.is_ascii_alphanumeric());
1441     /// assert!(g.is_ascii_alphanumeric());
1442     /// assert!(zero.is_ascii_alphanumeric());
1443     /// assert!(!percent.is_ascii_alphanumeric());
1444     /// assert!(!space.is_ascii_alphanumeric());
1445     /// assert!(!lf.is_ascii_alphanumeric());
1446     /// assert!(!esc.is_ascii_alphanumeric());
1447     /// ```
1448     #[must_use]
1449     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1450     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1451     #[inline]
1452     pub const fn is_ascii_alphanumeric(&self) -> bool {
1453         matches!(*self, '0'..='9') | matches!(*self, 'A'..='Z') | matches!(*self, 'a'..='z')
1454     }
1455
1456     /// Checks if the value is an ASCII decimal digit:
1457     /// U+0030 '0' ..= U+0039 '9'.
1458     ///
1459     /// # Examples
1460     ///
1461     /// ```
1462     /// let uppercase_a = 'A';
1463     /// let uppercase_g = 'G';
1464     /// let a = 'a';
1465     /// let g = 'g';
1466     /// let zero = '0';
1467     /// let percent = '%';
1468     /// let space = ' ';
1469     /// let lf = '\n';
1470     /// let esc = '\x1b';
1471     ///
1472     /// assert!(!uppercase_a.is_ascii_digit());
1473     /// assert!(!uppercase_g.is_ascii_digit());
1474     /// assert!(!a.is_ascii_digit());
1475     /// assert!(!g.is_ascii_digit());
1476     /// assert!(zero.is_ascii_digit());
1477     /// assert!(!percent.is_ascii_digit());
1478     /// assert!(!space.is_ascii_digit());
1479     /// assert!(!lf.is_ascii_digit());
1480     /// assert!(!esc.is_ascii_digit());
1481     /// ```
1482     #[must_use]
1483     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1484     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1485     #[inline]
1486     pub const fn is_ascii_digit(&self) -> bool {
1487         matches!(*self, '0'..='9')
1488     }
1489
1490     /// Checks if the value is an ASCII octal digit:
1491     /// U+0030 '0' ..= U+0037 '7'.
1492     ///
1493     /// # Examples
1494     ///
1495     /// ```
1496     /// #![feature(is_ascii_octdigit)]
1497     ///
1498     /// let uppercase_a = 'A';
1499     /// let a = 'a';
1500     /// let zero = '0';
1501     /// let seven = '7';
1502     /// let nine = '9';
1503     /// let percent = '%';
1504     /// let lf = '\n';
1505     ///
1506     /// assert!(!uppercase_a.is_ascii_octdigit());
1507     /// assert!(!a.is_ascii_octdigit());
1508     /// assert!(zero.is_ascii_octdigit());
1509     /// assert!(seven.is_ascii_octdigit());
1510     /// assert!(!nine.is_ascii_octdigit());
1511     /// assert!(!percent.is_ascii_octdigit());
1512     /// assert!(!lf.is_ascii_octdigit());
1513     /// ```
1514     #[must_use]
1515     #[unstable(feature = "is_ascii_octdigit", issue = "101288")]
1516     #[rustc_const_unstable(feature = "is_ascii_octdigit", issue = "101288")]
1517     #[inline]
1518     pub const fn is_ascii_octdigit(&self) -> bool {
1519         matches!(*self, '0'..='7')
1520     }
1521
1522     /// Checks if the value is an ASCII hexadecimal digit:
1523     ///
1524     /// - U+0030 '0' ..= U+0039 '9', or
1525     /// - U+0041 'A' ..= U+0046 'F', or
1526     /// - U+0061 'a' ..= U+0066 'f'.
1527     ///
1528     /// # Examples
1529     ///
1530     /// ```
1531     /// let uppercase_a = 'A';
1532     /// let uppercase_g = 'G';
1533     /// let a = 'a';
1534     /// let g = 'g';
1535     /// let zero = '0';
1536     /// let percent = '%';
1537     /// let space = ' ';
1538     /// let lf = '\n';
1539     /// let esc = '\x1b';
1540     ///
1541     /// assert!(uppercase_a.is_ascii_hexdigit());
1542     /// assert!(!uppercase_g.is_ascii_hexdigit());
1543     /// assert!(a.is_ascii_hexdigit());
1544     /// assert!(!g.is_ascii_hexdigit());
1545     /// assert!(zero.is_ascii_hexdigit());
1546     /// assert!(!percent.is_ascii_hexdigit());
1547     /// assert!(!space.is_ascii_hexdigit());
1548     /// assert!(!lf.is_ascii_hexdigit());
1549     /// assert!(!esc.is_ascii_hexdigit());
1550     /// ```
1551     #[must_use]
1552     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1553     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1554     #[inline]
1555     pub const fn is_ascii_hexdigit(&self) -> bool {
1556         matches!(*self, '0'..='9') | matches!(*self, 'A'..='F') | matches!(*self, 'a'..='f')
1557     }
1558
1559     /// Checks if the value is an ASCII punctuation character:
1560     ///
1561     /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
1562     /// - U+003A ..= U+0040 `: ; < = > ? @`, or
1563     /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
1564     /// - U+007B ..= U+007E `{ | } ~`
1565     ///
1566     /// # Examples
1567     ///
1568     /// ```
1569     /// let uppercase_a = 'A';
1570     /// let uppercase_g = 'G';
1571     /// let a = 'a';
1572     /// let g = 'g';
1573     /// let zero = '0';
1574     /// let percent = '%';
1575     /// let space = ' ';
1576     /// let lf = '\n';
1577     /// let esc = '\x1b';
1578     ///
1579     /// assert!(!uppercase_a.is_ascii_punctuation());
1580     /// assert!(!uppercase_g.is_ascii_punctuation());
1581     /// assert!(!a.is_ascii_punctuation());
1582     /// assert!(!g.is_ascii_punctuation());
1583     /// assert!(!zero.is_ascii_punctuation());
1584     /// assert!(percent.is_ascii_punctuation());
1585     /// assert!(!space.is_ascii_punctuation());
1586     /// assert!(!lf.is_ascii_punctuation());
1587     /// assert!(!esc.is_ascii_punctuation());
1588     /// ```
1589     #[must_use]
1590     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1591     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1592     #[inline]
1593     pub const fn is_ascii_punctuation(&self) -> bool {
1594         matches!(*self, '!'..='/')
1595             | matches!(*self, ':'..='@')
1596             | matches!(*self, '['..='`')
1597             | matches!(*self, '{'..='~')
1598     }
1599
1600     /// Checks if the value is an ASCII graphic character:
1601     /// U+0021 '!' ..= U+007E '~'.
1602     ///
1603     /// # Examples
1604     ///
1605     /// ```
1606     /// let uppercase_a = 'A';
1607     /// let uppercase_g = 'G';
1608     /// let a = 'a';
1609     /// let g = 'g';
1610     /// let zero = '0';
1611     /// let percent = '%';
1612     /// let space = ' ';
1613     /// let lf = '\n';
1614     /// let esc = '\x1b';
1615     ///
1616     /// assert!(uppercase_a.is_ascii_graphic());
1617     /// assert!(uppercase_g.is_ascii_graphic());
1618     /// assert!(a.is_ascii_graphic());
1619     /// assert!(g.is_ascii_graphic());
1620     /// assert!(zero.is_ascii_graphic());
1621     /// assert!(percent.is_ascii_graphic());
1622     /// assert!(!space.is_ascii_graphic());
1623     /// assert!(!lf.is_ascii_graphic());
1624     /// assert!(!esc.is_ascii_graphic());
1625     /// ```
1626     #[must_use]
1627     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1628     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1629     #[inline]
1630     pub const fn is_ascii_graphic(&self) -> bool {
1631         matches!(*self, '!'..='~')
1632     }
1633
1634     /// Checks if the value is an ASCII whitespace character:
1635     /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1636     /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1637     ///
1638     /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1639     /// whitespace][infra-aw]. There are several other definitions in
1640     /// wide use. For instance, [the POSIX locale][pct] includes
1641     /// U+000B VERTICAL TAB as well as all the above characters,
1642     /// but—from the very same specification—[the default rule for
1643     /// "field splitting" in the Bourne shell][bfs] considers *only*
1644     /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1645     ///
1646     /// If you are writing a program that will process an existing
1647     /// file format, check what that format's definition of whitespace is
1648     /// before using this function.
1649     ///
1650     /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1651     /// [pct]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1652     /// [bfs]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1653     ///
1654     /// # Examples
1655     ///
1656     /// ```
1657     /// let uppercase_a = 'A';
1658     /// let uppercase_g = 'G';
1659     /// let a = 'a';
1660     /// let g = 'g';
1661     /// let zero = '0';
1662     /// let percent = '%';
1663     /// let space = ' ';
1664     /// let lf = '\n';
1665     /// let esc = '\x1b';
1666     ///
1667     /// assert!(!uppercase_a.is_ascii_whitespace());
1668     /// assert!(!uppercase_g.is_ascii_whitespace());
1669     /// assert!(!a.is_ascii_whitespace());
1670     /// assert!(!g.is_ascii_whitespace());
1671     /// assert!(!zero.is_ascii_whitespace());
1672     /// assert!(!percent.is_ascii_whitespace());
1673     /// assert!(space.is_ascii_whitespace());
1674     /// assert!(lf.is_ascii_whitespace());
1675     /// assert!(!esc.is_ascii_whitespace());
1676     /// ```
1677     #[must_use]
1678     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1679     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1680     #[inline]
1681     pub const fn is_ascii_whitespace(&self) -> bool {
1682         matches!(*self, '\t' | '\n' | '\x0C' | '\r' | ' ')
1683     }
1684
1685     /// Checks if the value is an ASCII control character:
1686     /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
1687     /// Note that most ASCII whitespace characters are control
1688     /// characters, but SPACE is not.
1689     ///
1690     /// # Examples
1691     ///
1692     /// ```
1693     /// let uppercase_a = 'A';
1694     /// let uppercase_g = 'G';
1695     /// let a = 'a';
1696     /// let g = 'g';
1697     /// let zero = '0';
1698     /// let percent = '%';
1699     /// let space = ' ';
1700     /// let lf = '\n';
1701     /// let esc = '\x1b';
1702     ///
1703     /// assert!(!uppercase_a.is_ascii_control());
1704     /// assert!(!uppercase_g.is_ascii_control());
1705     /// assert!(!a.is_ascii_control());
1706     /// assert!(!g.is_ascii_control());
1707     /// assert!(!zero.is_ascii_control());
1708     /// assert!(!percent.is_ascii_control());
1709     /// assert!(!space.is_ascii_control());
1710     /// assert!(lf.is_ascii_control());
1711     /// assert!(esc.is_ascii_control());
1712     /// ```
1713     #[must_use]
1714     #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
1715     #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")]
1716     #[inline]
1717     pub const fn is_ascii_control(&self) -> bool {
1718         matches!(*self, '\0'..='\x1F' | '\x7F')
1719     }
1720 }
1721
1722 pub(crate) struct EscapeDebugExtArgs {
1723     /// Escape Extended Grapheme codepoints?
1724     pub(crate) escape_grapheme_extended: bool,
1725
1726     /// Escape single quotes?
1727     pub(crate) escape_single_quote: bool,
1728
1729     /// Escape double quotes?
1730     pub(crate) escape_double_quote: bool,
1731 }
1732
1733 impl EscapeDebugExtArgs {
1734     pub(crate) const ESCAPE_ALL: Self = Self {
1735         escape_grapheme_extended: true,
1736         escape_single_quote: true,
1737         escape_double_quote: true,
1738     };
1739 }
1740
1741 #[inline]
1742 const fn len_utf8(code: u32) -> usize {
1743     if code < MAX_ONE_B {
1744         1
1745     } else if code < MAX_TWO_B {
1746         2
1747     } else if code < MAX_THREE_B {
1748         3
1749     } else {
1750         4
1751     }
1752 }
1753
1754 /// Encodes a raw u32 value as UTF-8 into the provided byte buffer,
1755 /// and then returns the subslice of the buffer that contains the encoded character.
1756 ///
1757 /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range.
1758 /// (Creating a `char` in the surrogate range is UB.)
1759 /// The result is valid [generalized UTF-8] but not valid UTF-8.
1760 ///
1761 /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8
1762 ///
1763 /// # Panics
1764 ///
1765 /// Panics if the buffer is not large enough.
1766 /// A buffer of length four is large enough to encode any `char`.
1767 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1768 #[doc(hidden)]
1769 #[inline]
1770 pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
1771     let len = len_utf8(code);
1772     match (len, &mut dst[..]) {
1773         (1, [a, ..]) => {
1774             *a = code as u8;
1775         }
1776         (2, [a, b, ..]) => {
1777             *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
1778             *b = (code & 0x3F) as u8 | TAG_CONT;
1779         }
1780         (3, [a, b, c, ..]) => {
1781             *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
1782             *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1783             *c = (code & 0x3F) as u8 | TAG_CONT;
1784         }
1785         (4, [a, b, c, d, ..]) => {
1786             *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
1787             *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
1788             *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
1789             *d = (code & 0x3F) as u8 | TAG_CONT;
1790         }
1791         _ => panic!(
1792             "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
1793             len,
1794             code,
1795             dst.len(),
1796         ),
1797     };
1798     &mut dst[..len]
1799 }
1800
1801 /// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
1802 /// and then returns the subslice of the buffer that contains the encoded character.
1803 ///
1804 /// Unlike `char::encode_utf16`, this method also handles codepoints in the surrogate range.
1805 /// (Creating a `char` in the surrogate range is UB.)
1806 ///
1807 /// # Panics
1808 ///
1809 /// Panics if the buffer is not large enough.
1810 /// A buffer of length 2 is large enough to encode any `char`.
1811 #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
1812 #[doc(hidden)]
1813 #[inline]
1814 pub fn encode_utf16_raw(mut code: u32, dst: &mut [u16]) -> &mut [u16] {
1815     // SAFETY: each arm checks whether there are enough bits to write into
1816     unsafe {
1817         if (code & 0xFFFF) == code && !dst.is_empty() {
1818             // The BMP falls through
1819             *dst.get_unchecked_mut(0) = code as u16;
1820             slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
1821         } else if dst.len() >= 2 {
1822             // Supplementary planes break into surrogates.
1823             code -= 0x1_0000;
1824             *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
1825             *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
1826             slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
1827         } else {
1828             panic!(
1829                 "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
1830                 char::from_u32_unchecked(code).len_utf16(),
1831                 code,
1832                 dst.len(),
1833             )
1834         }
1835     }
1836 }