]> git.proxmox.com Git - rustc.git/blob - src/libstd_unicode/char.rs
New upstream version 1.23.0+dfsg1
[rustc.git] / src / libstd_unicode / char.rs
1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! A character type.
12 //!
13 //! The `char` type represents a single character. More specifically, since
14 //! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
15 //! scalar value]', which is similar to, but not the same as, a '[Unicode code
16 //! point]'.
17 //!
18 //! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
19 //! [Unicode code point]: http://www.unicode.org/glossary/#code_point
20 //!
21 //! This module exists for technical reasons, the primary documentation for
22 //! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
23 //! itself.
24 //!
25 //! This module is the home of the iterator implementations for the iterators
26 //! implemented on `char`, as well as some useful constants and conversion
27 //! functions that convert various types to `char`.
28
29 #![stable(feature = "rust1", since = "1.0.0")]
30
31 use core::char::CharExt as C;
32 use core::iter::FusedIterator;
33 use core::fmt::{self, Write};
34 use tables::{conversions, derived_property, general_category, property};
35
36 // stable reexports
37 #[stable(feature = "rust1", since = "1.0.0")]
38 pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked};
39 #[stable(feature = "rust1", since = "1.0.0")]
40 pub use core::char::{EscapeDebug, EscapeDefault, EscapeUnicode};
41 #[stable(feature = "char_from_str", since = "1.20.0")]
42 pub use core::char::ParseCharError;
43
44 // unstable reexports
45 #[unstable(feature = "try_from", issue = "33417")]
46 pub use core::char::CharTryFromError;
47 #[unstable(feature = "decode_utf8", issue = "33906")]
48 pub use core::char::{DecodeUtf8, decode_utf8};
49 #[unstable(feature = "unicode", issue = "27783")]
50 pub use tables::{UnicodeVersion, UNICODE_VERSION};
51
52 /// Returns an iterator that yields the lowercase equivalent of a `char`.
53 ///
54 /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
55 /// its documentation for more.
56 ///
57 /// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
58 /// [`char`]: ../../std/primitive.char.html
59 #[stable(feature = "rust1", since = "1.0.0")]
60 pub struct ToLowercase(CaseMappingIter);
61
62 #[stable(feature = "rust1", since = "1.0.0")]
63 impl Iterator for ToLowercase {
64 type Item = char;
65 fn next(&mut self) -> Option<char> {
66 self.0.next()
67 }
68 }
69
70 #[unstable(feature = "fused", issue = "35602")]
71 impl FusedIterator for ToLowercase {}
72
73 /// Returns an iterator that yields the uppercase equivalent of a `char`.
74 ///
75 /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
76 /// its documentation for more.
77 ///
78 /// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
79 /// [`char`]: ../../std/primitive.char.html
80 #[stable(feature = "rust1", since = "1.0.0")]
81 pub struct ToUppercase(CaseMappingIter);
82
83 #[stable(feature = "rust1", since = "1.0.0")]
84 impl Iterator for ToUppercase {
85 type Item = char;
86 fn next(&mut self) -> Option<char> {
87 self.0.next()
88 }
89 }
90
91 #[unstable(feature = "fused", issue = "35602")]
92 impl FusedIterator for ToUppercase {}
93
94 enum CaseMappingIter {
95 Three(char, char, char),
96 Two(char, char),
97 One(char),
98 Zero,
99 }
100
101 impl CaseMappingIter {
102 fn new(chars: [char; 3]) -> CaseMappingIter {
103 if chars[2] == '\0' {
104 if chars[1] == '\0' {
105 CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
106 } else {
107 CaseMappingIter::Two(chars[0], chars[1])
108 }
109 } else {
110 CaseMappingIter::Three(chars[0], chars[1], chars[2])
111 }
112 }
113 }
114
115 impl Iterator for CaseMappingIter {
116 type Item = char;
117 fn next(&mut self) -> Option<char> {
118 match *self {
119 CaseMappingIter::Three(a, b, c) => {
120 *self = CaseMappingIter::Two(b, c);
121 Some(a)
122 }
123 CaseMappingIter::Two(b, c) => {
124 *self = CaseMappingIter::One(c);
125 Some(b)
126 }
127 CaseMappingIter::One(c) => {
128 *self = CaseMappingIter::Zero;
129 Some(c)
130 }
131 CaseMappingIter::Zero => None,
132 }
133 }
134 }
135
136 impl fmt::Display for CaseMappingIter {
137 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
138 match *self {
139 CaseMappingIter::Three(a, b, c) => {
140 f.write_char(a)?;
141 f.write_char(b)?;
142 f.write_char(c)
143 }
144 CaseMappingIter::Two(b, c) => {
145 f.write_char(b)?;
146 f.write_char(c)
147 }
148 CaseMappingIter::One(c) => {
149 f.write_char(c)
150 }
151 CaseMappingIter::Zero => Ok(()),
152 }
153 }
154 }
155
156 #[stable(feature = "char_struct_display", since = "1.16.0")]
157 impl fmt::Display for ToLowercase {
158 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
159 fmt::Display::fmt(&self.0, f)
160 }
161 }
162
163 #[stable(feature = "char_struct_display", since = "1.16.0")]
164 impl fmt::Display for ToUppercase {
165 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
166 fmt::Display::fmt(&self.0, f)
167 }
168 }
169
170 #[lang = "char"]
171 impl char {
172 /// Checks if a `char` is a digit in the given radix.
173 ///
174 /// A 'radix' here is sometimes also called a 'base'. A radix of two
175 /// indicates a binary number, a radix of ten, decimal, and a radix of
176 /// sixteen, hexadecimal, to give some common values. Arbitrary
177 /// radices are supported.
178 ///
179 /// Compared to `is_numeric()`, this function only recognizes the characters
180 /// `0-9`, `a-z` and `A-Z`.
181 ///
182 /// 'Digit' is defined to be only the following characters:
183 ///
184 /// * `0-9`
185 /// * `a-z`
186 /// * `A-Z`
187 ///
188 /// For a more comprehensive understanding of 'digit', see [`is_numeric`][is_numeric].
189 ///
190 /// [is_numeric]: #method.is_numeric
191 ///
192 /// # Panics
193 ///
194 /// Panics if given a radix larger than 36.
195 ///
196 /// # Examples
197 ///
198 /// Basic usage:
199 ///
200 /// ```
201 /// assert!('1'.is_digit(10));
202 /// assert!('f'.is_digit(16));
203 /// assert!(!'f'.is_digit(10));
204 /// ```
205 ///
206 /// Passing a large radix, causing a panic:
207 ///
208 /// ```
209 /// use std::thread;
210 ///
211 /// let result = thread::spawn(|| {
212 /// // this panics
213 /// '1'.is_digit(37);
214 /// }).join();
215 ///
216 /// assert!(result.is_err());
217 /// ```
218 #[stable(feature = "rust1", since = "1.0.0")]
219 #[inline]
220 pub fn is_digit(self, radix: u32) -> bool {
221 C::is_digit(self, radix)
222 }
223
224 /// Converts a `char` to a digit in the given radix.
225 ///
226 /// A 'radix' here is sometimes also called a 'base'. A radix of two
227 /// indicates a binary number, a radix of ten, decimal, and a radix of
228 /// sixteen, hexadecimal, to give some common values. Arbitrary
229 /// radices are supported.
230 ///
231 /// 'Digit' is defined to be only the following characters:
232 ///
233 /// * `0-9`
234 /// * `a-z`
235 /// * `A-Z`
236 ///
237 /// # Errors
238 ///
239 /// Returns `None` if the `char` does not refer to a digit in the given radix.
240 ///
241 /// # Panics
242 ///
243 /// Panics if given a radix larger than 36.
244 ///
245 /// # Examples
246 ///
247 /// Basic usage:
248 ///
249 /// ```
250 /// assert_eq!('1'.to_digit(10), Some(1));
251 /// assert_eq!('f'.to_digit(16), Some(15));
252 /// ```
253 ///
254 /// Passing a non-digit results in failure:
255 ///
256 /// ```
257 /// assert_eq!('f'.to_digit(10), None);
258 /// assert_eq!('z'.to_digit(16), None);
259 /// ```
260 ///
261 /// Passing a large radix, causing a panic:
262 ///
263 /// ```
264 /// use std::thread;
265 ///
266 /// let result = thread::spawn(|| {
267 /// '1'.to_digit(37);
268 /// }).join();
269 ///
270 /// assert!(result.is_err());
271 /// ```
272 #[stable(feature = "rust1", since = "1.0.0")]
273 #[inline]
274 pub fn to_digit(self, radix: u32) -> Option<u32> {
275 C::to_digit(self, radix)
276 }
277
278 /// Returns an iterator that yields the hexadecimal Unicode escape of a
279 /// character as `char`s.
280 ///
281 /// This will escape characters with the Rust syntax of the form
282 /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
283 ///
284 /// # Examples
285 ///
286 /// As an iterator:
287 ///
288 /// ```
289 /// for c in '❤'.escape_unicode() {
290 /// print!("{}", c);
291 /// }
292 /// println!();
293 /// ```
294 ///
295 /// Using `println!` directly:
296 ///
297 /// ```
298 /// println!("{}", '❤'.escape_unicode());
299 /// ```
300 ///
301 /// Both are equivalent to:
302 ///
303 /// ```
304 /// println!("\\u{{2764}}");
305 /// ```
306 ///
307 /// Using `to_string`:
308 ///
309 /// ```
310 /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
311 /// ```
312 #[stable(feature = "rust1", since = "1.0.0")]
313 #[inline]
314 pub fn escape_unicode(self) -> EscapeUnicode {
315 C::escape_unicode(self)
316 }
317
318 /// Returns an iterator that yields the literal escape code of a character
319 /// as `char`s.
320 ///
321 /// This will escape the characters similar to the `Debug` implementations
322 /// of `str` or `char`.
323 ///
324 /// # Examples
325 ///
326 /// As an iterator:
327 ///
328 /// ```
329 /// for c in '\n'.escape_debug() {
330 /// print!("{}", c);
331 /// }
332 /// println!();
333 /// ```
334 ///
335 /// Using `println!` directly:
336 ///
337 /// ```
338 /// println!("{}", '\n'.escape_debug());
339 /// ```
340 ///
341 /// Both are equivalent to:
342 ///
343 /// ```
344 /// println!("\\n");
345 /// ```
346 ///
347 /// Using `to_string`:
348 ///
349 /// ```
350 /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
351 /// ```
352 #[stable(feature = "char_escape_debug", since = "1.20.0")]
353 #[inline]
354 pub fn escape_debug(self) -> EscapeDebug {
355 C::escape_debug(self)
356 }
357
358 /// Returns an iterator that yields the literal escape code of a character
359 /// as `char`s.
360 ///
361 /// The default is chosen with a bias toward producing literals that are
362 /// legal in a variety of languages, including C++11 and similar C-family
363 /// languages. The exact rules are:
364 ///
365 /// * Tab is escaped as `\t`.
366 /// * Carriage return is escaped as `\r`.
367 /// * Line feed is escaped as `\n`.
368 /// * Single quote is escaped as `\'`.
369 /// * Double quote is escaped as `\"`.
370 /// * Backslash is escaped as `\\`.
371 /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
372 /// inclusive is not escaped.
373 /// * All other characters are given hexadecimal Unicode escapes; see
374 /// [`escape_unicode`][escape_unicode].
375 ///
376 /// [escape_unicode]: #method.escape_unicode
377 ///
378 /// # Examples
379 ///
380 /// As an iterator:
381 ///
382 /// ```
383 /// for c in '"'.escape_default() {
384 /// print!("{}", c);
385 /// }
386 /// println!();
387 /// ```
388 ///
389 /// Using `println!` directly:
390 ///
391 /// ```
392 /// println!("{}", '"'.escape_default());
393 /// ```
394 ///
395 ///
396 /// Both are equivalent to:
397 ///
398 /// ```
399 /// println!("\\\"");
400 /// ```
401 ///
402 /// Using `to_string`:
403 ///
404 /// ```
405 /// assert_eq!('"'.escape_default().to_string(), "\\\"");
406 /// ```
407 #[stable(feature = "rust1", since = "1.0.0")]
408 #[inline]
409 pub fn escape_default(self) -> EscapeDefault {
410 C::escape_default(self)
411 }
412
413 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
414 ///
415 /// That number of bytes is always between 1 and 4, inclusive.
416 ///
417 /// # Examples
418 ///
419 /// Basic usage:
420 ///
421 /// ```
422 /// let len = 'A'.len_utf8();
423 /// assert_eq!(len, 1);
424 ///
425 /// let len = 'ß'.len_utf8();
426 /// assert_eq!(len, 2);
427 ///
428 /// let len = 'ℝ'.len_utf8();
429 /// assert_eq!(len, 3);
430 ///
431 /// let len = '💣'.len_utf8();
432 /// assert_eq!(len, 4);
433 /// ```
434 ///
435 /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
436 /// would take if each code point was represented as a `char` vs in the `&str` itself:
437 ///
438 /// ```
439 /// // as chars
440 /// let eastern = '東';
441 /// let capitol = '京';
442 ///
443 /// // both can be represented as three bytes
444 /// assert_eq!(3, eastern.len_utf8());
445 /// assert_eq!(3, capitol.len_utf8());
446 ///
447 /// // as a &str, these two are encoded in UTF-8
448 /// let tokyo = "東京";
449 ///
450 /// let len = eastern.len_utf8() + capitol.len_utf8();
451 ///
452 /// // we can see that they take six bytes total...
453 /// assert_eq!(6, tokyo.len());
454 ///
455 /// // ... just like the &str
456 /// assert_eq!(len, tokyo.len());
457 /// ```
458 #[stable(feature = "rust1", since = "1.0.0")]
459 #[inline]
460 pub fn len_utf8(self) -> usize {
461 C::len_utf8(self)
462 }
463
464 /// Returns the number of 16-bit code units this `char` would need if
465 /// encoded in UTF-16.
466 ///
467 /// See the documentation for [`len_utf8`] for more explanation of this
468 /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
469 ///
470 /// [`len_utf8`]: #method.len_utf8
471 ///
472 /// # Examples
473 ///
474 /// Basic usage:
475 ///
476 /// ```
477 /// let n = 'ß'.len_utf16();
478 /// assert_eq!(n, 1);
479 ///
480 /// let len = '💣'.len_utf16();
481 /// assert_eq!(len, 2);
482 /// ```
483 #[stable(feature = "rust1", since = "1.0.0")]
484 #[inline]
485 pub fn len_utf16(self) -> usize {
486 C::len_utf16(self)
487 }
488
489 /// Encodes this character as UTF-8 into the provided byte buffer,
490 /// and then returns the subslice of the buffer that contains the encoded character.
491 ///
492 /// # Panics
493 ///
494 /// Panics if the buffer is not large enough.
495 /// A buffer of length four is large enough to encode any `char`.
496 ///
497 /// # Examples
498 ///
499 /// In both of these examples, 'ß' takes two bytes to encode.
500 ///
501 /// ```
502 /// let mut b = [0; 2];
503 ///
504 /// let result = 'ß'.encode_utf8(&mut b);
505 ///
506 /// assert_eq!(result, "ß");
507 ///
508 /// assert_eq!(result.len(), 2);
509 /// ```
510 ///
511 /// A buffer that's too small:
512 ///
513 /// ```
514 /// use std::thread;
515 ///
516 /// let result = thread::spawn(|| {
517 /// let mut b = [0; 1];
518 ///
519 /// // this panics
520 /// 'ß'.encode_utf8(&mut b);
521 /// }).join();
522 ///
523 /// assert!(result.is_err());
524 /// ```
525 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
526 #[inline]
527 pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
528 C::encode_utf8(self, dst)
529 }
530
531 /// Encodes this character as UTF-16 into the provided `u16` buffer,
532 /// and then returns the subslice of the buffer that contains the encoded character.
533 ///
534 /// # Panics
535 ///
536 /// Panics if the buffer is not large enough.
537 /// A buffer of length 2 is large enough to encode any `char`.
538 ///
539 /// # Examples
540 ///
541 /// In both of these examples, '𝕊' takes two `u16`s to encode.
542 ///
543 /// ```
544 /// let mut b = [0; 2];
545 ///
546 /// let result = '𝕊'.encode_utf16(&mut b);
547 ///
548 /// assert_eq!(result.len(), 2);
549 /// ```
550 ///
551 /// A buffer that's too small:
552 ///
553 /// ```
554 /// use std::thread;
555 ///
556 /// let result = thread::spawn(|| {
557 /// let mut b = [0; 1];
558 ///
559 /// // this panics
560 /// '𝕊'.encode_utf16(&mut b);
561 /// }).join();
562 ///
563 /// assert!(result.is_err());
564 /// ```
565 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
566 #[inline]
567 pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
568 C::encode_utf16(self, dst)
569 }
570
571 /// Returns true if this `char` is an alphabetic code point, and false if not.
572 ///
573 /// # Examples
574 ///
575 /// Basic usage:
576 ///
577 /// ```
578 /// assert!('a'.is_alphabetic());
579 /// assert!('京'.is_alphabetic());
580 ///
581 /// let c = '💝';
582 /// // love is many things, but it is not alphabetic
583 /// assert!(!c.is_alphabetic());
584 /// ```
585 #[stable(feature = "rust1", since = "1.0.0")]
586 #[inline]
587 pub fn is_alphabetic(self) -> bool {
588 match self {
589 'a'...'z' | 'A'...'Z' => true,
590 c if c > '\x7f' => derived_property::Alphabetic(c),
591 _ => false,
592 }
593 }
594
595 /// Returns true if this `char` satisfies the 'XID_Start' Unicode property, and false
596 /// otherwise.
597 ///
598 /// 'XID_Start' is a Unicode Derived Property specified in
599 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
600 /// mostly similar to `ID_Start` but modified for closure under `NFKx`.
601 #[unstable(feature = "rustc_private",
602 reason = "mainly needed for compiler internals",
603 issue = "27812")]
604 #[inline]
605 pub fn is_xid_start(self) -> bool {
606 derived_property::XID_Start(self)
607 }
608
609 /// Returns true if this `char` satisfies the 'XID_Continue' Unicode property, and false
610 /// otherwise.
611 ///
612 /// 'XID_Continue' is a Unicode Derived Property specified in
613 /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications),
614 /// mostly similar to 'ID_Continue' but modified for closure under NFKx.
615 #[unstable(feature = "rustc_private",
616 reason = "mainly needed for compiler internals",
617 issue = "27812")]
618 #[inline]
619 pub fn is_xid_continue(self) -> bool {
620 derived_property::XID_Continue(self)
621 }
622
623 /// Returns true if this `char` is lowercase, and false otherwise.
624 ///
625 /// 'Lowercase' is defined according to the terms of the Unicode Derived Core
626 /// Property `Lowercase`.
627 ///
628 /// # Examples
629 ///
630 /// Basic usage:
631 ///
632 /// ```
633 /// assert!('a'.is_lowercase());
634 /// assert!('δ'.is_lowercase());
635 /// assert!(!'A'.is_lowercase());
636 /// assert!(!'Δ'.is_lowercase());
637 ///
638 /// // The various Chinese scripts do not have case, and so:
639 /// assert!(!'中'.is_lowercase());
640 /// ```
641 #[stable(feature = "rust1", since = "1.0.0")]
642 #[inline]
643 pub fn is_lowercase(self) -> bool {
644 match self {
645 'a'...'z' => true,
646 c if c > '\x7f' => derived_property::Lowercase(c),
647 _ => false,
648 }
649 }
650
651 /// Returns true if this `char` is uppercase, and false otherwise.
652 ///
653 /// 'Uppercase' is defined according to the terms of the Unicode Derived Core
654 /// Property `Uppercase`.
655 ///
656 /// # Examples
657 ///
658 /// Basic usage:
659 ///
660 /// ```
661 /// assert!(!'a'.is_uppercase());
662 /// assert!(!'δ'.is_uppercase());
663 /// assert!('A'.is_uppercase());
664 /// assert!('Δ'.is_uppercase());
665 ///
666 /// // The various Chinese scripts do not have case, and so:
667 /// assert!(!'中'.is_uppercase());
668 /// ```
669 #[stable(feature = "rust1", since = "1.0.0")]
670 #[inline]
671 pub fn is_uppercase(self) -> bool {
672 match self {
673 'A'...'Z' => true,
674 c if c > '\x7f' => derived_property::Uppercase(c),
675 _ => false,
676 }
677 }
678
679 /// Returns true if this `char` is whitespace, and false otherwise.
680 ///
681 /// 'Whitespace' is defined according to the terms of the Unicode Derived Core
682 /// Property `White_Space`.
683 ///
684 /// # Examples
685 ///
686 /// Basic usage:
687 ///
688 /// ```
689 /// assert!(' '.is_whitespace());
690 ///
691 /// // a non-breaking space
692 /// assert!('\u{A0}'.is_whitespace());
693 ///
694 /// assert!(!'越'.is_whitespace());
695 /// ```
696 #[stable(feature = "rust1", since = "1.0.0")]
697 #[inline]
698 pub fn is_whitespace(self) -> bool {
699 match self {
700 ' ' | '\x09'...'\x0d' => true,
701 c if c > '\x7f' => property::White_Space(c),
702 _ => false,
703 }
704 }
705
706 /// Returns true if this `char` is alphanumeric, and false otherwise.
707 ///
708 /// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
709 /// 'Nd', 'Nl', 'No' and the Derived Core Property 'Alphabetic'.
710 ///
711 /// # Examples
712 ///
713 /// Basic usage:
714 ///
715 /// ```
716 /// assert!('٣'.is_alphanumeric());
717 /// assert!('7'.is_alphanumeric());
718 /// assert!('৬'.is_alphanumeric());
719 /// assert!('K'.is_alphanumeric());
720 /// assert!('و'.is_alphanumeric());
721 /// assert!('藏'.is_alphanumeric());
722 /// assert!(!'¾'.is_alphanumeric());
723 /// assert!(!'①'.is_alphanumeric());
724 /// ```
725 #[stable(feature = "rust1", since = "1.0.0")]
726 #[inline]
727 pub fn is_alphanumeric(self) -> bool {
728 self.is_alphabetic() || self.is_numeric()
729 }
730
731 /// Returns true if this `char` is a control code point, and false otherwise.
732 ///
733 /// 'Control code point' is defined in terms of the Unicode General
734 /// Category `Cc`.
735 ///
736 /// # Examples
737 ///
738 /// Basic usage:
739 ///
740 /// ```
741 /// // U+009C, STRING TERMINATOR
742 /// assert!('\9c'.is_control());
743 /// assert!(!'q'.is_control());
744 /// ```
745 #[stable(feature = "rust1", since = "1.0.0")]
746 #[inline]
747 pub fn is_control(self) -> bool {
748 general_category::Cc(self)
749 }
750
751 /// Returns true if this `char` is numeric, and false otherwise.
752 ///
753 /// 'Numeric'-ness is defined in terms of the Unicode General Categories
754 /// 'Nd', 'Nl', 'No'.
755 ///
756 /// # Examples
757 ///
758 /// Basic usage:
759 ///
760 /// ```
761 /// assert!('٣'.is_numeric());
762 /// assert!('7'.is_numeric());
763 /// assert!('৬'.is_numeric());
764 /// assert!(!'K'.is_numeric());
765 /// assert!(!'و'.is_numeric());
766 /// assert!(!'藏'.is_numeric());
767 /// assert!(!'¾'.is_numeric());
768 /// assert!(!'①'.is_numeric());
769 /// ```
770 #[stable(feature = "rust1", since = "1.0.0")]
771 #[inline]
772 pub fn is_numeric(self) -> bool {
773 match self {
774 '0'...'9' => true,
775 c if c > '\x7f' => general_category::N(c),
776 _ => false,
777 }
778 }
779
780 /// Returns an iterator that yields the lowercase equivalent of a `char`
781 /// as one or more `char`s.
782 ///
783 /// If a character does not have a lowercase equivalent, the same character
784 /// will be returned back by the iterator.
785 ///
786 /// This performs complex unconditional mappings with no tailoring: it maps
787 /// one Unicode character to its lowercase equivalent according to the
788 /// [Unicode database] and the additional complex mappings
789 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
790 /// language) are not considered here.
791 ///
792 /// For a full reference, see [here][reference].
793 ///
794 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
795 ///
796 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
797 ///
798 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
799 ///
800 /// # Examples
801 ///
802 /// As an iterator:
803 ///
804 /// ```
805 /// for c in 'İ'.to_lowercase() {
806 /// print!("{}", c);
807 /// }
808 /// println!();
809 /// ```
810 ///
811 /// Using `println!` directly:
812 ///
813 /// ```
814 /// println!("{}", 'İ'.to_lowercase());
815 /// ```
816 ///
817 /// Both are equivalent to:
818 ///
819 /// ```
820 /// println!("i\u{307}");
821 /// ```
822 ///
823 /// Using `to_string`:
824 ///
825 /// ```
826 /// assert_eq!('C'.to_lowercase().to_string(), "c");
827 ///
828 /// // Sometimes the result is more than one character:
829 /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
830 ///
831 /// // Characters that do not have both uppercase and lowercase
832 /// // convert into themselves.
833 /// assert_eq!('山'.to_lowercase().to_string(), "山");
834 /// ```
835 #[stable(feature = "rust1", since = "1.0.0")]
836 #[inline]
837 pub fn to_lowercase(self) -> ToLowercase {
838 ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
839 }
840
841 /// Returns an iterator that yields the uppercase equivalent of a `char`
842 /// as one or more `char`s.
843 ///
844 /// If a character does not have an uppercase equivalent, the same character
845 /// will be returned back by the iterator.
846 ///
847 /// This performs complex unconditional mappings with no tailoring: it maps
848 /// one Unicode character to its uppercase equivalent according to the
849 /// [Unicode database] and the additional complex mappings
850 /// [`SpecialCasing.txt`]. Conditional mappings (based on context or
851 /// language) are not considered here.
852 ///
853 /// For a full reference, see [here][reference].
854 ///
855 /// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
856 ///
857 /// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
858 ///
859 /// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
860 ///
861 /// # Examples
862 ///
863 /// As an iterator:
864 ///
865 /// ```
866 /// for c in 'ß'.to_uppercase() {
867 /// print!("{}", c);
868 /// }
869 /// println!();
870 /// ```
871 ///
872 /// Using `println!` directly:
873 ///
874 /// ```
875 /// println!("{}", 'ß'.to_uppercase());
876 /// ```
877 ///
878 /// Both are equivalent to:
879 ///
880 /// ```
881 /// println!("SS");
882 /// ```
883 ///
884 /// Using `to_string`:
885 ///
886 /// ```
887 /// assert_eq!('c'.to_uppercase().to_string(), "C");
888 ///
889 /// // Sometimes the result is more than one character:
890 /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
891 ///
892 /// // Characters that do not have both uppercase and lowercase
893 /// // convert into themselves.
894 /// assert_eq!('山'.to_uppercase().to_string(), "山");
895 /// ```
896 ///
897 /// # Note on locale
898 ///
899 /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
900 ///
901 /// * 'Dotless': I / ı, sometimes written ï
902 /// * 'Dotted': İ / i
903 ///
904 /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
905 ///
906 /// ```
907 /// let upper_i = 'i'.to_uppercase().to_string();
908 /// ```
909 ///
910 /// The value of `upper_i` here relies on the language of the text: if we're
911 /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
912 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
913 ///
914 /// ```
915 /// let upper_i = 'i'.to_uppercase().to_string();
916 ///
917 /// assert_eq!(upper_i, "I");
918 /// ```
919 ///
920 /// holds across languages.
921 #[stable(feature = "rust1", since = "1.0.0")]
922 #[inline]
923 pub fn to_uppercase(self) -> ToUppercase {
924 ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
925 }
926
927 /// Checks if the value is within the ASCII range.
928 ///
929 /// # Examples
930 ///
931 /// ```
932 /// let ascii = 'a';
933 /// let non_ascii = '❤';
934 ///
935 /// assert!(ascii.is_ascii());
936 /// assert!(!non_ascii.is_ascii());
937 /// ```
938 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
939 #[inline]
940 pub fn is_ascii(&self) -> bool {
941 *self as u32 <= 0x7F
942 }
943
944 /// Makes a copy of the value in its ASCII upper case equivalent.
945 ///
946 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
947 /// but non-ASCII letters are unchanged.
948 ///
949 /// To uppercase the value in-place, use [`make_ascii_uppercase`].
950 ///
951 /// To uppercase ASCII characters in addition to non-ASCII characters, use
952 /// [`to_uppercase`].
953 ///
954 /// # Examples
955 ///
956 /// ```
957 /// let ascii = 'a';
958 /// let non_ascii = '❤';
959 ///
960 /// assert_eq!('A', ascii.to_ascii_uppercase());
961 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
962 /// ```
963 ///
964 /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
965 /// [`to_uppercase`]: #method.to_uppercase
966 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
967 #[inline]
968 pub fn to_ascii_uppercase(&self) -> char {
969 if self.is_ascii() {
970 (*self as u8).to_ascii_uppercase() as char
971 } else {
972 *self
973 }
974 }
975
976 /// Makes a copy of the value in its ASCII lower case equivalent.
977 ///
978 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
979 /// but non-ASCII letters are unchanged.
980 ///
981 /// To lowercase the value in-place, use [`make_ascii_lowercase`].
982 ///
983 /// To lowercase ASCII characters in addition to non-ASCII characters, use
984 /// [`to_lowercase`].
985 ///
986 /// # Examples
987 ///
988 /// ```
989 /// let ascii = 'A';
990 /// let non_ascii = '❤';
991 ///
992 /// assert_eq!('a', ascii.to_ascii_lowercase());
993 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
994 /// ```
995 ///
996 /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
997 /// [`to_lowercase`]: #method.to_lowercase
998 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
999 #[inline]
1000 pub fn to_ascii_lowercase(&self) -> char {
1001 if self.is_ascii() {
1002 (*self as u8).to_ascii_lowercase() as char
1003 } else {
1004 *self
1005 }
1006 }
1007
1008 /// Checks that two values are an ASCII case-insensitive match.
1009 ///
1010 /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`.
1011 ///
1012 /// # Examples
1013 ///
1014 /// ```
1015 /// let upper_a = 'A';
1016 /// let lower_a = 'a';
1017 /// let lower_z = 'z';
1018 ///
1019 /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
1020 /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
1021 /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
1022 /// ```
1023 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1024 #[inline]
1025 pub fn eq_ignore_ascii_case(&self, other: &char) -> bool {
1026 self.to_ascii_lowercase() == other.to_ascii_lowercase()
1027 }
1028
1029 /// Converts this type to its ASCII upper case equivalent in-place.
1030 ///
1031 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1032 /// but non-ASCII letters are unchanged.
1033 ///
1034 /// To return a new uppercased value without modifying the existing one, use
1035 /// [`to_ascii_uppercase`].
1036 ///
1037 /// # Examples
1038 ///
1039 /// ```
1040 /// let mut ascii = 'a';
1041 ///
1042 /// ascii.make_ascii_uppercase();
1043 ///
1044 /// assert_eq!('A', ascii);
1045 /// ```
1046 ///
1047 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
1048 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1049 #[inline]
1050 pub fn make_ascii_uppercase(&mut self) {
1051 *self = self.to_ascii_uppercase();
1052 }
1053
1054 /// Converts this type to its ASCII lower case equivalent in-place.
1055 ///
1056 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1057 /// but non-ASCII letters are unchanged.
1058 ///
1059 /// To return a new lowercased value without modifying the existing one, use
1060 /// [`to_ascii_lowercase`].
1061 ///
1062 /// # Examples
1063 ///
1064 /// ```
1065 /// let mut ascii = 'A';
1066 ///
1067 /// ascii.make_ascii_lowercase();
1068 ///
1069 /// assert_eq!('a', ascii);
1070 /// ```
1071 ///
1072 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
1073 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.21.0")]
1074 #[inline]
1075 pub fn make_ascii_lowercase(&mut self) {
1076 *self = self.to_ascii_lowercase();
1077 }
1078
1079 /// Checks if the value is an ASCII alphabetic character:
1080 ///
1081 /// - U+0041 'A' ... U+005A 'Z', or
1082 /// - U+0061 'a' ... U+007A 'z'.
1083 ///
1084 /// # Examples
1085 ///
1086 /// ```
1087 /// #![feature(ascii_ctype)]
1088 ///
1089 /// let uppercase_a = 'A';
1090 /// let uppercase_g = 'G';
1091 /// let a = 'a';
1092 /// let g = 'g';
1093 /// let zero = '0';
1094 /// let percent = '%';
1095 /// let space = ' ';
1096 /// let lf = '\n';
1097 /// let esc: char = 0x1b_u8.into();
1098 ///
1099 /// assert!(uppercase_a.is_ascii_alphabetic());
1100 /// assert!(uppercase_g.is_ascii_alphabetic());
1101 /// assert!(a.is_ascii_alphabetic());
1102 /// assert!(g.is_ascii_alphabetic());
1103 /// assert!(!zero.is_ascii_alphabetic());
1104 /// assert!(!percent.is_ascii_alphabetic());
1105 /// assert!(!space.is_ascii_alphabetic());
1106 /// assert!(!lf.is_ascii_alphabetic());
1107 /// assert!(!esc.is_ascii_alphabetic());
1108 /// ```
1109 #[unstable(feature = "ascii_ctype", issue = "39658")]
1110 #[inline]
1111 pub fn is_ascii_alphabetic(&self) -> bool {
1112 self.is_ascii() && (*self as u8).is_ascii_alphabetic()
1113 }
1114
1115 /// Checks if the value is an ASCII uppercase character:
1116 /// U+0041 'A' ... U+005A 'Z'.
1117 ///
1118 /// # Examples
1119 ///
1120 /// ```
1121 /// #![feature(ascii_ctype)]
1122 ///
1123 /// let uppercase_a = 'A';
1124 /// let uppercase_g = 'G';
1125 /// let a = 'a';
1126 /// let g = 'g';
1127 /// let zero = '0';
1128 /// let percent = '%';
1129 /// let space = ' ';
1130 /// let lf = '\n';
1131 /// let esc: char = 0x1b_u8.into();
1132 ///
1133 /// assert!(uppercase_a.is_ascii_uppercase());
1134 /// assert!(uppercase_g.is_ascii_uppercase());
1135 /// assert!(!a.is_ascii_uppercase());
1136 /// assert!(!g.is_ascii_uppercase());
1137 /// assert!(!zero.is_ascii_uppercase());
1138 /// assert!(!percent.is_ascii_uppercase());
1139 /// assert!(!space.is_ascii_uppercase());
1140 /// assert!(!lf.is_ascii_uppercase());
1141 /// assert!(!esc.is_ascii_uppercase());
1142 /// ```
1143 #[unstable(feature = "ascii_ctype", issue = "39658")]
1144 #[inline]
1145 pub fn is_ascii_uppercase(&self) -> bool {
1146 self.is_ascii() && (*self as u8).is_ascii_uppercase()
1147 }
1148
1149 /// Checks if the value is an ASCII lowercase character:
1150 /// U+0061 'a' ... U+007A 'z'.
1151 ///
1152 /// # Examples
1153 ///
1154 /// ```
1155 /// #![feature(ascii_ctype)]
1156 ///
1157 /// let uppercase_a = 'A';
1158 /// let uppercase_g = 'G';
1159 /// let a = 'a';
1160 /// let g = 'g';
1161 /// let zero = '0';
1162 /// let percent = '%';
1163 /// let space = ' ';
1164 /// let lf = '\n';
1165 /// let esc: char = 0x1b_u8.into();
1166 ///
1167 /// assert!(!uppercase_a.is_ascii_lowercase());
1168 /// assert!(!uppercase_g.is_ascii_lowercase());
1169 /// assert!(a.is_ascii_lowercase());
1170 /// assert!(g.is_ascii_lowercase());
1171 /// assert!(!zero.is_ascii_lowercase());
1172 /// assert!(!percent.is_ascii_lowercase());
1173 /// assert!(!space.is_ascii_lowercase());
1174 /// assert!(!lf.is_ascii_lowercase());
1175 /// assert!(!esc.is_ascii_lowercase());
1176 /// ```
1177 #[unstable(feature = "ascii_ctype", issue = "39658")]
1178 #[inline]
1179 pub fn is_ascii_lowercase(&self) -> bool {
1180 self.is_ascii() && (*self as u8).is_ascii_lowercase()
1181 }
1182
1183 /// Checks if the value is an ASCII alphanumeric character:
1184 ///
1185 /// - U+0041 'A' ... U+005A 'Z', or
1186 /// - U+0061 'a' ... U+007A 'z', or
1187 /// - U+0030 '0' ... U+0039 '9'.
1188 ///
1189 /// # Examples
1190 ///
1191 /// ```
1192 /// #![feature(ascii_ctype)]
1193 ///
1194 /// let uppercase_a = 'A';
1195 /// let uppercase_g = 'G';
1196 /// let a = 'a';
1197 /// let g = 'g';
1198 /// let zero = '0';
1199 /// let percent = '%';
1200 /// let space = ' ';
1201 /// let lf = '\n';
1202 /// let esc: char = 0x1b_u8.into();
1203 ///
1204 /// assert!(uppercase_a.is_ascii_alphanumeric());
1205 /// assert!(uppercase_g.is_ascii_alphanumeric());
1206 /// assert!(a.is_ascii_alphanumeric());
1207 /// assert!(g.is_ascii_alphanumeric());
1208 /// assert!(zero.is_ascii_alphanumeric());
1209 /// assert!(!percent.is_ascii_alphanumeric());
1210 /// assert!(!space.is_ascii_alphanumeric());
1211 /// assert!(!lf.is_ascii_alphanumeric());
1212 /// assert!(!esc.is_ascii_alphanumeric());
1213 /// ```
1214 #[unstable(feature = "ascii_ctype", issue = "39658")]
1215 #[inline]
1216 pub fn is_ascii_alphanumeric(&self) -> bool {
1217 self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
1218 }
1219
1220 /// Checks if the value is an ASCII decimal digit:
1221 /// U+0030 '0' ... U+0039 '9'.
1222 ///
1223 /// # Examples
1224 ///
1225 /// ```
1226 /// #![feature(ascii_ctype)]
1227 ///
1228 /// let uppercase_a = 'A';
1229 /// let uppercase_g = 'G';
1230 /// let a = 'a';
1231 /// let g = 'g';
1232 /// let zero = '0';
1233 /// let percent = '%';
1234 /// let space = ' ';
1235 /// let lf = '\n';
1236 /// let esc: char = 0x1b_u8.into();
1237 ///
1238 /// assert!(!uppercase_a.is_ascii_digit());
1239 /// assert!(!uppercase_g.is_ascii_digit());
1240 /// assert!(!a.is_ascii_digit());
1241 /// assert!(!g.is_ascii_digit());
1242 /// assert!(zero.is_ascii_digit());
1243 /// assert!(!percent.is_ascii_digit());
1244 /// assert!(!space.is_ascii_digit());
1245 /// assert!(!lf.is_ascii_digit());
1246 /// assert!(!esc.is_ascii_digit());
1247 /// ```
1248 #[unstable(feature = "ascii_ctype", issue = "39658")]
1249 #[inline]
1250 pub fn is_ascii_digit(&self) -> bool {
1251 self.is_ascii() && (*self as u8).is_ascii_digit()
1252 }
1253
1254 /// Checks if the value is an ASCII hexadecimal digit:
1255 ///
1256 /// - U+0030 '0' ... U+0039 '9', or
1257 /// - U+0041 'A' ... U+0046 'F', or
1258 /// - U+0061 'a' ... U+0066 'f'.
1259 ///
1260 /// # Examples
1261 ///
1262 /// ```
1263 /// #![feature(ascii_ctype)]
1264 ///
1265 /// let uppercase_a = 'A';
1266 /// let uppercase_g = 'G';
1267 /// let a = 'a';
1268 /// let g = 'g';
1269 /// let zero = '0';
1270 /// let percent = '%';
1271 /// let space = ' ';
1272 /// let lf = '\n';
1273 /// let esc: char = 0x1b_u8.into();
1274 ///
1275 /// assert!(uppercase_a.is_ascii_hexdigit());
1276 /// assert!(!uppercase_g.is_ascii_hexdigit());
1277 /// assert!(a.is_ascii_hexdigit());
1278 /// assert!(!g.is_ascii_hexdigit());
1279 /// assert!(zero.is_ascii_hexdigit());
1280 /// assert!(!percent.is_ascii_hexdigit());
1281 /// assert!(!space.is_ascii_hexdigit());
1282 /// assert!(!lf.is_ascii_hexdigit());
1283 /// assert!(!esc.is_ascii_hexdigit());
1284 /// ```
1285 #[unstable(feature = "ascii_ctype", issue = "39658")]
1286 #[inline]
1287 pub fn is_ascii_hexdigit(&self) -> bool {
1288 self.is_ascii() && (*self as u8).is_ascii_hexdigit()
1289 }
1290
1291 /// Checks if the value is an ASCII punctuation character:
1292 ///
1293 /// - U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`, or
1294 /// - U+003A ... U+0040 `: ; < = > ? @`, or
1295 /// - U+005B ... U+0060 ``[ \ ] ^ _ ` ``, or
1296 /// - U+007B ... U+007E `{ | } ~`
1297 ///
1298 /// # Examples
1299 ///
1300 /// ```
1301 /// #![feature(ascii_ctype)]
1302 ///
1303 /// let uppercase_a = 'A';
1304 /// let uppercase_g = 'G';
1305 /// let a = 'a';
1306 /// let g = 'g';
1307 /// let zero = '0';
1308 /// let percent = '%';
1309 /// let space = ' ';
1310 /// let lf = '\n';
1311 /// let esc: char = 0x1b_u8.into();
1312 ///
1313 /// assert!(!uppercase_a.is_ascii_punctuation());
1314 /// assert!(!uppercase_g.is_ascii_punctuation());
1315 /// assert!(!a.is_ascii_punctuation());
1316 /// assert!(!g.is_ascii_punctuation());
1317 /// assert!(!zero.is_ascii_punctuation());
1318 /// assert!(percent.is_ascii_punctuation());
1319 /// assert!(!space.is_ascii_punctuation());
1320 /// assert!(!lf.is_ascii_punctuation());
1321 /// assert!(!esc.is_ascii_punctuation());
1322 /// ```
1323 #[unstable(feature = "ascii_ctype", issue = "39658")]
1324 #[inline]
1325 pub fn is_ascii_punctuation(&self) -> bool {
1326 self.is_ascii() && (*self as u8).is_ascii_punctuation()
1327 }
1328
1329 /// Checks if the value is an ASCII graphic character:
1330 /// U+0021 '@' ... U+007E '~'.
1331 ///
1332 /// # Examples
1333 ///
1334 /// ```
1335 /// #![feature(ascii_ctype)]
1336 ///
1337 /// let uppercase_a = 'A';
1338 /// let uppercase_g = 'G';
1339 /// let a = 'a';
1340 /// let g = 'g';
1341 /// let zero = '0';
1342 /// let percent = '%';
1343 /// let space = ' ';
1344 /// let lf = '\n';
1345 /// let esc: char = 0x1b_u8.into();
1346 ///
1347 /// assert!(uppercase_a.is_ascii_graphic());
1348 /// assert!(uppercase_g.is_ascii_graphic());
1349 /// assert!(a.is_ascii_graphic());
1350 /// assert!(g.is_ascii_graphic());
1351 /// assert!(zero.is_ascii_graphic());
1352 /// assert!(percent.is_ascii_graphic());
1353 /// assert!(!space.is_ascii_graphic());
1354 /// assert!(!lf.is_ascii_graphic());
1355 /// assert!(!esc.is_ascii_graphic());
1356 /// ```
1357 #[unstable(feature = "ascii_ctype", issue = "39658")]
1358 #[inline]
1359 pub fn is_ascii_graphic(&self) -> bool {
1360 self.is_ascii() && (*self as u8).is_ascii_graphic()
1361 }
1362
1363 /// Checks if the value is an ASCII whitespace character:
1364 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1365 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1366 ///
1367 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1368 /// whitespace][infra-aw]. There are several other definitions in
1369 /// wide use. For instance, [the POSIX locale][pct] includes
1370 /// U+000B VERTICAL TAB as well as all the above characters,
1371 /// but—from the very same specification—[the default rule for
1372 /// "field splitting" in the Bourne shell][bfs] considers *only*
1373 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1374 ///
1375 /// If you are writing a program that will process an existing
1376 /// file format, check what that format's definition of whitespace is
1377 /// before using this function.
1378 ///
1379 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1380 /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1381 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1382 ///
1383 /// # Examples
1384 ///
1385 /// ```
1386 /// #![feature(ascii_ctype)]
1387 ///
1388 /// let uppercase_a = 'A';
1389 /// let uppercase_g = 'G';
1390 /// let a = 'a';
1391 /// let g = 'g';
1392 /// let zero = '0';
1393 /// let percent = '%';
1394 /// let space = ' ';
1395 /// let lf = '\n';
1396 /// let esc: char = 0x1b_u8.into();
1397 ///
1398 /// assert!(!uppercase_a.is_ascii_whitespace());
1399 /// assert!(!uppercase_g.is_ascii_whitespace());
1400 /// assert!(!a.is_ascii_whitespace());
1401 /// assert!(!g.is_ascii_whitespace());
1402 /// assert!(!zero.is_ascii_whitespace());
1403 /// assert!(!percent.is_ascii_whitespace());
1404 /// assert!(space.is_ascii_whitespace());
1405 /// assert!(lf.is_ascii_whitespace());
1406 /// assert!(!esc.is_ascii_whitespace());
1407 /// ```
1408 #[unstable(feature = "ascii_ctype", issue = "39658")]
1409 #[inline]
1410 pub fn is_ascii_whitespace(&self) -> bool {
1411 self.is_ascii() && (*self as u8).is_ascii_whitespace()
1412 }
1413
1414 /// Checks if the value is an ASCII control character:
1415 /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE.
1416 /// Note that most ASCII whitespace characters are control
1417 /// characters, but SPACE is not.
1418 ///
1419 /// # Examples
1420 ///
1421 /// ```
1422 /// #![feature(ascii_ctype)]
1423 ///
1424 /// let uppercase_a = 'A';
1425 /// let uppercase_g = 'G';
1426 /// let a = 'a';
1427 /// let g = 'g';
1428 /// let zero = '0';
1429 /// let percent = '%';
1430 /// let space = ' ';
1431 /// let lf = '\n';
1432 /// let esc: char = 0x1b_u8.into();
1433 ///
1434 /// assert!(!uppercase_a.is_ascii_control());
1435 /// assert!(!uppercase_g.is_ascii_control());
1436 /// assert!(!a.is_ascii_control());
1437 /// assert!(!g.is_ascii_control());
1438 /// assert!(!zero.is_ascii_control());
1439 /// assert!(!percent.is_ascii_control());
1440 /// assert!(!space.is_ascii_control());
1441 /// assert!(lf.is_ascii_control());
1442 /// assert!(esc.is_ascii_control());
1443 /// ```
1444 #[unstable(feature = "ascii_ctype", issue = "39658")]
1445 #[inline]
1446 pub fn is_ascii_control(&self) -> bool {
1447 self.is_ascii() && (*self as u8).is_ascii_control()
1448 }
1449 }
1450
1451 /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
1452 #[stable(feature = "decode_utf16", since = "1.9.0")]
1453 #[derive(Clone)]
1454 pub struct DecodeUtf16<I>
1455 where I: Iterator<Item = u16>
1456 {
1457 iter: I,
1458 buf: Option<u16>,
1459 }
1460
1461 /// An iterator that decodes UTF-16 encoded code points from an iterator of `u16`s.
1462 #[stable(feature = "decode_utf16", since = "1.9.0")]
1463 #[derive(Debug, Clone, Eq, PartialEq)]
1464 pub struct DecodeUtf16Error {
1465 code: u16,
1466 }
1467
1468 /// Create an iterator over the UTF-16 encoded code points in `iter`,
1469 /// returning unpaired surrogates as `Err`s.
1470 ///
1471 /// # Examples
1472 ///
1473 /// Basic usage:
1474 ///
1475 /// ```
1476 /// use std::char::decode_utf16;
1477 ///
1478 /// fn main() {
1479 /// // 𝄞mus<invalid>ic<invalid>
1480 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
1481 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
1482 /// 0xD834];
1483 ///
1484 /// assert_eq!(decode_utf16(v.iter().cloned())
1485 /// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
1486 /// .collect::<Vec<_>>(),
1487 /// vec![Ok('𝄞'),
1488 /// Ok('m'), Ok('u'), Ok('s'),
1489 /// Err(0xDD1E),
1490 /// Ok('i'), Ok('c'),
1491 /// Err(0xD834)]);
1492 /// }
1493 /// ```
1494 ///
1495 /// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
1496 ///
1497 /// ```
1498 /// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
1499 ///
1500 /// fn main() {
1501 /// // 𝄞mus<invalid>ic<invalid>
1502 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
1503 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
1504 /// 0xD834];
1505 ///
1506 /// assert_eq!(decode_utf16(v.iter().cloned())
1507 /// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
1508 /// .collect::<String>(),
1509 /// "𝄞mus�ic�");
1510 /// }
1511 /// ```
1512 #[stable(feature = "decode_utf16", since = "1.9.0")]
1513 #[inline]
1514 pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
1515 DecodeUtf16 {
1516 iter: iter.into_iter(),
1517 buf: None,
1518 }
1519 }
1520
1521 #[stable(feature = "decode_utf16", since = "1.9.0")]
1522 impl<I: Iterator<Item = u16>> Iterator for DecodeUtf16<I> {
1523 type Item = Result<char, DecodeUtf16Error>;
1524
1525 fn next(&mut self) -> Option<Result<char, DecodeUtf16Error>> {
1526 let u = match self.buf.take() {
1527 Some(buf) => buf,
1528 None => {
1529 match self.iter.next() {
1530 Some(u) => u,
1531 None => return None,
1532 }
1533 }
1534 };
1535
1536 if u < 0xD800 || 0xDFFF < u {
1537 // not a surrogate
1538 Some(Ok(unsafe { from_u32_unchecked(u as u32) }))
1539 } else if u >= 0xDC00 {
1540 // a trailing surrogate
1541 Some(Err(DecodeUtf16Error { code: u }))
1542 } else {
1543 let u2 = match self.iter.next() {
1544 Some(u2) => u2,
1545 // eof
1546 None => return Some(Err(DecodeUtf16Error { code: u })),
1547 };
1548 if u2 < 0xDC00 || u2 > 0xDFFF {
1549 // not a trailing surrogate so we're not a valid
1550 // surrogate pair, so rewind to redecode u2 next time.
1551 self.buf = Some(u2);
1552 return Some(Err(DecodeUtf16Error { code: u }));
1553 }
1554
1555 // all ok, so lets decode it.
1556 let c = (((u - 0xD800) as u32) << 10 | (u2 - 0xDC00) as u32) + 0x1_0000;
1557 Some(Ok(unsafe { from_u32_unchecked(c) }))
1558 }
1559 }
1560
1561 #[inline]
1562 fn size_hint(&self) -> (usize, Option<usize>) {
1563 let (low, high) = self.iter.size_hint();
1564 // we could be entirely valid surrogates (2 elements per
1565 // char), or entirely non-surrogates (1 element per char)
1566 (low / 2, high)
1567 }
1568 }
1569
1570 impl DecodeUtf16Error {
1571 /// Returns the unpaired surrogate which caused this error.
1572 #[stable(feature = "decode_utf16", since = "1.9.0")]
1573 pub fn unpaired_surrogate(&self) -> u16 {
1574 self.code
1575 }
1576 }
1577
1578 #[stable(feature = "decode_utf16", since = "1.9.0")]
1579 impl fmt::Display for DecodeUtf16Error {
1580 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1581 write!(f, "unpaired surrogate found: {:x}", self.code)
1582 }
1583 }
1584
1585 /// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
1586 /// decoding error.
1587 ///
1588 /// It can occur, for example, when giving ill-formed UTF-8 bytes to
1589 /// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
1590 #[stable(feature = "decode_utf16", since = "1.9.0")]
1591 pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';