]> git.proxmox.com Git - rustc.git/blame - src/libcore/char/methods.rs
New upstream version 1.41.1+dfsg1
[rustc.git] / src / libcore / char / methods.rs
CommitLineData
83c7162d 1//! impl char {}
c34b1796 2
48663c56
XL
3use crate::slice;
4use crate::str::from_utf8_unchecked_mut;
5use crate::unicode::printable::is_printable;
6use crate::unicode::tables::{conversions, derived_property, general_category, property};
7
83c7162d 8use super::*;
32a655c1 9
c34b1796
AL
10#[lang = "char"]
11impl char {
b039eaaf
SL
12 /// Checks if a `char` is a digit in the given radix.
13 ///
14 /// A 'radix' here is sometimes also called a 'base'. A radix of two
15 /// indicates a binary number, a radix of ten, decimal, and a radix of
9cc50fc6 16 /// sixteen, hexadecimal, to give some common values. Arbitrary
476ff2be 17 /// radices are supported.
1a4d82fc
JJ
18 ///
19 /// Compared to `is_numeric()`, this function only recognizes the characters
20 /// `0-9`, `a-z` and `A-Z`.
21 ///
b039eaaf 22 /// 'Digit' is defined to be only the following characters:
1a4d82fc 23 ///
b039eaaf
SL
24 /// * `0-9`
25 /// * `a-z`
26 /// * `A-Z`
27 ///
cc61c64b 28 /// For a more comprehensive understanding of 'digit', see [`is_numeric`][is_numeric].
b039eaaf
SL
29 ///
30 /// [is_numeric]: #method.is_numeric
1a4d82fc
JJ
31 ///
32 /// # Panics
33 ///
b039eaaf 34 /// Panics if given a radix larger than 36.
c34b1796
AL
35 ///
36 /// # Examples
37 ///
b039eaaf
SL
38 /// Basic usage:
39 ///
c34b1796 40 /// ```
54a0048b
SL
41 /// assert!('1'.is_digit(10));
42 /// assert!('f'.is_digit(16));
43 /// assert!(!'f'.is_digit(10));
b039eaaf
SL
44 /// ```
45 ///
46 /// Passing a large radix, causing a panic:
47 ///
48 /// ```
49 /// use std::thread;
50 ///
51 /// let result = thread::spawn(|| {
b039eaaf 52 /// // this panics
54a0048b 53 /// '1'.is_digit(37);
b039eaaf
SL
54 /// }).join();
55 ///
56 /// assert!(result.is_err());
c34b1796
AL
57 /// ```
58 #[stable(feature = "rust1", since = "1.0.0")]
59 #[inline]
b039eaaf 60 pub fn is_digit(self, radix: u32) -> bool {
83c7162d 61 self.to_digit(radix).is_some()
b039eaaf 62 }
1a4d82fc 63
b039eaaf
SL
64 /// Converts a `char` to a digit in the given radix.
65 ///
66 /// A 'radix' here is sometimes also called a 'base'. A radix of two
67 /// indicates a binary number, a radix of ten, decimal, and a radix of
9cc50fc6 68 /// sixteen, hexadecimal, to give some common values. Arbitrary
476ff2be 69 /// radices are supported.
b039eaaf
SL
70 ///
71 /// 'Digit' is defined to be only the following characters:
72 ///
73 /// * `0-9`
74 /// * `a-z`
75 /// * `A-Z`
1a4d82fc 76 ///
7453a54e 77 /// # Errors
1a4d82fc 78 ///
b039eaaf 79 /// Returns `None` if the `char` does not refer to a digit in the given radix.
1a4d82fc
JJ
80 ///
81 /// # Panics
82 ///
b039eaaf 83 /// Panics if given a radix larger than 36.
c34b1796
AL
84 ///
85 /// # Examples
86 ///
b039eaaf
SL
87 /// Basic usage:
88 ///
89 /// ```
54a0048b
SL
90 /// assert_eq!('1'.to_digit(10), Some(1));
91 /// assert_eq!('f'.to_digit(16), Some(15));
b039eaaf
SL
92 /// ```
93 ///
94 /// Passing a non-digit results in failure:
95 ///
96 /// ```
54a0048b
SL
97 /// assert_eq!('f'.to_digit(10), None);
98 /// assert_eq!('z'.to_digit(16), None);
b039eaaf
SL
99 /// ```
100 ///
101 /// Passing a large radix, causing a panic:
102 ///
c34b1796 103 /// ```
b039eaaf 104 /// use std::thread;
c34b1796 105 ///
b039eaaf 106 /// let result = thread::spawn(|| {
54a0048b 107 /// '1'.to_digit(37);
b039eaaf
SL
108 /// }).join();
109 ///
110 /// assert!(result.is_err());
c34b1796
AL
111 /// ```
112 #[stable(feature = "rust1", since = "1.0.0")]
62682a34 113 #[inline]
b039eaaf 114 pub fn to_digit(self, radix: u32) -> Option<u32> {
a1dfa0c6
XL
115 assert!(radix <= 36, "to_digit: radix is too high (maximum 36)");
116
117 // the code is split up here to improve execution speed for cases where
118 // the `radix` is constant and 10 or smaller
e74abb32 119 let val = if radix <= 10 {
a1dfa0c6 120 match self {
e74abb32 121 '0'..='9' => self as u32 - '0' as u32,
a1dfa0c6
XL
122 _ => return None,
123 }
124 } else {
125 match self {
126 '0'..='9' => self as u32 - '0' as u32,
127 'a'..='z' => self as u32 - 'a' as u32 + 10,
128 'A'..='Z' => self as u32 - 'A' as u32 + 10,
129 _ => return None,
130 }
83c7162d 131 };
a1dfa0c6 132
60c5eb7d 133 if val < radix { Some(val) } else { None }
b039eaaf 134 }
1a4d82fc 135
c34b1796 136 /// Returns an iterator that yields the hexadecimal Unicode escape of a
32a655c1 137 /// character as `char`s.
1a4d82fc 138 ///
32a655c1
SL
139 /// This will escape characters with the Rust syntax of the form
140 /// `\u{NNNNNN}` where `NNNNNN` is a hexadecimal representation.
c34b1796
AL
141 ///
142 /// # Examples
143 ///
32a655c1 144 /// As an iterator:
92a42be0 145 ///
c34b1796 146 /// ```
62682a34
SL
147 /// for c in '❤'.escape_unicode() {
148 /// print!("{}", c);
c34b1796 149 /// }
32a655c1 150 /// println!();
c34b1796
AL
151 /// ```
152 ///
32a655c1 153 /// Using `println!` directly:
c34b1796 154 ///
32a655c1
SL
155 /// ```
156 /// println!("{}", '❤'.escape_unicode());
c34b1796
AL
157 /// ```
158 ///
32a655c1 159 /// Both are equivalent to:
c34b1796
AL
160 ///
161 /// ```
32a655c1
SL
162 /// println!("\\u{{2764}}");
163 /// ```
c34b1796 164 ///
32a655c1
SL
165 /// Using `to_string`:
166 ///
167 /// ```
168 /// assert_eq!('❤'.escape_unicode().to_string(), "\\u{2764}");
c34b1796 169 /// ```
85aaf69f 170 #[stable(feature = "rust1", since = "1.0.0")]
62682a34 171 #[inline]
b039eaaf 172 pub fn escape_unicode(self) -> EscapeUnicode {
83c7162d
XL
173 let c = self as u32;
174
175 // or-ing 1 ensures that for c==0 the code computes that one
176 // digit should be printed and (which is the same) avoids the
177 // (31 - 32) underflow
178 let msb = 31 - (c | 1).leading_zeros();
179
180 // the index of the most significant hex digit
181 let ms_hex_digit = msb / 4;
182 EscapeUnicode {
183 c: self,
184 state: EscapeUnicodeState::Backslash,
185 hex_digit_idx: ms_hex_digit as usize,
186 }
b039eaaf 187 }
1a4d82fc 188
94b46f34
XL
189 /// An extended version of `escape_debug` that optionally permits escaping
190 /// Extended Grapheme codepoints. This allows us to format characters like
191 /// nonspacing marks better when they're at the start of a string.
94b46f34 192 #[inline]
9fa01778 193 pub(crate) fn escape_debug_ext(self, escape_grapheme_extended: bool) -> EscapeDebug {
94b46f34
XL
194 let init_state = match self {
195 '\t' => EscapeDefaultState::Backslash('t'),
196 '\r' => EscapeDefaultState::Backslash('r'),
197 '\n' => EscapeDefaultState::Backslash('n'),
198 '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
199 _ if escape_grapheme_extended && self.is_grapheme_extended() => {
200 EscapeDefaultState::Unicode(self.escape_unicode())
201 }
202 _ if is_printable(self) => EscapeDefaultState::Char(self),
203 _ => EscapeDefaultState::Unicode(self.escape_unicode()),
204 };
205 EscapeDebug(EscapeDefault { state: init_state })
206 }
207
32a655c1
SL
208 /// Returns an iterator that yields the literal escape code of a character
209 /// as `char`s.
5bcae85e
SL
210 ///
211 /// This will escape the characters similar to the `Debug` implementations
212 /// of `str` or `char`.
213 ///
214 /// # Examples
215 ///
32a655c1 216 /// As an iterator:
5bcae85e
SL
217 ///
218 /// ```
32a655c1
SL
219 /// for c in '\n'.escape_debug() {
220 /// print!("{}", c);
5bcae85e 221 /// }
32a655c1 222 /// println!();
5bcae85e
SL
223 /// ```
224 ///
32a655c1 225 /// Using `println!` directly:
5bcae85e 226 ///
32a655c1 227 /// ```
32a655c1 228 /// println!("{}", '\n'.escape_debug());
5bcae85e
SL
229 /// ```
230 ///
32a655c1 231 /// Both are equivalent to:
5bcae85e
SL
232 ///
233 /// ```
32a655c1
SL
234 /// println!("\\n");
235 /// ```
5bcae85e 236 ///
32a655c1
SL
237 /// Using `to_string`:
238 ///
239 /// ```
32a655c1 240 /// assert_eq!('\n'.escape_debug().to_string(), "\\n");
5bcae85e 241 /// ```
041b39d2 242 #[stable(feature = "char_escape_debug", since = "1.20.0")]
5bcae85e
SL
243 #[inline]
244 pub fn escape_debug(self) -> EscapeDebug {
94b46f34 245 self.escape_debug_ext(true)
5bcae85e
SL
246 }
247
32a655c1
SL
248 /// Returns an iterator that yields the literal escape code of a character
249 /// as `char`s.
1a4d82fc
JJ
250 ///
251 /// The default is chosen with a bias toward producing literals that are
252 /// legal in a variety of languages, including C++11 and similar C-family
253 /// languages. The exact rules are:
254 ///
b039eaaf
SL
255 /// * Tab is escaped as `\t`.
256 /// * Carriage return is escaped as `\r`.
257 /// * Line feed is escaped as `\n`.
258 /// * Single quote is escaped as `\'`.
259 /// * Double quote is escaped as `\"`.
260 /// * Backslash is escaped as `\\`.
261 /// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
262 /// inclusive is not escaped.
263 /// * All other characters are given hexadecimal Unicode escapes; see
264 /// [`escape_unicode`][escape_unicode].
265 ///
266 /// [escape_unicode]: #method.escape_unicode
c34b1796
AL
267 ///
268 /// # Examples
269 ///
32a655c1 270 /// As an iterator:
b039eaaf 271 ///
c34b1796 272 /// ```
32a655c1
SL
273 /// for c in '"'.escape_default() {
274 /// print!("{}", c);
c34b1796 275 /// }
32a655c1 276 /// println!();
c34b1796
AL
277 /// ```
278 ///
32a655c1 279 /// Using `println!` directly:
c34b1796 280 ///
c34b1796 281 /// ```
32a655c1
SL
282 /// println!("{}", '"'.escape_default());
283 /// ```
284 ///
c34b1796 285 ///
32a655c1 286 /// Both are equivalent to:
c34b1796
AL
287 ///
288 /// ```
32a655c1
SL
289 /// println!("\\\"");
290 /// ```
291 ///
292 /// Using `to_string`:
c34b1796 293 ///
32a655c1
SL
294 /// ```
295 /// assert_eq!('"'.escape_default().to_string(), "\\\"");
c34b1796 296 /// ```
85aaf69f 297 #[stable(feature = "rust1", since = "1.0.0")]
62682a34 298 #[inline]
b039eaaf 299 pub fn escape_default(self) -> EscapeDefault {
83c7162d
XL
300 let init_state = match self {
301 '\t' => EscapeDefaultState::Backslash('t'),
302 '\r' => EscapeDefaultState::Backslash('r'),
303 '\n' => EscapeDefaultState::Backslash('n'),
304 '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
e74abb32
XL
305 '\x20'..='\x7e' => EscapeDefaultState::Char(self),
306 _ => EscapeDefaultState::Unicode(self.escape_unicode()),
83c7162d
XL
307 };
308 EscapeDefault { state: init_state }
b039eaaf 309 }
1a4d82fc 310
b039eaaf
SL
311 /// Returns the number of bytes this `char` would need if encoded in UTF-8.
312 ///
313 /// That number of bytes is always between 1 and 4, inclusive.
c34b1796
AL
314 ///
315 /// # Examples
316 ///
b039eaaf
SL
317 /// Basic usage:
318 ///
c34b1796 319 /// ```
b039eaaf
SL
320 /// let len = 'A'.len_utf8();
321 /// assert_eq!(len, 1);
322 ///
323 /// let len = 'ß'.len_utf8();
324 /// assert_eq!(len, 2);
325 ///
326 /// let len = 'ℝ'.len_utf8();
327 /// assert_eq!(len, 3);
c34b1796 328 ///
b039eaaf
SL
329 /// let len = '💣'.len_utf8();
330 /// assert_eq!(len, 4);
331 /// ```
332 ///
333 /// The `&str` type guarantees that its contents are UTF-8, and so we can compare the length it
334 /// would take if each code point was represented as a `char` vs in the `&str` itself:
335 ///
336 /// ```
337 /// // as chars
338 /// let eastern = '東';
dc9dc135 339 /// let capital = '京';
b039eaaf
SL
340 ///
341 /// // both can be represented as three bytes
342 /// assert_eq!(3, eastern.len_utf8());
dc9dc135 343 /// assert_eq!(3, capital.len_utf8());
b039eaaf
SL
344 ///
345 /// // as a &str, these two are encoded in UTF-8
346 /// let tokyo = "東京";
347 ///
dc9dc135 348 /// let len = eastern.len_utf8() + capital.len_utf8();
b039eaaf
SL
349 ///
350 /// // we can see that they take six bytes total...
351 /// assert_eq!(6, tokyo.len());
352 ///
353 /// // ... just like the &str
354 /// assert_eq!(len, tokyo.len());
c34b1796 355 /// ```
85aaf69f 356 #[stable(feature = "rust1", since = "1.0.0")]
62682a34 357 #[inline]
b039eaaf 358 pub fn len_utf8(self) -> usize {
83c7162d
XL
359 let code = self as u32;
360 if code < MAX_ONE_B {
361 1
362 } else if code < MAX_TWO_B {
363 2
364 } else if code < MAX_THREE_B {
365 3
366 } else {
367 4
368 }
b039eaaf 369 }
1a4d82fc 370
b039eaaf 371 /// Returns the number of 16-bit code units this `char` would need if
c34b1796
AL
372 /// encoded in UTF-16.
373 ///
cc61c64b 374 /// See the documentation for [`len_utf8`] for more explanation of this
92a42be0
SL
375 /// concept. This function is a mirror, but for UTF-16 instead of UTF-8.
376 ///
cc61c64b 377 /// [`len_utf8`]: #method.len_utf8
b039eaaf 378 ///
c34b1796
AL
379 /// # Examples
380 ///
92a42be0
SL
381 /// Basic usage:
382 ///
c34b1796
AL
383 /// ```
384 /// let n = 'ß'.len_utf16();
c34b1796 385 /// assert_eq!(n, 1);
b039eaaf
SL
386 ///
387 /// let len = '💣'.len_utf16();
388 /// assert_eq!(len, 2);
c34b1796 389 /// ```
85aaf69f 390 #[stable(feature = "rust1", since = "1.0.0")]
62682a34 391 #[inline]
b039eaaf 392 pub fn len_utf16(self) -> usize {
83c7162d
XL
393 let ch = self as u32;
394 if (ch & 0xFFFF) == ch { 1 } else { 2 }
b039eaaf 395 }
1a4d82fc 396
c30ab7b3
SL
397 /// Encodes this character as UTF-8 into the provided byte buffer,
398 /// and then returns the subslice of the buffer that contains the encoded character.
c34b1796 399 ///
c30ab7b3
SL
400 /// # Panics
401 ///
402 /// Panics if the buffer is not large enough.
403 /// A buffer of length four is large enough to encode any `char`.
c34b1796
AL
404 ///
405 /// # Examples
406 ///
c30ab7b3
SL
407 /// In both of these examples, 'ß' takes two bytes to encode.
408 ///
c34b1796 409 /// ```
c30ab7b3 410 /// let mut b = [0; 2];
c34b1796 411 ///
c30ab7b3
SL
412 /// let result = 'ß'.encode_utf8(&mut b);
413 ///
414 /// assert_eq!(result, "ß");
415 ///
416 /// assert_eq!(result.len(), 2);
417 /// ```
418 ///
419 /// A buffer that's too small:
420 ///
421 /// ```
c30ab7b3
SL
422 /// use std::thread;
423 ///
424 /// let result = thread::spawn(|| {
425 /// let mut b = [0; 1];
426 ///
427 /// // this panics
428 /// 'ß'.encode_utf8(&mut b);
429 /// }).join();
430 ///
431 /// assert!(result.is_err());
c34b1796 432 /// ```
476ff2be 433 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
62682a34 434 #[inline]
c30ab7b3 435 pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
83c7162d 436 let code = self as u32;
60c5eb7d 437 // SAFETY: each arm checks the size of the slice and only uses `get_unchecked` unsafe ops
83c7162d 438 unsafe {
e74abb32 439 let len = if code < MAX_ONE_B && !dst.is_empty() {
83c7162d
XL
440 *dst.get_unchecked_mut(0) = code as u8;
441 1
442 } else if code < MAX_TWO_B && dst.len() >= 2 {
443 *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
444 *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
445 2
e74abb32 446 } else if code < MAX_THREE_B && dst.len() >= 3 {
83c7162d 447 *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
e74abb32 448 *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
83c7162d
XL
449 *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
450 3
451 } else if dst.len() >= 4 {
452 *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
453 *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
e74abb32 454 *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
83c7162d
XL
455 *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
456 4
457 } else {
e74abb32
XL
458 panic!(
459 "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
83c7162d
XL
460 from_u32_unchecked(code).len_utf8(),
461 code,
e74abb32
XL
462 dst.len(),
463 )
83c7162d
XL
464 };
465 from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
466 }
62682a34 467 }
1a4d82fc 468
c30ab7b3
SL
469 /// Encodes this character as UTF-16 into the provided `u16` buffer,
470 /// and then returns the subslice of the buffer that contains the encoded character.
c34b1796 471 ///
c30ab7b3
SL
472 /// # Panics
473 ///
474 /// Panics if the buffer is not large enough.
475 /// A buffer of length 2 is large enough to encode any `char`.
c34b1796
AL
476 ///
477 /// # Examples
478 ///
c30ab7b3
SL
479 /// In both of these examples, '𝕊' takes two `u16`s to encode.
480 ///
c34b1796 481 /// ```
c30ab7b3 482 /// let mut b = [0; 2];
c34b1796 483 ///
c30ab7b3
SL
484 /// let result = '𝕊'.encode_utf16(&mut b);
485 ///
486 /// assert_eq!(result.len(), 2);
487 /// ```
488 ///
489 /// A buffer that's too small:
490 ///
491 /// ```
c30ab7b3
SL
492 /// use std::thread;
493 ///
494 /// let result = thread::spawn(|| {
495 /// let mut b = [0; 1];
496 ///
497 /// // this panics
498 /// '𝕊'.encode_utf16(&mut b);
499 /// }).join();
500 ///
501 /// assert!(result.is_err());
c34b1796 502 /// ```
476ff2be 503 #[stable(feature = "unicode_encode_char", since = "1.15.0")]
62682a34 504 #[inline]
c30ab7b3 505 pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
83c7162d 506 let mut code = self as u32;
60c5eb7d 507 // SAFETY: each arm checks whether there are enough bits to write into
83c7162d
XL
508 unsafe {
509 if (code & 0xFFFF) == code && !dst.is_empty() {
510 // The BMP falls through (assuming non-surrogate, as it should)
511 *dst.get_unchecked_mut(0) = code as u16;
512 slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
513 } else if dst.len() >= 2 {
514 // Supplementary planes break into surrogates.
515 code -= 0x1_0000;
516 *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
517 *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
518 slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
519 } else {
e74abb32
XL
520 panic!(
521 "encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
83c7162d
XL
522 from_u32_unchecked(code).len_utf16(),
523 code,
e74abb32
XL
524 dst.len(),
525 )
83c7162d
XL
526 }
527 }
62682a34 528 }
1a4d82fc 529
e74abb32
XL
530 /// Returns `true` if this `char` has the `Alphabetic` property.
531 ///
532 /// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
533 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
534 ///
535 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
536 /// [ucd]: https://www.unicode.org/reports/tr44/
537 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
b039eaaf
SL
538 ///
539 /// # Examples
540 ///
541 /// Basic usage:
542 ///
543 /// ```
54a0048b
SL
544 /// assert!('a'.is_alphabetic());
545 /// assert!('京'.is_alphabetic());
b039eaaf
SL
546 ///
547 /// let c = '💝';
548 /// // love is many things, but it is not alphabetic
549 /// assert!(!c.is_alphabetic());
550 /// ```
85aaf69f 551 #[stable(feature = "rust1", since = "1.0.0")]
c34b1796
AL
552 #[inline]
553 pub fn is_alphabetic(self) -> bool {
554 match self {
8faf50e0 555 'a'..='z' | 'A'..='Z' => true,
60c5eb7d 556 c => c > '\x7f' && derived_property::Alphabetic(c),
c34b1796
AL
557 }
558 }
1a4d82fc 559
e74abb32 560 /// Returns `true` if this `char` has the `Lowercase` property.
1a4d82fc 561 ///
e74abb32
XL
562 /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
563 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
564 ///
565 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
566 /// [ucd]: https://www.unicode.org/reports/tr44/
567 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
b039eaaf
SL
568 ///
569 /// # Examples
570 ///
571 /// Basic usage:
572 ///
573 /// ```
54a0048b
SL
574 /// assert!('a'.is_lowercase());
575 /// assert!('δ'.is_lowercase());
576 /// assert!(!'A'.is_lowercase());
577 /// assert!(!'Δ'.is_lowercase());
b039eaaf
SL
578 ///
579 /// // The various Chinese scripts do not have case, and so:
54a0048b 580 /// assert!(!'中'.is_lowercase());
b039eaaf 581 /// ```
85aaf69f 582 #[stable(feature = "rust1", since = "1.0.0")]
c34b1796
AL
583 #[inline]
584 pub fn is_lowercase(self) -> bool {
585 match self {
8faf50e0 586 'a'..='z' => true,
60c5eb7d 587 c => c > '\x7f' && derived_property::Lowercase(c),
c34b1796
AL
588 }
589 }
1a4d82fc 590
e74abb32
XL
591 /// Returns `true` if this `char` has the `Uppercase` property.
592 ///
593 /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
594 /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
1a4d82fc 595 ///
e74abb32
XL
596 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
597 /// [ucd]: https://www.unicode.org/reports/tr44/
598 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
b039eaaf
SL
599 ///
600 /// # Examples
601 ///
602 /// Basic usage:
603 ///
604 /// ```
54a0048b
SL
605 /// assert!(!'a'.is_uppercase());
606 /// assert!(!'δ'.is_uppercase());
607 /// assert!('A'.is_uppercase());
608 /// assert!('Δ'.is_uppercase());
b039eaaf
SL
609 ///
610 /// // The various Chinese scripts do not have case, and so:
54a0048b 611 /// assert!(!'中'.is_uppercase());
b039eaaf 612 /// ```
85aaf69f 613 #[stable(feature = "rust1", since = "1.0.0")]
c34b1796
AL
614 #[inline]
615 pub fn is_uppercase(self) -> bool {
616 match self {
8faf50e0 617 'A'..='Z' => true,
60c5eb7d 618 c => c > '\x7f' && derived_property::Uppercase(c),
c34b1796
AL
619 }
620 }
1a4d82fc 621
e74abb32 622 /// Returns `true` if this `char` has the `White_Space` property.
b039eaaf 623 ///
e74abb32
XL
624 /// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
625 ///
626 /// [ucd]: https://www.unicode.org/reports/tr44/
627 /// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
b039eaaf
SL
628 ///
629 /// # Examples
1a4d82fc 630 ///
b039eaaf
SL
631 /// Basic usage:
632 ///
633 /// ```
54a0048b 634 /// assert!(' '.is_whitespace());
b039eaaf
SL
635 ///
636 /// // a non-breaking space
54a0048b 637 /// assert!('\u{A0}'.is_whitespace());
b039eaaf 638 ///
54a0048b 639 /// assert!(!'越'.is_whitespace());
b039eaaf 640 /// ```
85aaf69f 641 #[stable(feature = "rust1", since = "1.0.0")]
c34b1796
AL
642 #[inline]
643 pub fn is_whitespace(self) -> bool {
644 match self {
8faf50e0 645 ' ' | '\x09'..='\x0d' => true,
60c5eb7d 646 c => c > '\x7f' && property::White_Space(c),
c34b1796
AL
647 }
648 }
1a4d82fc 649
e74abb32 650 /// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
1a4d82fc 651 ///
e74abb32
XL
652 /// [`is_alphabetic()`]: #method.is_alphabetic
653 /// [`is_numeric()`]: #method.is_numeric
b039eaaf
SL
654 ///
655 /// # Examples
656 ///
657 /// Basic usage:
658 ///
659 /// ```
54a0048b
SL
660 /// assert!('٣'.is_alphanumeric());
661 /// assert!('7'.is_alphanumeric());
662 /// assert!('৬'.is_alphanumeric());
b7449926
XL
663 /// assert!('¾'.is_alphanumeric());
664 /// assert!('①'.is_alphanumeric());
54a0048b
SL
665 /// assert!('K'.is_alphanumeric());
666 /// assert!('و'.is_alphanumeric());
667 /// assert!('藏'.is_alphanumeric());
b039eaaf 668 /// ```
85aaf69f 669 #[stable(feature = "rust1", since = "1.0.0")]
c34b1796
AL
670 #[inline]
671 pub fn is_alphanumeric(self) -> bool {
672 self.is_alphabetic() || self.is_numeric()
673 }
1a4d82fc 674
e74abb32
XL
675 /// Returns `true` if this `char` has the general category for control codes.
676 ///
677 /// Control codes (code points with the general category of `Cc`) are described in Chapter 4
678 /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
679 /// Database][ucd] [`UnicodeData.txt`].
1a4d82fc 680 ///
e74abb32
XL
681 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
682 /// [ucd]: https://www.unicode.org/reports/tr44/
683 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
b039eaaf
SL
684 ///
685 /// # Examples
686 ///
687 /// Basic usage:
688 ///
689 /// ```
690 /// // U+009C, STRING TERMINATOR
54a0048b
SL
691 /// assert!('\9c'.is_control());
692 /// assert!(!'q'.is_control());
b039eaaf 693 /// ```
85aaf69f 694 #[stable(feature = "rust1", since = "1.0.0")]
c34b1796 695 #[inline]
b039eaaf
SL
696 pub fn is_control(self) -> bool {
697 general_category::Cc(self)
698 }
1a4d82fc 699
e74abb32 700 /// Returns `true` if this `char` has the `Grapheme_Extend` property.
94b46f34 701 ///
e74abb32
XL
702 /// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
703 /// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
704 /// [`DerivedCoreProperties.txt`].
705 ///
706 /// [uax29]: https://www.unicode.org/reports/tr29/
707 /// [ucd]: https://www.unicode.org/reports/tr44/
708 /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
94b46f34
XL
709 #[inline]
710 pub(crate) fn is_grapheme_extended(self) -> bool {
711 derived_property::Grapheme_Extend(self)
712 }
713
e74abb32
XL
714 /// Returns `true` if this `char` has one of the general categories for numbers.
715 ///
716 /// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
717 /// characters, and `No` for other numeric characters) are specified in the [Unicode Character
718 /// Database][ucd] [`UnicodeData.txt`].
b039eaaf 719 ///
e74abb32
XL
720 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
721 /// [ucd]: https://www.unicode.org/reports/tr44/
722 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
b039eaaf
SL
723 ///
724 /// # Examples
725 ///
726 /// Basic usage:
727 ///
728 /// ```
54a0048b
SL
729 /// assert!('٣'.is_numeric());
730 /// assert!('7'.is_numeric());
731 /// assert!('৬'.is_numeric());
b7449926
XL
732 /// assert!('¾'.is_numeric());
733 /// assert!('①'.is_numeric());
54a0048b
SL
734 /// assert!(!'K'.is_numeric());
735 /// assert!(!'و'.is_numeric());
736 /// assert!(!'藏'.is_numeric());
b039eaaf 737 /// ```
85aaf69f 738 #[stable(feature = "rust1", since = "1.0.0")]
c34b1796
AL
739 #[inline]
740 pub fn is_numeric(self) -> bool {
741 match self {
8faf50e0 742 '0'..='9' => true,
60c5eb7d 743 c => c > '\x7f' && general_category::N(c),
c34b1796
AL
744 }
745 }
1a4d82fc 746
e74abb32
XL
747 /// Returns an iterator that yields the lowercase mapping of this `char` as one or more
748 /// `char`s.
1a4d82fc 749 ///
e74abb32 750 /// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
1a4d82fc 751 ///
e74abb32
XL
752 /// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
753 /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1a4d82fc 754 ///
e74abb32
XL
755 /// [ucd]: https://www.unicode.org/reports/tr44/
756 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
b039eaaf 757 ///
e74abb32
XL
758 /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
759 /// the `char`(s) given by [`SpecialCasing.txt`].
b039eaaf 760 ///
e74abb32 761 /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
b039eaaf 762 ///
e74abb32
XL
763 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
764 /// is independent of context and language.
765 ///
766 /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
767 /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
768 ///
769 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
62682a34
SL
770 ///
771 /// # Examples
772 ///
32a655c1 773 /// As an iterator:
b039eaaf 774 ///
62682a34 775 /// ```
32a655c1
SL
776 /// for c in 'İ'.to_lowercase() {
777 /// print!("{}", c);
778 /// }
779 /// println!();
780 /// ```
781 ///
782 /// Using `println!` directly:
783 ///
784 /// ```
785 /// println!("{}", 'İ'.to_lowercase());
786 /// ```
787 ///
788 /// Both are equivalent to:
789 ///
790 /// ```
791 /// println!("i\u{307}");
792 /// ```
793 ///
794 /// Using `to_string`:
795 ///
796 /// ```
797 /// assert_eq!('C'.to_lowercase().to_string(), "c");
3157f602
XL
798 ///
799 /// // Sometimes the result is more than one character:
32a655c1 800 /// assert_eq!('İ'.to_lowercase().to_string(), "i\u{307}");
b039eaaf 801 ///
cc61c64b
XL
802 /// // Characters that do not have both uppercase and lowercase
803 /// // convert into themselves.
32a655c1 804 /// assert_eq!('山'.to_lowercase().to_string(), "山");
62682a34 805 /// ```
c34b1796
AL
806 #[stable(feature = "rust1", since = "1.0.0")]
807 #[inline]
808 pub fn to_lowercase(self) -> ToLowercase {
62682a34
SL
809 ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
810 }
811
e74abb32
XL
812 /// Returns an iterator that yields the uppercase mapping of this `char` as one or more
813 /// `char`s.
814 ///
815 /// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
816 ///
817 /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
818 /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
1a4d82fc 819 ///
e74abb32
XL
820 /// [ucd]: https://www.unicode.org/reports/tr44/
821 /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
1a4d82fc 822 ///
e74abb32
XL
823 /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
824 /// the `char`(s) given by [`SpecialCasing.txt`].
1a4d82fc 825 ///
e74abb32 826 /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
1a4d82fc 827 ///
e74abb32
XL
828 /// This operation performs an unconditional mapping without tailoring. That is, the conversion
829 /// is independent of context and language.
1a4d82fc 830 ///
e74abb32
XL
831 /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
832 /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
62682a34 833 ///
e74abb32 834 /// [Unicode Standard]: https://www.unicode.org/versions/latest/
1a4d82fc 835 ///
62682a34
SL
836 /// # Examples
837 ///
32a655c1
SL
838 /// As an iterator:
839 ///
840 /// ```
841 /// for c in 'ß'.to_uppercase() {
842 /// print!("{}", c);
843 /// }
844 /// println!();
845 /// ```
846 ///
847 /// Using `println!` directly:
848 ///
849 /// ```
850 /// println!("{}", 'ß'.to_uppercase());
851 /// ```
852 ///
853 /// Both are equivalent to:
854 ///
855 /// ```
856 /// println!("SS");
857 /// ```
858 ///
859 /// Using `to_string`:
b039eaaf
SL
860 ///
861 /// ```
32a655c1 862 /// assert_eq!('c'.to_uppercase().to_string(), "C");
3157f602
XL
863 ///
864 /// // Sometimes the result is more than one character:
32a655c1 865 /// assert_eq!('ß'.to_uppercase().to_string(), "SS");
b039eaaf 866 ///
cc61c64b
XL
867 /// // Characters that do not have both uppercase and lowercase
868 /// // convert into themselves.
32a655c1 869 /// assert_eq!('山'.to_uppercase().to_string(), "山");
b039eaaf
SL
870 /// ```
871 ///
32a655c1
SL
872 /// # Note on locale
873 ///
b039eaaf
SL
874 /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two:
875 ///
876 /// * 'Dotless': I / ı, sometimes written ï
877 /// * 'Dotted': İ / i
878 ///
879 /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore:
880 ///
881 /// ```
32a655c1 882 /// let upper_i = 'i'.to_uppercase().to_string();
b039eaaf
SL
883 /// ```
884 ///
885 /// The value of `upper_i` here relies on the language of the text: if we're
3157f602
XL
886 /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should
887 /// be `"İ"`. `to_uppercase()` does not take this into account, and so:
b039eaaf 888 ///
62682a34 889 /// ```
32a655c1 890 /// let upper_i = 'i'.to_uppercase().to_string();
b039eaaf 891 ///
3157f602 892 /// assert_eq!(upper_i, "I");
62682a34 893 /// ```
b039eaaf
SL
894 ///
895 /// holds across languages.
c34b1796
AL
896 #[stable(feature = "rust1", since = "1.0.0")]
897 #[inline]
898 pub fn to_uppercase(self) -> ToUppercase {
62682a34 899 ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
c34b1796 900 }
abe05a73
XL
901
902 /// Checks if the value is within the ASCII range.
903 ///
904 /// # Examples
905 ///
906 /// ```
907 /// let ascii = 'a';
908 /// let non_ascii = '❤';
909 ///
910 /// assert!(ascii.is_ascii());
911 /// assert!(!non_ascii.is_ascii());
912 /// ```
ff7c6d11 913 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
60c5eb7d
XL
914 #[cfg_attr(
915 not(bootstrap),
916 rustc_const_stable(feature = "const_ascii_methods_on_intrinsics", since = "1.32.0"),
917 )]
abe05a73 918 #[inline]
a1dfa0c6 919 pub const fn is_ascii(&self) -> bool {
abe05a73
XL
920 *self as u32 <= 0x7F
921 }
922
923 /// Makes a copy of the value in its ASCII upper case equivalent.
924 ///
925 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
926 /// but non-ASCII letters are unchanged.
927 ///
928 /// To uppercase the value in-place, use [`make_ascii_uppercase`].
929 ///
930 /// To uppercase ASCII characters in addition to non-ASCII characters, use
931 /// [`to_uppercase`].
932 ///
933 /// # Examples
934 ///
935 /// ```
936 /// let ascii = 'a';
937 /// let non_ascii = '❤';
938 ///
939 /// assert_eq!('A', ascii.to_ascii_uppercase());
940 /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
941 /// ```
942 ///
943 /// [`make_ascii_uppercase`]: #method.make_ascii_uppercase
944 /// [`to_uppercase`]: #method.to_uppercase
ff7c6d11 945 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
abe05a73
XL
946 #[inline]
947 pub fn to_ascii_uppercase(&self) -> char {
60c5eb7d 948 if self.is_ascii() { (*self as u8).to_ascii_uppercase() as char } else { *self }
abe05a73
XL
949 }
950
951 /// Makes a copy of the value in its ASCII lower case equivalent.
952 ///
953 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
954 /// but non-ASCII letters are unchanged.
955 ///
956 /// To lowercase the value in-place, use [`make_ascii_lowercase`].
957 ///
958 /// To lowercase ASCII characters in addition to non-ASCII characters, use
959 /// [`to_lowercase`].
960 ///
961 /// # Examples
962 ///
963 /// ```
964 /// let ascii = 'A';
965 /// let non_ascii = '❤';
966 ///
967 /// assert_eq!('a', ascii.to_ascii_lowercase());
968 /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
969 /// ```
970 ///
971 /// [`make_ascii_lowercase`]: #method.make_ascii_lowercase
972 /// [`to_lowercase`]: #method.to_lowercase
ff7c6d11 973 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
abe05a73
XL
974 #[inline]
975 pub fn to_ascii_lowercase(&self) -> char {
60c5eb7d 976 if self.is_ascii() { (*self as u8).to_ascii_lowercase() as char } else { *self }
abe05a73
XL
977 }
978
979 /// Checks that two values are an ASCII case-insensitive match.
980 ///
981 /// Equivalent to `to_ascii_lowercase(a) == to_ascii_lowercase(b)`.
982 ///
983 /// # Examples
984 ///
985 /// ```
986 /// let upper_a = 'A';
987 /// let lower_a = 'a';
988 /// let lower_z = 'z';
989 ///
990 /// assert!(upper_a.eq_ignore_ascii_case(&lower_a));
991 /// assert!(upper_a.eq_ignore_ascii_case(&upper_a));
992 /// assert!(!upper_a.eq_ignore_ascii_case(&lower_z));
993 /// ```
ff7c6d11 994 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
abe05a73
XL
995 #[inline]
996 pub fn eq_ignore_ascii_case(&self, other: &char) -> bool {
997 self.to_ascii_lowercase() == other.to_ascii_lowercase()
998 }
999
1000 /// Converts this type to its ASCII upper case equivalent in-place.
1001 ///
1002 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
1003 /// but non-ASCII letters are unchanged.
1004 ///
1005 /// To return a new uppercased value without modifying the existing one, use
1006 /// [`to_ascii_uppercase`].
1007 ///
1008 /// # Examples
1009 ///
1010 /// ```
1011 /// let mut ascii = 'a';
1012 ///
1013 /// ascii.make_ascii_uppercase();
1014 ///
1015 /// assert_eq!('A', ascii);
1016 /// ```
1017 ///
1018 /// [`to_ascii_uppercase`]: #method.to_ascii_uppercase
ff7c6d11 1019 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
abe05a73
XL
1020 #[inline]
1021 pub fn make_ascii_uppercase(&mut self) {
1022 *self = self.to_ascii_uppercase();
1023 }
1024
1025 /// Converts this type to its ASCII lower case equivalent in-place.
1026 ///
1027 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
1028 /// but non-ASCII letters are unchanged.
1029 ///
1030 /// To return a new lowercased value without modifying the existing one, use
1031 /// [`to_ascii_lowercase`].
1032 ///
1033 /// # Examples
1034 ///
1035 /// ```
1036 /// let mut ascii = 'A';
1037 ///
1038 /// ascii.make_ascii_lowercase();
1039 ///
1040 /// assert_eq!('a', ascii);
1041 /// ```
1042 ///
1043 /// [`to_ascii_lowercase`]: #method.to_ascii_lowercase
ff7c6d11 1044 #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
abe05a73
XL
1045 #[inline]
1046 pub fn make_ascii_lowercase(&mut self) {
1047 *self = self.to_ascii_lowercase();
1048 }
1049
1050 /// Checks if the value is an ASCII alphabetic character:
1051 ///
dc9dc135
XL
1052 /// - U+0041 'A' ..= U+005A 'Z', or
1053 /// - U+0061 'a' ..= U+007A 'z'.
abe05a73
XL
1054 ///
1055 /// # Examples
1056 ///
1057 /// ```
abe05a73
XL
1058 /// let uppercase_a = 'A';
1059 /// let uppercase_g = 'G';
1060 /// let a = 'a';
1061 /// let g = 'g';
1062 /// let zero = '0';
1063 /// let percent = '%';
1064 /// let space = ' ';
1065 /// let lf = '\n';
1066 /// let esc: char = 0x1b_u8.into();
1067 ///
1068 /// assert!(uppercase_a.is_ascii_alphabetic());
1069 /// assert!(uppercase_g.is_ascii_alphabetic());
1070 /// assert!(a.is_ascii_alphabetic());
1071 /// assert!(g.is_ascii_alphabetic());
1072 /// assert!(!zero.is_ascii_alphabetic());
1073 /// assert!(!percent.is_ascii_alphabetic());
1074 /// assert!(!space.is_ascii_alphabetic());
1075 /// assert!(!lf.is_ascii_alphabetic());
1076 /// assert!(!esc.is_ascii_alphabetic());
1077 /// ```
ff7c6d11 1078 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1079 #[inline]
1080 pub fn is_ascii_alphabetic(&self) -> bool {
1081 self.is_ascii() && (*self as u8).is_ascii_alphabetic()
1082 }
1083
1084 /// Checks if the value is an ASCII uppercase character:
dc9dc135 1085 /// U+0041 'A' ..= U+005A 'Z'.
abe05a73
XL
1086 ///
1087 /// # Examples
1088 ///
1089 /// ```
abe05a73
XL
1090 /// let uppercase_a = 'A';
1091 /// let uppercase_g = 'G';
1092 /// let a = 'a';
1093 /// let g = 'g';
1094 /// let zero = '0';
1095 /// let percent = '%';
1096 /// let space = ' ';
1097 /// let lf = '\n';
1098 /// let esc: char = 0x1b_u8.into();
1099 ///
1100 /// assert!(uppercase_a.is_ascii_uppercase());
1101 /// assert!(uppercase_g.is_ascii_uppercase());
1102 /// assert!(!a.is_ascii_uppercase());
1103 /// assert!(!g.is_ascii_uppercase());
1104 /// assert!(!zero.is_ascii_uppercase());
1105 /// assert!(!percent.is_ascii_uppercase());
1106 /// assert!(!space.is_ascii_uppercase());
1107 /// assert!(!lf.is_ascii_uppercase());
1108 /// assert!(!esc.is_ascii_uppercase());
1109 /// ```
ff7c6d11 1110 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1111 #[inline]
1112 pub fn is_ascii_uppercase(&self) -> bool {
1113 self.is_ascii() && (*self as u8).is_ascii_uppercase()
1114 }
1115
1116 /// Checks if the value is an ASCII lowercase character:
dc9dc135 1117 /// U+0061 'a' ..= U+007A 'z'.
abe05a73
XL
1118 ///
1119 /// # Examples
1120 ///
1121 /// ```
abe05a73
XL
1122 /// let uppercase_a = 'A';
1123 /// let uppercase_g = 'G';
1124 /// let a = 'a';
1125 /// let g = 'g';
1126 /// let zero = '0';
1127 /// let percent = '%';
1128 /// let space = ' ';
1129 /// let lf = '\n';
1130 /// let esc: char = 0x1b_u8.into();
1131 ///
1132 /// assert!(!uppercase_a.is_ascii_lowercase());
1133 /// assert!(!uppercase_g.is_ascii_lowercase());
1134 /// assert!(a.is_ascii_lowercase());
1135 /// assert!(g.is_ascii_lowercase());
1136 /// assert!(!zero.is_ascii_lowercase());
1137 /// assert!(!percent.is_ascii_lowercase());
1138 /// assert!(!space.is_ascii_lowercase());
1139 /// assert!(!lf.is_ascii_lowercase());
1140 /// assert!(!esc.is_ascii_lowercase());
1141 /// ```
ff7c6d11 1142 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1143 #[inline]
1144 pub fn is_ascii_lowercase(&self) -> bool {
1145 self.is_ascii() && (*self as u8).is_ascii_lowercase()
1146 }
1147
1148 /// Checks if the value is an ASCII alphanumeric character:
1149 ///
dc9dc135
XL
1150 /// - U+0041 'A' ..= U+005A 'Z', or
1151 /// - U+0061 'a' ..= U+007A 'z', or
1152 /// - U+0030 '0' ..= U+0039 '9'.
abe05a73
XL
1153 ///
1154 /// # Examples
1155 ///
1156 /// ```
abe05a73
XL
1157 /// let uppercase_a = 'A';
1158 /// let uppercase_g = 'G';
1159 /// let a = 'a';
1160 /// let g = 'g';
1161 /// let zero = '0';
1162 /// let percent = '%';
1163 /// let space = ' ';
1164 /// let lf = '\n';
1165 /// let esc: char = 0x1b_u8.into();
1166 ///
1167 /// assert!(uppercase_a.is_ascii_alphanumeric());
1168 /// assert!(uppercase_g.is_ascii_alphanumeric());
1169 /// assert!(a.is_ascii_alphanumeric());
1170 /// assert!(g.is_ascii_alphanumeric());
1171 /// assert!(zero.is_ascii_alphanumeric());
1172 /// assert!(!percent.is_ascii_alphanumeric());
1173 /// assert!(!space.is_ascii_alphanumeric());
1174 /// assert!(!lf.is_ascii_alphanumeric());
1175 /// assert!(!esc.is_ascii_alphanumeric());
1176 /// ```
ff7c6d11 1177 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1178 #[inline]
1179 pub fn is_ascii_alphanumeric(&self) -> bool {
1180 self.is_ascii() && (*self as u8).is_ascii_alphanumeric()
1181 }
1182
1183 /// Checks if the value is an ASCII decimal digit:
dc9dc135 1184 /// U+0030 '0' ..= U+0039 '9'.
abe05a73
XL
1185 ///
1186 /// # Examples
1187 ///
1188 /// ```
abe05a73
XL
1189 /// let uppercase_a = 'A';
1190 /// let uppercase_g = 'G';
1191 /// let a = 'a';
1192 /// let g = 'g';
1193 /// let zero = '0';
1194 /// let percent = '%';
1195 /// let space = ' ';
1196 /// let lf = '\n';
1197 /// let esc: char = 0x1b_u8.into();
1198 ///
1199 /// assert!(!uppercase_a.is_ascii_digit());
1200 /// assert!(!uppercase_g.is_ascii_digit());
1201 /// assert!(!a.is_ascii_digit());
1202 /// assert!(!g.is_ascii_digit());
1203 /// assert!(zero.is_ascii_digit());
1204 /// assert!(!percent.is_ascii_digit());
1205 /// assert!(!space.is_ascii_digit());
1206 /// assert!(!lf.is_ascii_digit());
1207 /// assert!(!esc.is_ascii_digit());
1208 /// ```
ff7c6d11 1209 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1210 #[inline]
1211 pub fn is_ascii_digit(&self) -> bool {
1212 self.is_ascii() && (*self as u8).is_ascii_digit()
1213 }
1214
1215 /// Checks if the value is an ASCII hexadecimal digit:
1216 ///
dc9dc135
XL
1217 /// - U+0030 '0' ..= U+0039 '9', or
1218 /// - U+0041 'A' ..= U+0046 'F', or
1219 /// - U+0061 'a' ..= U+0066 'f'.
abe05a73
XL
1220 ///
1221 /// # Examples
1222 ///
1223 /// ```
abe05a73
XL
1224 /// let uppercase_a = 'A';
1225 /// let uppercase_g = 'G';
1226 /// let a = 'a';
1227 /// let g = 'g';
1228 /// let zero = '0';
1229 /// let percent = '%';
1230 /// let space = ' ';
1231 /// let lf = '\n';
1232 /// let esc: char = 0x1b_u8.into();
1233 ///
1234 /// assert!(uppercase_a.is_ascii_hexdigit());
1235 /// assert!(!uppercase_g.is_ascii_hexdigit());
1236 /// assert!(a.is_ascii_hexdigit());
1237 /// assert!(!g.is_ascii_hexdigit());
1238 /// assert!(zero.is_ascii_hexdigit());
1239 /// assert!(!percent.is_ascii_hexdigit());
1240 /// assert!(!space.is_ascii_hexdigit());
1241 /// assert!(!lf.is_ascii_hexdigit());
1242 /// assert!(!esc.is_ascii_hexdigit());
1243 /// ```
ff7c6d11 1244 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1245 #[inline]
1246 pub fn is_ascii_hexdigit(&self) -> bool {
1247 self.is_ascii() && (*self as u8).is_ascii_hexdigit()
1248 }
1249
1250 /// Checks if the value is an ASCII punctuation character:
1251 ///
dc9dc135
XL
1252 /// - U+0021 ..= U+002F `! " # $ % & ' ( ) * + , - . /`, or
1253 /// - U+003A ..= U+0040 `: ; < = > ? @`, or
1254 /// - U+005B ..= U+0060 ``[ \ ] ^ _ ` ``, or
1255 /// - U+007B ..= U+007E `{ | } ~`
abe05a73
XL
1256 ///
1257 /// # Examples
1258 ///
1259 /// ```
abe05a73
XL
1260 /// let uppercase_a = 'A';
1261 /// let uppercase_g = 'G';
1262 /// let a = 'a';
1263 /// let g = 'g';
1264 /// let zero = '0';
1265 /// let percent = '%';
1266 /// let space = ' ';
1267 /// let lf = '\n';
1268 /// let esc: char = 0x1b_u8.into();
1269 ///
1270 /// assert!(!uppercase_a.is_ascii_punctuation());
1271 /// assert!(!uppercase_g.is_ascii_punctuation());
1272 /// assert!(!a.is_ascii_punctuation());
1273 /// assert!(!g.is_ascii_punctuation());
1274 /// assert!(!zero.is_ascii_punctuation());
1275 /// assert!(percent.is_ascii_punctuation());
1276 /// assert!(!space.is_ascii_punctuation());
1277 /// assert!(!lf.is_ascii_punctuation());
1278 /// assert!(!esc.is_ascii_punctuation());
1279 /// ```
ff7c6d11 1280 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1281 #[inline]
1282 pub fn is_ascii_punctuation(&self) -> bool {
1283 self.is_ascii() && (*self as u8).is_ascii_punctuation()
1284 }
1285
1286 /// Checks if the value is an ASCII graphic character:
dc9dc135 1287 /// U+0021 '!' ..= U+007E '~'.
abe05a73
XL
1288 ///
1289 /// # Examples
1290 ///
1291 /// ```
abe05a73
XL
1292 /// let uppercase_a = 'A';
1293 /// let uppercase_g = 'G';
1294 /// let a = 'a';
1295 /// let g = 'g';
1296 /// let zero = '0';
1297 /// let percent = '%';
1298 /// let space = ' ';
1299 /// let lf = '\n';
1300 /// let esc: char = 0x1b_u8.into();
1301 ///
1302 /// assert!(uppercase_a.is_ascii_graphic());
1303 /// assert!(uppercase_g.is_ascii_graphic());
1304 /// assert!(a.is_ascii_graphic());
1305 /// assert!(g.is_ascii_graphic());
1306 /// assert!(zero.is_ascii_graphic());
1307 /// assert!(percent.is_ascii_graphic());
1308 /// assert!(!space.is_ascii_graphic());
1309 /// assert!(!lf.is_ascii_graphic());
1310 /// assert!(!esc.is_ascii_graphic());
1311 /// ```
ff7c6d11 1312 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1313 #[inline]
1314 pub fn is_ascii_graphic(&self) -> bool {
1315 self.is_ascii() && (*self as u8).is_ascii_graphic()
1316 }
1317
1318 /// Checks if the value is an ASCII whitespace character:
1319 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
1320 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
1321 ///
1322 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
1323 /// whitespace][infra-aw]. There are several other definitions in
1324 /// wide use. For instance, [the POSIX locale][pct] includes
1325 /// U+000B VERTICAL TAB as well as all the above characters,
1326 /// but—from the very same specification—[the default rule for
1327 /// "field splitting" in the Bourne shell][bfs] considers *only*
1328 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
1329 ///
1330 /// If you are writing a program that will process an existing
1331 /// file format, check what that format's definition of whitespace is
1332 /// before using this function.
1333 ///
1334 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
1335 /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
1336 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
1337 ///
1338 /// # Examples
1339 ///
1340 /// ```
abe05a73
XL
1341 /// let uppercase_a = 'A';
1342 /// let uppercase_g = 'G';
1343 /// let a = 'a';
1344 /// let g = 'g';
1345 /// let zero = '0';
1346 /// let percent = '%';
1347 /// let space = ' ';
1348 /// let lf = '\n';
1349 /// let esc: char = 0x1b_u8.into();
1350 ///
1351 /// assert!(!uppercase_a.is_ascii_whitespace());
1352 /// assert!(!uppercase_g.is_ascii_whitespace());
1353 /// assert!(!a.is_ascii_whitespace());
1354 /// assert!(!g.is_ascii_whitespace());
1355 /// assert!(!zero.is_ascii_whitespace());
1356 /// assert!(!percent.is_ascii_whitespace());
1357 /// assert!(space.is_ascii_whitespace());
1358 /// assert!(lf.is_ascii_whitespace());
1359 /// assert!(!esc.is_ascii_whitespace());
1360 /// ```
ff7c6d11 1361 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1362 #[inline]
1363 pub fn is_ascii_whitespace(&self) -> bool {
1364 self.is_ascii() && (*self as u8).is_ascii_whitespace()
1365 }
1366
1367 /// Checks if the value is an ASCII control character:
dc9dc135 1368 /// U+0000 NUL ..= U+001F UNIT SEPARATOR, or U+007F DELETE.
abe05a73
XL
1369 /// Note that most ASCII whitespace characters are control
1370 /// characters, but SPACE is not.
1371 ///
1372 /// # Examples
1373 ///
1374 /// ```
abe05a73
XL
1375 /// let uppercase_a = 'A';
1376 /// let uppercase_g = 'G';
1377 /// let a = 'a';
1378 /// let g = 'g';
1379 /// let zero = '0';
1380 /// let percent = '%';
1381 /// let space = ' ';
1382 /// let lf = '\n';
1383 /// let esc: char = 0x1b_u8.into();
1384 ///
1385 /// assert!(!uppercase_a.is_ascii_control());
1386 /// assert!(!uppercase_g.is_ascii_control());
1387 /// assert!(!a.is_ascii_control());
1388 /// assert!(!g.is_ascii_control());
1389 /// assert!(!zero.is_ascii_control());
1390 /// assert!(!percent.is_ascii_control());
1391 /// assert!(!space.is_ascii_control());
1392 /// assert!(lf.is_ascii_control());
1393 /// assert!(esc.is_ascii_control());
1394 /// ```
ff7c6d11 1395 #[stable(feature = "ascii_ctype_on_intrinsics", since = "1.24.0")]
abe05a73
XL
1396 #[inline]
1397 pub fn is_ascii_control(&self) -> bool {
1398 self.is_ascii() && (*self as u8).is_ascii_control()
1399 }
e9174d1e 1400}