1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters.
13 #![stable(feature = "rust1", since = "1.0.0")]
17 use iter
::FusedIterator
;
19 /// Extension methods for ASCII-subset only operations on string slices.
21 /// Be aware that operations on seemingly non-ASCII characters can sometimes
22 /// have unexpected results. Consider this example:
25 /// use std::ascii::AsciiExt;
27 /// assert_eq!("café".to_ascii_uppercase(), "CAFÉ");
28 /// assert_eq!("café".to_ascii_uppercase(), "CAFé");
31 /// In the first example, the lowercased string is represented `"cafe\u{301}"`
32 /// (the last character is an acute accent [combining character]). Unlike the
33 /// other characters in the string, the combining character will not get mapped
34 /// to an uppercase variant, resulting in `"CAFE\u{301}"`. In the second
35 /// example, the lowercased string is represented `"caf\u{e9}"` (the last
36 /// character is a single Unicode character representing an 'e' with an acute
37 /// accent). Since the last character is defined outside the scope of ASCII,
38 /// it will not get mapped to an uppercase variant, resulting in `"CAF\u{e9}"`.
40 /// [combining character]: https://en.wikipedia.org/wiki/Combining_character
41 #[stable(feature = "rust1", since = "1.0.0")]
43 /// Container type for copied ASCII characters.
44 #[stable(feature = "rust1", since = "1.0.0")]
47 /// Checks if the value is within the ASCII range.
52 /// use std::ascii::AsciiExt;
57 /// assert!(ascii.is_ascii());
58 /// assert!(!utf8.is_ascii());
60 #[stable(feature = "rust1", since = "1.0.0")]
61 fn is_ascii(&self) -> bool
;
63 /// Makes a copy of the string in ASCII upper case.
65 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
66 /// but non-ASCII letters are unchanged.
71 /// use std::ascii::AsciiExt;
76 /// assert_eq!('A', ascii.to_ascii_uppercase());
77 /// assert_eq!('❤', utf8.to_ascii_uppercase());
79 #[stable(feature = "rust1", since = "1.0.0")]
80 fn to_ascii_uppercase(&self) -> Self::Owned
;
82 /// Makes a copy of the string in ASCII lower case.
84 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
85 /// but non-ASCII letters are unchanged.
90 /// use std::ascii::AsciiExt;
95 /// assert_eq!('a', ascii.to_ascii_lowercase());
96 /// assert_eq!('❤', utf8.to_ascii_lowercase());
98 #[stable(feature = "rust1", since = "1.0.0")]
99 fn to_ascii_lowercase(&self) -> Self::Owned
;
101 /// Checks that two strings are an ASCII case-insensitive match.
103 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
104 /// but without allocating and copying temporary strings.
109 /// use std::ascii::AsciiExt;
111 /// let ascii1 = 'A';
112 /// let ascii2 = 'a';
113 /// let ascii3 = 'A';
114 /// let ascii4 = 'z';
116 /// assert!(ascii1.eq_ignore_ascii_case(&ascii2));
117 /// assert!(ascii1.eq_ignore_ascii_case(&ascii3));
118 /// assert!(!ascii1.eq_ignore_ascii_case(&ascii4));
120 #[stable(feature = "rust1", since = "1.0.0")]
121 fn eq_ignore_ascii_case(&self, other
: &Self) -> bool
;
123 /// Converts this type to its ASCII upper case equivalent in-place.
125 /// See `to_ascii_uppercase` for more information.
130 /// use std::ascii::AsciiExt;
132 /// let mut ascii = 'a';
134 /// ascii.make_ascii_uppercase();
136 /// assert_eq!('A', ascii);
138 #[stable(feature = "ascii", since = "1.9.0")]
139 fn make_ascii_uppercase(&mut self);
141 /// Converts this type to its ASCII lower case equivalent in-place.
143 /// See `to_ascii_lowercase` for more information.
148 /// use std::ascii::AsciiExt;
150 /// let mut ascii = 'A';
152 /// ascii.make_ascii_lowercase();
154 /// assert_eq!('a', ascii);
156 #[stable(feature = "ascii", since = "1.9.0")]
157 fn make_ascii_lowercase(&mut self);
160 #[stable(feature = "rust1", since = "1.0.0")]
161 impl AsciiExt
for str {
165 fn is_ascii(&self) -> bool
{
166 self.bytes().all(|b
| b
.is_ascii())
170 fn to_ascii_uppercase(&self) -> String
{
171 let mut bytes
= self.as_bytes().to_vec();
172 bytes
.make_ascii_uppercase();
173 // make_ascii_uppercase() preserves the UTF-8 invariant.
174 unsafe { String::from_utf8_unchecked(bytes) }
178 fn to_ascii_lowercase(&self) -> String
{
179 let mut bytes
= self.as_bytes().to_vec();
180 bytes
.make_ascii_lowercase();
181 // make_ascii_uppercase() preserves the UTF-8 invariant.
182 unsafe { String::from_utf8_unchecked(bytes) }
186 fn eq_ignore_ascii_case(&self, other
: &str) -> bool
{
187 self.as_bytes().eq_ignore_ascii_case(other
.as_bytes())
190 fn make_ascii_uppercase(&mut self) {
191 let me
: &mut [u8] = unsafe { mem::transmute(self) }
;
192 me
.make_ascii_uppercase()
195 fn make_ascii_lowercase(&mut self) {
196 let me
: &mut [u8] = unsafe { mem::transmute(self) }
;
197 me
.make_ascii_lowercase()
201 #[stable(feature = "rust1", since = "1.0.0")]
202 impl AsciiExt
for [u8] {
203 type Owned
= Vec
<u8>;
205 fn is_ascii(&self) -> bool
{
206 self.iter().all(|b
| b
.is_ascii())
210 fn to_ascii_uppercase(&self) -> Vec
<u8> {
211 let mut me
= self.to_vec();
212 me
.make_ascii_uppercase();
217 fn to_ascii_lowercase(&self) -> Vec
<u8> {
218 let mut me
= self.to_vec();
219 me
.make_ascii_lowercase();
224 fn eq_ignore_ascii_case(&self, other
: &[u8]) -> bool
{
225 self.len() == other
.len() &&
226 self.iter().zip(other
).all(|(a
, b
)| {
227 a
.eq_ignore_ascii_case(b
)
231 fn make_ascii_uppercase(&mut self) {
233 byte
.make_ascii_uppercase();
237 fn make_ascii_lowercase(&mut self) {
239 byte
.make_ascii_lowercase();
244 #[stable(feature = "rust1", since = "1.0.0")]
245 impl AsciiExt
for u8 {
248 fn is_ascii(&self) -> bool { *self & 128 == 0 }
250 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
252 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
254 fn eq_ignore_ascii_case(&self, other
: &u8) -> bool
{
255 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
258 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
260 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
263 #[stable(feature = "rust1", since = "1.0.0")]
264 impl AsciiExt
for char {
267 fn is_ascii(&self) -> bool
{
272 fn to_ascii_uppercase(&self) -> char {
274 (*self as u8).to_ascii_uppercase() as char
281 fn to_ascii_lowercase(&self) -> char {
283 (*self as u8).to_ascii_lowercase() as char
290 fn eq_ignore_ascii_case(&self, other
: &char) -> bool
{
291 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
295 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
297 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
300 /// An iterator over the escaped version of a byte, constructed via
301 /// `std::ascii::escape_default`.
302 #[stable(feature = "rust1", since = "1.0.0")]
303 pub struct EscapeDefault
{
308 /// Returns an iterator that produces an escaped version of a `u8`.
310 /// The default is chosen with a bias toward producing literals that are
311 /// legal in a variety of languages, including C++11 and similar C-family
312 /// languages. The exact rules are:
314 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
315 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
316 /// - Any other chars in the range [0x20,0x7e] are not escaped.
317 /// - Any other chars are given hex escapes of the form '\xNN'.
318 /// - Unicode escapes are never generated by this function.
325 /// let escaped = ascii::escape_default(b'0').next().unwrap();
326 /// assert_eq!(b'0', escaped);
328 /// let mut escaped = ascii::escape_default(b'\t');
330 /// assert_eq!(b'\\', escaped.next().unwrap());
331 /// assert_eq!(b't', escaped.next().unwrap());
333 #[stable(feature = "rust1", since = "1.0.0")]
334 pub fn escape_default(c
: u8) -> EscapeDefault
{
335 let (data
, len
) = match c
{
336 b'
\t'
=> ([b'
\\'
, b't'
, 0, 0], 2),
337 b'
\r'
=> ([b'
\\'
, b'r'
, 0, 0], 2),
338 b'
\n'
=> ([b'
\\'
, b'n'
, 0, 0], 2),
339 b'
\\'
=> ([b'
\\'
, b'
\\'
, 0, 0], 2),
340 b'
\''
=> ([b'
\\'
, b'
\''
, 0, 0], 2),
341 b'
"' => ([b'\\', b'"'
, 0, 0], 2),
342 b'
\x20'
... b'
\x7e'
=> ([c
, 0, 0, 0], 1),
343 _
=> ([b'
\\'
, b'x'
, hexify(c
>> 4), hexify(c
& 0xf)], 4),
346 return EscapeDefault { range: (0.. len), data: data }
;
348 fn hexify(b
: u8) -> u8 {
356 #[stable(feature = "rust1", since = "1.0.0")]
357 impl Iterator
for EscapeDefault
{
359 fn next(&mut self) -> Option
<u8> { self.range.next().map(|i| self.data[i]) }
360 fn size_hint(&self) -> (usize, Option
<usize>) { self.range.size_hint() }
362 #[stable(feature = "rust1", since = "1.0.0")]
363 impl DoubleEndedIterator
for EscapeDefault
{
364 fn next_back(&mut self) -> Option
<u8> {
365 self.range
.next_back().map(|i
| self.data
[i
])
368 #[stable(feature = "rust1", since = "1.0.0")]
369 impl ExactSizeIterator
for EscapeDefault {}
370 #[unstable(feature = "fused", issue = "35602")]
371 impl FusedIterator
for EscapeDefault {}
374 static ASCII_LOWERCASE_MAP
: [u8; 256] = [
375 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
376 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
377 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
378 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
379 b' '
, b'
!'
, b'
"', b'#', b'$', b'%', b'&', b'\'',
380 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
381 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
382 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
385 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
386 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
387 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
390 b'[', b'\\', b']', b'^', b'_',
391 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
392 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
393 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
394 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
395 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
396 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
397 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
398 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
399 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
400 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
401 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
402 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
403 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
404 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
405 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
406 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
407 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
408 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
409 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
410 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
413 static ASCII_UPPERCASE_MAP: [u8; 256] = [
414 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
415 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
416 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
417 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
418 b' ', b'!', b'"'
, b'
#', b'$', b'%', b'&', b'\'',
419 b'
('
, b'
)'
, b'
*'
, b'
+'
, b'
,'
, b'
-'
, b'
.'
, b'
/'
,
420 b'
0'
, b'
1'
, b'
2'
, b'
3'
, b'
4'
, b'
5'
, b'
6'
, b'
7'
,
421 b'
8'
, b'
9'
, b'
:'
, b'
;'
, b'
<'
, b'
='
, b'
>'
, b'?'
,
422 b'@'
, b'A'
, b'B'
, b'C'
, b'D'
, b'E'
, b'F'
, b'G'
,
423 b'H'
, b'I'
, b'J'
, b'K'
, b'L'
, b'M'
, b'N'
, b'O'
,
424 b'P'
, b'Q'
, b'R'
, b'S'
, b'T'
, b'U'
, b'V'
, b'W'
,
425 b'X'
, b'Y'
, b'Z'
, b'
['
, b'
\\'
, b'
]'
, b'
^', b'_'
,
428 b'A'
, b'B'
, b'C'
, b'D'
, b'E'
, b'F'
, b'G'
,
429 b'H'
, b'I'
, b'J'
, b'K'
, b'L'
, b'M'
, b'N'
, b'O'
,
430 b'P'
, b'Q'
, b'R'
, b'S'
, b'T'
, b'U'
, b'V'
, b'W'
,
433 b'{', b'|', b'}'
, b'
~'
, 0x7f,
434 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
435 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
436 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
437 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
438 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
439 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
440 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
441 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
442 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
443 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
444 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
445 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
446 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
447 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
448 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
449 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
460 assert
!(b
"".is_ascii());
461 assert
!(b
"banana\0\x7F".is_ascii());
462 assert
!(b
"banana\0\x7F".iter().all(|b
| b
.is_ascii()));
463 assert
!(!b
"Vi\xe1\xbb\x87t Nam".is_ascii());
464 assert
!(!b
"Vi\xe1\xbb\x87t Nam".iter().all(|b
| b
.is_ascii()));
465 assert
!(!b
"\xe1\xbb\x87".iter().any(|b
| b
.is_ascii()));
467 assert
!("".is_ascii());
468 assert
!("banana\0\u{7F}".is_ascii());
469 assert
!("banana\0\u{7F}".chars().all(|c
| c
.is_ascii()));
470 assert
!(!"ประเทศไทย中华Việt Nam".chars().all(|c
| c
.is_ascii()));
471 assert
!(!"ประเทศไทย中华ệ ".chars().any(|c
| c
.is_ascii()));
475 fn test_to_ascii_uppercase() {
476 assert_eq
!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
477 assert_eq
!("hıKß".to_ascii_uppercase(), "HıKß");
480 let upper
= if 'a'
as u32 <= i
&& i
<= 'z'
as u32 { i + 'A' as u32 - 'a' as u32 }
482 assert_eq
!((from_u32(i
).unwrap()).to_string().to_ascii_uppercase(),
483 (from_u32(upper
).unwrap()).to_string());
488 fn test_to_ascii_lowercase() {
489 assert_eq
!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
490 // Dotted capital I, Kelvin sign, Sharp S.
491 assert_eq
!("HİKß".to_ascii_lowercase(), "hİKß");
494 let lower
= if 'A'
as u32 <= i
&& i
<= 'Z'
as u32 { i + 'a' as u32 - 'A' as u32 }
496 assert_eq
!((from_u32(i
).unwrap()).to_string().to_ascii_lowercase(),
497 (from_u32(lower
).unwrap()).to_string());
502 fn test_make_ascii_lower_case() {
504 ($from
: expr
, $to
: expr
) => {
507 x
.make_ascii_lowercase();
519 test
!(b
"H\xc3\x89".to_vec(), b
"h\xc3\x89");
520 test
!("HİKß".to_string(), "hİKß");
525 fn test_make_ascii_upper_case() {
527 ($from
: expr
, $to
: expr
) => {
530 x
.make_ascii_uppercase();
542 test
!(b
"h\xc3\xa9".to_vec(), b
"H\xc3\xa9");
543 test
!("hıKß".to_string(), "HıKß");
545 let mut x
= "Hello".to_string();
546 x
[..3].make_ascii_uppercase(); // Test IndexMut on String.
547 assert_eq
!(x
, "HELlo")
551 fn test_eq_ignore_ascii_case() {
552 assert
!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
553 assert
!(!"Ürl".eq_ignore_ascii_case("ürl"));
554 // Dotted capital I, Kelvin sign, Sharp S.
555 assert
!("HİKß".eq_ignore_ascii_case("hİKß"));
556 assert
!(!"İ".eq_ignore_ascii_case("i"));
557 assert
!(!"K".eq_ignore_ascii_case("k"));
558 assert
!(!"ß".eq_ignore_ascii_case("s"));
561 let lower
= if 'A'
as u32 <= i
&& i
<= 'Z'
as u32 { i + 'a' as u32 - 'A' as u32 }
563 assert
!((from_u32(i
).unwrap()).to_string().eq_ignore_ascii_case(
564 &from_u32(lower
).unwrap().to_string()));
569 fn inference_works() {
570 let x
= "a".to_string();
571 x
.eq_ignore_ascii_case("A");