1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters.
13 #![stable(feature = "rust1", since = "1.0.0")]
20 /// Extension methods for ASCII-subset only operations on string slices.
22 /// Be aware that operations on seemingly non-ASCII characters can sometimes
23 /// have unexpected results. Consider this example:
26 /// use std::ascii::AsciiExt;
28 /// assert_eq!("café".to_ascii_uppercase(), "CAFÉ");
29 /// assert_eq!("café".to_ascii_uppercase(), "CAFé");
32 /// In the first example, the lowercased string is represented `"cafe\u{301}"`
33 /// (the last character is an acute accent [combining character]). Unlike the
34 /// other characters in the string, the combining character will not get mapped
35 /// to an uppercase variant, resulting in `"CAFE\u{301}"`. In the second
36 /// example, the lowercased string is represented `"caf\u{e9}"` (the last
37 /// character is a single Unicode character representing an 'e' with an acute
38 /// accent). Since the last character is defined outside the scope of ASCII,
39 /// it will not get mapped to an uppercase variant, resulting in `"CAF\u{e9}"`.
41 /// [combining character]: https://en.wikipedia.org/wiki/Combining_character
42 #[stable(feature = "rust1", since = "1.0.0")]
44 /// Container type for copied ASCII characters.
45 #[stable(feature = "rust1", since = "1.0.0")]
48 /// Checks if the value is within the ASCII range.
53 /// use std::ascii::AsciiExt;
58 /// assert!(ascii.is_ascii());
59 /// assert!(!utf8.is_ascii());
61 #[stable(feature = "rust1", since = "1.0.0")]
62 fn is_ascii(&self) -> bool
;
64 /// Makes a copy of the string in ASCII upper case.
66 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
67 /// but non-ASCII letters are unchanged.
72 /// use std::ascii::AsciiExt;
77 /// assert_eq!('A', ascii.to_ascii_uppercase());
78 /// assert_eq!('❤', utf8.to_ascii_uppercase());
80 #[stable(feature = "rust1", since = "1.0.0")]
81 fn to_ascii_uppercase(&self) -> Self::Owned
;
83 /// Makes a copy of the string in ASCII lower case.
85 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
86 /// but non-ASCII letters are unchanged.
91 /// use std::ascii::AsciiExt;
96 /// assert_eq!('a', ascii.to_ascii_lowercase());
97 /// assert_eq!('❤', utf8.to_ascii_lowercase());
99 #[stable(feature = "rust1", since = "1.0.0")]
100 fn to_ascii_lowercase(&self) -> Self::Owned
;
102 /// Checks that two strings are an ASCII case-insensitive match.
104 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
105 /// but without allocating and copying temporary strings.
110 /// use std::ascii::AsciiExt;
112 /// let ascii1 = 'A';
113 /// let ascii2 = 'a';
114 /// let ascii3 = 'A';
115 /// let ascii4 = 'z';
117 /// assert!(ascii1.eq_ignore_ascii_case(&ascii2));
118 /// assert!(ascii1.eq_ignore_ascii_case(&ascii3));
119 /// assert!(!ascii1.eq_ignore_ascii_case(&ascii4));
121 #[stable(feature = "rust1", since = "1.0.0")]
122 fn eq_ignore_ascii_case(&self, other
: &Self) -> bool
;
124 /// Converts this type to its ASCII upper case equivalent in-place.
126 /// See `to_ascii_uppercase` for more information.
131 /// use std::ascii::AsciiExt;
133 /// let mut ascii = 'a';
135 /// ascii.make_ascii_uppercase();
137 /// assert_eq!('A', ascii);
139 #[stable(feature = "ascii", since = "1.9.0")]
140 fn make_ascii_uppercase(&mut self);
142 /// Converts this type to its ASCII lower case equivalent in-place.
144 /// See `to_ascii_lowercase` for more information.
149 /// use std::ascii::AsciiExt;
151 /// let mut ascii = 'A';
153 /// ascii.make_ascii_lowercase();
155 /// assert_eq!('a', ascii);
157 #[stable(feature = "ascii", since = "1.9.0")]
158 fn make_ascii_lowercase(&mut self);
161 #[stable(feature = "rust1", since = "1.0.0")]
162 impl AsciiExt
for str {
166 fn is_ascii(&self) -> bool
{
167 self.bytes().all(|b
| b
.is_ascii())
171 fn to_ascii_uppercase(&self) -> String
{
172 let mut bytes
= self.as_bytes().to_vec();
173 bytes
.make_ascii_uppercase();
174 // make_ascii_uppercase() preserves the UTF-8 invariant.
175 unsafe { String::from_utf8_unchecked(bytes) }
179 fn to_ascii_lowercase(&self) -> String
{
180 let mut bytes
= self.as_bytes().to_vec();
181 bytes
.make_ascii_lowercase();
182 // make_ascii_uppercase() preserves the UTF-8 invariant.
183 unsafe { String::from_utf8_unchecked(bytes) }
187 fn eq_ignore_ascii_case(&self, other
: &str) -> bool
{
188 self.as_bytes().eq_ignore_ascii_case(other
.as_bytes())
191 fn make_ascii_uppercase(&mut self) {
192 let me
: &mut [u8] = unsafe { mem::transmute(self) }
;
193 me
.make_ascii_uppercase()
196 fn make_ascii_lowercase(&mut self) {
197 let me
: &mut [u8] = unsafe { mem::transmute(self) }
;
198 me
.make_ascii_lowercase()
202 #[stable(feature = "rust1", since = "1.0.0")]
203 impl AsciiExt
for [u8] {
204 type Owned
= Vec
<u8>;
206 fn is_ascii(&self) -> bool
{
207 self.iter().all(|b
| b
.is_ascii())
211 fn to_ascii_uppercase(&self) -> Vec
<u8> {
212 let mut me
= self.to_vec();
213 me
.make_ascii_uppercase();
218 fn to_ascii_lowercase(&self) -> Vec
<u8> {
219 let mut me
= self.to_vec();
220 me
.make_ascii_lowercase();
225 fn eq_ignore_ascii_case(&self, other
: &[u8]) -> bool
{
226 self.len() == other
.len() &&
227 self.iter().zip(other
).all(|(a
, b
)| {
228 a
.eq_ignore_ascii_case(b
)
232 fn make_ascii_uppercase(&mut self) {
234 byte
.make_ascii_uppercase();
238 fn make_ascii_lowercase(&mut self) {
240 byte
.make_ascii_lowercase();
245 #[stable(feature = "rust1", since = "1.0.0")]
246 impl AsciiExt
for u8 {
249 fn is_ascii(&self) -> bool { *self & 128 == 0 }
251 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
253 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
255 fn eq_ignore_ascii_case(&self, other
: &u8) -> bool
{
256 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
259 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
261 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
264 #[stable(feature = "rust1", since = "1.0.0")]
265 impl AsciiExt
for char {
268 fn is_ascii(&self) -> bool
{
273 fn to_ascii_uppercase(&self) -> char {
275 (*self as u8).to_ascii_uppercase() as char
282 fn to_ascii_lowercase(&self) -> char {
284 (*self as u8).to_ascii_lowercase() as char
291 fn eq_ignore_ascii_case(&self, other
: &char) -> bool
{
292 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
296 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
298 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
301 /// An iterator over the escaped version of a byte, constructed via
302 /// `std::ascii::escape_default`.
303 #[stable(feature = "rust1", since = "1.0.0")]
304 pub struct EscapeDefault
{
309 /// Returns an iterator that produces an escaped version of a `u8`.
311 /// The default is chosen with a bias toward producing literals that are
312 /// legal in a variety of languages, including C++11 and similar C-family
313 /// languages. The exact rules are:
315 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
316 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
317 /// - Any other chars in the range [0x20,0x7e] are not escaped.
318 /// - Any other chars are given hex escapes of the form '\xNN'.
319 /// - Unicode escapes are never generated by this function.
326 /// let escaped = ascii::escape_default(b'0').next().unwrap();
327 /// assert_eq!(b'0', escaped);
329 /// let mut escaped = ascii::escape_default(b'\t');
331 /// assert_eq!(b'\\', escaped.next().unwrap());
332 /// assert_eq!(b't', escaped.next().unwrap());
334 #[stable(feature = "rust1", since = "1.0.0")]
335 pub fn escape_default(c
: u8) -> EscapeDefault
{
336 let (data
, len
) = match c
{
337 b'
\t'
=> ([b'
\\'
, b't'
, 0, 0], 2),
338 b'
\r'
=> ([b'
\\'
, b'r'
, 0, 0], 2),
339 b'
\n'
=> ([b'
\\'
, b'n'
, 0, 0], 2),
340 b'
\\'
=> ([b'
\\'
, b'
\\'
, 0, 0], 2),
341 b'
\''
=> ([b'
\\'
, b'
\''
, 0, 0], 2),
342 b'
"' => ([b'\\', b'"'
, 0, 0], 2),
343 b'
\x20'
... b'
\x7e'
=> ([c
, 0, 0, 0], 1),
344 _
=> ([b'
\\'
, b'x'
, hexify(c
>> 4), hexify(c
& 0xf)], 4),
347 return EscapeDefault { range: (0.. len), data: data }
;
349 fn hexify(b
: u8) -> u8 {
357 #[stable(feature = "rust1", since = "1.0.0")]
358 impl Iterator
for EscapeDefault
{
360 fn next(&mut self) -> Option
<u8> { self.range.next().map(|i| self.data[i]) }
361 fn size_hint(&self) -> (usize, Option
<usize>) { self.range.size_hint() }
363 #[stable(feature = "rust1", since = "1.0.0")]
364 impl DoubleEndedIterator
for EscapeDefault
{
365 fn next_back(&mut self) -> Option
<u8> {
366 self.range
.next_back().map(|i
| self.data
[i
])
369 #[stable(feature = "rust1", since = "1.0.0")]
370 impl ExactSizeIterator
for EscapeDefault {}
372 static ASCII_LOWERCASE_MAP
: [u8; 256] = [
373 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
374 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
375 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
376 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
377 b' '
, b'
!'
, b'
"', b'#', b'$', b'%', b'&', b'\'',
378 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
379 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
380 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
383 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
384 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
385 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
388 b'[', b'\\', b']', b'^', b'_',
389 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
390 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
391 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
392 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
393 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
394 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
395 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
396 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
397 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
398 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
399 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
400 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
401 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
402 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
403 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
404 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
405 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
406 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
407 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
408 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
411 static ASCII_UPPERCASE_MAP: [u8; 256] = [
412 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
413 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
414 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
415 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
416 b' ', b'!', b'"'
, b'
#', b'$', b'%', b'&', b'\'',
417 b'
('
, b'
)'
, b'
*'
, b'
+'
, b'
,'
, b'
-'
, b'
.'
, b'
/'
,
418 b'
0'
, b'
1'
, b'
2'
, b'
3'
, b'
4'
, b'
5'
, b'
6'
, b'
7'
,
419 b'
8'
, b'
9'
, b'
:'
, b'
;'
, b'
<'
, b'
='
, b'
>'
, b'?'
,
420 b'@'
, b'A'
, b'B'
, b'C'
, b'D'
, b'E'
, b'F'
, b'G'
,
421 b'H'
, b'I'
, b'J'
, b'K'
, b'L'
, b'M'
, b'N'
, b'O'
,
422 b'P'
, b'Q'
, b'R'
, b'S'
, b'T'
, b'U'
, b'V'
, b'W'
,
423 b'X'
, b'Y'
, b'Z'
, b'
['
, b'
\\'
, b'
]'
, b'
^', b'_'
,
426 b'A'
, b'B'
, b'C'
, b'D'
, b'E'
, b'F'
, b'G'
,
427 b'H'
, b'I'
, b'J'
, b'K'
, b'L'
, b'M'
, b'N'
, b'O'
,
428 b'P'
, b'Q'
, b'R'
, b'S'
, b'T'
, b'U'
, b'V'
, b'W'
,
431 b'{', b'|', b'}'
, b'
~'
, 0x7f,
432 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
433 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
434 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
435 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
436 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
437 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
438 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
439 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
440 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
441 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
442 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
443 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
444 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
445 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
446 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
447 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
459 assert
!(b
"".is_ascii());
460 assert
!(b
"banana\0\x7F".is_ascii());
461 assert
!(b
"banana\0\x7F".iter().all(|b
| b
.is_ascii()));
462 assert
!(!b
"Vi\xe1\xbb\x87t Nam".is_ascii());
463 assert
!(!b
"Vi\xe1\xbb\x87t Nam".iter().all(|b
| b
.is_ascii()));
464 assert
!(!b
"\xe1\xbb\x87".iter().any(|b
| b
.is_ascii()));
466 assert
!("".is_ascii());
467 assert
!("banana\0\u{7F}".is_ascii());
468 assert
!("banana\0\u{7F}".chars().all(|c
| c
.is_ascii()));
469 assert
!(!"ประเทศไทย中华Việt Nam".chars().all(|c
| c
.is_ascii()));
470 assert
!(!"ประเทศไทย中华ệ ".chars().any(|c
| c
.is_ascii()));
474 fn test_to_ascii_uppercase() {
475 assert_eq
!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
476 assert_eq
!("hıKß".to_ascii_uppercase(), "HıKß");
479 let upper
= if 'a'
as u32 <= i
&& i
<= 'z'
as u32 { i + 'A' as u32 - 'a' as u32 }
481 assert_eq
!((from_u32(i
).unwrap()).to_string().to_ascii_uppercase(),
482 (from_u32(upper
).unwrap()).to_string());
487 fn test_to_ascii_lowercase() {
488 assert_eq
!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
489 // Dotted capital I, Kelvin sign, Sharp S.
490 assert_eq
!("HİKß".to_ascii_lowercase(), "hİKß");
493 let lower
= if 'A'
as u32 <= i
&& i
<= 'Z'
as u32 { i + 'a' as u32 - 'A' as u32 }
495 assert_eq
!((from_u32(i
).unwrap()).to_string().to_ascii_lowercase(),
496 (from_u32(lower
).unwrap()).to_string());
501 fn test_make_ascii_lower_case() {
503 ($from
: expr
, $to
: expr
) => {
506 x
.make_ascii_lowercase();
518 test
!(b
"H\xc3\x89".to_vec(), b
"h\xc3\x89");
519 test
!("HİKß".to_string(), "hİKß");
524 fn test_make_ascii_upper_case() {
526 ($from
: expr
, $to
: expr
) => {
529 x
.make_ascii_uppercase();
541 test
!(b
"h\xc3\xa9".to_vec(), b
"H\xc3\xa9");
542 test
!("hıKß".to_string(), "HıKß");
544 let mut x
= "Hello".to_string();
545 x
[..3].make_ascii_uppercase(); // Test IndexMut on String.
546 assert_eq
!(x
, "HELlo")
550 fn test_eq_ignore_ascii_case() {
551 assert
!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
552 assert
!(!"Ürl".eq_ignore_ascii_case("ürl"));
553 // Dotted capital I, Kelvin sign, Sharp S.
554 assert
!("HİKß".eq_ignore_ascii_case("hİKß"));
555 assert
!(!"İ".eq_ignore_ascii_case("i"));
556 assert
!(!"K".eq_ignore_ascii_case("k"));
557 assert
!(!"ß".eq_ignore_ascii_case("s"));
560 let lower
= if 'A'
as u32 <= i
&& i
<= 'Z'
as u32 { i + 'a' as u32 - 'A' as u32 }
562 assert
!((from_u32(i
).unwrap()).to_string().eq_ignore_ascii_case(
563 &from_u32(lower
).unwrap().to_string()));
568 fn inference_works() {
569 let x
= "a".to_string();
570 x
.eq_ignore_ascii_case("A");