1 // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Operations on ASCII strings and characters
13 #![stable(feature = "rust1", since = "1.0.0")]
20 /// Extension methods for ASCII-subset only operations on owned strings
21 #[unstable(feature = "std_misc",
22 reason
= "would prefer to do this in a more general way")]
23 pub trait OwnedAsciiExt
{
24 /// Converts the string to ASCII upper case:
25 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
26 /// but non-ASCII letters are unchanged.
27 fn into_ascii_uppercase(self) -> Self;
29 /// Converts the string to ASCII lower case:
30 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
31 /// but non-ASCII letters are unchanged.
32 fn into_ascii_lowercase(self) -> Self;
35 /// Extension methods for ASCII-subset only operations on string slices.
36 #[stable(feature = "rust1", since = "1.0.0")]
38 /// Container type for copied ASCII characters.
39 #[stable(feature = "rust1", since = "1.0.0")]
42 /// Checks if within the ASCII range.
47 /// use std::ascii::AsciiExt;
52 /// assert_eq!(true, ascii.is_ascii());
53 /// assert_eq!(false, utf8.is_ascii())
55 #[stable(feature = "rust1", since = "1.0.0")]
56 fn is_ascii(&self) -> bool
;
58 /// Makes a copy of the string in ASCII upper case.
60 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
61 /// but non-ASCII letters are unchanged.
66 /// use std::ascii::AsciiExt;
71 /// assert_eq!('A', ascii.to_ascii_uppercase());
72 /// assert_eq!('❤', utf8.to_ascii_uppercase());
74 #[stable(feature = "rust1", since = "1.0.0")]
75 fn to_ascii_uppercase(&self) -> Self::Owned
;
77 /// Makes a copy of the string in ASCII lower case.
79 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
80 /// but non-ASCII letters are unchanged.
85 /// use std::ascii::AsciiExt;
90 /// assert_eq!('a', ascii.to_ascii_lowercase());
91 /// assert_eq!('❤', utf8.to_ascii_lowercase());
93 #[stable(feature = "rust1", since = "1.0.0")]
94 fn to_ascii_lowercase(&self) -> Self::Owned
;
96 /// Checks that two strings are an ASCII case-insensitive match.
98 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
99 /// but without allocating and copying temporary strings.
104 /// use std::ascii::AsciiExt;
106 /// let ascii1 = 'A';
107 /// let ascii2 = 'a';
108 /// let ascii3 = 'A';
109 /// let ascii4 = 'z';
111 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii2));
112 /// assert_eq!(true, ascii1.eq_ignore_ascii_case(&ascii3));
113 /// assert_eq!(false, ascii1.eq_ignore_ascii_case(&ascii4));
115 #[stable(feature = "rust1", since = "1.0.0")]
116 fn eq_ignore_ascii_case(&self, other
: &Self) -> bool
;
118 /// Converts this type to its ASCII upper case equivalent in-place.
120 /// See `to_ascii_uppercase` for more information.
125 /// # #![feature(ascii)]
126 /// use std::ascii::AsciiExt;
128 /// let mut ascii = 'a';
130 /// ascii.make_ascii_uppercase();
132 /// assert_eq!('A', ascii);
134 #[unstable(feature = "ascii")]
135 fn make_ascii_uppercase(&mut self);
137 /// Converts this type to its ASCII lower case equivalent in-place.
139 /// See `to_ascii_lowercase` for more information.
144 /// # #![feature(ascii)]
145 /// use std::ascii::AsciiExt;
147 /// let mut ascii = 'A';
149 /// ascii.make_ascii_lowercase();
151 /// assert_eq!('a', ascii);
153 #[unstable(feature = "ascii")]
154 fn make_ascii_lowercase(&mut self);
157 #[stable(feature = "rust1", since = "1.0.0")]
158 impl AsciiExt
for str {
162 fn is_ascii(&self) -> bool
{
163 self.bytes().all(|b
| b
.is_ascii())
167 fn to_ascii_uppercase(&self) -> String
{
168 self.to_string().into_ascii_uppercase()
172 fn to_ascii_lowercase(&self) -> String
{
173 self.to_string().into_ascii_lowercase()
177 fn eq_ignore_ascii_case(&self, other
: &str) -> bool
{
178 self.as_bytes().eq_ignore_ascii_case(other
.as_bytes())
181 fn make_ascii_uppercase(&mut self) {
182 let me
: &mut [u8] = unsafe { mem::transmute(self) }
;
183 me
.make_ascii_uppercase()
186 fn make_ascii_lowercase(&mut self) {
187 let me
: &mut [u8] = unsafe { mem::transmute(self) }
;
188 me
.make_ascii_lowercase()
192 #[unstable(feature = "std_misc",
193 reason
= "would prefer to do this in a more general way")]
194 impl OwnedAsciiExt
for String
{
196 fn into_ascii_uppercase(self) -> String
{
197 // Vec<u8>::into_ascii_uppercase() preserves the UTF-8 invariant.
198 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_uppercase()) }
202 fn into_ascii_lowercase(self) -> String
{
203 // Vec<u8>::into_ascii_lowercase() preserves the UTF-8 invariant.
204 unsafe { String::from_utf8_unchecked(self.into_bytes().into_ascii_lowercase()) }
208 #[stable(feature = "rust1", since = "1.0.0")]
209 impl AsciiExt
for [u8] {
210 type Owned
= Vec
<u8>;
212 fn is_ascii(&self) -> bool
{
213 self.iter().all(|b
| b
.is_ascii())
217 fn to_ascii_uppercase(&self) -> Vec
<u8> {
218 self.to_vec().into_ascii_uppercase()
222 fn to_ascii_lowercase(&self) -> Vec
<u8> {
223 self.to_vec().into_ascii_lowercase()
227 fn eq_ignore_ascii_case(&self, other
: &[u8]) -> bool
{
228 self.len() == other
.len() &&
229 self.iter().zip(other
.iter()).all(|(a
, b
)| {
230 a
.eq_ignore_ascii_case(b
)
234 fn make_ascii_uppercase(&mut self) {
236 byte
.make_ascii_uppercase();
240 fn make_ascii_lowercase(&mut self) {
242 byte
.make_ascii_lowercase();
247 #[unstable(feature = "std_misc",
248 reason
= "would prefer to do this in a more general way")]
249 impl OwnedAsciiExt
for Vec
<u8> {
251 fn into_ascii_uppercase(mut self) -> Vec
<u8> {
252 self.make_ascii_uppercase();
257 fn into_ascii_lowercase(mut self) -> Vec
<u8> {
258 self.make_ascii_lowercase();
263 #[stable(feature = "rust1", since = "1.0.0")]
264 impl AsciiExt
for u8 {
267 fn is_ascii(&self) -> bool { *self & 128 == 0 }
269 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
271 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
273 fn eq_ignore_ascii_case(&self, other
: &u8) -> bool
{
274 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
277 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
279 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
282 #[stable(feature = "rust1", since = "1.0.0")]
283 impl AsciiExt
for char {
286 fn is_ascii(&self) -> bool
{
291 fn to_ascii_uppercase(&self) -> char {
293 (*self as u8).to_ascii_uppercase() as char
300 fn to_ascii_lowercase(&self) -> char {
302 (*self as u8).to_ascii_lowercase() as char
309 fn eq_ignore_ascii_case(&self, other
: &char) -> bool
{
310 self.to_ascii_lowercase() == other
.to_ascii_lowercase()
314 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
316 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
319 /// An iterator over the escaped version of a byte, constructed via
320 /// `std::ascii::escape_default`.
321 #[stable(feature = "rust1", since = "1.0.0")]
322 pub struct EscapeDefault
{
327 /// Returns an iterator that produces an escaped version of a `u8`.
329 /// The default is chosen with a bias toward producing literals that are
330 /// legal in a variety of languages, including C++11 and similar C-family
331 /// languages. The exact rules are:
333 /// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
334 /// - Single-quote, double-quote and backslash chars are backslash-escaped.
335 /// - Any other chars in the range [0x20,0x7e] are not escaped.
336 /// - Any other chars are given hex escapes of the form '\xNN'.
337 /// - Unicode escapes are never generated by this function.
344 /// let escaped = ascii::escape_default(b'0').next().unwrap();
345 /// assert_eq!(b'0', escaped);
347 /// let mut escaped = ascii::escape_default(b'\t');
349 /// assert_eq!(b'\\', escaped.next().unwrap());
350 /// assert_eq!(b't', escaped.next().unwrap());
352 #[stable(feature = "rust1", since = "1.0.0")]
353 pub fn escape_default(c
: u8) -> EscapeDefault
{
354 let (data
, len
) = match c
{
355 b'
\t'
=> ([b'
\\'
, b't'
, 0, 0], 2),
356 b'
\r'
=> ([b'
\\'
, b'r'
, 0, 0], 2),
357 b'
\n'
=> ([b'
\\'
, b'n'
, 0, 0], 2),
358 b'
\\'
=> ([b'
\\'
, b'
\\'
, 0, 0], 2),
359 b'
\''
=> ([b'
\\'
, b'
\''
, 0, 0], 2),
360 b'
"' => ([b'\\', b'"'
, 0, 0], 2),
361 b'
\x20'
... b'
\x7e'
=> ([c
, 0, 0, 0], 1),
362 _
=> ([b'
\\'
, b'x'
, hexify(c
>> 4), hexify(c
& 0xf)], 4),
365 return EscapeDefault { range: (0.. len), data: data }
;
367 fn hexify(b
: u8) -> u8 {
375 #[stable(feature = "rust1", since = "1.0.0")]
376 impl Iterator
for EscapeDefault
{
378 fn next(&mut self) -> Option
<u8> { self.range.next().map(|i| self.data[i]) }
379 fn size_hint(&self) -> (usize, Option
<usize>) { self.range.size_hint() }
381 #[stable(feature = "rust1", since = "1.0.0")]
382 impl DoubleEndedIterator
for EscapeDefault
{
383 fn next_back(&mut self) -> Option
<u8> {
384 self.range
.next_back().map(|i
| self.data
[i
])
387 #[stable(feature = "rust1", since = "1.0.0")]
388 impl ExactSizeIterator
for EscapeDefault {}
390 static ASCII_LOWERCASE_MAP
: [u8; 256] = [
391 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
392 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
393 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
394 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
395 b' '
, b'
!'
, b'
"', b'#', b'$', b'%', b'&', b'\'',
396 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
397 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
398 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
401 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
402 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
403 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
406 b'[', b'\\', b']', b'^', b'_',
407 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
408 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
409 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
410 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
411 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
412 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
413 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
414 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
415 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
416 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
417 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
418 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
419 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
420 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
421 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
422 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
423 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
424 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
425 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
426 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
429 static ASCII_UPPERCASE_MAP: [u8; 256] = [
430 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
431 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
432 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
433 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
434 b' ', b'!', b'"'
, b'
#', b'$', b'%', b'&', b'\'',
435 b'
('
, b'
)'
, b'
*'
, b'
+'
, b'
,'
, b'
-'
, b'
.'
, b'
/'
,
436 b'
0'
, b'
1'
, b'
2'
, b'
3'
, b'
4'
, b'
5'
, b'
6'
, b'
7'
,
437 b'
8'
, b'
9'
, b'
:'
, b'
;'
, b'
<'
, b'
='
, b'
>'
, b'?'
,
438 b'@'
, b'A'
, b'B'
, b'C'
, b'D'
, b'E'
, b'F'
, b'G'
,
439 b'H'
, b'I'
, b'J'
, b'K'
, b'L'
, b'M'
, b'N'
, b'O'
,
440 b'P'
, b'Q'
, b'R'
, b'S'
, b'T'
, b'U'
, b'V'
, b'W'
,
441 b'X'
, b'Y'
, b'Z'
, b'
['
, b'
\\'
, b'
]'
, b'
^', b'_'
,
444 b'A'
, b'B'
, b'C'
, b'D'
, b'E'
, b'F'
, b'G'
,
445 b'H'
, b'I'
, b'J'
, b'K'
, b'L'
, b'M'
, b'N'
, b'O'
,
446 b'P'
, b'Q'
, b'R'
, b'S'
, b'T'
, b'U'
, b'V'
, b'W'
,
449 b'{', b'|', b'}'
, b'
~'
, 0x7f,
450 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
451 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
452 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
453 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
454 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
455 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
456 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
457 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
458 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
459 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
460 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
461 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
462 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
463 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
464 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
465 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
477 assert
!("banana".chars().all(|c
| c
.is_ascii()));
478 assert
!(!"ประเทศไทย中华Việt Nam".chars().all(|c
| c
.is_ascii()));
482 fn test_ascii_vec() {
483 assert
!("".is_ascii());
484 assert
!("a".is_ascii());
485 assert
!(!"\u{2009}".is_ascii());
489 fn test_to_ascii_uppercase() {
490 assert_eq
!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
491 assert_eq
!("hıKß".to_ascii_uppercase(), "HıKß");
494 let upper
= if 'a'
as u32 <= i
&& i
<= 'z'
as u32 { i + 'A' as u32 - 'a' as u32 }
496 assert_eq
!((from_u32(i
).unwrap()).to_string().to_ascii_uppercase(),
497 (from_u32(upper
).unwrap()).to_string());
502 fn test_to_ascii_lowercase() {
503 assert_eq
!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
504 // Dotted capital I, Kelvin sign, Sharp S.
505 assert_eq
!("HİKß".to_ascii_lowercase(), "hİKß");
508 let lower
= if 'A'
as u32 <= i
&& i
<= 'Z'
as u32 { i + 'a' as u32 - 'A' as u32 }
510 assert_eq
!((from_u32(i
).unwrap()).to_string().to_ascii_lowercase(),
511 (from_u32(lower
).unwrap()).to_string());
516 fn test_into_ascii_uppercase() {
517 assert_eq
!(("url()URL()uRl()ürl".to_string()).into_ascii_uppercase(),
518 "URL()URL()URL()üRL".to_string());
519 assert_eq
!(("hıKß".to_string()).into_ascii_uppercase(), "HıKß");
522 let upper
= if 'a'
as u32 <= i
&& i
<= 'z'
as u32 { i + 'A' as u32 - 'a' as u32 }
524 assert_eq
!((from_u32(i
).unwrap()).to_string().into_ascii_uppercase(),
525 (from_u32(upper
).unwrap()).to_string());
530 fn test_into_ascii_lowercase() {
531 assert_eq
!(("url()URL()uRl()Ürl".to_string()).into_ascii_lowercase(),
532 "url()url()url()Ürl");
533 // Dotted capital I, Kelvin sign, Sharp S.
534 assert_eq
!(("HİKß".to_string()).into_ascii_lowercase(), "hİKß");
537 let lower
= if 'A'
as u32 <= i
&& i
<= 'Z'
as u32 { i + 'a' as u32 - 'A' as u32 }
539 assert_eq
!((from_u32(i
).unwrap()).to_string().into_ascii_lowercase(),
540 (from_u32(lower
).unwrap()).to_string());
545 fn test_eq_ignore_ascii_case() {
546 assert
!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
547 assert
!(!"Ürl".eq_ignore_ascii_case("ürl"));
548 // Dotted capital I, Kelvin sign, Sharp S.
549 assert
!("HİKß".eq_ignore_ascii_case("hİKß"));
550 assert
!(!"İ".eq_ignore_ascii_case("i"));
551 assert
!(!"K".eq_ignore_ascii_case("k"));
552 assert
!(!"ß".eq_ignore_ascii_case("s"));
555 let lower
= if 'A'
as u32 <= i
&& i
<= 'Z'
as u32 { i + 'a' as u32 - 'A' as u32 }
557 assert
!((from_u32(i
).unwrap()).to_string().eq_ignore_ascii_case(
558 &from_u32(lower
).unwrap().to_string()));