]> git.proxmox.com Git - rustc.git/blame - src/libstd/ascii.rs
New upstream version 1.17.0+dfsg1
[rustc.git] / src / libstd / ascii.rs
CommitLineData
1a4d82fc
JJ
1// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
1a4d82fc 10
54a0048b 11//! Operations on ASCII strings and characters.
1a4d82fc 12
85aaf69f 13#![stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 14
32a655c1 15use fmt;
85aaf69f 16use mem;
e9174d1e 17use ops::Range;
9e0c209e 18use iter::FusedIterator;
1a4d82fc 19
c34b1796 20/// Extension methods for ASCII-subset only operations on string slices.
7453a54e
SL
21///
22/// Be aware that operations on seemingly non-ASCII characters can sometimes
23/// have unexpected results. Consider this example:
24///
25/// ```
26/// use std::ascii::AsciiExt;
27///
28/// assert_eq!("café".to_ascii_uppercase(), "CAFÉ");
29/// assert_eq!("café".to_ascii_uppercase(), "CAFé");
30/// ```
31///
32/// In the first example, the lowercased string is represented `"cafe\u{301}"`
33/// (the last character is an acute accent [combining character]). Unlike the
34/// other characters in the string, the combining character will not get mapped
35/// to an uppercase variant, resulting in `"CAFE\u{301}"`. In the second
36/// example, the lowercased string is represented `"caf\u{e9}"` (the last
37/// character is a single Unicode character representing an 'e' with an acute
38/// accent). Since the last character is defined outside the scope of ASCII,
39/// it will not get mapped to an uppercase variant, resulting in `"CAF\u{e9}"`.
40///
41/// [combining character]: https://en.wikipedia.org/wiki/Combining_character
85aaf69f
SL
42#[stable(feature = "rust1", since = "1.0.0")]
43pub trait AsciiExt {
c34b1796 44 /// Container type for copied ASCII characters.
85aaf69f
SL
45 #[stable(feature = "rust1", since = "1.0.0")]
46 type Owned;
47
7453a54e 48 /// Checks if the value is within the ASCII range.
c34b1796
AL
49 ///
50 /// # Examples
51 ///
52 /// ```
53 /// use std::ascii::AsciiExt;
54 ///
55 /// let ascii = 'a';
56 /// let utf8 = '❤';
57 ///
7453a54e
SL
58 /// assert!(ascii.is_ascii());
59 /// assert!(!utf8.is_ascii());
c34b1796 60 /// ```
85aaf69f 61 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc
JJ
62 fn is_ascii(&self) -> bool;
63
c34b1796
AL
64 /// Makes a copy of the string in ASCII upper case.
65 ///
1a4d82fc
JJ
66 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
67 /// but non-ASCII letters are unchanged.
c34b1796 68 ///
32a655c1
SL
69 /// To uppercase the string in-place, use [`make_ascii_uppercase`].
70 ///
71 /// To uppercase ASCII characters in addition to non-ASCII characters, use
72 /// [`str::to_uppercase`].
73 ///
c34b1796
AL
74 /// # Examples
75 ///
76 /// ```
77 /// use std::ascii::AsciiExt;
78 ///
79 /// let ascii = 'a';
80 /// let utf8 = '❤';
81 ///
82 /// assert_eq!('A', ascii.to_ascii_uppercase());
83 /// assert_eq!('❤', utf8.to_ascii_uppercase());
84 /// ```
32a655c1
SL
85 ///
86 /// [`make_ascii_uppercase`]: #tymethod.make_ascii_uppercase
87 /// [`str::to_uppercase`]: ../primitive.str.html#method.to_uppercase
85aaf69f
SL
88 #[stable(feature = "rust1", since = "1.0.0")]
89 fn to_ascii_uppercase(&self) -> Self::Owned;
1a4d82fc 90
c34b1796
AL
91 /// Makes a copy of the string in ASCII lower case.
92 ///
1a4d82fc
JJ
93 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
94 /// but non-ASCII letters are unchanged.
c34b1796 95 ///
32a655c1
SL
96 /// To lowercase the string in-place, use [`make_ascii_lowercase`].
97 ///
98 /// To lowercase ASCII characters in addition to non-ASCII characters, use
99 /// [`str::to_lowercase`].
100 ///
c34b1796
AL
101 /// # Examples
102 ///
103 /// ```
104 /// use std::ascii::AsciiExt;
105 ///
106 /// let ascii = 'A';
107 /// let utf8 = '❤';
108 ///
109 /// assert_eq!('a', ascii.to_ascii_lowercase());
110 /// assert_eq!('❤', utf8.to_ascii_lowercase());
111 /// ```
32a655c1
SL
112 ///
113 /// [`make_ascii_lowercase`]: #tymethod.make_ascii_lowercase
114 /// [`str::to_lowercase`]: ../primitive.str.html#method.to_lowercase
85aaf69f
SL
115 #[stable(feature = "rust1", since = "1.0.0")]
116 fn to_ascii_lowercase(&self) -> Self::Owned;
1a4d82fc 117
9346a6ac 118 /// Checks that two strings are an ASCII case-insensitive match.
c34b1796 119 ///
1a4d82fc
JJ
120 /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`,
121 /// but without allocating and copying temporary strings.
c34b1796
AL
122 ///
123 /// # Examples
124 ///
125 /// ```
126 /// use std::ascii::AsciiExt;
127 ///
128 /// let ascii1 = 'A';
129 /// let ascii2 = 'a';
130 /// let ascii3 = 'A';
131 /// let ascii4 = 'z';
132 ///
7453a54e
SL
133 /// assert!(ascii1.eq_ignore_ascii_case(&ascii2));
134 /// assert!(ascii1.eq_ignore_ascii_case(&ascii3));
135 /// assert!(!ascii1.eq_ignore_ascii_case(&ascii4));
c34b1796 136 /// ```
85aaf69f 137 #[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 138 fn eq_ignore_ascii_case(&self, other: &Self) -> bool;
85aaf69f 139
9346a6ac 140 /// Converts this type to its ASCII upper case equivalent in-place.
85aaf69f 141 ///
32a655c1
SL
142 /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
143 /// but non-ASCII letters are unchanged.
144 ///
145 /// To return a new uppercased string without modifying the existing one, use
146 /// [`to_ascii_uppercase`].
c34b1796
AL
147 ///
148 /// # Examples
149 ///
150 /// ```
c34b1796
AL
151 /// use std::ascii::AsciiExt;
152 ///
153 /// let mut ascii = 'a';
154 ///
155 /// ascii.make_ascii_uppercase();
156 ///
157 /// assert_eq!('A', ascii);
158 /// ```
32a655c1
SL
159 ///
160 /// [`to_ascii_uppercase`]: #tymethod.to_ascii_uppercase
54a0048b 161 #[stable(feature = "ascii", since = "1.9.0")]
85aaf69f
SL
162 fn make_ascii_uppercase(&mut self);
163
9346a6ac 164 /// Converts this type to its ASCII lower case equivalent in-place.
85aaf69f 165 ///
32a655c1
SL
166 /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
167 /// but non-ASCII letters are unchanged.
168 ///
169 /// To return a new lowercased string without modifying the existing one, use
170 /// [`to_ascii_lowercase`].
c34b1796
AL
171 ///
172 /// # Examples
173 ///
174 /// ```
c34b1796
AL
175 /// use std::ascii::AsciiExt;
176 ///
177 /// let mut ascii = 'A';
178 ///
179 /// ascii.make_ascii_lowercase();
180 ///
181 /// assert_eq!('a', ascii);
182 /// ```
32a655c1
SL
183 ///
184 /// [`to_ascii_lowercase`]: #tymethod.to_ascii_lowercase
54a0048b 185 #[stable(feature = "ascii", since = "1.9.0")]
85aaf69f 186 fn make_ascii_lowercase(&mut self);
8bb4bdeb
XL
187
188 /// Checks if the value is an ASCII alphabetic character:
189 /// U+0041 'A' ... U+005A 'Z' or U+0061 'a' ... U+007A 'z'.
190 /// For strings, true if all characters in the string are
191 /// ASCII alphabetic.
192 ///
193 /// # Examples
194 ///
195 /// ```
196 /// #![feature(ascii_ctype)]
197 /// # #![allow(non_snake_case)]
198 /// use std::ascii::AsciiExt;
199 /// let A = 'A';
200 /// let G = 'G';
201 /// let a = 'a';
202 /// let g = 'g';
203 /// let zero = '0';
204 /// let percent = '%';
205 /// let space = ' ';
206 /// let lf = '\n';
207 /// let esc = '\u{001b}';
208 ///
209 /// assert!(A.is_ascii_alphabetic());
210 /// assert!(G.is_ascii_alphabetic());
211 /// assert!(a.is_ascii_alphabetic());
212 /// assert!(g.is_ascii_alphabetic());
213 /// assert!(!zero.is_ascii_alphabetic());
214 /// assert!(!percent.is_ascii_alphabetic());
215 /// assert!(!space.is_ascii_alphabetic());
216 /// assert!(!lf.is_ascii_alphabetic());
217 /// assert!(!esc.is_ascii_alphabetic());
218 /// ```
219 #[unstable(feature = "ascii_ctype", issue = "39658")]
220 fn is_ascii_alphabetic(&self) -> bool { unimplemented!(); }
221
222 /// Checks if the value is an ASCII uppercase character:
223 /// U+0041 'A' ... U+005A 'Z'.
224 /// For strings, true if all characters in the string are
225 /// ASCII uppercase.
226 ///
227 /// # Examples
228 ///
229 /// ```
230 /// #![feature(ascii_ctype)]
231 /// # #![allow(non_snake_case)]
232 /// use std::ascii::AsciiExt;
233 /// let A = 'A';
234 /// let G = 'G';
235 /// let a = 'a';
236 /// let g = 'g';
237 /// let zero = '0';
238 /// let percent = '%';
239 /// let space = ' ';
240 /// let lf = '\n';
241 /// let esc = '\u{001b}';
242 ///
243 /// assert!(A.is_ascii_uppercase());
244 /// assert!(G.is_ascii_uppercase());
245 /// assert!(!a.is_ascii_uppercase());
246 /// assert!(!g.is_ascii_uppercase());
247 /// assert!(!zero.is_ascii_uppercase());
248 /// assert!(!percent.is_ascii_uppercase());
249 /// assert!(!space.is_ascii_uppercase());
250 /// assert!(!lf.is_ascii_uppercase());
251 /// assert!(!esc.is_ascii_uppercase());
252 /// ```
253 #[unstable(feature = "ascii_ctype", issue = "39658")]
254 fn is_ascii_uppercase(&self) -> bool { unimplemented!(); }
255
256 /// Checks if the value is an ASCII lowercase character:
257 /// U+0061 'a' ... U+007A 'z'.
258 /// For strings, true if all characters in the string are
259 /// ASCII lowercase.
260 ///
261 /// # Examples
262 ///
263 /// ```
264 /// #![feature(ascii_ctype)]
265 /// # #![allow(non_snake_case)]
266 /// use std::ascii::AsciiExt;
267 /// let A = 'A';
268 /// let G = 'G';
269 /// let a = 'a';
270 /// let g = 'g';
271 /// let zero = '0';
272 /// let percent = '%';
273 /// let space = ' ';
274 /// let lf = '\n';
275 /// let esc = '\u{001b}';
276 ///
277 /// assert!(!A.is_ascii_lowercase());
278 /// assert!(!G.is_ascii_lowercase());
279 /// assert!(a.is_ascii_lowercase());
280 /// assert!(g.is_ascii_lowercase());
281 /// assert!(!zero.is_ascii_lowercase());
282 /// assert!(!percent.is_ascii_lowercase());
283 /// assert!(!space.is_ascii_lowercase());
284 /// assert!(!lf.is_ascii_lowercase());
285 /// assert!(!esc.is_ascii_lowercase());
286 /// ```
287 #[unstable(feature = "ascii_ctype", issue = "39658")]
288 fn is_ascii_lowercase(&self) -> bool { unimplemented!(); }
289
290 /// Checks if the value is an ASCII alphanumeric character:
291 /// U+0041 'A' ... U+005A 'Z', U+0061 'a' ... U+007A 'z', or
292 /// U+0030 '0' ... U+0039 '9'.
293 /// For strings, true if all characters in the string are
294 /// ASCII alphanumeric.
295 ///
296 /// # Examples
297 ///
298 /// ```
299 /// #![feature(ascii_ctype)]
300 /// # #![allow(non_snake_case)]
301 /// use std::ascii::AsciiExt;
302 /// let A = 'A';
303 /// let G = 'G';
304 /// let a = 'a';
305 /// let g = 'g';
306 /// let zero = '0';
307 /// let percent = '%';
308 /// let space = ' ';
309 /// let lf = '\n';
310 /// let esc = '\u{001b}';
311 ///
312 /// assert!(A.is_ascii_alphanumeric());
313 /// assert!(G.is_ascii_alphanumeric());
314 /// assert!(a.is_ascii_alphanumeric());
315 /// assert!(g.is_ascii_alphanumeric());
316 /// assert!(zero.is_ascii_alphanumeric());
317 /// assert!(!percent.is_ascii_alphanumeric());
318 /// assert!(!space.is_ascii_alphanumeric());
319 /// assert!(!lf.is_ascii_alphanumeric());
320 /// assert!(!esc.is_ascii_alphanumeric());
321 /// ```
322 #[unstable(feature = "ascii_ctype", issue = "39658")]
323 fn is_ascii_alphanumeric(&self) -> bool { unimplemented!(); }
324
325 /// Checks if the value is an ASCII decimal digit:
326 /// U+0030 '0' ... U+0039 '9'.
327 /// For strings, true if all characters in the string are
328 /// ASCII digits.
329 ///
330 /// # Examples
331 ///
332 /// ```
333 /// #![feature(ascii_ctype)]
334 /// # #![allow(non_snake_case)]
335 /// use std::ascii::AsciiExt;
336 /// let A = 'A';
337 /// let G = 'G';
338 /// let a = 'a';
339 /// let g = 'g';
340 /// let zero = '0';
341 /// let percent = '%';
342 /// let space = ' ';
343 /// let lf = '\n';
344 /// let esc = '\u{001b}';
345 ///
346 /// assert!(!A.is_ascii_digit());
347 /// assert!(!G.is_ascii_digit());
348 /// assert!(!a.is_ascii_digit());
349 /// assert!(!g.is_ascii_digit());
350 /// assert!(zero.is_ascii_digit());
351 /// assert!(!percent.is_ascii_digit());
352 /// assert!(!space.is_ascii_digit());
353 /// assert!(!lf.is_ascii_digit());
354 /// assert!(!esc.is_ascii_digit());
355 /// ```
356 #[unstable(feature = "ascii_ctype", issue = "39658")]
357 fn is_ascii_digit(&self) -> bool { unimplemented!(); }
358
359 /// Checks if the value is an ASCII hexadecimal digit:
360 /// U+0030 '0' ... U+0039 '9', U+0041 'A' ... U+0046 'F', or
361 /// U+0061 'a' ... U+0066 'f'.
362 /// For strings, true if all characters in the string are
363 /// ASCII hex digits.
364 ///
365 /// # Examples
366 ///
367 /// ```
368 /// #![feature(ascii_ctype)]
369 /// # #![allow(non_snake_case)]
370 /// use std::ascii::AsciiExt;
371 /// let A = 'A';
372 /// let G = 'G';
373 /// let a = 'a';
374 /// let g = 'g';
375 /// let zero = '0';
376 /// let percent = '%';
377 /// let space = ' ';
378 /// let lf = '\n';
379 /// let esc = '\u{001b}';
380 ///
381 /// assert!(A.is_ascii_hexdigit());
382 /// assert!(!G.is_ascii_hexdigit());
383 /// assert!(a.is_ascii_hexdigit());
384 /// assert!(!g.is_ascii_hexdigit());
385 /// assert!(zero.is_ascii_hexdigit());
386 /// assert!(!percent.is_ascii_hexdigit());
387 /// assert!(!space.is_ascii_hexdigit());
388 /// assert!(!lf.is_ascii_hexdigit());
389 /// assert!(!esc.is_ascii_hexdigit());
390 /// ```
391 #[unstable(feature = "ascii_ctype", issue = "39658")]
392 fn is_ascii_hexdigit(&self) -> bool { unimplemented!(); }
393
394 /// Checks if the value is an ASCII punctuation character:
395 /// U+0021 ... U+002F `! " # $ % & ' ( ) * + , - . /`
396 /// U+003A ... U+0040 `: ; < = > ? @`
397 /// U+005B ... U+0060 `[ \\ ] ^ _ \``
398 /// U+007B ... U+007E `{ | } ~`
399 /// For strings, true if all characters in the string are
400 /// ASCII punctuation.
401 ///
402 /// # Examples
403 ///
404 /// ```
405 /// #![feature(ascii_ctype)]
406 /// # #![allow(non_snake_case)]
407 /// use std::ascii::AsciiExt;
408 /// let A = 'A';
409 /// let G = 'G';
410 /// let a = 'a';
411 /// let g = 'g';
412 /// let zero = '0';
413 /// let percent = '%';
414 /// let space = ' ';
415 /// let lf = '\n';
416 /// let esc = '\u{001b}';
417 ///
418 /// assert!(!A.is_ascii_punctuation());
419 /// assert!(!G.is_ascii_punctuation());
420 /// assert!(!a.is_ascii_punctuation());
421 /// assert!(!g.is_ascii_punctuation());
422 /// assert!(!zero.is_ascii_punctuation());
423 /// assert!(percent.is_ascii_punctuation());
424 /// assert!(!space.is_ascii_punctuation());
425 /// assert!(!lf.is_ascii_punctuation());
426 /// assert!(!esc.is_ascii_punctuation());
427 /// ```
428 #[unstable(feature = "ascii_ctype", issue = "39658")]
429 fn is_ascii_punctuation(&self) -> bool { unimplemented!(); }
430
431 /// Checks if the value is an ASCII graphic character:
432 /// U+0021 '@' ... U+007E '~'.
433 /// For strings, true if all characters in the string are
434 /// ASCII punctuation.
435 ///
436 /// # Examples
437 ///
438 /// ```
439 /// #![feature(ascii_ctype)]
440 /// # #![allow(non_snake_case)]
441 /// use std::ascii::AsciiExt;
442 /// let A = 'A';
443 /// let G = 'G';
444 /// let a = 'a';
445 /// let g = 'g';
446 /// let zero = '0';
447 /// let percent = '%';
448 /// let space = ' ';
449 /// let lf = '\n';
450 /// let esc = '\u{001b}';
451 ///
452 /// assert!(A.is_ascii_graphic());
453 /// assert!(G.is_ascii_graphic());
454 /// assert!(a.is_ascii_graphic());
455 /// assert!(g.is_ascii_graphic());
456 /// assert!(zero.is_ascii_graphic());
457 /// assert!(percent.is_ascii_graphic());
458 /// assert!(!space.is_ascii_graphic());
459 /// assert!(!lf.is_ascii_graphic());
460 /// assert!(!esc.is_ascii_graphic());
461 /// ```
462 #[unstable(feature = "ascii_ctype", issue = "39658")]
463 fn is_ascii_graphic(&self) -> bool { unimplemented!(); }
464
465 /// Checks if the value is an ASCII whitespace character:
466 /// U+0020 SPACE, U+0009 HORIZONTAL TAB, U+000A LINE FEED,
467 /// U+000C FORM FEED, or U+000D CARRIAGE RETURN.
468 /// For strings, true if all characters in the string are
469 /// ASCII whitespace.
470 ///
471 /// Rust uses the WhatWG Infra Standard's [definition of ASCII
472 /// whitespace][infra-aw]. There are several other definitions in
473 /// wide use. For instance, [the POSIX locale][pct] includes
474 /// U+000B VERTICAL TAB as well as all the above characters,
475 /// but—from the very same specification—[the default rule for
476 /// "field splitting" in the Bourne shell][bfs] considers *only*
477 /// SPACE, HORIZONTAL TAB, and LINE FEED as whitespace.
478 ///
479 /// If you are writing a program that will process an existing
480 /// file format, check what that format's definition of whitespace is
481 /// before using this function.
482 ///
483 /// [infra-aw]: https://infra.spec.whatwg.org/#ascii-whitespace
484 /// [pct]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap07.html#tag_07_03_01
485 /// [bfs]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05
486 ///
487 /// # Examples
488 ///
489 /// ```
490 /// #![feature(ascii_ctype)]
491 /// # #![allow(non_snake_case)]
492 /// use std::ascii::AsciiExt;
493 /// let A = 'A';
494 /// let G = 'G';
495 /// let a = 'a';
496 /// let g = 'g';
497 /// let zero = '0';
498 /// let percent = '%';
499 /// let space = ' ';
500 /// let lf = '\n';
501 /// let esc = '\u{001b}';
502 ///
503 /// assert!(!A.is_ascii_whitespace());
504 /// assert!(!G.is_ascii_whitespace());
505 /// assert!(!a.is_ascii_whitespace());
506 /// assert!(!g.is_ascii_whitespace());
507 /// assert!(!zero.is_ascii_whitespace());
508 /// assert!(!percent.is_ascii_whitespace());
509 /// assert!(space.is_ascii_whitespace());
510 /// assert!(lf.is_ascii_whitespace());
511 /// assert!(!esc.is_ascii_whitespace());
512 /// ```
513 #[unstable(feature = "ascii_ctype", issue = "39658")]
514 fn is_ascii_whitespace(&self) -> bool { unimplemented!(); }
515
516 /// Checks if the value is an ASCII control character:
517 /// U+0000 NUL ... U+001F UNIT SEPARATOR, or U+007F DELETE.
518 /// Note that most ASCII whitespace characters are control
519 /// characters, but SPACE is not.
520 ///
521 /// # Examples
522 ///
523 /// ```
524 /// #![feature(ascii_ctype)]
525 /// # #![allow(non_snake_case)]
526 /// use std::ascii::AsciiExt;
527 /// let A = 'A';
528 /// let G = 'G';
529 /// let a = 'a';
530 /// let g = 'g';
531 /// let zero = '0';
532 /// let percent = '%';
533 /// let space = ' ';
534 /// let lf = '\n';
535 /// let esc = '\u{001b}';
536 ///
537 /// assert!(!A.is_ascii_control());
538 /// assert!(!G.is_ascii_control());
539 /// assert!(!a.is_ascii_control());
540 /// assert!(!g.is_ascii_control());
541 /// assert!(!zero.is_ascii_control());
542 /// assert!(!percent.is_ascii_control());
543 /// assert!(!space.is_ascii_control());
544 /// assert!(lf.is_ascii_control());
545 /// assert!(esc.is_ascii_control());
546 /// ```
547 #[unstable(feature = "ascii_ctype", issue = "39658")]
548 fn is_ascii_control(&self) -> bool { unimplemented!(); }
1a4d82fc
JJ
549}
550
85aaf69f
SL
551#[stable(feature = "rust1", since = "1.0.0")]
552impl AsciiExt for str {
553 type Owned = String;
554
1a4d82fc
JJ
555 #[inline]
556 fn is_ascii(&self) -> bool {
557 self.bytes().all(|b| b.is_ascii())
558 }
559
560 #[inline]
561 fn to_ascii_uppercase(&self) -> String {
e9174d1e
SL
562 let mut bytes = self.as_bytes().to_vec();
563 bytes.make_ascii_uppercase();
564 // make_ascii_uppercase() preserves the UTF-8 invariant.
565 unsafe { String::from_utf8_unchecked(bytes) }
1a4d82fc
JJ
566 }
567
568 #[inline]
569 fn to_ascii_lowercase(&self) -> String {
e9174d1e
SL
570 let mut bytes = self.as_bytes().to_vec();
571 bytes.make_ascii_lowercase();
572 // make_ascii_uppercase() preserves the UTF-8 invariant.
573 unsafe { String::from_utf8_unchecked(bytes) }
1a4d82fc
JJ
574 }
575
576 #[inline]
577 fn eq_ignore_ascii_case(&self, other: &str) -> bool {
578 self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
579 }
85aaf69f
SL
580
581 fn make_ascii_uppercase(&mut self) {
582 let me: &mut [u8] = unsafe { mem::transmute(self) };
583 me.make_ascii_uppercase()
584 }
585
586 fn make_ascii_lowercase(&mut self) {
587 let me: &mut [u8] = unsafe { mem::transmute(self) };
588 me.make_ascii_lowercase()
589 }
8bb4bdeb
XL
590
591 #[inline]
592 fn is_ascii_alphabetic(&self) -> bool {
593 self.bytes().all(|b| b.is_ascii_alphabetic())
594 }
595
596 #[inline]
597 fn is_ascii_uppercase(&self) -> bool {
598 self.bytes().all(|b| b.is_ascii_uppercase())
599 }
600
601 #[inline]
602 fn is_ascii_lowercase(&self) -> bool {
603 self.bytes().all(|b| b.is_ascii_lowercase())
604 }
605
606 #[inline]
607 fn is_ascii_alphanumeric(&self) -> bool {
608 self.bytes().all(|b| b.is_ascii_alphanumeric())
609 }
610
611 #[inline]
612 fn is_ascii_digit(&self) -> bool {
613 self.bytes().all(|b| b.is_ascii_digit())
614 }
615
616 #[inline]
617 fn is_ascii_hexdigit(&self) -> bool {
618 self.bytes().all(|b| b.is_ascii_hexdigit())
619 }
620
621 #[inline]
622 fn is_ascii_punctuation(&self) -> bool {
623 self.bytes().all(|b| b.is_ascii_punctuation())
624 }
625
626 #[inline]
627 fn is_ascii_graphic(&self) -> bool {
628 self.bytes().all(|b| b.is_ascii_graphic())
629 }
630
631 #[inline]
632 fn is_ascii_whitespace(&self) -> bool {
633 self.bytes().all(|b| b.is_ascii_whitespace())
634 }
635
636 #[inline]
637 fn is_ascii_control(&self) -> bool {
638 self.bytes().all(|b| b.is_ascii_control())
639 }
1a4d82fc
JJ
640}
641
85aaf69f
SL
642#[stable(feature = "rust1", since = "1.0.0")]
643impl AsciiExt for [u8] {
644 type Owned = Vec<u8>;
1a4d82fc
JJ
645 #[inline]
646 fn is_ascii(&self) -> bool {
647 self.iter().all(|b| b.is_ascii())
648 }
649
650 #[inline]
651 fn to_ascii_uppercase(&self) -> Vec<u8> {
e9174d1e
SL
652 let mut me = self.to_vec();
653 me.make_ascii_uppercase();
654 return me
1a4d82fc
JJ
655 }
656
657 #[inline]
658 fn to_ascii_lowercase(&self) -> Vec<u8> {
e9174d1e
SL
659 let mut me = self.to_vec();
660 me.make_ascii_lowercase();
661 return me
1a4d82fc
JJ
662 }
663
664 #[inline]
665 fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
666 self.len() == other.len() &&
62682a34 667 self.iter().zip(other).all(|(a, b)| {
1a4d82fc
JJ
668 a.eq_ignore_ascii_case(b)
669 })
670 }
85aaf69f
SL
671
672 fn make_ascii_uppercase(&mut self) {
673 for byte in self {
674 byte.make_ascii_uppercase();
675 }
676 }
677
678 fn make_ascii_lowercase(&mut self) {
679 for byte in self {
680 byte.make_ascii_lowercase();
681 }
682 }
8bb4bdeb
XL
683
684 #[inline]
685 fn is_ascii_alphabetic(&self) -> bool {
686 self.iter().all(|b| b.is_ascii_alphabetic())
687 }
688
689 #[inline]
690 fn is_ascii_uppercase(&self) -> bool {
691 self.iter().all(|b| b.is_ascii_uppercase())
692 }
693
694 #[inline]
695 fn is_ascii_lowercase(&self) -> bool {
696 self.iter().all(|b| b.is_ascii_lowercase())
697 }
698
699 #[inline]
700 fn is_ascii_alphanumeric(&self) -> bool {
701 self.iter().all(|b| b.is_ascii_alphanumeric())
702 }
703
704 #[inline]
705 fn is_ascii_digit(&self) -> bool {
706 self.iter().all(|b| b.is_ascii_digit())
707 }
708
709 #[inline]
710 fn is_ascii_hexdigit(&self) -> bool {
711 self.iter().all(|b| b.is_ascii_hexdigit())
712 }
713
714 #[inline]
715 fn is_ascii_punctuation(&self) -> bool {
716 self.iter().all(|b| b.is_ascii_punctuation())
717 }
718
719 #[inline]
720 fn is_ascii_graphic(&self) -> bool {
721 self.iter().all(|b| b.is_ascii_graphic())
722 }
723
724 #[inline]
725 fn is_ascii_whitespace(&self) -> bool {
726 self.iter().all(|b| b.is_ascii_whitespace())
727 }
728
729 #[inline]
730 fn is_ascii_control(&self) -> bool {
731 self.iter().all(|b| b.is_ascii_control())
732 }
1a4d82fc
JJ
733}
734
85aaf69f 735#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 736impl AsciiExt for u8 {
85aaf69f 737 type Owned = u8;
1a4d82fc 738 #[inline]
c34b1796 739 fn is_ascii(&self) -> bool { *self & 128 == 0 }
1a4d82fc 740 #[inline]
85aaf69f 741 fn to_ascii_uppercase(&self) -> u8 { ASCII_UPPERCASE_MAP[*self as usize] }
1a4d82fc 742 #[inline]
85aaf69f 743 fn to_ascii_lowercase(&self) -> u8 { ASCII_LOWERCASE_MAP[*self as usize] }
1a4d82fc
JJ
744 #[inline]
745 fn eq_ignore_ascii_case(&self, other: &u8) -> bool {
746 self.to_ascii_lowercase() == other.to_ascii_lowercase()
747 }
85aaf69f
SL
748 #[inline]
749 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
750 #[inline]
751 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
8bb4bdeb
XL
752
753 #[inline]
754 fn is_ascii_alphabetic(&self) -> bool {
755 if *self >= 0x80 { return false; }
756 match ASCII_CHARACTER_CLASS[*self as usize] {
757 L|Lx|U|Ux => true,
758 _ => false
759 }
760 }
761
762 #[inline]
763 fn is_ascii_uppercase(&self) -> bool {
764 if *self >= 0x80 { return false }
765 match ASCII_CHARACTER_CLASS[*self as usize] {
766 U|Ux => true,
767 _ => false
768 }
769 }
770
771 #[inline]
772 fn is_ascii_lowercase(&self) -> bool {
773 if *self >= 0x80 { return false }
774 match ASCII_CHARACTER_CLASS[*self as usize] {
775 L|Lx => true,
776 _ => false
777 }
778 }
779
780 #[inline]
781 fn is_ascii_alphanumeric(&self) -> bool {
782 if *self >= 0x80 { return false }
783 match ASCII_CHARACTER_CLASS[*self as usize] {
784 D|L|Lx|U|Ux => true,
785 _ => false
786 }
787 }
788
789 #[inline]
790 fn is_ascii_digit(&self) -> bool {
791 if *self >= 0x80 { return false }
792 match ASCII_CHARACTER_CLASS[*self as usize] {
793 D => true,
794 _ => false
795 }
796 }
797
798 #[inline]
799 fn is_ascii_hexdigit(&self) -> bool {
800 if *self >= 0x80 { return false }
801 match ASCII_CHARACTER_CLASS[*self as usize] {
802 D|Lx|Ux => true,
803 _ => false
804 }
805 }
806
807 #[inline]
808 fn is_ascii_punctuation(&self) -> bool {
809 if *self >= 0x80 { return false }
810 match ASCII_CHARACTER_CLASS[*self as usize] {
811 P => true,
812 _ => false
813 }
814 }
815
816 #[inline]
817 fn is_ascii_graphic(&self) -> bool {
818 if *self >= 0x80 { return false; }
819 match ASCII_CHARACTER_CLASS[*self as usize] {
820 Ux|U|Lx|L|D|P => true,
821 _ => false
822 }
823 }
824
825 #[inline]
826 fn is_ascii_whitespace(&self) -> bool {
827 if *self >= 0x80 { return false; }
828 match ASCII_CHARACTER_CLASS[*self as usize] {
829 Cw|W => true,
830 _ => false
831 }
832 }
833
834 #[inline]
835 fn is_ascii_control(&self) -> bool {
836 if *self >= 0x80 { return false; }
837 match ASCII_CHARACTER_CLASS[*self as usize] {
838 C|Cw => true,
839 _ => false
840 }
841 }
1a4d82fc
JJ
842}
843
85aaf69f 844#[stable(feature = "rust1", since = "1.0.0")]
1a4d82fc 845impl AsciiExt for char {
85aaf69f 846 type Owned = char;
1a4d82fc
JJ
847 #[inline]
848 fn is_ascii(&self) -> bool {
849 *self as u32 <= 0x7F
850 }
851
852 #[inline]
853 fn to_ascii_uppercase(&self) -> char {
854 if self.is_ascii() {
855 (*self as u8).to_ascii_uppercase() as char
856 } else {
857 *self
858 }
859 }
860
861 #[inline]
862 fn to_ascii_lowercase(&self) -> char {
863 if self.is_ascii() {
864 (*self as u8).to_ascii_lowercase() as char
865 } else {
866 *self
867 }
868 }
869
870 #[inline]
871 fn eq_ignore_ascii_case(&self, other: &char) -> bool {
872 self.to_ascii_lowercase() == other.to_ascii_lowercase()
873 }
85aaf69f
SL
874
875 #[inline]
876 fn make_ascii_uppercase(&mut self) { *self = self.to_ascii_uppercase(); }
877 #[inline]
878 fn make_ascii_lowercase(&mut self) { *self = self.to_ascii_lowercase(); }
8bb4bdeb
XL
879
880 #[inline]
881 fn is_ascii_alphabetic(&self) -> bool {
882 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphabetic()
883 }
884
885 #[inline]
886 fn is_ascii_uppercase(&self) -> bool {
887 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_uppercase()
888 }
889
890 #[inline]
891 fn is_ascii_lowercase(&self) -> bool {
892 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_lowercase()
893 }
894
895 #[inline]
896 fn is_ascii_alphanumeric(&self) -> bool {
897 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_alphanumeric()
898 }
899
900 #[inline]
901 fn is_ascii_digit(&self) -> bool {
902 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_digit()
903 }
904
905 #[inline]
906 fn is_ascii_hexdigit(&self) -> bool {
907 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_hexdigit()
908 }
909
910 #[inline]
911 fn is_ascii_punctuation(&self) -> bool {
912 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_punctuation()
913 }
914
915 #[inline]
916 fn is_ascii_graphic(&self) -> bool {
917 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_graphic()
918 }
919
920 #[inline]
921 fn is_ascii_whitespace(&self) -> bool {
922 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_whitespace()
923 }
924
925 #[inline]
926 fn is_ascii_control(&self) -> bool {
927 (*self as u32 <= 0x7f) && (*self as u8).is_ascii_control()
928 }
85aaf69f
SL
929}
930
931/// An iterator over the escaped version of a byte, constructed via
932/// `std::ascii::escape_default`.
933#[stable(feature = "rust1", since = "1.0.0")]
934pub struct EscapeDefault {
935 range: Range<usize>,
936 data: [u8; 4],
1a4d82fc
JJ
937}
938
c34b1796 939/// Returns an iterator that produces an escaped version of a `u8`.
1a4d82fc
JJ
940///
941/// The default is chosen with a bias toward producing literals that are
942/// legal in a variety of languages, including C++11 and similar C-family
943/// languages. The exact rules are:
944///
945/// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
946/// - Single-quote, double-quote and backslash chars are backslash-escaped.
947/// - Any other chars in the range [0x20,0x7e] are not escaped.
85aaf69f 948/// - Any other chars are given hex escapes of the form '\xNN'.
1a4d82fc 949/// - Unicode escapes are never generated by this function.
c34b1796
AL
950///
951/// # Examples
952///
953/// ```
954/// use std::ascii;
955///
956/// let escaped = ascii::escape_default(b'0').next().unwrap();
957/// assert_eq!(b'0', escaped);
958///
959/// let mut escaped = ascii::escape_default(b'\t');
960///
961/// assert_eq!(b'\\', escaped.next().unwrap());
962/// assert_eq!(b't', escaped.next().unwrap());
963/// ```
85aaf69f
SL
964#[stable(feature = "rust1", since = "1.0.0")]
965pub fn escape_default(c: u8) -> EscapeDefault {
966 let (data, len) = match c {
967 b'\t' => ([b'\\', b't', 0, 0], 2),
968 b'\r' => ([b'\\', b'r', 0, 0], 2),
969 b'\n' => ([b'\\', b'n', 0, 0], 2),
970 b'\\' => ([b'\\', b'\\', 0, 0], 2),
971 b'\'' => ([b'\\', b'\'', 0, 0], 2),
972 b'"' => ([b'\\', b'"', 0, 0], 2),
973 b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
974 _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
975 };
976
c34b1796 977 return EscapeDefault { range: (0.. len), data: data };
85aaf69f
SL
978
979 fn hexify(b: u8) -> u8 {
980 match b {
981 0 ... 9 => b'0' + b,
982 _ => b'a' + b - 10,
1a4d82fc
JJ
983 }
984 }
985}
986
85aaf69f
SL
987#[stable(feature = "rust1", since = "1.0.0")]
988impl Iterator for EscapeDefault {
989 type Item = u8;
990 fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
991 fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
992}
993#[stable(feature = "rust1", since = "1.0.0")]
994impl DoubleEndedIterator for EscapeDefault {
995 fn next_back(&mut self) -> Option<u8> {
996 self.range.next_back().map(|i| self.data[i])
997 }
998}
999#[stable(feature = "rust1", since = "1.0.0")]
1000impl ExactSizeIterator for EscapeDefault {}
9e0c209e
SL
1001#[unstable(feature = "fused", issue = "35602")]
1002impl FusedIterator for EscapeDefault {}
1003
8bb4bdeb 1004#[stable(feature = "std_debug", since = "1.16.0")]
32a655c1
SL
1005impl fmt::Debug for EscapeDefault {
1006 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1007 f.pad("EscapeDefault { .. }")
1008 }
1009}
1010
85aaf69f 1011
1a4d82fc
JJ
1012static ASCII_LOWERCASE_MAP: [u8; 256] = [
1013 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1014 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1015 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1016 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1017 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
1018 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
1019 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
1020 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
1021 b'@',
1022
1023 b'a', b'b', b'c', b'd', b'e', b'f', b'g',
1024 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
1025 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
1026 b'x', b'y', b'z',
1027
1028 b'[', b'\\', b']', b'^', b'_',
1029 b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
1030 b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
1031 b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
1032 b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
1033 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
1034 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
1035 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
1036 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
1037 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
1038 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
1039 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
1040 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
1041 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
1042 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
1043 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
1044 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
1045 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
1046 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
1047 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
1048 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
1049];
1050
1051static ASCII_UPPERCASE_MAP: [u8; 256] = [
1052 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1053 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1054 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1055 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1056 b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
1057 b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
1058 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
1059 b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
1060 b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
1061 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
1062 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
1063 b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
1064 b'`',
1065
1066 b'A', b'B', b'C', b'D', b'E', b'F', b'G',
1067 b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
1068 b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
1069 b'X', b'Y', b'Z',
1070
1071 b'{', b'|', b'}', b'~', 0x7f,
1072 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
1073 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
1074 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
1075 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
1076 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
1077 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
1078 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
1079 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
1080 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
1081 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
1082 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
1083 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
1084 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
1085 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
1086 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
1087 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
1088];
1089
8bb4bdeb
XL
1090enum AsciiCharacterClass {
1091 C, // control
1092 Cw, // control whitespace
1093 W, // whitespace
1094 D, // digit
1095 L, // lowercase
1096 Lx, // lowercase hex digit
1097 U, // uppercase
1098 Ux, // uppercase hex digit
1099 P, // punctuation
1100}
1101use self::AsciiCharacterClass::*;
1102
1103static ASCII_CHARACTER_CLASS: [AsciiCharacterClass; 128] = [
1104// _0 _1 _2 _3 _4 _5 _6 _7 _8 _9 _a _b _c _d _e _f
1105 C, C, C, C, C, C, C, C, C, Cw,Cw,C, Cw,Cw,C, C, // 0_
1106 C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, // 1_
1107 W, P, P, P, P, P, P, P, P, P, P, P, P, P, P, P, // 2_
1108 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, P, // 3_
1109 P, Ux,Ux,Ux,Ux,Ux,Ux,U, U, U, U, U, U, U, U, U, // 4_
1110 U, U, U, U, U, U, U, U, U, U, U, P, P, P, P, P, // 5_
1111 P, Lx,Lx,Lx,Lx,Lx,Lx,L, L, L, L, L, L, L, L, L, // 6_
1112 L, L, L, L, L, L, L, L, L, L, L, P, P, P, P, C, // 7_
1113];
1a4d82fc
JJ
1114
1115#[cfg(test)]
1116mod tests {
1a4d82fc
JJ
1117 use super::*;
1118 use char::from_u32;
1119
1120 #[test]
c1a9b12d
SL
1121 fn test_is_ascii() {
1122 assert!(b"".is_ascii());
1123 assert!(b"banana\0\x7F".is_ascii());
1124 assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
1125 assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
1126 assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
1127 assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
1a4d82fc 1128
1a4d82fc 1129 assert!("".is_ascii());
c1a9b12d
SL
1130 assert!("banana\0\u{7F}".is_ascii());
1131 assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
1132 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
1133 assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
1a4d82fc
JJ
1134 }
1135
1136 #[test]
1137 fn test_to_ascii_uppercase() {
1138 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
1139 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
1140
c34b1796 1141 for i in 0..501 {
1a4d82fc
JJ
1142 let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
1143 else { i };
1144 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
1145 (from_u32(upper).unwrap()).to_string());
1a4d82fc
JJ
1146 }
1147 }
1148
1149 #[test]
1150 fn test_to_ascii_lowercase() {
1151 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
1152 // Dotted capital I, Kelvin sign, Sharp S.
1153 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
1154
c34b1796 1155 for i in 0..501 {
1a4d82fc
JJ
1156 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
1157 else { i };
1158 assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
1159 (from_u32(lower).unwrap()).to_string());
1a4d82fc
JJ
1160 }
1161 }
1162
c1a9b12d
SL
1163 #[test]
1164 fn test_make_ascii_lower_case() {
1165 macro_rules! test {
1166 ($from: expr, $to: expr) => {
1167 {
1168 let mut x = $from;
1169 x.make_ascii_lowercase();
1170 assert_eq!(x, $to);
1171 }
1172 }
1173 }
1174 test!(b'A', b'a');
1175 test!(b'a', b'a');
1176 test!(b'!', b'!');
1177 test!('A', 'a');
1178 test!('À', 'À');
1179 test!('a', 'a');
1180 test!('!', '!');
1181 test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
1182 test!("HİKß".to_string(), "hİKß");
1183 }
1184
1185
1186 #[test]
1187 fn test_make_ascii_upper_case() {
1188 macro_rules! test {
1189 ($from: expr, $to: expr) => {
1190 {
1191 let mut x = $from;
1192 x.make_ascii_uppercase();
1193 assert_eq!(x, $to);
1194 }
1195 }
1196 }
1197 test!(b'a', b'A');
1198 test!(b'A', b'A');
1199 test!(b'!', b'!');
1200 test!('a', 'A');
1201 test!('à', 'à');
1202 test!('A', 'A');
1203 test!('!', '!');
1204 test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
1205 test!("hıKß".to_string(), "HıKß");
1206
1207 let mut x = "Hello".to_string();
1208 x[..3].make_ascii_uppercase(); // Test IndexMut on String.
1209 assert_eq!(x, "HELlo")
1210 }
1211
1a4d82fc
JJ
1212 #[test]
1213 fn test_eq_ignore_ascii_case() {
1214 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
1215 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
1216 // Dotted capital I, Kelvin sign, Sharp S.
1217 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
1218 assert!(!"İ".eq_ignore_ascii_case("i"));
1219 assert!(!"K".eq_ignore_ascii_case("k"));
1220 assert!(!"ß".eq_ignore_ascii_case("s"));
1221
c34b1796 1222 for i in 0..501 {
85aaf69f
SL
1223 let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
1224 else { i };
1a4d82fc 1225 assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
85aaf69f 1226 &from_u32(lower).unwrap().to_string()));
1a4d82fc
JJ
1227 }
1228 }
54a0048b
SL
1229
1230 #[test]
1231 fn inference_works() {
1232 let x = "a".to_string();
1233 x.eq_ignore_ascii_case("A");
1234 }
8bb4bdeb
XL
1235
1236 // Shorthands used by the is_ascii_* tests.
1237 macro_rules! assert_all {
1238 ($what:ident, $($str:tt),+) => {{
1239 $(
1240 for b in $str.chars() {
1241 if !b.$what() {
1242 panic!("expected {}({}) but it isn't",
1243 stringify!($what), b);
1244 }
1245 }
1246 for b in $str.as_bytes().iter() {
1247 if !b.$what() {
1248 panic!("expected {}(0x{:02x})) but it isn't",
1249 stringify!($what), b);
1250 }
1251 }
1252 assert!($str.$what());
1253 assert!($str.as_bytes().$what());
1254 )+
1255 }};
1256 ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+))
1257 }
1258 macro_rules! assert_none {
1259 ($what:ident, $($str:tt),+) => {{
1260 $(
1261 for b in $str.chars() {
1262 if b.$what() {
1263 panic!("expected not-{}({}) but it is",
1264 stringify!($what), b);
1265 }
1266 }
1267 for b in $str.as_bytes().iter() {
1268 if b.$what() {
1269 panic!("expected not-{}(0x{:02x})) but it is",
1270 stringify!($what), b);
1271 }
1272 }
1273 )*
1274 }};
1275 ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+))
1276 }
1277
1278 #[test]
1279 fn test_is_ascii_alphabetic() {
1280 assert_all!(is_ascii_alphabetic,
1281 "",
1282 "abcdefghijklmnopqrstuvwxyz",
1283 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1284 );
1285 assert_none!(is_ascii_alphabetic,
1286 "0123456789",
1287 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1288 " \t\n\x0c\r",
1289 "\x00\x01\x02\x03\x04\x05\x06\x07",
1290 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1291 "\x10\x11\x12\x13\x14\x15\x16\x17",
1292 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1293 "\x7f",
1294 );
1295 }
1296
1297 #[test]
1298 fn test_is_ascii_uppercase() {
1299 assert_all!(is_ascii_uppercase,
1300 "",
1301 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1302 );
1303 assert_none!(is_ascii_uppercase,
1304 "abcdefghijklmnopqrstuvwxyz",
1305 "0123456789",
1306 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1307 " \t\n\x0c\r",
1308 "\x00\x01\x02\x03\x04\x05\x06\x07",
1309 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1310 "\x10\x11\x12\x13\x14\x15\x16\x17",
1311 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1312 "\x7f",
1313 );
1314 }
1315
1316 #[test]
1317 fn test_is_ascii_lowercase() {
1318 assert_all!(is_ascii_lowercase,
1319 "abcdefghijklmnopqrstuvwxyz",
1320 );
1321 assert_none!(is_ascii_lowercase,
1322 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1323 "0123456789",
1324 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1325 " \t\n\x0c\r",
1326 "\x00\x01\x02\x03\x04\x05\x06\x07",
1327 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1328 "\x10\x11\x12\x13\x14\x15\x16\x17",
1329 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1330 "\x7f",
1331 );
1332 }
1333
1334 #[test]
1335 fn test_is_ascii_alphanumeric() {
1336 assert_all!(is_ascii_alphanumeric,
1337 "",
1338 "abcdefghijklmnopqrstuvwxyz",
1339 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1340 "0123456789",
1341 );
1342 assert_none!(is_ascii_alphanumeric,
1343 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1344 " \t\n\x0c\r",
1345 "\x00\x01\x02\x03\x04\x05\x06\x07",
1346 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1347 "\x10\x11\x12\x13\x14\x15\x16\x17",
1348 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1349 "\x7f",
1350 );
1351 }
1352
1353 #[test]
1354 fn test_is_ascii_digit() {
1355 assert_all!(is_ascii_digit,
1356 "",
1357 "0123456789",
1358 );
1359 assert_none!(is_ascii_digit,
1360 "abcdefghijklmnopqrstuvwxyz",
1361 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1362 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1363 " \t\n\x0c\r",
1364 "\x00\x01\x02\x03\x04\x05\x06\x07",
1365 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1366 "\x10\x11\x12\x13\x14\x15\x16\x17",
1367 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1368 "\x7f",
1369 );
1370 }
1371
1372 #[test]
1373 fn test_is_ascii_hexdigit() {
1374 assert_all!(is_ascii_hexdigit,
1375 "",
1376 "0123456789",
1377 "abcdefABCDEF",
1378 );
1379 assert_none!(is_ascii_hexdigit,
1380 "ghijklmnopqrstuvwxyz",
1381 "GHIJKLMNOQPRSTUVWXYZ",
1382 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1383 " \t\n\x0c\r",
1384 "\x00\x01\x02\x03\x04\x05\x06\x07",
1385 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1386 "\x10\x11\x12\x13\x14\x15\x16\x17",
1387 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1388 "\x7f",
1389 );
1390 }
1391
1392 #[test]
1393 fn test_is_ascii_punctuation() {
1394 assert_all!(is_ascii_punctuation,
1395 "",
1396 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1397 );
1398 assert_none!(is_ascii_punctuation,
1399 "abcdefghijklmnopqrstuvwxyz",
1400 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1401 "0123456789",
1402 " \t\n\x0c\r",
1403 "\x00\x01\x02\x03\x04\x05\x06\x07",
1404 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1405 "\x10\x11\x12\x13\x14\x15\x16\x17",
1406 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1407 "\x7f",
1408 );
1409 }
1410
1411 #[test]
1412 fn test_is_ascii_graphic() {
1413 assert_all!(is_ascii_graphic,
1414 "",
1415 "abcdefghijklmnopqrstuvwxyz",
1416 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1417 "0123456789",
1418 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1419 );
1420 assert_none!(is_ascii_graphic,
1421 " \t\n\x0c\r",
1422 "\x00\x01\x02\x03\x04\x05\x06\x07",
1423 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1424 "\x10\x11\x12\x13\x14\x15\x16\x17",
1425 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1426 "\x7f",
1427 );
1428 }
1429
1430 #[test]
1431 fn test_is_ascii_whitespace() {
1432 assert_all!(is_ascii_whitespace,
1433 "",
1434 " \t\n\x0c\r",
1435 );
1436 assert_none!(is_ascii_whitespace,
1437 "abcdefghijklmnopqrstuvwxyz",
1438 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1439 "0123456789",
1440 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1441 "\x00\x01\x02\x03\x04\x05\x06\x07",
1442 "\x08\x0b\x0e\x0f",
1443 "\x10\x11\x12\x13\x14\x15\x16\x17",
1444 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1445 "\x7f",
1446 );
1447 }
1448
1449 #[test]
1450 fn test_is_ascii_control() {
1451 assert_all!(is_ascii_control,
1452 "",
1453 "\x00\x01\x02\x03\x04\x05\x06\x07",
1454 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
1455 "\x10\x11\x12\x13\x14\x15\x16\x17",
1456 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
1457 "\x7f",
1458 );
1459 assert_none!(is_ascii_control,
1460 "abcdefghijklmnopqrstuvwxyz",
1461 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
1462 "0123456789",
1463 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
1464 " ",
1465 );
1466 }
1a4d82fc 1467}