]> git.proxmox.com Git - rustc.git/blob - library/core/tests/ascii.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / library / core / tests / ascii.rs
1 use core::char::from_u32;
2
3 #[test]
4 fn test_is_ascii() {
5 assert!(b"".is_ascii());
6 assert!(b"banana\0\x7F".is_ascii());
7 assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
8 assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
9 assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
10 assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
11
12 assert!("".is_ascii());
13 assert!("banana\0\u{7F}".is_ascii());
14 assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
15 assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
16 assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
17 }
18
19 #[test]
20 fn test_to_ascii_uppercase() {
21 assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
22 assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
23
24 for i in 0..501 {
25 let upper =
26 if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 } else { i };
27 assert_eq!(
28 (from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
29 (from_u32(upper).unwrap()).to_string()
30 );
31 }
32 }
33
34 #[test]
35 fn test_to_ascii_lowercase() {
36 assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
37 // Dotted capital I, Kelvin sign, Sharp S.
38 assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
39
40 for i in 0..501 {
41 let lower =
42 if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i };
43 assert_eq!(
44 (from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
45 (from_u32(lower).unwrap()).to_string()
46 );
47 }
48 }
49
50 #[test]
51 fn test_make_ascii_lower_case() {
52 macro_rules! test {
53 ($from: expr, $to: expr) => {{
54 let mut x = $from;
55 x.make_ascii_lowercase();
56 assert_eq!(x, $to);
57 }};
58 }
59 test!(b'A', b'a');
60 test!(b'a', b'a');
61 test!(b'!', b'!');
62 test!('A', 'a');
63 test!('À', 'À');
64 test!('a', 'a');
65 test!('!', '!');
66 test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
67 test!("HİKß".to_string(), "hİKß");
68 }
69
70 #[test]
71 fn test_make_ascii_upper_case() {
72 macro_rules! test {
73 ($from: expr, $to: expr) => {{
74 let mut x = $from;
75 x.make_ascii_uppercase();
76 assert_eq!(x, $to);
77 }};
78 }
79 test!(b'a', b'A');
80 test!(b'A', b'A');
81 test!(b'!', b'!');
82 test!('a', 'A');
83 test!('à', 'à');
84 test!('A', 'A');
85 test!('!', '!');
86 test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
87 test!("hıKß".to_string(), "HıKß");
88
89 let mut x = "Hello".to_string();
90 x[..3].make_ascii_uppercase(); // Test IndexMut on String.
91 assert_eq!(x, "HELlo")
92 }
93
94 #[test]
95 fn test_eq_ignore_ascii_case() {
96 assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
97 assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
98 // Dotted capital I, Kelvin sign, Sharp S.
99 assert!("HİKß".eq_ignore_ascii_case("hİKß"));
100 assert!(!"İ".eq_ignore_ascii_case("i"));
101 assert!(!"K".eq_ignore_ascii_case("k"));
102 assert!(!"ß".eq_ignore_ascii_case("s"));
103
104 for i in 0..501 {
105 let lower =
106 if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i };
107 assert!(
108 (from_u32(i).unwrap())
109 .to_string()
110 .eq_ignore_ascii_case(&from_u32(lower).unwrap().to_string())
111 );
112 }
113 }
114
115 #[test]
116 fn inference_works() {
117 let x = "a".to_string();
118 x.eq_ignore_ascii_case("A");
119 }
120
121 // Shorthands used by the is_ascii_* tests.
122 macro_rules! assert_all {
123 ($what:ident, $($str:tt),+) => {{
124 $(
125 for b in $str.chars() {
126 if !b.$what() {
127 panic!("expected {}({}) but it isn't",
128 stringify!($what), b);
129 }
130 }
131 for b in $str.as_bytes().iter() {
132 if !b.$what() {
133 panic!("expected {}(0x{:02x})) but it isn't",
134 stringify!($what), b);
135 }
136 }
137 )+
138 }};
139 ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+))
140 }
141 macro_rules! assert_none {
142 ($what:ident, $($str:tt),+) => {{
143 $(
144 for b in $str.chars() {
145 if b.$what() {
146 panic!("expected not-{}({}) but it is",
147 stringify!($what), b);
148 }
149 }
150 for b in $str.as_bytes().iter() {
151 if b.$what() {
152 panic!("expected not-{}(0x{:02x})) but it is",
153 stringify!($what), b);
154 }
155 }
156 )+
157 }};
158 ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+))
159 }
160
161 #[test]
162 fn test_is_ascii_alphabetic() {
163 assert_all!(
164 is_ascii_alphabetic,
165 "",
166 "abcdefghijklmnopqrstuvwxyz",
167 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
168 );
169 assert_none!(
170 is_ascii_alphabetic,
171 "0123456789",
172 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
173 " \t\n\x0c\r",
174 "\x00\x01\x02\x03\x04\x05\x06\x07",
175 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
176 "\x10\x11\x12\x13\x14\x15\x16\x17",
177 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
178 "\x7f",
179 );
180 }
181
182 #[test]
183 fn test_is_ascii_uppercase() {
184 assert_all!(is_ascii_uppercase, "", "ABCDEFGHIJKLMNOQPRSTUVWXYZ",);
185 assert_none!(
186 is_ascii_uppercase,
187 "abcdefghijklmnopqrstuvwxyz",
188 "0123456789",
189 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
190 " \t\n\x0c\r",
191 "\x00\x01\x02\x03\x04\x05\x06\x07",
192 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
193 "\x10\x11\x12\x13\x14\x15\x16\x17",
194 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
195 "\x7f",
196 );
197 }
198
199 #[test]
200 fn test_is_ascii_lowercase() {
201 assert_all!(is_ascii_lowercase, "abcdefghijklmnopqrstuvwxyz",);
202 assert_none!(
203 is_ascii_lowercase,
204 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
205 "0123456789",
206 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
207 " \t\n\x0c\r",
208 "\x00\x01\x02\x03\x04\x05\x06\x07",
209 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
210 "\x10\x11\x12\x13\x14\x15\x16\x17",
211 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
212 "\x7f",
213 );
214 }
215
216 #[test]
217 fn test_is_ascii_alphanumeric() {
218 assert_all!(
219 is_ascii_alphanumeric,
220 "",
221 "abcdefghijklmnopqrstuvwxyz",
222 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
223 "0123456789",
224 );
225 assert_none!(
226 is_ascii_alphanumeric,
227 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
228 " \t\n\x0c\r",
229 "\x00\x01\x02\x03\x04\x05\x06\x07",
230 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
231 "\x10\x11\x12\x13\x14\x15\x16\x17",
232 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
233 "\x7f",
234 );
235 }
236
237 #[test]
238 fn test_is_ascii_digit() {
239 assert_all!(is_ascii_digit, "", "0123456789",);
240 assert_none!(
241 is_ascii_digit,
242 "abcdefghijklmnopqrstuvwxyz",
243 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
244 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
245 " \t\n\x0c\r",
246 "\x00\x01\x02\x03\x04\x05\x06\x07",
247 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
248 "\x10\x11\x12\x13\x14\x15\x16\x17",
249 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
250 "\x7f",
251 );
252 }
253
254 #[test]
255 fn test_is_ascii_hexdigit() {
256 assert_all!(is_ascii_hexdigit, "", "0123456789", "abcdefABCDEF",);
257 assert_none!(
258 is_ascii_hexdigit,
259 "ghijklmnopqrstuvwxyz",
260 "GHIJKLMNOQPRSTUVWXYZ",
261 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
262 " \t\n\x0c\r",
263 "\x00\x01\x02\x03\x04\x05\x06\x07",
264 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
265 "\x10\x11\x12\x13\x14\x15\x16\x17",
266 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
267 "\x7f",
268 );
269 }
270
271 #[test]
272 fn test_is_ascii_punctuation() {
273 assert_all!(is_ascii_punctuation, "", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",);
274 assert_none!(
275 is_ascii_punctuation,
276 "abcdefghijklmnopqrstuvwxyz",
277 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
278 "0123456789",
279 " \t\n\x0c\r",
280 "\x00\x01\x02\x03\x04\x05\x06\x07",
281 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
282 "\x10\x11\x12\x13\x14\x15\x16\x17",
283 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
284 "\x7f",
285 );
286 }
287
288 #[test]
289 fn test_is_ascii_graphic() {
290 assert_all!(
291 is_ascii_graphic,
292 "",
293 "abcdefghijklmnopqrstuvwxyz",
294 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
295 "0123456789",
296 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
297 );
298 assert_none!(
299 is_ascii_graphic,
300 " \t\n\x0c\r",
301 "\x00\x01\x02\x03\x04\x05\x06\x07",
302 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
303 "\x10\x11\x12\x13\x14\x15\x16\x17",
304 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
305 "\x7f",
306 );
307 }
308
309 #[test]
310 fn test_is_ascii_whitespace() {
311 assert_all!(is_ascii_whitespace, "", " \t\n\x0c\r",);
312 assert_none!(
313 is_ascii_whitespace,
314 "abcdefghijklmnopqrstuvwxyz",
315 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
316 "0123456789",
317 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
318 "\x00\x01\x02\x03\x04\x05\x06\x07",
319 "\x08\x0b\x0e\x0f",
320 "\x10\x11\x12\x13\x14\x15\x16\x17",
321 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
322 "\x7f",
323 );
324 }
325
326 #[test]
327 fn test_is_ascii_control() {
328 assert_all!(
329 is_ascii_control,
330 "",
331 "\x00\x01\x02\x03\x04\x05\x06\x07",
332 "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
333 "\x10\x11\x12\x13\x14\x15\x16\x17",
334 "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
335 "\x7f",
336 );
337 assert_none!(
338 is_ascii_control,
339 "abcdefghijklmnopqrstuvwxyz",
340 "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
341 "0123456789",
342 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
343 " ",
344 );
345 }
346
347 // `is_ascii` does a good amount of pointer manipulation and has
348 // alignment-dependent computation. This is all sanity-checked via
349 // `debug_assert!`s, so we test various sizes/alignments thoroughly versus an
350 // "obviously correct" baseline function.
351 #[test]
352 fn test_is_ascii_align_size_thoroughly() {
353 // The "obviously-correct" baseline mentioned above.
354 fn is_ascii_baseline(s: &[u8]) -> bool {
355 s.iter().all(|b| b.is_ascii())
356 }
357
358 // Helper to repeat `l` copies of `b0` followed by `l` copies of `b1`.
359 fn repeat_concat(b0: u8, b1: u8, l: usize) -> Vec<u8> {
360 use core::iter::repeat;
361 repeat(b0).take(l).chain(repeat(b1).take(l)).collect()
362 }
363
364 // Miri is too slow
365 let iter = if cfg!(miri) { 0..20 } else { 0..100 };
366
367 for i in iter {
368 #[cfg(not(miri))]
369 let cases = &[
370 b"a".repeat(i),
371 b"\0".repeat(i),
372 b"\x7f".repeat(i),
373 b"\x80".repeat(i),
374 b"\xff".repeat(i),
375 repeat_concat(b'a', 0x80u8, i),
376 repeat_concat(0x80u8, b'a', i),
377 ];
378
379 #[cfg(miri)]
380 let cases = &[b"a".repeat(i), b"\x80".repeat(i), repeat_concat(b'a', 0x80u8, i)];
381
382 for case in cases {
383 for pos in 0..=case.len() {
384 // Potentially misaligned head
385 let prefix = &case[pos..];
386 assert_eq!(is_ascii_baseline(prefix), prefix.is_ascii(),);
387
388 // Potentially misaligned tail
389 let suffix = &case[..case.len() - pos];
390
391 assert_eq!(is_ascii_baseline(suffix), suffix.is_ascii(),);
392
393 // Both head and tail are potentially misaligned
394 let mid = &case[(pos / 2)..(case.len() - (pos / 2))];
395 assert_eq!(is_ascii_baseline(mid), mid.is_ascii(),);
396 }
397 }
398 }
399 }
400
401 #[test]
402 fn ascii_const() {
403 // test that the `is_ascii` methods of `char` and `u8` are usable in a const context
404
405 const CHAR_IS_ASCII: bool = 'a'.is_ascii();
406 assert!(CHAR_IS_ASCII);
407
408 const BYTE_IS_ASCII: bool = 97u8.is_ascii();
409 assert!(BYTE_IS_ASCII);
410 }