]>
Commit | Line | Data |
---|---|---|
9346a6ac | 1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
c34b1796 AL |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | use std::cmp::Ordering::{Equal, Greater, Less}; | |
62682a34 | 12 | use std::str::from_utf8; |
c34b1796 AL |
13 | |
14 | #[test] | |
15 | fn test_le() { | |
16 | assert!("" <= ""); | |
17 | assert!("" <= "foo"); | |
18 | assert!("foo" <= "foo"); | |
19 | assert!("foo" != "bar"); | |
20 | } | |
21 | ||
d9579d0f | 22 | #[allow(deprecated)] |
c34b1796 AL |
23 | #[test] |
24 | fn test_len() { | |
25 | assert_eq!("".len(), 0); | |
26 | assert_eq!("hello world".len(), 11); | |
27 | assert_eq!("\x63".len(), 1); | |
28 | assert_eq!("\u{a2}".len(), 2); | |
29 | assert_eq!("\u{3c0}".len(), 2); | |
30 | assert_eq!("\u{2620}".len(), 3); | |
31 | assert_eq!("\u{1d11e}".len(), 4); | |
32 | ||
33 | assert_eq!("".chars().count(), 0); | |
34 | assert_eq!("hello world".chars().count(), 11); | |
35 | assert_eq!("\x63".chars().count(), 1); | |
36 | assert_eq!("\u{a2}".chars().count(), 1); | |
37 | assert_eq!("\u{3c0}".chars().count(), 1); | |
38 | assert_eq!("\u{2620}".chars().count(), 1); | |
39 | assert_eq!("\u{1d11e}".chars().count(), 1); | |
40 | assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19); | |
41 | ||
42 | assert_eq!("hello".width(false), 10); | |
43 | assert_eq!("hello".width(true), 10); | |
44 | assert_eq!("\0\0\0\0\0".width(false), 0); | |
45 | assert_eq!("\0\0\0\0\0".width(true), 0); | |
46 | assert_eq!("".width(false), 0); | |
47 | assert_eq!("".width(true), 0); | |
48 | assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4); | |
49 | assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8); | |
50 | } | |
51 | ||
52 | #[test] | |
53 | fn test_find() { | |
54 | assert_eq!("hello".find('l'), Some(2)); | |
55 | assert_eq!("hello".find(|c:char| c == 'o'), Some(4)); | |
56 | assert!("hello".find('x').is_none()); | |
57 | assert!("hello".find(|c:char| c == 'x').is_none()); | |
58 | assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30)); | |
59 | assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30)); | |
60 | } | |
61 | ||
62 | #[test] | |
63 | fn test_rfind() { | |
64 | assert_eq!("hello".rfind('l'), Some(3)); | |
65 | assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4)); | |
66 | assert!("hello".rfind('x').is_none()); | |
67 | assert!("hello".rfind(|c:char| c == 'x').is_none()); | |
68 | assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30)); | |
69 | assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30)); | |
70 | } | |
71 | ||
72 | #[test] | |
73 | fn test_collect() { | |
62682a34 | 74 | let empty = String::from(""); |
c34b1796 AL |
75 | let s: String = empty.chars().collect(); |
76 | assert_eq!(empty, s); | |
62682a34 | 77 | let data = String::from("ประเทศไทย中"); |
c34b1796 AL |
78 | let s: String = data.chars().collect(); |
79 | assert_eq!(data, s); | |
80 | } | |
81 | ||
82 | #[test] | |
83 | fn test_into_bytes() { | |
62682a34 | 84 | let data = String::from("asdf"); |
c34b1796 AL |
85 | let buf = data.into_bytes(); |
86 | assert_eq!(buf, b"asdf"); | |
87 | } | |
88 | ||
89 | #[test] | |
90 | fn test_find_str() { | |
91 | // byte positions | |
92 | assert_eq!("".find(""), Some(0)); | |
93 | assert!("banana".find("apple pie").is_none()); | |
94 | ||
95 | let data = "abcabc"; | |
96 | assert_eq!(data[0..6].find("ab"), Some(0)); | |
97 | assert_eq!(data[2..6].find("ab"), Some(3 - 2)); | |
98 | assert!(data[2..4].find("ab").is_none()); | |
99 | ||
100 | let string = "ประเทศไทย中华Việt Nam"; | |
62682a34 | 101 | let mut data = String::from(string); |
c34b1796 AL |
102 | data.push_str(string); |
103 | assert!(data.find("ไท华").is_none()); | |
104 | assert_eq!(data[0..43].find(""), Some(0)); | |
105 | assert_eq!(data[6..43].find(""), Some(6 - 6)); | |
106 | ||
107 | assert_eq!(data[0..43].find("ประ"), Some( 0)); | |
108 | assert_eq!(data[0..43].find("ทศไ"), Some(12)); | |
109 | assert_eq!(data[0..43].find("ย中"), Some(24)); | |
110 | assert_eq!(data[0..43].find("iệt"), Some(34)); | |
111 | assert_eq!(data[0..43].find("Nam"), Some(40)); | |
112 | ||
113 | assert_eq!(data[43..86].find("ประ"), Some(43 - 43)); | |
114 | assert_eq!(data[43..86].find("ทศไ"), Some(55 - 43)); | |
115 | assert_eq!(data[43..86].find("ย中"), Some(67 - 43)); | |
116 | assert_eq!(data[43..86].find("iệt"), Some(77 - 43)); | |
117 | assert_eq!(data[43..86].find("Nam"), Some(83 - 43)); | |
118 | } | |
119 | ||
120 | #[test] | |
121 | fn test_slice_chars() { | |
122 | fn t(a: &str, b: &str, start: usize) { | |
123 | assert_eq!(a.slice_chars(start, start + b.chars().count()), b); | |
124 | } | |
125 | t("", "", 0); | |
126 | t("hello", "llo", 2); | |
127 | t("hello", "el", 1); | |
128 | t("αβλ", "β", 1); | |
129 | t("αβλ", "", 3); | |
130 | assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8)); | |
131 | } | |
132 | ||
133 | fn s(x: &str) -> String { x.to_string() } | |
134 | ||
135 | macro_rules! test_concat { | |
136 | ($expected: expr, $string: expr) => { | |
137 | { | |
138 | let s: String = $string.concat(); | |
139 | assert_eq!($expected, s); | |
140 | } | |
141 | } | |
142 | } | |
143 | ||
144 | #[test] | |
145 | fn test_concat_for_different_types() { | |
146 | test_concat!("ab", vec![s("a"), s("b")]); | |
147 | test_concat!("ab", vec!["a", "b"]); | |
148 | test_concat!("ab", vec!["a", "b"]); | |
149 | test_concat!("ab", vec![s("a"), s("b")]); | |
150 | } | |
151 | ||
152 | #[test] | |
153 | fn test_concat_for_different_lengths() { | |
154 | let empty: &[&str] = &[]; | |
155 | test_concat!("", empty); | |
156 | test_concat!("a", ["a"]); | |
157 | test_concat!("ab", ["a", "b"]); | |
158 | test_concat!("abc", ["", "a", "bc"]); | |
159 | } | |
160 | ||
c1a9b12d | 161 | macro_rules! test_join { |
c34b1796 AL |
162 | ($expected: expr, $string: expr, $delim: expr) => { |
163 | { | |
c1a9b12d | 164 | let s = $string.join($delim); |
c34b1796 AL |
165 | assert_eq!($expected, s); |
166 | } | |
167 | } | |
168 | } | |
169 | ||
170 | #[test] | |
c1a9b12d SL |
171 | fn test_join_for_different_types() { |
172 | test_join!("a-b", ["a", "b"], "-"); | |
c34b1796 | 173 | let hyphen = "-".to_string(); |
c1a9b12d SL |
174 | test_join!("a-b", [s("a"), s("b")], &*hyphen); |
175 | test_join!("a-b", vec!["a", "b"], &*hyphen); | |
176 | test_join!("a-b", &*vec!["a", "b"], "-"); | |
177 | test_join!("a-b", vec![s("a"), s("b")], "-"); | |
c34b1796 AL |
178 | } |
179 | ||
180 | #[test] | |
c1a9b12d | 181 | fn test_join_for_different_lengths() { |
c34b1796 | 182 | let empty: &[&str] = &[]; |
c1a9b12d SL |
183 | test_join!("", empty, "-"); |
184 | test_join!("a", ["a"], "-"); | |
185 | test_join!("a-b", ["a", "b"], "-"); | |
186 | test_join!("-a-bc", ["", "a", "bc"], "-"); | |
c34b1796 AL |
187 | } |
188 | ||
189 | #[test] | |
190 | fn test_unsafe_slice() { | |
191 | assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)}); | |
192 | assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)}); | |
193 | assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)}); | |
194 | fn a_million_letter_a() -> String { | |
195 | let mut i = 0; | |
196 | let mut rs = String::new(); | |
197 | while i < 100000 { | |
198 | rs.push_str("aaaaaaaaaa"); | |
199 | i += 1; | |
200 | } | |
201 | rs | |
202 | } | |
203 | fn half_a_million_letter_a() -> String { | |
204 | let mut i = 0; | |
205 | let mut rs = String::new(); | |
206 | while i < 100000 { | |
207 | rs.push_str("aaaaa"); | |
208 | i += 1; | |
209 | } | |
210 | rs | |
211 | } | |
212 | let letters = a_million_letter_a(); | |
213 | assert!(half_a_million_letter_a() == | |
62682a34 | 214 | unsafe {String::from(letters.slice_unchecked( |
c34b1796 AL |
215 | 0, |
216 | 500000))}); | |
217 | } | |
218 | ||
219 | #[test] | |
220 | fn test_starts_with() { | |
221 | assert!(("".starts_with(""))); | |
222 | assert!(("abc".starts_with(""))); | |
223 | assert!(("abc".starts_with("a"))); | |
224 | assert!((!"a".starts_with("abc"))); | |
225 | assert!((!"".starts_with("abc"))); | |
226 | assert!((!"ödd".starts_with("-"))); | |
227 | assert!(("ödd".starts_with("öd"))); | |
228 | } | |
229 | ||
230 | #[test] | |
231 | fn test_ends_with() { | |
232 | assert!(("".ends_with(""))); | |
233 | assert!(("abc".ends_with(""))); | |
234 | assert!(("abc".ends_with("c"))); | |
235 | assert!((!"a".ends_with("abc"))); | |
236 | assert!((!"".ends_with("abc"))); | |
237 | assert!((!"ddö".ends_with("-"))); | |
238 | assert!(("ddö".ends_with("dö"))); | |
239 | } | |
240 | ||
241 | #[test] | |
242 | fn test_is_empty() { | |
243 | assert!("".is_empty()); | |
244 | assert!(!"a".is_empty()); | |
245 | } | |
246 | ||
247 | #[test] | |
248 | fn test_replace() { | |
249 | let a = "a"; | |
62682a34 SL |
250 | assert_eq!("".replace(a, "b"), String::from("")); |
251 | assert_eq!("a".replace(a, "b"), String::from("b")); | |
252 | assert_eq!("ab".replace(a, "b"), String::from("bb")); | |
c34b1796 AL |
253 | let test = "test"; |
254 | assert!(" test test ".replace(test, "toast") == | |
62682a34 SL |
255 | String::from(" toast toast ")); |
256 | assert_eq!(" test test ".replace(test, ""), String::from(" ")); | |
c34b1796 AL |
257 | } |
258 | ||
259 | #[test] | |
260 | fn test_replace_2a() { | |
261 | let data = "ประเทศไทย中华"; | |
262 | let repl = "دولة الكويت"; | |
263 | ||
264 | let a = "ประเ"; | |
265 | let a2 = "دولة الكويتทศไทย中华"; | |
266 | assert_eq!(data.replace(a, repl), a2); | |
267 | } | |
268 | ||
269 | #[test] | |
270 | fn test_replace_2b() { | |
271 | let data = "ประเทศไทย中华"; | |
272 | let repl = "دولة الكويت"; | |
273 | ||
274 | let b = "ะเ"; | |
275 | let b2 = "ปรدولة الكويتทศไทย中华"; | |
276 | assert_eq!(data.replace(b, repl), b2); | |
277 | } | |
278 | ||
279 | #[test] | |
280 | fn test_replace_2c() { | |
281 | let data = "ประเทศไทย中华"; | |
282 | let repl = "دولة الكويت"; | |
283 | ||
284 | let c = "中华"; | |
285 | let c2 = "ประเทศไทยدولة الكويت"; | |
286 | assert_eq!(data.replace(c, repl), c2); | |
287 | } | |
288 | ||
289 | #[test] | |
290 | fn test_replace_2d() { | |
291 | let data = "ประเทศไทย中华"; | |
292 | let repl = "دولة الكويت"; | |
293 | ||
294 | let d = "ไท华"; | |
295 | assert_eq!(data.replace(d, repl), data); | |
296 | } | |
297 | ||
298 | #[test] | |
299 | fn test_slice() { | |
300 | assert_eq!("ab", &"abc"[0..2]); | |
301 | assert_eq!("bc", &"abc"[1..3]); | |
302 | assert_eq!("", &"abc"[1..1]); | |
303 | assert_eq!("\u{65e5}", &"\u{65e5}\u{672c}"[0..3]); | |
304 | ||
305 | let data = "ประเทศไทย中华"; | |
306 | assert_eq!("ป", &data[0..3]); | |
307 | assert_eq!("ร", &data[3..6]); | |
308 | assert_eq!("", &data[3..3]); | |
309 | assert_eq!("华", &data[30..33]); | |
310 | ||
311 | fn a_million_letter_x() -> String { | |
312 | let mut i = 0; | |
313 | let mut rs = String::new(); | |
314 | while i < 100000 { | |
315 | rs.push_str("华华华华华华华华华华"); | |
316 | i += 1; | |
317 | } | |
318 | rs | |
319 | } | |
320 | fn half_a_million_letter_x() -> String { | |
321 | let mut i = 0; | |
322 | let mut rs = String::new(); | |
323 | while i < 100000 { | |
324 | rs.push_str("华华华华华"); | |
325 | i += 1; | |
326 | } | |
327 | rs | |
328 | } | |
329 | let letters = a_million_letter_x(); | |
330 | assert!(half_a_million_letter_x() == | |
62682a34 | 331 | String::from(&letters[0..3 * 500000])); |
c34b1796 AL |
332 | } |
333 | ||
334 | #[test] | |
335 | fn test_slice_2() { | |
336 | let ss = "中华Việt Nam"; | |
337 | ||
338 | assert_eq!("华", &ss[3..6]); | |
339 | assert_eq!("Việt Nam", &ss[6..16]); | |
340 | ||
341 | assert_eq!("ab", &"abc"[0..2]); | |
342 | assert_eq!("bc", &"abc"[1..3]); | |
343 | assert_eq!("", &"abc"[1..1]); | |
344 | ||
345 | assert_eq!("中", &ss[0..3]); | |
346 | assert_eq!("华V", &ss[3..7]); | |
347 | assert_eq!("", &ss[3..3]); | |
348 | /*0: 中 | |
349 | 3: 华 | |
350 | 6: V | |
351 | 7: i | |
352 | 8: ệ | |
353 | 11: t | |
354 | 12: | |
355 | 13: N | |
356 | 14: a | |
357 | 15: m */ | |
358 | } | |
359 | ||
360 | #[test] | |
361 | #[should_panic] | |
362 | fn test_slice_fail() { | |
363 | &"中华Việt Nam"[0..2]; | |
364 | } | |
365 | ||
366 | #[test] | |
367 | fn test_slice_from() { | |
368 | assert_eq!(&"abcd"[0..], "abcd"); | |
369 | assert_eq!(&"abcd"[2..], "cd"); | |
370 | assert_eq!(&"abcd"[4..], ""); | |
371 | } | |
372 | #[test] | |
373 | fn test_slice_to() { | |
374 | assert_eq!(&"abcd"[..0], ""); | |
375 | assert_eq!(&"abcd"[..2], "ab"); | |
376 | assert_eq!(&"abcd"[..4], "abcd"); | |
377 | } | |
378 | ||
379 | #[test] | |
380 | fn test_trim_left_matches() { | |
381 | let v: &[char] = &[]; | |
382 | assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** "); | |
383 | let chars: &[char] = &['*', ' ']; | |
384 | assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** "); | |
385 | assert_eq!(" *** *** ".trim_left_matches(chars), ""); | |
386 | assert_eq!("foo *** ".trim_left_matches(chars), "foo *** "); | |
387 | ||
388 | assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); | |
389 | let chars: &[char] = &['1', '2']; | |
390 | assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12"); | |
391 | assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123"); | |
392 | } | |
393 | ||
394 | #[test] | |
395 | fn test_trim_right_matches() { | |
396 | let v: &[char] = &[]; | |
397 | assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** "); | |
398 | let chars: &[char] = &['*', ' ']; | |
399 | assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo"); | |
400 | assert_eq!(" *** *** ".trim_right_matches(chars), ""); | |
401 | assert_eq!(" *** foo".trim_right_matches(chars), " *** foo"); | |
402 | ||
403 | assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); | |
404 | let chars: &[char] = &['1', '2']; | |
405 | assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar"); | |
406 | assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar"); | |
407 | } | |
408 | ||
409 | #[test] | |
410 | fn test_trim_matches() { | |
411 | let v: &[char] = &[]; | |
412 | assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** "); | |
413 | let chars: &[char] = &['*', ' ']; | |
414 | assert_eq!(" *** foo *** ".trim_matches(chars), "foo"); | |
415 | assert_eq!(" *** *** ".trim_matches(chars), ""); | |
416 | assert_eq!("foo".trim_matches(chars), "foo"); | |
417 | ||
418 | assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); | |
419 | let chars: &[char] = &['1', '2']; | |
420 | assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar"); | |
421 | assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar"); | |
422 | } | |
423 | ||
424 | #[test] | |
425 | fn test_trim_left() { | |
426 | assert_eq!("".trim_left(), ""); | |
427 | assert_eq!("a".trim_left(), "a"); | |
428 | assert_eq!(" ".trim_left(), ""); | |
429 | assert_eq!(" blah".trim_left(), "blah"); | |
430 | assert_eq!(" \u{3000} wut".trim_left(), "wut"); | |
431 | assert_eq!("hey ".trim_left(), "hey "); | |
432 | } | |
433 | ||
434 | #[test] | |
435 | fn test_trim_right() { | |
436 | assert_eq!("".trim_right(), ""); | |
437 | assert_eq!("a".trim_right(), "a"); | |
438 | assert_eq!(" ".trim_right(), ""); | |
439 | assert_eq!("blah ".trim_right(), "blah"); | |
440 | assert_eq!("wut \u{3000} ".trim_right(), "wut"); | |
441 | assert_eq!(" hey".trim_right(), " hey"); | |
442 | } | |
443 | ||
444 | #[test] | |
445 | fn test_trim() { | |
446 | assert_eq!("".trim(), ""); | |
447 | assert_eq!("a".trim(), "a"); | |
448 | assert_eq!(" ".trim(), ""); | |
449 | assert_eq!(" blah ".trim(), "blah"); | |
450 | assert_eq!("\nwut \u{3000} ".trim(), "wut"); | |
451 | assert_eq!(" hey dude ".trim(), "hey dude"); | |
452 | } | |
453 | ||
454 | #[test] | |
455 | fn test_is_whitespace() { | |
456 | assert!("".chars().all(|c| c.is_whitespace())); | |
457 | assert!(" ".chars().all(|c| c.is_whitespace())); | |
458 | assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space | |
459 | assert!(" \n\t ".chars().all(|c| c.is_whitespace())); | |
460 | assert!(!" _ ".chars().all(|c| c.is_whitespace())); | |
461 | } | |
462 | ||
463 | #[test] | |
464 | fn test_slice_shift_char() { | |
465 | let data = "ประเทศไทย中"; | |
466 | assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中"))); | |
467 | } | |
468 | ||
469 | #[test] | |
470 | fn test_slice_shift_char_2() { | |
471 | let empty = ""; | |
472 | assert_eq!(empty.slice_shift_char(), None); | |
473 | } | |
474 | ||
475 | #[test] | |
476 | fn test_is_utf8() { | |
477 | // deny overlong encodings | |
478 | assert!(from_utf8(&[0xc0, 0x80]).is_err()); | |
479 | assert!(from_utf8(&[0xc0, 0xae]).is_err()); | |
480 | assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err()); | |
481 | assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err()); | |
482 | assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err()); | |
483 | assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err()); | |
484 | assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err()); | |
485 | ||
486 | // deny surrogates | |
487 | assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err()); | |
488 | assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err()); | |
489 | ||
490 | assert!(from_utf8(&[0xC2, 0x80]).is_ok()); | |
491 | assert!(from_utf8(&[0xDF, 0xBF]).is_ok()); | |
492 | assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok()); | |
493 | assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok()); | |
494 | assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok()); | |
495 | assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok()); | |
496 | assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok()); | |
497 | assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok()); | |
498 | } | |
499 | ||
500 | #[test] | |
501 | fn test_is_utf16() { | |
d9579d0f | 502 | use rustc_unicode::str::is_utf16; |
c34b1796 AL |
503 | |
504 | macro_rules! pos { | |
505 | ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } } | |
506 | } | |
507 | ||
508 | // non-surrogates | |
509 | pos!(&[0x0000], | |
510 | &[0x0001, 0x0002], | |
511 | &[0xD7FF], | |
512 | &[0xE000]); | |
513 | ||
514 | // surrogate pairs (randomly generated with Python 3's | |
515 | // .encode('utf-16be')) | |
516 | pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45], | |
517 | &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14], | |
518 | &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]); | |
519 | ||
520 | // mixtures (also random) | |
521 | pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65], | |
522 | &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006], | |
523 | &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]); | |
524 | ||
525 | // negative tests | |
526 | macro_rules! neg { | |
527 | ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } } | |
528 | } | |
529 | ||
530 | neg!( | |
531 | // surrogate + regular unit | |
532 | &[0xdb45, 0x0000], | |
533 | // surrogate + lead surrogate | |
534 | &[0xd900, 0xd900], | |
535 | // unterminated surrogate | |
536 | &[0xd8ff], | |
537 | // trail surrogate without a lead | |
538 | &[0xddb7]); | |
539 | ||
540 | // random byte sequences that Python 3's .decode('utf-16be') | |
541 | // failed on | |
542 | neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7], | |
543 | &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3], | |
544 | &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca], | |
545 | &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278], | |
546 | &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e], | |
547 | &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5], | |
548 | &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee], | |
549 | &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7], | |
550 | &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a], | |
551 | &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a], | |
552 | &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe], | |
553 | &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf], | |
554 | &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e], | |
555 | &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5], | |
556 | &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f], | |
557 | &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b], | |
558 | &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7], | |
559 | &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9], | |
560 | &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8], | |
561 | &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282], | |
562 | &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]); | |
563 | } | |
564 | ||
565 | #[test] | |
566 | fn test_as_bytes() { | |
567 | // no null | |
568 | let v = [ | |
569 | 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228, | |
570 | 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97, | |
571 | 109 | |
572 | ]; | |
573 | let b: &[u8] = &[]; | |
574 | assert_eq!("".as_bytes(), b); | |
575 | assert_eq!("abc".as_bytes(), b"abc"); | |
576 | assert_eq!("ศไทย中华Việt Nam".as_bytes(), v); | |
577 | } | |
578 | ||
579 | #[test] | |
580 | #[should_panic] | |
581 | fn test_as_bytes_fail() { | |
582 | // Don't double free. (I'm not sure if this exercises the | |
583 | // original problem code path anymore.) | |
62682a34 | 584 | let s = String::from(""); |
c34b1796 AL |
585 | let _bytes = s.as_bytes(); |
586 | panic!(); | |
587 | } | |
588 | ||
589 | #[test] | |
590 | fn test_as_ptr() { | |
591 | let buf = "hello".as_ptr(); | |
592 | unsafe { | |
593 | assert_eq!(*buf.offset(0), b'h'); | |
594 | assert_eq!(*buf.offset(1), b'e'); | |
595 | assert_eq!(*buf.offset(2), b'l'); | |
596 | assert_eq!(*buf.offset(3), b'l'); | |
597 | assert_eq!(*buf.offset(4), b'o'); | |
598 | } | |
599 | } | |
600 | ||
601 | #[test] | |
602 | fn test_subslice_offset() { | |
603 | let a = "kernelsprite"; | |
604 | let b = &a[7..a.len()]; | |
605 | let c = &a[0..a.len() - 6]; | |
606 | assert_eq!(a.subslice_offset(b), 7); | |
607 | assert_eq!(a.subslice_offset(c), 0); | |
608 | ||
609 | let string = "a\nb\nc"; | |
610 | let lines: Vec<&str> = string.lines().collect(); | |
611 | assert_eq!(string.subslice_offset(lines[0]), 0); | |
612 | assert_eq!(string.subslice_offset(lines[1]), 2); | |
613 | assert_eq!(string.subslice_offset(lines[2]), 4); | |
614 | } | |
615 | ||
616 | #[test] | |
617 | #[should_panic] | |
618 | fn test_subslice_offset_2() { | |
619 | let a = "alchemiter"; | |
620 | let b = "cruxtruder"; | |
621 | a.subslice_offset(b); | |
622 | } | |
623 | ||
624 | #[test] | |
625 | fn vec_str_conversions() { | |
62682a34 | 626 | let s1: String = String::from("All mimsy were the borogoves"); |
c34b1796 AL |
627 | |
628 | let v: Vec<u8> = s1.as_bytes().to_vec(); | |
62682a34 | 629 | let s2: String = String::from(from_utf8(&v).unwrap()); |
c34b1796 AL |
630 | let mut i = 0; |
631 | let n1 = s1.len(); | |
632 | let n2 = v.len(); | |
633 | assert_eq!(n1, n2); | |
634 | while i < n1 { | |
635 | let a: u8 = s1.as_bytes()[i]; | |
636 | let b: u8 = s2.as_bytes()[i]; | |
637 | debug!("{}", a); | |
638 | debug!("{}", b); | |
639 | assert_eq!(a, b); | |
640 | i += 1; | |
641 | } | |
642 | } | |
643 | ||
644 | #[test] | |
645 | fn test_contains() { | |
646 | assert!("abcde".contains("bcd")); | |
647 | assert!("abcde".contains("abcd")); | |
648 | assert!("abcde".contains("bcde")); | |
649 | assert!("abcde".contains("")); | |
650 | assert!("".contains("")); | |
651 | assert!(!"abcde".contains("def")); | |
652 | assert!(!"".contains("a")); | |
653 | ||
654 | let data = "ประเทศไทย中华Việt Nam"; | |
655 | assert!(data.contains("ประเ")); | |
656 | assert!(data.contains("ะเ")); | |
657 | assert!(data.contains("中华")); | |
658 | assert!(!data.contains("ไท华")); | |
659 | } | |
660 | ||
661 | #[test] | |
662 | fn test_contains_char() { | |
663 | assert!("abc".contains('b')); | |
664 | assert!("a".contains('a')); | |
665 | assert!(!"abc".contains('d')); | |
666 | assert!(!"".contains('a')); | |
667 | } | |
668 | ||
669 | #[test] | |
670 | fn test_char_at() { | |
671 | let s = "ศไทย中华Việt Nam"; | |
672 | let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
673 | let mut pos = 0; | |
674 | for ch in &v { | |
675 | assert!(s.char_at(pos) == *ch); | |
676 | pos += ch.to_string().len(); | |
677 | } | |
678 | } | |
679 | ||
680 | #[test] | |
681 | fn test_char_at_reverse() { | |
682 | let s = "ศไทย中华Việt Nam"; | |
683 | let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
684 | let mut pos = s.len(); | |
685 | for ch in v.iter().rev() { | |
686 | assert!(s.char_at_reverse(pos) == *ch); | |
687 | pos -= ch.to_string().len(); | |
688 | } | |
689 | } | |
690 | ||
62682a34 SL |
691 | #[test] |
692 | fn test_split_at() { | |
693 | let s = "ศไทย中华Việt Nam"; | |
694 | for (index, _) in s.char_indices() { | |
695 | let (a, b) = s.split_at(index); | |
696 | assert_eq!(&s[..a.len()], a); | |
697 | assert_eq!(&s[a.len()..], b); | |
698 | } | |
699 | let (a, b) = s.split_at(s.len()); | |
700 | assert_eq!(a, s); | |
701 | assert_eq!(b, ""); | |
702 | } | |
703 | ||
c1a9b12d SL |
704 | #[test] |
705 | fn test_split_at_mut() { | |
706 | use std::ascii::AsciiExt; | |
707 | let mut s = "Hello World".to_string(); | |
708 | { | |
709 | let (a, b) = s.split_at_mut(5); | |
710 | a.make_ascii_uppercase(); | |
711 | b.make_ascii_lowercase(); | |
712 | } | |
713 | assert_eq!(s, "HELLO world"); | |
714 | } | |
715 | ||
62682a34 SL |
716 | #[test] |
717 | #[should_panic] | |
718 | fn test_split_at_boundscheck() { | |
719 | let s = "ศไทย中华Việt Nam"; | |
c1a9b12d | 720 | s.split_at(1); |
62682a34 SL |
721 | } |
722 | ||
c34b1796 AL |
723 | #[test] |
724 | fn test_escape_unicode() { | |
725 | assert_eq!("abc".escape_unicode(), | |
62682a34 | 726 | String::from("\\u{61}\\u{62}\\u{63}")); |
c34b1796 | 727 | assert_eq!("a c".escape_unicode(), |
62682a34 | 728 | String::from("\\u{61}\\u{20}\\u{63}")); |
c34b1796 | 729 | assert_eq!("\r\n\t".escape_unicode(), |
62682a34 | 730 | String::from("\\u{d}\\u{a}\\u{9}")); |
c34b1796 | 731 | assert_eq!("'\"\\".escape_unicode(), |
62682a34 | 732 | String::from("\\u{27}\\u{22}\\u{5c}")); |
c34b1796 | 733 | assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(), |
62682a34 | 734 | String::from("\\u{0}\\u{1}\\u{fe}\\u{ff}")); |
c34b1796 | 735 | assert_eq!("\u{100}\u{ffff}".escape_unicode(), |
62682a34 | 736 | String::from("\\u{100}\\u{ffff}")); |
c34b1796 | 737 | assert_eq!("\u{10000}\u{10ffff}".escape_unicode(), |
62682a34 | 738 | String::from("\\u{10000}\\u{10ffff}")); |
c34b1796 | 739 | assert_eq!("ab\u{fb00}".escape_unicode(), |
62682a34 | 740 | String::from("\\u{61}\\u{62}\\u{fb00}")); |
c34b1796 | 741 | assert_eq!("\u{1d4ea}\r".escape_unicode(), |
62682a34 | 742 | String::from("\\u{1d4ea}\\u{d}")); |
c34b1796 AL |
743 | } |
744 | ||
745 | #[test] | |
746 | fn test_escape_default() { | |
62682a34 SL |
747 | assert_eq!("abc".escape_default(), String::from("abc")); |
748 | assert_eq!("a c".escape_default(), String::from("a c")); | |
749 | assert_eq!("\r\n\t".escape_default(), String::from("\\r\\n\\t")); | |
750 | assert_eq!("'\"\\".escape_default(), String::from("\\'\\\"\\\\")); | |
c34b1796 | 751 | assert_eq!("\u{100}\u{ffff}".escape_default(), |
62682a34 | 752 | String::from("\\u{100}\\u{ffff}")); |
c34b1796 | 753 | assert_eq!("\u{10000}\u{10ffff}".escape_default(), |
62682a34 | 754 | String::from("\\u{10000}\\u{10ffff}")); |
c34b1796 | 755 | assert_eq!("ab\u{fb00}".escape_default(), |
62682a34 | 756 | String::from("ab\\u{fb00}")); |
c34b1796 | 757 | assert_eq!("\u{1d4ea}\r".escape_default(), |
62682a34 | 758 | String::from("\\u{1d4ea}\\r")); |
c34b1796 AL |
759 | } |
760 | ||
761 | #[test] | |
762 | fn test_total_ord() { | |
763 | "1234".cmp("123") == Greater; | |
764 | "123".cmp("1234") == Less; | |
765 | "1234".cmp("1234") == Equal; | |
766 | "12345555".cmp("123456") == Less; | |
767 | "22".cmp("1234") == Greater; | |
768 | } | |
769 | ||
770 | #[test] | |
771 | fn test_char_range_at() { | |
772 | let data = "b¢€𤭢𤭢€¢b"; | |
773 | assert_eq!('b', data.char_range_at(0).ch); | |
774 | assert_eq!('¢', data.char_range_at(1).ch); | |
775 | assert_eq!('€', data.char_range_at(3).ch); | |
776 | assert_eq!('𤭢', data.char_range_at(6).ch); | |
777 | assert_eq!('𤭢', data.char_range_at(10).ch); | |
778 | assert_eq!('€', data.char_range_at(14).ch); | |
779 | assert_eq!('¢', data.char_range_at(17).ch); | |
780 | assert_eq!('b', data.char_range_at(19).ch); | |
781 | } | |
782 | ||
783 | #[test] | |
784 | fn test_char_range_at_reverse_underflow() { | |
785 | assert_eq!("abc".char_range_at_reverse(0).next, 0); | |
786 | } | |
787 | ||
788 | #[test] | |
789 | fn test_iterator() { | |
790 | let s = "ศไทย中华Việt Nam"; | |
791 | let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
792 | ||
793 | let mut pos = 0; | |
794 | let it = s.chars(); | |
795 | ||
796 | for c in it { | |
797 | assert_eq!(c, v[pos]); | |
798 | pos += 1; | |
799 | } | |
800 | assert_eq!(pos, v.len()); | |
801 | } | |
802 | ||
803 | #[test] | |
804 | fn test_rev_iterator() { | |
805 | let s = "ศไทย中华Việt Nam"; | |
806 | let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ']; | |
807 | ||
808 | let mut pos = 0; | |
809 | let it = s.chars().rev(); | |
810 | ||
811 | for c in it { | |
812 | assert_eq!(c, v[pos]); | |
813 | pos += 1; | |
814 | } | |
815 | assert_eq!(pos, v.len()); | |
816 | } | |
817 | ||
818 | #[test] | |
819 | fn test_chars_decoding() { | |
820 | let mut bytes = [0; 4]; | |
821 | for c in (0..0x110000).filter_map(::std::char::from_u32) { | |
822 | let len = c.encode_utf8(&mut bytes).unwrap_or(0); | |
823 | let s = ::std::str::from_utf8(&bytes[..len]).unwrap(); | |
824 | if Some(c) != s.chars().next() { | |
825 | panic!("character {:x}={} does not decode correctly", c as u32, c); | |
826 | } | |
827 | } | |
828 | } | |
829 | ||
830 | #[test] | |
831 | fn test_chars_rev_decoding() { | |
832 | let mut bytes = [0; 4]; | |
833 | for c in (0..0x110000).filter_map(::std::char::from_u32) { | |
834 | let len = c.encode_utf8(&mut bytes).unwrap_or(0); | |
835 | let s = ::std::str::from_utf8(&bytes[..len]).unwrap(); | |
836 | if Some(c) != s.chars().rev().next() { | |
837 | panic!("character {:x}={} does not decode correctly", c as u32, c); | |
838 | } | |
839 | } | |
840 | } | |
841 | ||
842 | #[test] | |
843 | fn test_iterator_clone() { | |
844 | let s = "ศไทย中华Việt Nam"; | |
845 | let mut it = s.chars(); | |
846 | it.next(); | |
847 | assert!(it.clone().zip(it).all(|(x,y)| x == y)); | |
848 | } | |
849 | ||
850 | #[test] | |
851 | fn test_bytesator() { | |
852 | let s = "ศไทย中华Việt Nam"; | |
853 | let v = [ | |
854 | 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228, | |
855 | 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97, | |
856 | 109 | |
857 | ]; | |
858 | let mut pos = 0; | |
859 | ||
860 | for b in s.bytes() { | |
861 | assert_eq!(b, v[pos]); | |
862 | pos += 1; | |
863 | } | |
864 | } | |
865 | ||
866 | #[test] | |
867 | fn test_bytes_revator() { | |
868 | let s = "ศไทย中华Việt Nam"; | |
869 | let v = [ | |
870 | 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228, | |
871 | 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97, | |
872 | 109 | |
873 | ]; | |
874 | let mut pos = v.len(); | |
875 | ||
876 | for b in s.bytes().rev() { | |
877 | pos -= 1; | |
878 | assert_eq!(b, v[pos]); | |
879 | } | |
880 | } | |
881 | ||
882 | #[test] | |
883 | fn test_char_indicesator() { | |
884 | let s = "ศไทย中华Việt Nam"; | |
885 | let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27]; | |
886 | let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
887 | ||
888 | let mut pos = 0; | |
889 | let it = s.char_indices(); | |
890 | ||
891 | for c in it { | |
892 | assert_eq!(c, (p[pos], v[pos])); | |
893 | pos += 1; | |
894 | } | |
895 | assert_eq!(pos, v.len()); | |
896 | assert_eq!(pos, p.len()); | |
897 | } | |
898 | ||
899 | #[test] | |
900 | fn test_char_indices_revator() { | |
901 | let s = "ศไทย中华Việt Nam"; | |
902 | let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0]; | |
903 | let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ']; | |
904 | ||
905 | let mut pos = 0; | |
906 | let it = s.char_indices().rev(); | |
907 | ||
908 | for c in it { | |
909 | assert_eq!(c, (p[pos], v[pos])); | |
910 | pos += 1; | |
911 | } | |
912 | assert_eq!(pos, v.len()); | |
913 | assert_eq!(pos, p.len()); | |
914 | } | |
915 | ||
916 | #[test] | |
917 | fn test_splitn_char_iterator() { | |
918 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
919 | ||
920 | let split: Vec<&str> = data.splitn(4, ' ').collect(); | |
921 | assert_eq!(split, ["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); | |
922 | ||
923 | let split: Vec<&str> = data.splitn(4, |c: char| c == ' ').collect(); | |
924 | assert_eq!(split, ["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); | |
925 | ||
926 | // Unicode | |
927 | let split: Vec<&str> = data.splitn(4, 'ä').collect(); | |
928 | assert_eq!(split, ["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); | |
929 | ||
930 | let split: Vec<&str> = data.splitn(4, |c: char| c == 'ä').collect(); | |
931 | assert_eq!(split, ["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); | |
932 | } | |
933 | ||
934 | #[test] | |
935 | fn test_split_char_iterator_no_trailing() { | |
936 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
937 | ||
938 | let split: Vec<&str> = data.split('\n').collect(); | |
939 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); | |
940 | ||
941 | let split: Vec<&str> = data.split_terminator('\n').collect(); | |
942 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); | |
943 | } | |
944 | ||
945 | #[test] | |
946 | fn test_rsplit() { | |
947 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
948 | ||
949 | let split: Vec<&str> = data.rsplit(' ').collect(); | |
950 | assert_eq!(split, ["lämb\n", "lämb\nLittle", "little", "ä", "häd", "\nMäry"]); | |
951 | ||
952 | let split: Vec<&str> = data.rsplit("lämb").collect(); | |
953 | assert_eq!(split, ["\n", "\nLittle ", "\nMäry häd ä little "]); | |
954 | ||
955 | let split: Vec<&str> = data.rsplit(|c: char| c == 'ä').collect(); | |
956 | assert_eq!(split, ["mb\n", "mb\nLittle l", " little l", "d ", "ry h", "\nM"]); | |
957 | } | |
958 | ||
959 | #[test] | |
960 | fn test_rsplitn() { | |
961 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
962 | ||
963 | let split: Vec<&str> = data.rsplitn(2, ' ').collect(); | |
964 | assert_eq!(split, ["lämb\n", "\nMäry häd ä little lämb\nLittle"]); | |
965 | ||
966 | let split: Vec<&str> = data.rsplitn(2, "lämb").collect(); | |
967 | assert_eq!(split, ["\n", "\nMäry häd ä little lämb\nLittle "]); | |
968 | ||
969 | let split: Vec<&str> = data.rsplitn(2, |c: char| c == 'ä').collect(); | |
970 | assert_eq!(split, ["mb\n", "\nMäry häd ä little lämb\nLittle l"]); | |
971 | } | |
972 | ||
973 | #[test] | |
d9579d0f | 974 | fn test_split_whitespace() { |
c34b1796 | 975 | let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n"; |
d9579d0f | 976 | let words: Vec<&str> = data.split_whitespace().collect(); |
c34b1796 AL |
977 | assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"]) |
978 | } | |
979 | ||
d9579d0f | 980 | #[allow(deprecated)] |
c34b1796 AL |
981 | #[test] |
982 | fn test_nfd_chars() { | |
983 | macro_rules! t { | |
984 | ($input: expr, $expected: expr) => { | |
985 | assert_eq!($input.nfd_chars().collect::<String>(), $expected); | |
986 | } | |
987 | } | |
988 | t!("abc", "abc"); | |
989 | t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}"); | |
990 | t!("\u{2026}", "\u{2026}"); | |
991 | t!("\u{2126}", "\u{3a9}"); | |
992 | t!("\u{1e0b}\u{323}", "d\u{323}\u{307}"); | |
993 | t!("\u{1e0d}\u{307}", "d\u{323}\u{307}"); | |
994 | t!("a\u{301}", "a\u{301}"); | |
995 | t!("\u{301}a", "\u{301}a"); | |
996 | t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}"); | |
997 | t!("\u{ac1c}", "\u{1100}\u{1162}"); | |
998 | } | |
999 | ||
d9579d0f | 1000 | #[allow(deprecated)] |
c34b1796 AL |
1001 | #[test] |
1002 | fn test_nfkd_chars() { | |
1003 | macro_rules! t { | |
1004 | ($input: expr, $expected: expr) => { | |
1005 | assert_eq!($input.nfkd_chars().collect::<String>(), $expected); | |
1006 | } | |
1007 | } | |
1008 | t!("abc", "abc"); | |
1009 | t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}"); | |
1010 | t!("\u{2026}", "..."); | |
1011 | t!("\u{2126}", "\u{3a9}"); | |
1012 | t!("\u{1e0b}\u{323}", "d\u{323}\u{307}"); | |
1013 | t!("\u{1e0d}\u{307}", "d\u{323}\u{307}"); | |
1014 | t!("a\u{301}", "a\u{301}"); | |
1015 | t!("\u{301}a", "\u{301}a"); | |
1016 | t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}"); | |
1017 | t!("\u{ac1c}", "\u{1100}\u{1162}"); | |
1018 | } | |
1019 | ||
d9579d0f | 1020 | #[allow(deprecated)] |
c34b1796 AL |
1021 | #[test] |
1022 | fn test_nfc_chars() { | |
1023 | macro_rules! t { | |
1024 | ($input: expr, $expected: expr) => { | |
1025 | assert_eq!($input.nfc_chars().collect::<String>(), $expected); | |
1026 | } | |
1027 | } | |
1028 | t!("abc", "abc"); | |
1029 | t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}"); | |
1030 | t!("\u{2026}", "\u{2026}"); | |
1031 | t!("\u{2126}", "\u{3a9}"); | |
1032 | t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}"); | |
1033 | t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}"); | |
1034 | t!("a\u{301}", "\u{e1}"); | |
1035 | t!("\u{301}a", "\u{301}a"); | |
1036 | t!("\u{d4db}", "\u{d4db}"); | |
1037 | t!("\u{ac1c}", "\u{ac1c}"); | |
1038 | t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b"); | |
1039 | } | |
1040 | ||
d9579d0f | 1041 | #[allow(deprecated)] |
c34b1796 AL |
1042 | #[test] |
1043 | fn test_nfkc_chars() { | |
1044 | macro_rules! t { | |
1045 | ($input: expr, $expected: expr) => { | |
1046 | assert_eq!($input.nfkc_chars().collect::<String>(), $expected); | |
1047 | } | |
1048 | } | |
1049 | t!("abc", "abc"); | |
1050 | t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}"); | |
1051 | t!("\u{2026}", "..."); | |
1052 | t!("\u{2126}", "\u{3a9}"); | |
1053 | t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}"); | |
1054 | t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}"); | |
1055 | t!("a\u{301}", "\u{e1}"); | |
1056 | t!("\u{301}a", "\u{301}a"); | |
1057 | t!("\u{d4db}", "\u{d4db}"); | |
1058 | t!("\u{ac1c}", "\u{ac1c}"); | |
1059 | t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b"); | |
1060 | } | |
1061 | ||
1062 | #[test] | |
1063 | fn test_lines() { | |
1064 | let data = "\nMäry häd ä little lämb\n\nLittle lämb\n"; | |
1065 | let lines: Vec<&str> = data.lines().collect(); | |
1066 | assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); | |
1067 | ||
1068 | let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n | |
1069 | let lines: Vec<&str> = data.lines().collect(); | |
1070 | assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); | |
1071 | } | |
1072 | ||
d9579d0f | 1073 | #[allow(deprecated)] |
c34b1796 AL |
1074 | #[test] |
1075 | fn test_graphemes() { | |
1076 | use std::iter::order; | |
1077 | ||
1078 | // official Unicode test data | |
1079 | // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt | |
1080 | let test_same: [(_, &[_]); 325] = [ | |
1081 | ("\u{20}\u{20}", &["\u{20}", "\u{20}"]), | |
1082 | ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]), | |
1083 | ("\u{20}\u{D}", &["\u{20}", "\u{D}"]), | |
1084 | ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]), | |
1085 | ("\u{20}\u{A}", &["\u{20}", "\u{A}"]), | |
1086 | ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]), | |
1087 | ("\u{20}\u{1}", &["\u{20}", "\u{1}"]), | |
1088 | ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]), | |
1089 | ("\u{20}\u{300}", &["\u{20}\u{300}"]), | |
1090 | ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]), | |
1091 | ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]), | |
1092 | ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]), | |
1093 | ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]), | |
1094 | ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]), | |
1095 | ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]), | |
1096 | ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]), | |
1097 | ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]), | |
1098 | ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]), | |
1099 | ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]), | |
1100 | ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]), | |
1101 | ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]), | |
1102 | ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]), | |
1103 | ("\u{20}\u{378}", &["\u{20}", "\u{378}"]), | |
1104 | ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]), | |
1105 | ("\u{D}\u{20}", &["\u{D}", "\u{20}"]), | |
1106 | ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]), | |
1107 | ("\u{D}\u{D}", &["\u{D}", "\u{D}"]), | |
1108 | ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]), | |
1109 | ("\u{D}\u{A}", &["\u{D}\u{A}"]), | |
1110 | ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]), | |
1111 | ("\u{D}\u{1}", &["\u{D}", "\u{1}"]), | |
1112 | ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]), | |
1113 | ("\u{D}\u{300}", &["\u{D}", "\u{300}"]), | |
1114 | ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]), | |
1115 | ("\u{D}\u{903}", &["\u{D}", "\u{903}"]), | |
1116 | ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]), | |
1117 | ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]), | |
1118 | ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]), | |
1119 | ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]), | |
1120 | ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]), | |
1121 | ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]), | |
1122 | ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]), | |
1123 | ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]), | |
1124 | ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]), | |
1125 | ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]), | |
1126 | ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]), | |
1127 | ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]), | |
1128 | ("\u{D}\u{378}", &["\u{D}", "\u{378}"]), | |
1129 | ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]), | |
1130 | ("\u{A}\u{20}", &["\u{A}", "\u{20}"]), | |
1131 | ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]), | |
1132 | ("\u{A}\u{D}", &["\u{A}", "\u{D}"]), | |
1133 | ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]), | |
1134 | ("\u{A}\u{A}", &["\u{A}", "\u{A}"]), | |
1135 | ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]), | |
1136 | ("\u{A}\u{1}", &["\u{A}", "\u{1}"]), | |
1137 | ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]), | |
1138 | ("\u{A}\u{300}", &["\u{A}", "\u{300}"]), | |
1139 | ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]), | |
1140 | ("\u{A}\u{903}", &["\u{A}", "\u{903}"]), | |
1141 | ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]), | |
1142 | ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]), | |
1143 | ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]), | |
1144 | ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]), | |
1145 | ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]), | |
1146 | ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]), | |
1147 | ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]), | |
1148 | ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]), | |
1149 | ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]), | |
1150 | ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]), | |
1151 | ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]), | |
1152 | ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]), | |
1153 | ("\u{A}\u{378}", &["\u{A}", "\u{378}"]), | |
1154 | ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]), | |
1155 | ("\u{1}\u{20}", &["\u{1}", "\u{20}"]), | |
1156 | ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]), | |
1157 | ("\u{1}\u{D}", &["\u{1}", "\u{D}"]), | |
1158 | ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]), | |
1159 | ("\u{1}\u{A}", &["\u{1}", "\u{A}"]), | |
1160 | ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]), | |
1161 | ("\u{1}\u{1}", &["\u{1}", "\u{1}"]), | |
1162 | ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]), | |
1163 | ("\u{1}\u{300}", &["\u{1}", "\u{300}"]), | |
1164 | ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]), | |
1165 | ("\u{1}\u{903}", &["\u{1}", "\u{903}"]), | |
1166 | ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]), | |
1167 | ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]), | |
1168 | ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]), | |
1169 | ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]), | |
1170 | ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]), | |
1171 | ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]), | |
1172 | ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]), | |
1173 | ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]), | |
1174 | ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]), | |
1175 | ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]), | |
1176 | ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]), | |
1177 | ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]), | |
1178 | ("\u{1}\u{378}", &["\u{1}", "\u{378}"]), | |
1179 | ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]), | |
1180 | ("\u{300}\u{20}", &["\u{300}", "\u{20}"]), | |
1181 | ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]), | |
1182 | ("\u{300}\u{D}", &["\u{300}", "\u{D}"]), | |
1183 | ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]), | |
1184 | ("\u{300}\u{A}", &["\u{300}", "\u{A}"]), | |
1185 | ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]), | |
1186 | ("\u{300}\u{1}", &["\u{300}", "\u{1}"]), | |
1187 | ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]), | |
1188 | ("\u{300}\u{300}", &["\u{300}\u{300}"]), | |
1189 | ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]), | |
1190 | ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]), | |
1191 | ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]), | |
1192 | ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]), | |
1193 | ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]), | |
1194 | ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]), | |
1195 | ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]), | |
1196 | ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]), | |
1197 | ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]), | |
1198 | ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]), | |
1199 | ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]), | |
1200 | ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]), | |
1201 | ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]), | |
1202 | ("\u{300}\u{378}", &["\u{300}", "\u{378}"]), | |
1203 | ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]), | |
1204 | ("\u{903}\u{20}", &["\u{903}", "\u{20}"]), | |
1205 | ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]), | |
1206 | ("\u{903}\u{D}", &["\u{903}", "\u{D}"]), | |
1207 | ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]), | |
1208 | ("\u{903}\u{A}", &["\u{903}", "\u{A}"]), | |
1209 | ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]), | |
1210 | ("\u{903}\u{1}", &["\u{903}", "\u{1}"]), | |
1211 | ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]), | |
1212 | ("\u{903}\u{300}", &["\u{903}\u{300}"]), | |
1213 | ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]), | |
1214 | ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]), | |
1215 | ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]), | |
1216 | ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]), | |
1217 | ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]), | |
1218 | ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]), | |
1219 | ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]), | |
1220 | ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]), | |
1221 | ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]), | |
1222 | ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]), | |
1223 | ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]), | |
1224 | ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]), | |
1225 | ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]), | |
1226 | ("\u{903}\u{378}", &["\u{903}", "\u{378}"]), | |
1227 | ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]), | |
1228 | ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]), | |
1229 | ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]), | |
1230 | ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]), | |
1231 | ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]), | |
1232 | ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]), | |
1233 | ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]), | |
1234 | ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]), | |
1235 | ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]), | |
1236 | ("\u{1100}\u{300}", &["\u{1100}\u{300}"]), | |
1237 | ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]), | |
1238 | ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]), | |
1239 | ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]), | |
1240 | ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]), | |
1241 | ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]), | |
1242 | ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]), | |
1243 | ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]), | |
1244 | ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]), | |
1245 | ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]), | |
1246 | ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]), | |
1247 | ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]), | |
1248 | ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]), | |
1249 | ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]), | |
1250 | ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]), | |
1251 | ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]), | |
1252 | ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]), | |
1253 | ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]), | |
1254 | ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]), | |
1255 | ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]), | |
1256 | ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]), | |
1257 | ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]), | |
1258 | ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]), | |
1259 | ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]), | |
1260 | ("\u{1160}\u{300}", &["\u{1160}\u{300}"]), | |
1261 | ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]), | |
1262 | ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]), | |
1263 | ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]), | |
1264 | ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]), | |
1265 | ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]), | |
1266 | ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]), | |
1267 | ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]), | |
1268 | ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]), | |
1269 | ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]), | |
1270 | ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]), | |
1271 | ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]), | |
1272 | ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]), | |
1273 | ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]), | |
1274 | ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]), | |
1275 | ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]), | |
1276 | ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]), | |
1277 | ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]), | |
1278 | ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]), | |
1279 | ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]), | |
1280 | ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]), | |
1281 | ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]), | |
1282 | ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]), | |
1283 | ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]), | |
1284 | ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]), | |
1285 | ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]), | |
1286 | ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]), | |
1287 | ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]), | |
1288 | ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]), | |
1289 | ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]), | |
1290 | ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]), | |
1291 | ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]), | |
1292 | ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]), | |
1293 | ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]), | |
1294 | ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]), | |
1295 | ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]), | |
1296 | ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]), | |
1297 | ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]), | |
1298 | ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]), | |
1299 | ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]), | |
1300 | ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]), | |
1301 | ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]), | |
1302 | ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]), | |
1303 | ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]), | |
1304 | ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]), | |
1305 | ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]), | |
1306 | ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]), | |
1307 | ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]), | |
1308 | ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]), | |
1309 | ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]), | |
1310 | ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]), | |
1311 | ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]), | |
1312 | ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]), | |
1313 | ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]), | |
1314 | ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]), | |
1315 | ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]), | |
1316 | ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]), | |
1317 | ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]), | |
1318 | ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]), | |
1319 | ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]), | |
1320 | ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]), | |
1321 | ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]), | |
1322 | ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]), | |
1323 | ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]), | |
1324 | ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]), | |
1325 | ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]), | |
1326 | ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]), | |
1327 | ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]), | |
1328 | ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]), | |
1329 | ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]), | |
1330 | ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]), | |
1331 | ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]), | |
1332 | ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]), | |
1333 | ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]), | |
1334 | ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]), | |
1335 | ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]), | |
1336 | ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]), | |
1337 | ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]), | |
1338 | ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]), | |
1339 | ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]), | |
1340 | ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]), | |
1341 | ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]), | |
1342 | ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]), | |
1343 | ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]), | |
1344 | ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]), | |
1345 | ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]), | |
1346 | ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]), | |
1347 | ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]), | |
1348 | ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]), | |
1349 | ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]), | |
1350 | ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]), | |
1351 | ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]), | |
1352 | ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]), | |
1353 | ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]), | |
1354 | ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]), | |
1355 | ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]), | |
1356 | ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]), | |
1357 | ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]), | |
1358 | ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]), | |
1359 | ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]), | |
1360 | ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]), | |
1361 | ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]), | |
1362 | ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]), | |
1363 | ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]), | |
1364 | ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]), | |
1365 | ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]), | |
1366 | ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]), | |
1367 | ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]), | |
1368 | ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]), | |
1369 | ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]), | |
1370 | ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]), | |
1371 | ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]), | |
1372 | ("\u{378}\u{20}", &["\u{378}", "\u{20}"]), | |
1373 | ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]), | |
1374 | ("\u{378}\u{D}", &["\u{378}", "\u{D}"]), | |
1375 | ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]), | |
1376 | ("\u{378}\u{A}", &["\u{378}", "\u{A}"]), | |
1377 | ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]), | |
1378 | ("\u{378}\u{1}", &["\u{378}", "\u{1}"]), | |
1379 | ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]), | |
1380 | ("\u{378}\u{300}", &["\u{378}\u{300}"]), | |
1381 | ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]), | |
1382 | ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]), | |
1383 | ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]), | |
1384 | ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]), | |
1385 | ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]), | |
1386 | ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]), | |
1387 | ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]), | |
1388 | ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]), | |
1389 | ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]), | |
1390 | ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]), | |
1391 | ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]), | |
1392 | ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]), | |
1393 | ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]), | |
1394 | ("\u{378}\u{378}", &["\u{378}", "\u{378}"]), | |
1395 | ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]), | |
1396 | ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]), | |
1397 | ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]), | |
1398 | ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]), | |
1399 | ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}", | |
1400 | &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]), | |
1401 | ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}", | |
1402 | &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]), | |
1403 | ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]), | |
1404 | ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}", | |
1405 | "\u{1F1E7}\u{1F1E8}"]), | |
1406 | ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}", | |
1407 | &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]), | |
1408 | ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]), | |
1409 | ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]), | |
1410 | ]; | |
1411 | ||
1412 | let test_diff: [(_, &[_], &[_]); 23] = [ | |
1413 | ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}", | |
1414 | &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}", | |
1415 | &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}", | |
1416 | &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}", | |
1417 | &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}", | |
1418 | &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}", | |
1419 | &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}", | |
1420 | &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}", | |
1421 | &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}", | |
1422 | &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}", | |
1423 | &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}", | |
1424 | &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}", | |
1425 | &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}", | |
1426 | &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}", | |
1427 | &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}", | |
1428 | &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}", | |
1429 | &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}", | |
1430 | &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}", | |
1431 | &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}", | |
1432 | &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}", | |
1433 | &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}", | |
1434 | &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}", | |
1435 | &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]), | |
1436 | ]; | |
1437 | ||
1438 | for &(s, g) in &test_same[..] { | |
1439 | // test forward iterator | |
1440 | assert!(order::equals(s.graphemes(true), g.iter().cloned())); | |
1441 | assert!(order::equals(s.graphemes(false), g.iter().cloned())); | |
1442 | ||
1443 | // test reverse iterator | |
1444 | assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().cloned())); | |
1445 | assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().cloned())); | |
1446 | } | |
1447 | ||
1448 | for &(s, gt, gf) in &test_diff { | |
1449 | // test forward iterator | |
1450 | assert!(order::equals(s.graphemes(true), gt.iter().cloned())); | |
1451 | assert!(order::equals(s.graphemes(false), gf.iter().cloned())); | |
1452 | ||
1453 | // test reverse iterator | |
1454 | assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().cloned())); | |
1455 | assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().cloned())); | |
1456 | } | |
1457 | ||
1458 | // test the indices iterators | |
1459 | let s = "a̐éö̲\r\n"; | |
1460 | let gr_inds = s.grapheme_indices(true).collect::<Vec<(usize, &str)>>(); | |
1461 | let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; | |
1462 | assert_eq!(gr_inds, b); | |
1463 | let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(usize, &str)>>(); | |
1464 | let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")]; | |
1465 | assert_eq!(gr_inds, b); | |
1466 | let mut gr_inds_iter = s.grapheme_indices(true); | |
1467 | { | |
1468 | let gr_inds = gr_inds_iter.by_ref(); | |
1469 | let e1 = gr_inds.size_hint(); | |
1470 | assert_eq!(e1, (1, Some(13))); | |
1471 | let c = gr_inds.count(); | |
1472 | assert_eq!(c, 4); | |
1473 | } | |
1474 | let e2 = gr_inds_iter.size_hint(); | |
1475 | assert_eq!(e2, (0, Some(0))); | |
1476 | ||
1477 | // make sure the reverse iterator does the right thing with "\n" at beginning of string | |
1478 | let s = "\n\r\n\r"; | |
1479 | let gr = s.graphemes(true).rev().collect::<Vec<&str>>(); | |
1480 | let b: &[_] = &["\r", "\r\n", "\n"]; | |
1481 | assert_eq!(gr, b); | |
1482 | } | |
1483 | ||
1484 | #[test] | |
1485 | fn test_splitator() { | |
1486 | fn t(s: &str, sep: &str, u: &[&str]) { | |
1487 | let v: Vec<&str> = s.split(sep).collect(); | |
1488 | assert_eq!(v, u); | |
1489 | } | |
1490 | t("--1233345--", "12345", &["--1233345--"]); | |
1491 | t("abc::hello::there", "::", &["abc", "hello", "there"]); | |
1492 | t("::hello::there", "::", &["", "hello", "there"]); | |
1493 | t("hello::there::", "::", &["hello", "there", ""]); | |
1494 | t("::hello::there::", "::", &["", "hello", "there", ""]); | |
1495 | t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]); | |
1496 | t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]); | |
1497 | t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]); | |
1498 | t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]); | |
1499 | t("", ".", &[""]); | |
1500 | t("zz", "zz", &["",""]); | |
1501 | t("ok", "z", &["ok"]); | |
1502 | t("zzz", "zz", &["","z"]); | |
1503 | t("zzzzz", "zz", &["","","z"]); | |
1504 | } | |
1505 | ||
1506 | #[test] | |
1507 | fn test_str_default() { | |
1508 | use std::default::Default; | |
1509 | ||
1510 | fn t<S: Default + AsRef<str>>() { | |
1511 | let s: S = Default::default(); | |
1512 | assert_eq!(s.as_ref(), ""); | |
1513 | } | |
1514 | ||
1515 | t::<&str>(); | |
1516 | t::<String>(); | |
1517 | } | |
1518 | ||
1519 | #[test] | |
1520 | fn test_str_container() { | |
1521 | fn sum_len(v: &[&str]) -> usize { | |
1522 | v.iter().map(|x| x.len()).sum() | |
1523 | } | |
1524 | ||
62682a34 | 1525 | let s = String::from("01234"); |
c34b1796 | 1526 | assert_eq!(5, sum_len(&["012", "", "34"])); |
62682a34 SL |
1527 | assert_eq!(5, sum_len(&[&String::from("01"), |
1528 | &String::from("2"), | |
1529 | &String::from("34"), | |
1530 | &String::from("")])); | |
c34b1796 AL |
1531 | assert_eq!(5, sum_len(&[&s])); |
1532 | } | |
1533 | ||
1534 | #[test] | |
1535 | fn test_str_from_utf8() { | |
1536 | let xs = b"hello"; | |
1537 | assert_eq!(from_utf8(xs), Ok("hello")); | |
1538 | ||
1539 | let xs = "ศไทย中华Việt Nam".as_bytes(); | |
1540 | assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam")); | |
1541 | ||
1542 | let xs = b"hello\xFF"; | |
9346a6ac AL |
1543 | assert!(from_utf8(xs).is_err()); |
1544 | } | |
1545 | ||
1546 | #[test] | |
1547 | fn test_pattern_deref_forward() { | |
1548 | let data = "aabcdaa"; | |
1549 | assert!(data.contains("bcd")); | |
1550 | assert!(data.contains(&"bcd")); | |
1551 | assert!(data.contains(&"bcd".to_string())); | |
1552 | } | |
1553 | ||
1554 | #[test] | |
1555 | fn test_empty_match_indices() { | |
1556 | let data = "aä中!"; | |
1557 | let vec: Vec<_> = data.match_indices("").collect(); | |
1558 | assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]); | |
1559 | } | |
1560 | ||
1561 | #[test] | |
1562 | fn test_bool_from_str() { | |
1563 | assert_eq!("true".parse().ok(), Some(true)); | |
1564 | assert_eq!("false".parse().ok(), Some(false)); | |
1565 | assert_eq!("not even a boolean".parse::<bool>().ok(), None); | |
1566 | } | |
1567 | ||
1568 | fn check_contains_all_substrings(s: &str) { | |
1569 | assert!(s.contains("")); | |
1570 | for i in 0..s.len() { | |
1571 | for j in i+1..s.len() + 1 { | |
1572 | assert!(s.contains(&s[i..j])); | |
1573 | } | |
1574 | } | |
1575 | } | |
1576 | ||
1577 | #[test] | |
1578 | fn strslice_issue_16589() { | |
1579 | assert!("bananas".contains("nana")); | |
1580 | ||
1581 | // prior to the fix for #16589, x.contains("abcdabcd") returned false | |
1582 | // test all substrings for good measure | |
1583 | check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd"); | |
1584 | } | |
1585 | ||
1586 | #[test] | |
1587 | fn strslice_issue_16878() { | |
1588 | assert!(!"1234567ah012345678901ah".contains("hah")); | |
1589 | assert!(!"00abc01234567890123456789abc".contains("bcabc")); | |
1590 | } | |
1591 | ||
1592 | ||
1593 | #[test] | |
1594 | fn test_strslice_contains() { | |
1595 | let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'"; | |
1596 | check_contains_all_substrings(x); | |
1597 | } | |
1598 | ||
1599 | #[test] | |
1600 | fn test_rsplitn_char_iterator() { | |
1601 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
1602 | ||
1603 | let mut split: Vec<&str> = data.rsplitn(4, ' ').collect(); | |
1604 | split.reverse(); | |
1605 | assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); | |
1606 | ||
1607 | let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect(); | |
1608 | split.reverse(); | |
1609 | assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); | |
1610 | ||
1611 | // Unicode | |
1612 | let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect(); | |
1613 | split.reverse(); | |
1614 | assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); | |
1615 | ||
1616 | let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect(); | |
1617 | split.reverse(); | |
1618 | assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); | |
1619 | } | |
1620 | ||
1621 | #[test] | |
1622 | fn test_split_char_iterator() { | |
1623 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
1624 | ||
1625 | let split: Vec<&str> = data.split(' ').collect(); | |
1626 | assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1627 | ||
1628 | let mut rsplit: Vec<&str> = data.split(' ').rev().collect(); | |
1629 | rsplit.reverse(); | |
1630 | assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1631 | ||
1632 | let split: Vec<&str> = data.split(|c: char| c == ' ').collect(); | |
1633 | assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1634 | ||
1635 | let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect(); | |
1636 | rsplit.reverse(); | |
1637 | assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1638 | ||
1639 | // Unicode | |
1640 | let split: Vec<&str> = data.split('ä').collect(); | |
1641 | assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1642 | ||
1643 | let mut rsplit: Vec<&str> = data.split('ä').rev().collect(); | |
1644 | rsplit.reverse(); | |
1645 | assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1646 | ||
1647 | let split: Vec<&str> = data.split(|c: char| c == 'ä').collect(); | |
1648 | assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1649 | ||
1650 | let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect(); | |
1651 | rsplit.reverse(); | |
1652 | assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1653 | } | |
1654 | ||
1655 | #[test] | |
1656 | fn test_rev_split_char_iterator_no_trailing() { | |
1657 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
1658 | ||
1659 | let mut split: Vec<&str> = data.split('\n').rev().collect(); | |
1660 | split.reverse(); | |
1661 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); | |
1662 | ||
1663 | let mut split: Vec<&str> = data.split_terminator('\n').rev().collect(); | |
1664 | split.reverse(); | |
1665 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); | |
1666 | } | |
1667 | ||
1668 | #[test] | |
1669 | fn test_utf16_code_units() { | |
d9579d0f | 1670 | use rustc_unicode::str::Utf16Encoder; |
9346a6ac AL |
1671 | assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(), |
1672 | [0xE9, 0xD83D, 0xDCA9]) | |
1673 | } | |
1674 | ||
1675 | #[test] | |
1676 | fn starts_with_in_unicode() { | |
1677 | assert!(!"├── Cargo.toml".starts_with("# ")); | |
1678 | } | |
1679 | ||
1680 | #[test] | |
1681 | fn starts_short_long() { | |
1682 | assert!(!"".starts_with("##")); | |
1683 | assert!(!"##".starts_with("####")); | |
1684 | assert!("####".starts_with("##")); | |
1685 | assert!(!"##ä".starts_with("####")); | |
1686 | assert!("####ä".starts_with("##")); | |
1687 | assert!(!"##".starts_with("####ä")); | |
1688 | assert!("##ä##".starts_with("##ä")); | |
1689 | ||
1690 | assert!("".starts_with("")); | |
1691 | assert!("ä".starts_with("")); | |
1692 | assert!("#ä".starts_with("")); | |
1693 | assert!("##ä".starts_with("")); | |
1694 | assert!("ä###".starts_with("")); | |
1695 | assert!("#ä##".starts_with("")); | |
1696 | assert!("##ä#".starts_with("")); | |
1697 | } | |
1698 | ||
1699 | #[test] | |
1700 | fn contains_weird_cases() { | |
1701 | assert!("* \t".contains(' ')); | |
1702 | assert!(!"* \t".contains('?')); | |
1703 | assert!(!"* \t".contains('\u{1F4A9}')); | |
1704 | } | |
1705 | ||
1706 | #[test] | |
1707 | fn trim_ws() { | |
1708 | assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()), | |
1709 | "a \t "); | |
1710 | assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()), | |
1711 | " \t a"); | |
1712 | assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()), | |
1713 | "a"); | |
1714 | assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()), | |
1715 | ""); | |
1716 | assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()), | |
1717 | ""); | |
1718 | assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()), | |
1719 | ""); | |
1720 | } | |
1721 | ||
62682a34 SL |
1722 | #[test] |
1723 | fn to_lowercase() { | |
1724 | assert_eq!("".to_lowercase(), ""); | |
1725 | assert_eq!("AÉDžaé ".to_lowercase(), "aédžaé "); | |
1726 | ||
1727 | // https://github.com/rust-lang/rust/issues/26035 | |
1728 | assert_eq!("ΑΣ".to_lowercase(), "ας"); | |
1729 | assert_eq!("Α'Σ".to_lowercase(), "α'ς"); | |
1730 | assert_eq!("Α''Σ".to_lowercase(), "α''ς"); | |
1731 | ||
1732 | assert_eq!("ΑΣ Α".to_lowercase(), "ας α"); | |
1733 | assert_eq!("Α'Σ Α".to_lowercase(), "α'ς α"); | |
1734 | assert_eq!("Α''Σ Α".to_lowercase(), "α''ς α"); | |
1735 | ||
1736 | assert_eq!("ΑΣ' Α".to_lowercase(), "ας' α"); | |
1737 | assert_eq!("ΑΣ'' Α".to_lowercase(), "ας'' α"); | |
1738 | ||
1739 | assert_eq!("Α'Σ' Α".to_lowercase(), "α'ς' α"); | |
1740 | assert_eq!("Α''Σ'' Α".to_lowercase(), "α''ς'' α"); | |
1741 | ||
1742 | assert_eq!("Α Σ".to_lowercase(), "α σ"); | |
1743 | assert_eq!("Α 'Σ".to_lowercase(), "α 'σ"); | |
1744 | assert_eq!("Α ''Σ".to_lowercase(), "α ''σ"); | |
1745 | ||
1746 | assert_eq!("Σ".to_lowercase(), "σ"); | |
1747 | assert_eq!("'Σ".to_lowercase(), "'σ"); | |
1748 | assert_eq!("''Σ".to_lowercase(), "''σ"); | |
1749 | ||
1750 | assert_eq!("ΑΣΑ".to_lowercase(), "ασα"); | |
1751 | assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α"); | |
1752 | assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α"); | |
1753 | } | |
1754 | ||
1755 | #[test] | |
1756 | fn to_uppercase() { | |
1757 | assert_eq!("".to_uppercase(), ""); | |
1758 | assert_eq!("aéDžßfiᾀ".to_uppercase(), "AÉDŽSSFIἈΙ"); | |
1759 | } | |
1760 | ||
c1a9b12d SL |
1761 | #[test] |
1762 | fn test_into_string() { | |
1763 | // The only way to acquire a Box<str> in the first place is through a String, so just | |
1764 | // test that we can round-trip between Box<str> and String. | |
1765 | let string = String::from("Some text goes here"); | |
1766 | assert_eq!(string.clone().into_boxed_slice().into_string(), string); | |
1767 | } | |
1768 | ||
1769 | #[test] | |
1770 | fn test_box_slice_clone() { | |
1771 | let data = String::from("hello HELLO hello HELLO yes YES 5 中ä华!!!"); | |
1772 | let data2 = data.clone().into_boxed_slice().clone().into_string(); | |
1773 | ||
1774 | assert_eq!(data, data2); | |
1775 | } | |
1776 | ||
9346a6ac AL |
1777 | mod pattern { |
1778 | use std::str::pattern::Pattern; | |
1779 | use std::str::pattern::{Searcher, ReverseSearcher}; | |
1780 | use std::str::pattern::SearchStep::{self, Match, Reject, Done}; | |
1781 | ||
1782 | macro_rules! make_test { | |
1783 | ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { | |
62682a34 | 1784 | #[allow(unused_imports)] |
9346a6ac AL |
1785 | mod $name { |
1786 | use std::str::pattern::SearchStep::{Match, Reject}; | |
1787 | use super::{cmp_search_to_vec}; | |
1788 | #[test] | |
1789 | fn fwd() { | |
1790 | cmp_search_to_vec(false, $p, $h, vec![$($e),*]); | |
1791 | } | |
1792 | #[test] | |
1793 | fn bwd() { | |
1794 | cmp_search_to_vec(true, $p, $h, vec![$($e),*]); | |
1795 | } | |
1796 | } | |
1797 | } | |
1798 | } | |
1799 | ||
1800 | fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str, | |
1801 | right: Vec<SearchStep>) | |
1802 | where P::Searcher: ReverseSearcher<'a> | |
1803 | { | |
1804 | let mut searcher = pat.into_searcher(haystack); | |
1805 | let mut v = vec![]; | |
1806 | loop { | |
1807 | match if !rev {searcher.next()} else {searcher.next_back()} { | |
1808 | Match(a, b) => v.push(Match(a, b)), | |
1809 | Reject(a, b) => v.push(Reject(a, b)), | |
1810 | Done => break, | |
1811 | } | |
1812 | } | |
1813 | if rev { | |
1814 | v.reverse(); | |
1815 | } | |
1816 | ||
1817 | let mut first_index = 0; | |
1818 | let mut err = None; | |
1819 | ||
1820 | for (i, e) in right.iter().enumerate() { | |
1821 | match *e { | |
1822 | Match(a, b) | Reject(a, b) | |
1823 | if a <= b && a == first_index => { | |
1824 | first_index = b; | |
1825 | } | |
1826 | _ => { | |
1827 | err = Some(i); | |
1828 | break; | |
1829 | } | |
1830 | } | |
1831 | } | |
1832 | ||
1833 | if let Some(err) = err { | |
1834 | panic!("Input skipped range at {}", err); | |
1835 | } | |
1836 | ||
1837 | if first_index != haystack.len() { | |
1838 | panic!("Did not cover whole input"); | |
1839 | } | |
1840 | ||
1841 | assert_eq!(v, right); | |
1842 | } | |
1843 | ||
1844 | make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [ | |
1845 | Reject(0, 1), | |
1846 | Match (1, 3), | |
1847 | Reject(3, 4), | |
1848 | Match (4, 6), | |
1849 | Reject(6, 7), | |
1850 | ]); | |
c1a9b12d SL |
1851 | make_test!(str_searcher_ascii_haystack_seq, "bb", "abbcbbbbd", [ |
1852 | Reject(0, 1), | |
1853 | Match (1, 3), | |
1854 | Reject(3, 4), | |
1855 | Match (4, 6), | |
1856 | Match (6, 8), | |
1857 | Reject(8, 9), | |
1858 | ]); | |
9346a6ac AL |
1859 | make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ |
1860 | Match (0, 0), | |
1861 | Reject(0, 1), | |
1862 | Match (1, 1), | |
1863 | Reject(1, 2), | |
1864 | Match (2, 2), | |
1865 | Reject(2, 3), | |
1866 | Match (3, 3), | |
1867 | Reject(3, 4), | |
1868 | Match (4, 4), | |
1869 | Reject(4, 5), | |
1870 | Match (5, 5), | |
1871 | Reject(5, 6), | |
1872 | Match (6, 6), | |
1873 | Reject(6, 7), | |
1874 | Match (7, 7), | |
1875 | ]); | |
1876 | make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ | |
1877 | Reject(0, 3), | |
1878 | Reject(3, 6), | |
1879 | Reject(6, 9), | |
1880 | ]); | |
1881 | make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ | |
1882 | Match (0, 0), | |
1883 | Reject(0, 3), | |
1884 | Match (3, 3), | |
1885 | Reject(3, 6), | |
1886 | Match (6, 6), | |
1887 | Reject(6, 9), | |
1888 | Match (9, 9), | |
1889 | ]); | |
1890 | make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ | |
1891 | Match(0, 0), | |
1892 | ]); | |
1893 | make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [ | |
1894 | ]); | |
1895 | make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [ | |
1896 | Reject(0, 1), | |
1897 | Match (1, 2), | |
1898 | Match (2, 3), | |
1899 | Reject(3, 4), | |
1900 | Match (4, 5), | |
1901 | Match (5, 6), | |
1902 | Reject(6, 7), | |
1903 | ]); | |
1904 | make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [ | |
1905 | Reject(0, 3), | |
1906 | Reject(3, 6), | |
1907 | Reject(6, 9), | |
1908 | ]); | |
1909 | make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [ | |
1910 | Reject(0, 1), | |
1911 | Reject(1, 2), | |
1912 | Reject(2, 3), | |
1913 | ]); | |
1914 | ||
1915 | } | |
1916 | ||
1917 | macro_rules! generate_iterator_test { | |
1918 | { | |
1919 | $name:ident { | |
1920 | $( | |
1921 | ($($arg:expr),*) -> [$($t:tt)*]; | |
1922 | )* | |
1923 | } | |
1924 | with $fwd:expr, $bwd:expr; | |
1925 | } => { | |
1926 | #[test] | |
1927 | fn $name() { | |
1928 | $( | |
1929 | { | |
1930 | let res = vec![$($t)*]; | |
1931 | ||
1932 | let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); | |
1933 | assert_eq!(fwd_vec, res); | |
1934 | ||
1935 | let mut bwd_vec: Vec<_> = ($bwd)($($arg),*).collect(); | |
1936 | bwd_vec.reverse(); | |
1937 | assert_eq!(bwd_vec, res); | |
1938 | } | |
1939 | )* | |
1940 | } | |
1941 | }; | |
1942 | { | |
1943 | $name:ident { | |
1944 | $( | |
1945 | ($($arg:expr),*) -> [$($t:tt)*]; | |
1946 | )* | |
1947 | } | |
1948 | with $fwd:expr; | |
1949 | } => { | |
1950 | #[test] | |
1951 | fn $name() { | |
1952 | $( | |
1953 | { | |
1954 | let res = vec![$($t)*]; | |
1955 | ||
1956 | let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); | |
1957 | assert_eq!(fwd_vec, res); | |
1958 | } | |
1959 | )* | |
1960 | } | |
1961 | } | |
1962 | } | |
1963 | ||
1964 | generate_iterator_test! { | |
1965 | double_ended_split { | |
1966 | ("foo.bar.baz", '.') -> ["foo", "bar", "baz"]; | |
1967 | ("foo::bar::baz", "::") -> ["foo", "bar", "baz"]; | |
1968 | } | |
1969 | with str::split, str::rsplit; | |
1970 | } | |
1971 | ||
1972 | generate_iterator_test! { | |
1973 | double_ended_split_terminator { | |
1974 | ("foo;bar;baz;", ';') -> ["foo", "bar", "baz"]; | |
1975 | } | |
1976 | with str::split_terminator, str::rsplit_terminator; | |
1977 | } | |
1978 | ||
1979 | generate_iterator_test! { | |
1980 | double_ended_matches { | |
1981 | ("a1b2c3", char::is_numeric) -> ["1", "2", "3"]; | |
1982 | } | |
1983 | with str::matches, str::rmatches; | |
1984 | } | |
1985 | ||
1986 | generate_iterator_test! { | |
1987 | double_ended_match_indices { | |
1988 | ("a1b2c3", char::is_numeric) -> [(1, 2), (3, 4), (5, 6)]; | |
1989 | } | |
1990 | with str::match_indices, str::rmatch_indices; | |
1991 | } | |
1992 | ||
1993 | generate_iterator_test! { | |
1994 | not_double_ended_splitn { | |
1995 | ("foo::bar::baz", 2, "::") -> ["foo", "bar::baz"]; | |
1996 | } | |
1997 | with str::splitn; | |
1998 | } | |
1999 | ||
2000 | generate_iterator_test! { | |
2001 | not_double_ended_rsplitn { | |
2002 | ("foo::bar::baz", 2, "::") -> ["baz", "foo::bar"]; | |
2003 | } | |
2004 | with str::rsplitn; | |
c34b1796 AL |
2005 | } |
2006 | ||
2007 | mod bench { | |
2008 | use test::{Bencher, black_box}; | |
2009 | ||
2010 | #[bench] | |
2011 | fn char_iterator(b: &mut Bencher) { | |
2012 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2013 | ||
2014 | b.iter(|| s.chars().count()); | |
2015 | } | |
2016 | ||
2017 | #[bench] | |
2018 | fn char_iterator_for(b: &mut Bencher) { | |
2019 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2020 | ||
2021 | b.iter(|| { | |
2022 | for ch in s.chars() { black_box(ch); } | |
2023 | }); | |
2024 | } | |
2025 | ||
2026 | #[bench] | |
2027 | fn char_iterator_ascii(b: &mut Bencher) { | |
2028 | let s = "Mary had a little lamb, Little lamb | |
2029 | Mary had a little lamb, Little lamb | |
2030 | Mary had a little lamb, Little lamb | |
2031 | Mary had a little lamb, Little lamb | |
2032 | Mary had a little lamb, Little lamb | |
2033 | Mary had a little lamb, Little lamb"; | |
2034 | ||
2035 | b.iter(|| s.chars().count()); | |
2036 | } | |
2037 | ||
2038 | #[bench] | |
2039 | fn char_iterator_rev(b: &mut Bencher) { | |
2040 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2041 | ||
2042 | b.iter(|| s.chars().rev().count()); | |
2043 | } | |
2044 | ||
2045 | #[bench] | |
2046 | fn char_iterator_rev_for(b: &mut Bencher) { | |
2047 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2048 | ||
2049 | b.iter(|| { | |
2050 | for ch in s.chars().rev() { black_box(ch); } | |
2051 | }); | |
2052 | } | |
2053 | ||
2054 | #[bench] | |
2055 | fn char_indicesator(b: &mut Bencher) { | |
2056 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2057 | let len = s.chars().count(); | |
2058 | ||
2059 | b.iter(|| assert_eq!(s.char_indices().count(), len)); | |
2060 | } | |
2061 | ||
2062 | #[bench] | |
2063 | fn char_indicesator_rev(b: &mut Bencher) { | |
2064 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2065 | let len = s.chars().count(); | |
2066 | ||
2067 | b.iter(|| assert_eq!(s.char_indices().rev().count(), len)); | |
2068 | } | |
2069 | ||
2070 | #[bench] | |
2071 | fn split_unicode_ascii(b: &mut Bencher) { | |
2072 | let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam"; | |
2073 | ||
2074 | b.iter(|| assert_eq!(s.split('V').count(), 3)); | |
2075 | } | |
2076 | ||
2077 | #[bench] | |
2078 | fn split_ascii(b: &mut Bencher) { | |
2079 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2080 | let len = s.split(' ').count(); | |
2081 | ||
2082 | b.iter(|| assert_eq!(s.split(' ').count(), len)); | |
2083 | } | |
2084 | ||
2085 | #[bench] | |
2086 | fn split_extern_fn(b: &mut Bencher) { | |
2087 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2088 | let len = s.split(' ').count(); | |
2089 | fn pred(c: char) -> bool { c == ' ' } | |
2090 | ||
2091 | b.iter(|| assert_eq!(s.split(pred).count(), len)); | |
2092 | } | |
2093 | ||
2094 | #[bench] | |
2095 | fn split_closure(b: &mut Bencher) { | |
2096 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2097 | let len = s.split(' ').count(); | |
2098 | ||
2099 | b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len)); | |
2100 | } | |
2101 | ||
2102 | #[bench] | |
2103 | fn split_slice(b: &mut Bencher) { | |
2104 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2105 | let len = s.split(' ').count(); | |
2106 | ||
2107 | let c: &[char] = &[' ']; | |
2108 | b.iter(|| assert_eq!(s.split(c).count(), len)); | |
2109 | } | |
2110 | ||
2111 | #[bench] | |
c1a9b12d | 2112 | fn bench_join(b: &mut Bencher) { |
c34b1796 AL |
2113 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; |
2114 | let sep = "→"; | |
2115 | let v = vec![s, s, s, s, s, s, s, s, s, s]; | |
2116 | b.iter(|| { | |
c1a9b12d | 2117 | assert_eq!(v.join(sep).len(), s.len() * 10 + sep.len() * 9); |
c34b1796 AL |
2118 | }) |
2119 | } | |
2120 | ||
2121 | #[bench] | |
2122 | fn bench_contains_short_short(b: &mut Bencher) { | |
2123 | let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; | |
2124 | let needle = "sit"; | |
2125 | ||
2126 | b.iter(|| { | |
2127 | assert!(haystack.contains(needle)); | |
2128 | }) | |
2129 | } | |
2130 | ||
2131 | #[bench] | |
2132 | fn bench_contains_short_long(b: &mut Bencher) { | |
2133 | let haystack = "\ | |
2134 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ | |
2135 | ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ | |
2136 | eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ | |
2137 | sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ | |
2138 | tempus vel, gravida nec quam. | |
2139 | ||
2140 | In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ | |
2141 | sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ | |
2142 | diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ | |
2143 | lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ | |
2144 | eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ | |
2145 | interdum. Curabitur ut nisi justo. | |
2146 | ||
2147 | Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ | |
2148 | mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ | |
2149 | lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ | |
2150 | est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ | |
2151 | felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ | |
2152 | ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ | |
2153 | feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ | |
2154 | Aliquam sit amet placerat lorem. | |
2155 | ||
2156 | Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ | |
2157 | mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ | |
2158 | Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ | |
2159 | lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ | |
2160 | suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ | |
2161 | cursus accumsan. | |
2162 | ||
2163 | Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ | |
2164 | feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ | |
2165 | vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ | |
2166 | leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ | |
2167 | malesuada sollicitudin quam eu fermentum."; | |
2168 | let needle = "english"; | |
2169 | ||
2170 | b.iter(|| { | |
2171 | assert!(!haystack.contains(needle)); | |
2172 | }) | |
2173 | } | |
2174 | ||
2175 | #[bench] | |
2176 | fn bench_contains_bad_naive(b: &mut Bencher) { | |
2177 | let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; | |
2178 | let needle = "aaaaaaaab"; | |
2179 | ||
2180 | b.iter(|| { | |
2181 | assert!(!haystack.contains(needle)); | |
2182 | }) | |
2183 | } | |
2184 | ||
2185 | #[bench] | |
2186 | fn bench_contains_equal(b: &mut Bencher) { | |
2187 | let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; | |
2188 | let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; | |
2189 | ||
2190 | b.iter(|| { | |
2191 | assert!(haystack.contains(needle)); | |
2192 | }) | |
2193 | } | |
9346a6ac AL |
2194 | |
2195 | macro_rules! make_test_inner { | |
2196 | ($s:ident, $code:expr, $name:ident, $str:expr) => { | |
2197 | #[bench] | |
2198 | fn $name(bencher: &mut Bencher) { | |
2199 | let mut $s = $str; | |
2200 | black_box(&mut $s); | |
2201 | bencher.iter(|| $code); | |
2202 | } | |
2203 | } | |
2204 | } | |
2205 | ||
2206 | macro_rules! make_test { | |
2207 | ($name:ident, $s:ident, $code:expr) => { | |
2208 | mod $name { | |
2209 | use test::Bencher; | |
2210 | use test::black_box; | |
2211 | ||
2212 | // Short strings: 65 bytes each | |
2213 | make_test_inner!($s, $code, short_ascii, | |
2214 | "Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!"); | |
2215 | make_test_inner!($s, $code, short_mixed, | |
2216 | "ศไทย中华Việt Nam; Mary had a little lamb, Little lam!"); | |
2217 | make_test_inner!($s, $code, short_pile_of_poo, | |
2218 | "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!"); | |
2219 | make_test_inner!($s, $code, long_lorem_ipsum,"\ | |
2220 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ | |
2221 | ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ | |
2222 | eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ | |
2223 | sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ | |
2224 | tempus vel, gravida nec quam. | |
2225 | ||
2226 | In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ | |
2227 | sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ | |
2228 | diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ | |
2229 | lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ | |
2230 | eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ | |
2231 | interdum. Curabitur ut nisi justo. | |
2232 | ||
2233 | Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ | |
2234 | mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ | |
2235 | lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ | |
2236 | est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ | |
2237 | felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ | |
2238 | ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ | |
2239 | feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ | |
2240 | Aliquam sit amet placerat lorem. | |
2241 | ||
2242 | Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ | |
2243 | mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ | |
2244 | Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ | |
2245 | lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ | |
2246 | suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ | |
2247 | cursus accumsan. | |
2248 | ||
2249 | Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ | |
2250 | feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ | |
2251 | vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ | |
2252 | leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ | |
2253 | malesuada sollicitudin quam eu fermentum!"); | |
2254 | } | |
2255 | } | |
2256 | } | |
2257 | ||
2258 | make_test!(chars_count, s, s.chars().count()); | |
2259 | ||
2260 | make_test!(contains_bang_str, s, s.contains("!")); | |
2261 | make_test!(contains_bang_char, s, s.contains('!')); | |
2262 | ||
2263 | make_test!(match_indices_a_str, s, s.match_indices("a").count()); | |
2264 | ||
2265 | make_test!(split_a_str, s, s.split("a").count()); | |
2266 | ||
2267 | make_test!(trim_ascii_char, s, { | |
2268 | use std::ascii::AsciiExt; | |
2269 | s.trim_matches(|c: char| c.is_ascii()) | |
2270 | }); | |
2271 | make_test!(trim_left_ascii_char, s, { | |
2272 | use std::ascii::AsciiExt; | |
2273 | s.trim_left_matches(|c: char| c.is_ascii()) | |
2274 | }); | |
2275 | make_test!(trim_right_ascii_char, s, { | |
2276 | use std::ascii::AsciiExt; | |
2277 | s.trim_right_matches(|c: char| c.is_ascii()) | |
2278 | }); | |
2279 | ||
2280 | make_test!(find_underscore_char, s, s.find('_')); | |
2281 | make_test!(rfind_underscore_char, s, s.rfind('_')); | |
2282 | make_test!(find_underscore_str, s, s.find("_")); | |
2283 | ||
2284 | make_test!(find_zzz_char, s, s.find('\u{1F4A4}')); | |
2285 | make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}')); | |
2286 | make_test!(find_zzz_str, s, s.find("\u{1F4A4}")); | |
2287 | ||
2288 | make_test!(split_space_char, s, s.split(' ').count()); | |
2289 | make_test!(split_terminator_space_char, s, s.split_terminator(' ').count()); | |
2290 | ||
2291 | make_test!(splitn_space_char, s, s.splitn(10, ' ').count()); | |
2292 | make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); | |
2293 | ||
2294 | make_test!(split_space_str, s, s.split(" ").count()); | |
2295 | make_test!(split_ad_str, s, s.split("ad").count()); | |
c34b1796 | 2296 | } |