]>
Commit | Line | Data |
---|---|---|
9346a6ac | 1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
c34b1796 AL |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | use std::cmp::Ordering::{Equal, Greater, Less}; | |
62682a34 | 12 | use std::str::from_utf8; |
c34b1796 AL |
13 | |
14 | #[test] | |
15 | fn test_le() { | |
16 | assert!("" <= ""); | |
17 | assert!("" <= "foo"); | |
18 | assert!("foo" <= "foo"); | |
19 | assert!("foo" != "bar"); | |
20 | } | |
21 | ||
d9579d0f | 22 | #[allow(deprecated)] |
c34b1796 AL |
23 | #[test] |
24 | fn test_len() { | |
25 | assert_eq!("".len(), 0); | |
26 | assert_eq!("hello world".len(), 11); | |
27 | assert_eq!("\x63".len(), 1); | |
28 | assert_eq!("\u{a2}".len(), 2); | |
29 | assert_eq!("\u{3c0}".len(), 2); | |
30 | assert_eq!("\u{2620}".len(), 3); | |
31 | assert_eq!("\u{1d11e}".len(), 4); | |
32 | ||
33 | assert_eq!("".chars().count(), 0); | |
34 | assert_eq!("hello world".chars().count(), 11); | |
35 | assert_eq!("\x63".chars().count(), 1); | |
36 | assert_eq!("\u{a2}".chars().count(), 1); | |
37 | assert_eq!("\u{3c0}".chars().count(), 1); | |
38 | assert_eq!("\u{2620}".chars().count(), 1); | |
39 | assert_eq!("\u{1d11e}".chars().count(), 1); | |
40 | assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19); | |
41 | ||
42 | assert_eq!("hello".width(false), 10); | |
43 | assert_eq!("hello".width(true), 10); | |
44 | assert_eq!("\0\0\0\0\0".width(false), 0); | |
45 | assert_eq!("\0\0\0\0\0".width(true), 0); | |
46 | assert_eq!("".width(false), 0); | |
47 | assert_eq!("".width(true), 0); | |
48 | assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4); | |
49 | assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8); | |
50 | } | |
51 | ||
52 | #[test] | |
53 | fn test_find() { | |
54 | assert_eq!("hello".find('l'), Some(2)); | |
55 | assert_eq!("hello".find(|c:char| c == 'o'), Some(4)); | |
56 | assert!("hello".find('x').is_none()); | |
57 | assert!("hello".find(|c:char| c == 'x').is_none()); | |
58 | assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30)); | |
59 | assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30)); | |
60 | } | |
61 | ||
62 | #[test] | |
63 | fn test_rfind() { | |
64 | assert_eq!("hello".rfind('l'), Some(3)); | |
65 | assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4)); | |
66 | assert!("hello".rfind('x').is_none()); | |
67 | assert!("hello".rfind(|c:char| c == 'x').is_none()); | |
68 | assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30)); | |
69 | assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30)); | |
70 | } | |
71 | ||
72 | #[test] | |
73 | fn test_collect() { | |
62682a34 | 74 | let empty = String::from(""); |
c34b1796 AL |
75 | let s: String = empty.chars().collect(); |
76 | assert_eq!(empty, s); | |
62682a34 | 77 | let data = String::from("ประเทศไทย中"); |
c34b1796 AL |
78 | let s: String = data.chars().collect(); |
79 | assert_eq!(data, s); | |
80 | } | |
81 | ||
82 | #[test] | |
83 | fn test_into_bytes() { | |
62682a34 | 84 | let data = String::from("asdf"); |
c34b1796 AL |
85 | let buf = data.into_bytes(); |
86 | assert_eq!(buf, b"asdf"); | |
87 | } | |
88 | ||
89 | #[test] | |
90 | fn test_find_str() { | |
91 | // byte positions | |
92 | assert_eq!("".find(""), Some(0)); | |
93 | assert!("banana".find("apple pie").is_none()); | |
94 | ||
95 | let data = "abcabc"; | |
96 | assert_eq!(data[0..6].find("ab"), Some(0)); | |
97 | assert_eq!(data[2..6].find("ab"), Some(3 - 2)); | |
98 | assert!(data[2..4].find("ab").is_none()); | |
99 | ||
100 | let string = "ประเทศไทย中华Việt Nam"; | |
62682a34 | 101 | let mut data = String::from(string); |
c34b1796 AL |
102 | data.push_str(string); |
103 | assert!(data.find("ไท华").is_none()); | |
104 | assert_eq!(data[0..43].find(""), Some(0)); | |
105 | assert_eq!(data[6..43].find(""), Some(6 - 6)); | |
106 | ||
107 | assert_eq!(data[0..43].find("ประ"), Some( 0)); | |
108 | assert_eq!(data[0..43].find("ทศไ"), Some(12)); | |
109 | assert_eq!(data[0..43].find("ย中"), Some(24)); | |
110 | assert_eq!(data[0..43].find("iệt"), Some(34)); | |
111 | assert_eq!(data[0..43].find("Nam"), Some(40)); | |
112 | ||
113 | assert_eq!(data[43..86].find("ประ"), Some(43 - 43)); | |
114 | assert_eq!(data[43..86].find("ทศไ"), Some(55 - 43)); | |
115 | assert_eq!(data[43..86].find("ย中"), Some(67 - 43)); | |
116 | assert_eq!(data[43..86].find("iệt"), Some(77 - 43)); | |
117 | assert_eq!(data[43..86].find("Nam"), Some(83 - 43)); | |
118 | } | |
119 | ||
120 | #[test] | |
121 | fn test_slice_chars() { | |
122 | fn t(a: &str, b: &str, start: usize) { | |
123 | assert_eq!(a.slice_chars(start, start + b.chars().count()), b); | |
124 | } | |
125 | t("", "", 0); | |
126 | t("hello", "llo", 2); | |
127 | t("hello", "el", 1); | |
128 | t("αβλ", "β", 1); | |
129 | t("αβλ", "", 3); | |
130 | assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8)); | |
131 | } | |
132 | ||
133 | fn s(x: &str) -> String { x.to_string() } | |
134 | ||
135 | macro_rules! test_concat { | |
136 | ($expected: expr, $string: expr) => { | |
137 | { | |
138 | let s: String = $string.concat(); | |
139 | assert_eq!($expected, s); | |
140 | } | |
141 | } | |
142 | } | |
143 | ||
144 | #[test] | |
145 | fn test_concat_for_different_types() { | |
146 | test_concat!("ab", vec![s("a"), s("b")]); | |
147 | test_concat!("ab", vec!["a", "b"]); | |
148 | test_concat!("ab", vec!["a", "b"]); | |
149 | test_concat!("ab", vec![s("a"), s("b")]); | |
150 | } | |
151 | ||
152 | #[test] | |
153 | fn test_concat_for_different_lengths() { | |
154 | let empty: &[&str] = &[]; | |
155 | test_concat!("", empty); | |
156 | test_concat!("a", ["a"]); | |
157 | test_concat!("ab", ["a", "b"]); | |
158 | test_concat!("abc", ["", "a", "bc"]); | |
159 | } | |
160 | ||
161 | macro_rules! test_connect { | |
162 | ($expected: expr, $string: expr, $delim: expr) => { | |
163 | { | |
164 | let s = $string.connect($delim); | |
165 | assert_eq!($expected, s); | |
166 | } | |
167 | } | |
168 | } | |
169 | ||
170 | #[test] | |
171 | fn test_connect_for_different_types() { | |
172 | test_connect!("a-b", ["a", "b"], "-"); | |
173 | let hyphen = "-".to_string(); | |
174 | test_connect!("a-b", [s("a"), s("b")], &*hyphen); | |
175 | test_connect!("a-b", vec!["a", "b"], &*hyphen); | |
176 | test_connect!("a-b", &*vec!["a", "b"], "-"); | |
177 | test_connect!("a-b", vec![s("a"), s("b")], "-"); | |
178 | } | |
179 | ||
180 | #[test] | |
181 | fn test_connect_for_different_lengths() { | |
182 | let empty: &[&str] = &[]; | |
183 | test_connect!("", empty, "-"); | |
184 | test_connect!("a", ["a"], "-"); | |
185 | test_connect!("a-b", ["a", "b"], "-"); | |
186 | test_connect!("-a-bc", ["", "a", "bc"], "-"); | |
187 | } | |
188 | ||
189 | #[test] | |
190 | fn test_unsafe_slice() { | |
191 | assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)}); | |
192 | assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)}); | |
193 | assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)}); | |
194 | fn a_million_letter_a() -> String { | |
195 | let mut i = 0; | |
196 | let mut rs = String::new(); | |
197 | while i < 100000 { | |
198 | rs.push_str("aaaaaaaaaa"); | |
199 | i += 1; | |
200 | } | |
201 | rs | |
202 | } | |
203 | fn half_a_million_letter_a() -> String { | |
204 | let mut i = 0; | |
205 | let mut rs = String::new(); | |
206 | while i < 100000 { | |
207 | rs.push_str("aaaaa"); | |
208 | i += 1; | |
209 | } | |
210 | rs | |
211 | } | |
212 | let letters = a_million_letter_a(); | |
213 | assert!(half_a_million_letter_a() == | |
62682a34 | 214 | unsafe {String::from(letters.slice_unchecked( |
c34b1796 AL |
215 | 0, |
216 | 500000))}); | |
217 | } | |
218 | ||
219 | #[test] | |
220 | fn test_starts_with() { | |
221 | assert!(("".starts_with(""))); | |
222 | assert!(("abc".starts_with(""))); | |
223 | assert!(("abc".starts_with("a"))); | |
224 | assert!((!"a".starts_with("abc"))); | |
225 | assert!((!"".starts_with("abc"))); | |
226 | assert!((!"ödd".starts_with("-"))); | |
227 | assert!(("ödd".starts_with("öd"))); | |
228 | } | |
229 | ||
230 | #[test] | |
231 | fn test_ends_with() { | |
232 | assert!(("".ends_with(""))); | |
233 | assert!(("abc".ends_with(""))); | |
234 | assert!(("abc".ends_with("c"))); | |
235 | assert!((!"a".ends_with("abc"))); | |
236 | assert!((!"".ends_with("abc"))); | |
237 | assert!((!"ddö".ends_with("-"))); | |
238 | assert!(("ddö".ends_with("dö"))); | |
239 | } | |
240 | ||
241 | #[test] | |
242 | fn test_is_empty() { | |
243 | assert!("".is_empty()); | |
244 | assert!(!"a".is_empty()); | |
245 | } | |
246 | ||
247 | #[test] | |
248 | fn test_replace() { | |
249 | let a = "a"; | |
62682a34 SL |
250 | assert_eq!("".replace(a, "b"), String::from("")); |
251 | assert_eq!("a".replace(a, "b"), String::from("b")); | |
252 | assert_eq!("ab".replace(a, "b"), String::from("bb")); | |
c34b1796 AL |
253 | let test = "test"; |
254 | assert!(" test test ".replace(test, "toast") == | |
62682a34 SL |
255 | String::from(" toast toast ")); |
256 | assert_eq!(" test test ".replace(test, ""), String::from(" ")); | |
c34b1796 AL |
257 | } |
258 | ||
259 | #[test] | |
260 | fn test_replace_2a() { | |
261 | let data = "ประเทศไทย中华"; | |
262 | let repl = "دولة الكويت"; | |
263 | ||
264 | let a = "ประเ"; | |
265 | let a2 = "دولة الكويتทศไทย中华"; | |
266 | assert_eq!(data.replace(a, repl), a2); | |
267 | } | |
268 | ||
269 | #[test] | |
270 | fn test_replace_2b() { | |
271 | let data = "ประเทศไทย中华"; | |
272 | let repl = "دولة الكويت"; | |
273 | ||
274 | let b = "ะเ"; | |
275 | let b2 = "ปรدولة الكويتทศไทย中华"; | |
276 | assert_eq!(data.replace(b, repl), b2); | |
277 | } | |
278 | ||
279 | #[test] | |
280 | fn test_replace_2c() { | |
281 | let data = "ประเทศไทย中华"; | |
282 | let repl = "دولة الكويت"; | |
283 | ||
284 | let c = "中华"; | |
285 | let c2 = "ประเทศไทยدولة الكويت"; | |
286 | assert_eq!(data.replace(c, repl), c2); | |
287 | } | |
288 | ||
289 | #[test] | |
290 | fn test_replace_2d() { | |
291 | let data = "ประเทศไทย中华"; | |
292 | let repl = "دولة الكويت"; | |
293 | ||
294 | let d = "ไท华"; | |
295 | assert_eq!(data.replace(d, repl), data); | |
296 | } | |
297 | ||
298 | #[test] | |
299 | fn test_slice() { | |
300 | assert_eq!("ab", &"abc"[0..2]); | |
301 | assert_eq!("bc", &"abc"[1..3]); | |
302 | assert_eq!("", &"abc"[1..1]); | |
303 | assert_eq!("\u{65e5}", &"\u{65e5}\u{672c}"[0..3]); | |
304 | ||
305 | let data = "ประเทศไทย中华"; | |
306 | assert_eq!("ป", &data[0..3]); | |
307 | assert_eq!("ร", &data[3..6]); | |
308 | assert_eq!("", &data[3..3]); | |
309 | assert_eq!("华", &data[30..33]); | |
310 | ||
311 | fn a_million_letter_x() -> String { | |
312 | let mut i = 0; | |
313 | let mut rs = String::new(); | |
314 | while i < 100000 { | |
315 | rs.push_str("华华华华华华华华华华"); | |
316 | i += 1; | |
317 | } | |
318 | rs | |
319 | } | |
320 | fn half_a_million_letter_x() -> String { | |
321 | let mut i = 0; | |
322 | let mut rs = String::new(); | |
323 | while i < 100000 { | |
324 | rs.push_str("华华华华华"); | |
325 | i += 1; | |
326 | } | |
327 | rs | |
328 | } | |
329 | let letters = a_million_letter_x(); | |
330 | assert!(half_a_million_letter_x() == | |
62682a34 | 331 | String::from(&letters[0..3 * 500000])); |
c34b1796 AL |
332 | } |
333 | ||
334 | #[test] | |
335 | fn test_slice_2() { | |
336 | let ss = "中华Việt Nam"; | |
337 | ||
338 | assert_eq!("华", &ss[3..6]); | |
339 | assert_eq!("Việt Nam", &ss[6..16]); | |
340 | ||
341 | assert_eq!("ab", &"abc"[0..2]); | |
342 | assert_eq!("bc", &"abc"[1..3]); | |
343 | assert_eq!("", &"abc"[1..1]); | |
344 | ||
345 | assert_eq!("中", &ss[0..3]); | |
346 | assert_eq!("华V", &ss[3..7]); | |
347 | assert_eq!("", &ss[3..3]); | |
348 | /*0: 中 | |
349 | 3: 华 | |
350 | 6: V | |
351 | 7: i | |
352 | 8: ệ | |
353 | 11: t | |
354 | 12: | |
355 | 13: N | |
356 | 14: a | |
357 | 15: m */ | |
358 | } | |
359 | ||
360 | #[test] | |
361 | #[should_panic] | |
362 | fn test_slice_fail() { | |
363 | &"中华Việt Nam"[0..2]; | |
364 | } | |
365 | ||
366 | #[test] | |
367 | fn test_slice_from() { | |
368 | assert_eq!(&"abcd"[0..], "abcd"); | |
369 | assert_eq!(&"abcd"[2..], "cd"); | |
370 | assert_eq!(&"abcd"[4..], ""); | |
371 | } | |
372 | #[test] | |
373 | fn test_slice_to() { | |
374 | assert_eq!(&"abcd"[..0], ""); | |
375 | assert_eq!(&"abcd"[..2], "ab"); | |
376 | assert_eq!(&"abcd"[..4], "abcd"); | |
377 | } | |
378 | ||
379 | #[test] | |
380 | fn test_trim_left_matches() { | |
381 | let v: &[char] = &[]; | |
382 | assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** "); | |
383 | let chars: &[char] = &['*', ' ']; | |
384 | assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** "); | |
385 | assert_eq!(" *** *** ".trim_left_matches(chars), ""); | |
386 | assert_eq!("foo *** ".trim_left_matches(chars), "foo *** "); | |
387 | ||
388 | assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); | |
389 | let chars: &[char] = &['1', '2']; | |
390 | assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12"); | |
391 | assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123"); | |
392 | } | |
393 | ||
394 | #[test] | |
395 | fn test_trim_right_matches() { | |
396 | let v: &[char] = &[]; | |
397 | assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** "); | |
398 | let chars: &[char] = &['*', ' ']; | |
399 | assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo"); | |
400 | assert_eq!(" *** *** ".trim_right_matches(chars), ""); | |
401 | assert_eq!(" *** foo".trim_right_matches(chars), " *** foo"); | |
402 | ||
403 | assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar"); | |
404 | let chars: &[char] = &['1', '2']; | |
405 | assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar"); | |
406 | assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar"); | |
407 | } | |
408 | ||
409 | #[test] | |
410 | fn test_trim_matches() { | |
411 | let v: &[char] = &[]; | |
412 | assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** "); | |
413 | let chars: &[char] = &['*', ' ']; | |
414 | assert_eq!(" *** foo *** ".trim_matches(chars), "foo"); | |
415 | assert_eq!(" *** *** ".trim_matches(chars), ""); | |
416 | assert_eq!("foo".trim_matches(chars), "foo"); | |
417 | ||
418 | assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar"); | |
419 | let chars: &[char] = &['1', '2']; | |
420 | assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar"); | |
421 | assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar"); | |
422 | } | |
423 | ||
424 | #[test] | |
425 | fn test_trim_left() { | |
426 | assert_eq!("".trim_left(), ""); | |
427 | assert_eq!("a".trim_left(), "a"); | |
428 | assert_eq!(" ".trim_left(), ""); | |
429 | assert_eq!(" blah".trim_left(), "blah"); | |
430 | assert_eq!(" \u{3000} wut".trim_left(), "wut"); | |
431 | assert_eq!("hey ".trim_left(), "hey "); | |
432 | } | |
433 | ||
434 | #[test] | |
435 | fn test_trim_right() { | |
436 | assert_eq!("".trim_right(), ""); | |
437 | assert_eq!("a".trim_right(), "a"); | |
438 | assert_eq!(" ".trim_right(), ""); | |
439 | assert_eq!("blah ".trim_right(), "blah"); | |
440 | assert_eq!("wut \u{3000} ".trim_right(), "wut"); | |
441 | assert_eq!(" hey".trim_right(), " hey"); | |
442 | } | |
443 | ||
444 | #[test] | |
445 | fn test_trim() { | |
446 | assert_eq!("".trim(), ""); | |
447 | assert_eq!("a".trim(), "a"); | |
448 | assert_eq!(" ".trim(), ""); | |
449 | assert_eq!(" blah ".trim(), "blah"); | |
450 | assert_eq!("\nwut \u{3000} ".trim(), "wut"); | |
451 | assert_eq!(" hey dude ".trim(), "hey dude"); | |
452 | } | |
453 | ||
454 | #[test] | |
455 | fn test_is_whitespace() { | |
456 | assert!("".chars().all(|c| c.is_whitespace())); | |
457 | assert!(" ".chars().all(|c| c.is_whitespace())); | |
458 | assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space | |
459 | assert!(" \n\t ".chars().all(|c| c.is_whitespace())); | |
460 | assert!(!" _ ".chars().all(|c| c.is_whitespace())); | |
461 | } | |
462 | ||
463 | #[test] | |
464 | fn test_slice_shift_char() { | |
465 | let data = "ประเทศไทย中"; | |
466 | assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中"))); | |
467 | } | |
468 | ||
469 | #[test] | |
470 | fn test_slice_shift_char_2() { | |
471 | let empty = ""; | |
472 | assert_eq!(empty.slice_shift_char(), None); | |
473 | } | |
474 | ||
475 | #[test] | |
476 | fn test_is_utf8() { | |
477 | // deny overlong encodings | |
478 | assert!(from_utf8(&[0xc0, 0x80]).is_err()); | |
479 | assert!(from_utf8(&[0xc0, 0xae]).is_err()); | |
480 | assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err()); | |
481 | assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err()); | |
482 | assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err()); | |
483 | assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err()); | |
484 | assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err()); | |
485 | ||
486 | // deny surrogates | |
487 | assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err()); | |
488 | assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err()); | |
489 | ||
490 | assert!(from_utf8(&[0xC2, 0x80]).is_ok()); | |
491 | assert!(from_utf8(&[0xDF, 0xBF]).is_ok()); | |
492 | assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok()); | |
493 | assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok()); | |
494 | assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok()); | |
495 | assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok()); | |
496 | assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok()); | |
497 | assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok()); | |
498 | } | |
499 | ||
500 | #[test] | |
501 | fn test_is_utf16() { | |
d9579d0f | 502 | use rustc_unicode::str::is_utf16; |
c34b1796 AL |
503 | |
504 | macro_rules! pos { | |
505 | ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } } | |
506 | } | |
507 | ||
508 | // non-surrogates | |
509 | pos!(&[0x0000], | |
510 | &[0x0001, 0x0002], | |
511 | &[0xD7FF], | |
512 | &[0xE000]); | |
513 | ||
514 | // surrogate pairs (randomly generated with Python 3's | |
515 | // .encode('utf-16be')) | |
516 | pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45], | |
517 | &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14], | |
518 | &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]); | |
519 | ||
520 | // mixtures (also random) | |
521 | pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65], | |
522 | &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006], | |
523 | &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]); | |
524 | ||
525 | // negative tests | |
526 | macro_rules! neg { | |
527 | ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } } | |
528 | } | |
529 | ||
530 | neg!( | |
531 | // surrogate + regular unit | |
532 | &[0xdb45, 0x0000], | |
533 | // surrogate + lead surrogate | |
534 | &[0xd900, 0xd900], | |
535 | // unterminated surrogate | |
536 | &[0xd8ff], | |
537 | // trail surrogate without a lead | |
538 | &[0xddb7]); | |
539 | ||
540 | // random byte sequences that Python 3's .decode('utf-16be') | |
541 | // failed on | |
542 | neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7], | |
543 | &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3], | |
544 | &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca], | |
545 | &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278], | |
546 | &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e], | |
547 | &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5], | |
548 | &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee], | |
549 | &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7], | |
550 | &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a], | |
551 | &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a], | |
552 | &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe], | |
553 | &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf], | |
554 | &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e], | |
555 | &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5], | |
556 | &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f], | |
557 | &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b], | |
558 | &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7], | |
559 | &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9], | |
560 | &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8], | |
561 | &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282], | |
562 | &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]); | |
563 | } | |
564 | ||
565 | #[test] | |
566 | fn test_as_bytes() { | |
567 | // no null | |
568 | let v = [ | |
569 | 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228, | |
570 | 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97, | |
571 | 109 | |
572 | ]; | |
573 | let b: &[u8] = &[]; | |
574 | assert_eq!("".as_bytes(), b); | |
575 | assert_eq!("abc".as_bytes(), b"abc"); | |
576 | assert_eq!("ศไทย中华Việt Nam".as_bytes(), v); | |
577 | } | |
578 | ||
579 | #[test] | |
580 | #[should_panic] | |
581 | fn test_as_bytes_fail() { | |
582 | // Don't double free. (I'm not sure if this exercises the | |
583 | // original problem code path anymore.) | |
62682a34 | 584 | let s = String::from(""); |
c34b1796 AL |
585 | let _bytes = s.as_bytes(); |
586 | panic!(); | |
587 | } | |
588 | ||
589 | #[test] | |
590 | fn test_as_ptr() { | |
591 | let buf = "hello".as_ptr(); | |
592 | unsafe { | |
593 | assert_eq!(*buf.offset(0), b'h'); | |
594 | assert_eq!(*buf.offset(1), b'e'); | |
595 | assert_eq!(*buf.offset(2), b'l'); | |
596 | assert_eq!(*buf.offset(3), b'l'); | |
597 | assert_eq!(*buf.offset(4), b'o'); | |
598 | } | |
599 | } | |
600 | ||
601 | #[test] | |
602 | fn test_subslice_offset() { | |
603 | let a = "kernelsprite"; | |
604 | let b = &a[7..a.len()]; | |
605 | let c = &a[0..a.len() - 6]; | |
606 | assert_eq!(a.subslice_offset(b), 7); | |
607 | assert_eq!(a.subslice_offset(c), 0); | |
608 | ||
609 | let string = "a\nb\nc"; | |
610 | let lines: Vec<&str> = string.lines().collect(); | |
611 | assert_eq!(string.subslice_offset(lines[0]), 0); | |
612 | assert_eq!(string.subslice_offset(lines[1]), 2); | |
613 | assert_eq!(string.subslice_offset(lines[2]), 4); | |
614 | } | |
615 | ||
616 | #[test] | |
617 | #[should_panic] | |
618 | fn test_subslice_offset_2() { | |
619 | let a = "alchemiter"; | |
620 | let b = "cruxtruder"; | |
621 | a.subslice_offset(b); | |
622 | } | |
623 | ||
624 | #[test] | |
625 | fn vec_str_conversions() { | |
62682a34 | 626 | let s1: String = String::from("All mimsy were the borogoves"); |
c34b1796 AL |
627 | |
628 | let v: Vec<u8> = s1.as_bytes().to_vec(); | |
62682a34 | 629 | let s2: String = String::from(from_utf8(&v).unwrap()); |
c34b1796 AL |
630 | let mut i = 0; |
631 | let n1 = s1.len(); | |
632 | let n2 = v.len(); | |
633 | assert_eq!(n1, n2); | |
634 | while i < n1 { | |
635 | let a: u8 = s1.as_bytes()[i]; | |
636 | let b: u8 = s2.as_bytes()[i]; | |
637 | debug!("{}", a); | |
638 | debug!("{}", b); | |
639 | assert_eq!(a, b); | |
640 | i += 1; | |
641 | } | |
642 | } | |
643 | ||
644 | #[test] | |
645 | fn test_contains() { | |
646 | assert!("abcde".contains("bcd")); | |
647 | assert!("abcde".contains("abcd")); | |
648 | assert!("abcde".contains("bcde")); | |
649 | assert!("abcde".contains("")); | |
650 | assert!("".contains("")); | |
651 | assert!(!"abcde".contains("def")); | |
652 | assert!(!"".contains("a")); | |
653 | ||
654 | let data = "ประเทศไทย中华Việt Nam"; | |
655 | assert!(data.contains("ประเ")); | |
656 | assert!(data.contains("ะเ")); | |
657 | assert!(data.contains("中华")); | |
658 | assert!(!data.contains("ไท华")); | |
659 | } | |
660 | ||
661 | #[test] | |
662 | fn test_contains_char() { | |
663 | assert!("abc".contains('b')); | |
664 | assert!("a".contains('a')); | |
665 | assert!(!"abc".contains('d')); | |
666 | assert!(!"".contains('a')); | |
667 | } | |
668 | ||
669 | #[test] | |
670 | fn test_char_at() { | |
671 | let s = "ศไทย中华Việt Nam"; | |
672 | let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
673 | let mut pos = 0; | |
674 | for ch in &v { | |
675 | assert!(s.char_at(pos) == *ch); | |
676 | pos += ch.to_string().len(); | |
677 | } | |
678 | } | |
679 | ||
680 | #[test] | |
681 | fn test_char_at_reverse() { | |
682 | let s = "ศไทย中华Việt Nam"; | |
683 | let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
684 | let mut pos = s.len(); | |
685 | for ch in v.iter().rev() { | |
686 | assert!(s.char_at_reverse(pos) == *ch); | |
687 | pos -= ch.to_string().len(); | |
688 | } | |
689 | } | |
690 | ||
62682a34 SL |
691 | #[test] |
692 | fn test_split_at() { | |
693 | let s = "ศไทย中华Việt Nam"; | |
694 | for (index, _) in s.char_indices() { | |
695 | let (a, b) = s.split_at(index); | |
696 | assert_eq!(&s[..a.len()], a); | |
697 | assert_eq!(&s[a.len()..], b); | |
698 | } | |
699 | let (a, b) = s.split_at(s.len()); | |
700 | assert_eq!(a, s); | |
701 | assert_eq!(b, ""); | |
702 | } | |
703 | ||
704 | #[test] | |
705 | #[should_panic] | |
706 | fn test_split_at_boundscheck() { | |
707 | let s = "ศไทย中华Việt Nam"; | |
708 | let (a, b) = s.split_at(1); | |
709 | } | |
710 | ||
c34b1796 AL |
711 | #[test] |
712 | fn test_escape_unicode() { | |
713 | assert_eq!("abc".escape_unicode(), | |
62682a34 | 714 | String::from("\\u{61}\\u{62}\\u{63}")); |
c34b1796 | 715 | assert_eq!("a c".escape_unicode(), |
62682a34 | 716 | String::from("\\u{61}\\u{20}\\u{63}")); |
c34b1796 | 717 | assert_eq!("\r\n\t".escape_unicode(), |
62682a34 | 718 | String::from("\\u{d}\\u{a}\\u{9}")); |
c34b1796 | 719 | assert_eq!("'\"\\".escape_unicode(), |
62682a34 | 720 | String::from("\\u{27}\\u{22}\\u{5c}")); |
c34b1796 | 721 | assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(), |
62682a34 | 722 | String::from("\\u{0}\\u{1}\\u{fe}\\u{ff}")); |
c34b1796 | 723 | assert_eq!("\u{100}\u{ffff}".escape_unicode(), |
62682a34 | 724 | String::from("\\u{100}\\u{ffff}")); |
c34b1796 | 725 | assert_eq!("\u{10000}\u{10ffff}".escape_unicode(), |
62682a34 | 726 | String::from("\\u{10000}\\u{10ffff}")); |
c34b1796 | 727 | assert_eq!("ab\u{fb00}".escape_unicode(), |
62682a34 | 728 | String::from("\\u{61}\\u{62}\\u{fb00}")); |
c34b1796 | 729 | assert_eq!("\u{1d4ea}\r".escape_unicode(), |
62682a34 | 730 | String::from("\\u{1d4ea}\\u{d}")); |
c34b1796 AL |
731 | } |
732 | ||
733 | #[test] | |
734 | fn test_escape_default() { | |
62682a34 SL |
735 | assert_eq!("abc".escape_default(), String::from("abc")); |
736 | assert_eq!("a c".escape_default(), String::from("a c")); | |
737 | assert_eq!("\r\n\t".escape_default(), String::from("\\r\\n\\t")); | |
738 | assert_eq!("'\"\\".escape_default(), String::from("\\'\\\"\\\\")); | |
c34b1796 | 739 | assert_eq!("\u{100}\u{ffff}".escape_default(), |
62682a34 | 740 | String::from("\\u{100}\\u{ffff}")); |
c34b1796 | 741 | assert_eq!("\u{10000}\u{10ffff}".escape_default(), |
62682a34 | 742 | String::from("\\u{10000}\\u{10ffff}")); |
c34b1796 | 743 | assert_eq!("ab\u{fb00}".escape_default(), |
62682a34 | 744 | String::from("ab\\u{fb00}")); |
c34b1796 | 745 | assert_eq!("\u{1d4ea}\r".escape_default(), |
62682a34 | 746 | String::from("\\u{1d4ea}\\r")); |
c34b1796 AL |
747 | } |
748 | ||
749 | #[test] | |
750 | fn test_total_ord() { | |
751 | "1234".cmp("123") == Greater; | |
752 | "123".cmp("1234") == Less; | |
753 | "1234".cmp("1234") == Equal; | |
754 | "12345555".cmp("123456") == Less; | |
755 | "22".cmp("1234") == Greater; | |
756 | } | |
757 | ||
758 | #[test] | |
759 | fn test_char_range_at() { | |
760 | let data = "b¢€𤭢𤭢€¢b"; | |
761 | assert_eq!('b', data.char_range_at(0).ch); | |
762 | assert_eq!('¢', data.char_range_at(1).ch); | |
763 | assert_eq!('€', data.char_range_at(3).ch); | |
764 | assert_eq!('𤭢', data.char_range_at(6).ch); | |
765 | assert_eq!('𤭢', data.char_range_at(10).ch); | |
766 | assert_eq!('€', data.char_range_at(14).ch); | |
767 | assert_eq!('¢', data.char_range_at(17).ch); | |
768 | assert_eq!('b', data.char_range_at(19).ch); | |
769 | } | |
770 | ||
771 | #[test] | |
772 | fn test_char_range_at_reverse_underflow() { | |
773 | assert_eq!("abc".char_range_at_reverse(0).next, 0); | |
774 | } | |
775 | ||
776 | #[test] | |
777 | fn test_iterator() { | |
778 | let s = "ศไทย中华Việt Nam"; | |
779 | let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
780 | ||
781 | let mut pos = 0; | |
782 | let it = s.chars(); | |
783 | ||
784 | for c in it { | |
785 | assert_eq!(c, v[pos]); | |
786 | pos += 1; | |
787 | } | |
788 | assert_eq!(pos, v.len()); | |
789 | } | |
790 | ||
791 | #[test] | |
792 | fn test_rev_iterator() { | |
793 | let s = "ศไทย中华Việt Nam"; | |
794 | let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ']; | |
795 | ||
796 | let mut pos = 0; | |
797 | let it = s.chars().rev(); | |
798 | ||
799 | for c in it { | |
800 | assert_eq!(c, v[pos]); | |
801 | pos += 1; | |
802 | } | |
803 | assert_eq!(pos, v.len()); | |
804 | } | |
805 | ||
806 | #[test] | |
807 | fn test_chars_decoding() { | |
808 | let mut bytes = [0; 4]; | |
809 | for c in (0..0x110000).filter_map(::std::char::from_u32) { | |
810 | let len = c.encode_utf8(&mut bytes).unwrap_or(0); | |
811 | let s = ::std::str::from_utf8(&bytes[..len]).unwrap(); | |
812 | if Some(c) != s.chars().next() { | |
813 | panic!("character {:x}={} does not decode correctly", c as u32, c); | |
814 | } | |
815 | } | |
816 | } | |
817 | ||
818 | #[test] | |
819 | fn test_chars_rev_decoding() { | |
820 | let mut bytes = [0; 4]; | |
821 | for c in (0..0x110000).filter_map(::std::char::from_u32) { | |
822 | let len = c.encode_utf8(&mut bytes).unwrap_or(0); | |
823 | let s = ::std::str::from_utf8(&bytes[..len]).unwrap(); | |
824 | if Some(c) != s.chars().rev().next() { | |
825 | panic!("character {:x}={} does not decode correctly", c as u32, c); | |
826 | } | |
827 | } | |
828 | } | |
829 | ||
830 | #[test] | |
831 | fn test_iterator_clone() { | |
832 | let s = "ศไทย中华Việt Nam"; | |
833 | let mut it = s.chars(); | |
834 | it.next(); | |
835 | assert!(it.clone().zip(it).all(|(x,y)| x == y)); | |
836 | } | |
837 | ||
838 | #[test] | |
839 | fn test_bytesator() { | |
840 | let s = "ศไทย中华Việt Nam"; | |
841 | let v = [ | |
842 | 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228, | |
843 | 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97, | |
844 | 109 | |
845 | ]; | |
846 | let mut pos = 0; | |
847 | ||
848 | for b in s.bytes() { | |
849 | assert_eq!(b, v[pos]); | |
850 | pos += 1; | |
851 | } | |
852 | } | |
853 | ||
854 | #[test] | |
855 | fn test_bytes_revator() { | |
856 | let s = "ศไทย中华Việt Nam"; | |
857 | let v = [ | |
858 | 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228, | |
859 | 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97, | |
860 | 109 | |
861 | ]; | |
862 | let mut pos = v.len(); | |
863 | ||
864 | for b in s.bytes().rev() { | |
865 | pos -= 1; | |
866 | assert_eq!(b, v[pos]); | |
867 | } | |
868 | } | |
869 | ||
870 | #[test] | |
871 | fn test_char_indicesator() { | |
872 | let s = "ศไทย中华Việt Nam"; | |
873 | let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27]; | |
874 | let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m']; | |
875 | ||
876 | let mut pos = 0; | |
877 | let it = s.char_indices(); | |
878 | ||
879 | for c in it { | |
880 | assert_eq!(c, (p[pos], v[pos])); | |
881 | pos += 1; | |
882 | } | |
883 | assert_eq!(pos, v.len()); | |
884 | assert_eq!(pos, p.len()); | |
885 | } | |
886 | ||
887 | #[test] | |
888 | fn test_char_indices_revator() { | |
889 | let s = "ศไทย中华Việt Nam"; | |
890 | let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0]; | |
891 | let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ']; | |
892 | ||
893 | let mut pos = 0; | |
894 | let it = s.char_indices().rev(); | |
895 | ||
896 | for c in it { | |
897 | assert_eq!(c, (p[pos], v[pos])); | |
898 | pos += 1; | |
899 | } | |
900 | assert_eq!(pos, v.len()); | |
901 | assert_eq!(pos, p.len()); | |
902 | } | |
903 | ||
904 | #[test] | |
905 | fn test_splitn_char_iterator() { | |
906 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
907 | ||
908 | let split: Vec<&str> = data.splitn(4, ' ').collect(); | |
909 | assert_eq!(split, ["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); | |
910 | ||
911 | let split: Vec<&str> = data.splitn(4, |c: char| c == ' ').collect(); | |
912 | assert_eq!(split, ["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]); | |
913 | ||
914 | // Unicode | |
915 | let split: Vec<&str> = data.splitn(4, 'ä').collect(); | |
916 | assert_eq!(split, ["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); | |
917 | ||
918 | let split: Vec<&str> = data.splitn(4, |c: char| c == 'ä').collect(); | |
919 | assert_eq!(split, ["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]); | |
920 | } | |
921 | ||
922 | #[test] | |
923 | fn test_split_char_iterator_no_trailing() { | |
924 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
925 | ||
926 | let split: Vec<&str> = data.split('\n').collect(); | |
927 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); | |
928 | ||
929 | let split: Vec<&str> = data.split_terminator('\n').collect(); | |
930 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); | |
931 | } | |
932 | ||
933 | #[test] | |
934 | fn test_rsplit() { | |
935 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
936 | ||
937 | let split: Vec<&str> = data.rsplit(' ').collect(); | |
938 | assert_eq!(split, ["lämb\n", "lämb\nLittle", "little", "ä", "häd", "\nMäry"]); | |
939 | ||
940 | let split: Vec<&str> = data.rsplit("lämb").collect(); | |
941 | assert_eq!(split, ["\n", "\nLittle ", "\nMäry häd ä little "]); | |
942 | ||
943 | let split: Vec<&str> = data.rsplit(|c: char| c == 'ä').collect(); | |
944 | assert_eq!(split, ["mb\n", "mb\nLittle l", " little l", "d ", "ry h", "\nM"]); | |
945 | } | |
946 | ||
947 | #[test] | |
948 | fn test_rsplitn() { | |
949 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
950 | ||
951 | let split: Vec<&str> = data.rsplitn(2, ' ').collect(); | |
952 | assert_eq!(split, ["lämb\n", "\nMäry häd ä little lämb\nLittle"]); | |
953 | ||
954 | let split: Vec<&str> = data.rsplitn(2, "lämb").collect(); | |
955 | assert_eq!(split, ["\n", "\nMäry häd ä little lämb\nLittle "]); | |
956 | ||
957 | let split: Vec<&str> = data.rsplitn(2, |c: char| c == 'ä').collect(); | |
958 | assert_eq!(split, ["mb\n", "\nMäry häd ä little lämb\nLittle l"]); | |
959 | } | |
960 | ||
961 | #[test] | |
d9579d0f | 962 | fn test_split_whitespace() { |
c34b1796 | 963 | let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n"; |
d9579d0f | 964 | let words: Vec<&str> = data.split_whitespace().collect(); |
c34b1796 AL |
965 | assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"]) |
966 | } | |
967 | ||
d9579d0f | 968 | #[allow(deprecated)] |
c34b1796 AL |
969 | #[test] |
970 | fn test_nfd_chars() { | |
971 | macro_rules! t { | |
972 | ($input: expr, $expected: expr) => { | |
973 | assert_eq!($input.nfd_chars().collect::<String>(), $expected); | |
974 | } | |
975 | } | |
976 | t!("abc", "abc"); | |
977 | t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}"); | |
978 | t!("\u{2026}", "\u{2026}"); | |
979 | t!("\u{2126}", "\u{3a9}"); | |
980 | t!("\u{1e0b}\u{323}", "d\u{323}\u{307}"); | |
981 | t!("\u{1e0d}\u{307}", "d\u{323}\u{307}"); | |
982 | t!("a\u{301}", "a\u{301}"); | |
983 | t!("\u{301}a", "\u{301}a"); | |
984 | t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}"); | |
985 | t!("\u{ac1c}", "\u{1100}\u{1162}"); | |
986 | } | |
987 | ||
d9579d0f | 988 | #[allow(deprecated)] |
c34b1796 AL |
989 | #[test] |
990 | fn test_nfkd_chars() { | |
991 | macro_rules! t { | |
992 | ($input: expr, $expected: expr) => { | |
993 | assert_eq!($input.nfkd_chars().collect::<String>(), $expected); | |
994 | } | |
995 | } | |
996 | t!("abc", "abc"); | |
997 | t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}"); | |
998 | t!("\u{2026}", "..."); | |
999 | t!("\u{2126}", "\u{3a9}"); | |
1000 | t!("\u{1e0b}\u{323}", "d\u{323}\u{307}"); | |
1001 | t!("\u{1e0d}\u{307}", "d\u{323}\u{307}"); | |
1002 | t!("a\u{301}", "a\u{301}"); | |
1003 | t!("\u{301}a", "\u{301}a"); | |
1004 | t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}"); | |
1005 | t!("\u{ac1c}", "\u{1100}\u{1162}"); | |
1006 | } | |
1007 | ||
d9579d0f | 1008 | #[allow(deprecated)] |
c34b1796 AL |
1009 | #[test] |
1010 | fn test_nfc_chars() { | |
1011 | macro_rules! t { | |
1012 | ($input: expr, $expected: expr) => { | |
1013 | assert_eq!($input.nfc_chars().collect::<String>(), $expected); | |
1014 | } | |
1015 | } | |
1016 | t!("abc", "abc"); | |
1017 | t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}"); | |
1018 | t!("\u{2026}", "\u{2026}"); | |
1019 | t!("\u{2126}", "\u{3a9}"); | |
1020 | t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}"); | |
1021 | t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}"); | |
1022 | t!("a\u{301}", "\u{e1}"); | |
1023 | t!("\u{301}a", "\u{301}a"); | |
1024 | t!("\u{d4db}", "\u{d4db}"); | |
1025 | t!("\u{ac1c}", "\u{ac1c}"); | |
1026 | t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b"); | |
1027 | } | |
1028 | ||
d9579d0f | 1029 | #[allow(deprecated)] |
c34b1796 AL |
1030 | #[test] |
1031 | fn test_nfkc_chars() { | |
1032 | macro_rules! t { | |
1033 | ($input: expr, $expected: expr) => { | |
1034 | assert_eq!($input.nfkc_chars().collect::<String>(), $expected); | |
1035 | } | |
1036 | } | |
1037 | t!("abc", "abc"); | |
1038 | t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}"); | |
1039 | t!("\u{2026}", "..."); | |
1040 | t!("\u{2126}", "\u{3a9}"); | |
1041 | t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}"); | |
1042 | t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}"); | |
1043 | t!("a\u{301}", "\u{e1}"); | |
1044 | t!("\u{301}a", "\u{301}a"); | |
1045 | t!("\u{d4db}", "\u{d4db}"); | |
1046 | t!("\u{ac1c}", "\u{ac1c}"); | |
1047 | t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b"); | |
1048 | } | |
1049 | ||
1050 | #[test] | |
1051 | fn test_lines() { | |
1052 | let data = "\nMäry häd ä little lämb\n\nLittle lämb\n"; | |
1053 | let lines: Vec<&str> = data.lines().collect(); | |
1054 | assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); | |
1055 | ||
1056 | let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n | |
1057 | let lines: Vec<&str> = data.lines().collect(); | |
1058 | assert_eq!(lines, ["", "Märy häd ä little lämb", "", "Little lämb"]); | |
1059 | } | |
1060 | ||
d9579d0f | 1061 | #[allow(deprecated)] |
c34b1796 AL |
1062 | #[test] |
1063 | fn test_graphemes() { | |
1064 | use std::iter::order; | |
1065 | ||
1066 | // official Unicode test data | |
1067 | // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt | |
1068 | let test_same: [(_, &[_]); 325] = [ | |
1069 | ("\u{20}\u{20}", &["\u{20}", "\u{20}"]), | |
1070 | ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]), | |
1071 | ("\u{20}\u{D}", &["\u{20}", "\u{D}"]), | |
1072 | ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]), | |
1073 | ("\u{20}\u{A}", &["\u{20}", "\u{A}"]), | |
1074 | ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]), | |
1075 | ("\u{20}\u{1}", &["\u{20}", "\u{1}"]), | |
1076 | ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]), | |
1077 | ("\u{20}\u{300}", &["\u{20}\u{300}"]), | |
1078 | ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]), | |
1079 | ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]), | |
1080 | ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]), | |
1081 | ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]), | |
1082 | ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]), | |
1083 | ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]), | |
1084 | ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]), | |
1085 | ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]), | |
1086 | ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]), | |
1087 | ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]), | |
1088 | ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]), | |
1089 | ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]), | |
1090 | ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]), | |
1091 | ("\u{20}\u{378}", &["\u{20}", "\u{378}"]), | |
1092 | ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]), | |
1093 | ("\u{D}\u{20}", &["\u{D}", "\u{20}"]), | |
1094 | ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]), | |
1095 | ("\u{D}\u{D}", &["\u{D}", "\u{D}"]), | |
1096 | ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]), | |
1097 | ("\u{D}\u{A}", &["\u{D}\u{A}"]), | |
1098 | ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]), | |
1099 | ("\u{D}\u{1}", &["\u{D}", "\u{1}"]), | |
1100 | ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]), | |
1101 | ("\u{D}\u{300}", &["\u{D}", "\u{300}"]), | |
1102 | ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]), | |
1103 | ("\u{D}\u{903}", &["\u{D}", "\u{903}"]), | |
1104 | ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]), | |
1105 | ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]), | |
1106 | ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]), | |
1107 | ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]), | |
1108 | ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]), | |
1109 | ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]), | |
1110 | ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]), | |
1111 | ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]), | |
1112 | ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]), | |
1113 | ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]), | |
1114 | ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]), | |
1115 | ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]), | |
1116 | ("\u{D}\u{378}", &["\u{D}", "\u{378}"]), | |
1117 | ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]), | |
1118 | ("\u{A}\u{20}", &["\u{A}", "\u{20}"]), | |
1119 | ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]), | |
1120 | ("\u{A}\u{D}", &["\u{A}", "\u{D}"]), | |
1121 | ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]), | |
1122 | ("\u{A}\u{A}", &["\u{A}", "\u{A}"]), | |
1123 | ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]), | |
1124 | ("\u{A}\u{1}", &["\u{A}", "\u{1}"]), | |
1125 | ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]), | |
1126 | ("\u{A}\u{300}", &["\u{A}", "\u{300}"]), | |
1127 | ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]), | |
1128 | ("\u{A}\u{903}", &["\u{A}", "\u{903}"]), | |
1129 | ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]), | |
1130 | ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]), | |
1131 | ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]), | |
1132 | ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]), | |
1133 | ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]), | |
1134 | ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]), | |
1135 | ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]), | |
1136 | ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]), | |
1137 | ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]), | |
1138 | ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]), | |
1139 | ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]), | |
1140 | ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]), | |
1141 | ("\u{A}\u{378}", &["\u{A}", "\u{378}"]), | |
1142 | ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]), | |
1143 | ("\u{1}\u{20}", &["\u{1}", "\u{20}"]), | |
1144 | ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]), | |
1145 | ("\u{1}\u{D}", &["\u{1}", "\u{D}"]), | |
1146 | ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]), | |
1147 | ("\u{1}\u{A}", &["\u{1}", "\u{A}"]), | |
1148 | ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]), | |
1149 | ("\u{1}\u{1}", &["\u{1}", "\u{1}"]), | |
1150 | ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]), | |
1151 | ("\u{1}\u{300}", &["\u{1}", "\u{300}"]), | |
1152 | ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]), | |
1153 | ("\u{1}\u{903}", &["\u{1}", "\u{903}"]), | |
1154 | ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]), | |
1155 | ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]), | |
1156 | ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]), | |
1157 | ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]), | |
1158 | ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]), | |
1159 | ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]), | |
1160 | ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]), | |
1161 | ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]), | |
1162 | ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]), | |
1163 | ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]), | |
1164 | ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]), | |
1165 | ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]), | |
1166 | ("\u{1}\u{378}", &["\u{1}", "\u{378}"]), | |
1167 | ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]), | |
1168 | ("\u{300}\u{20}", &["\u{300}", "\u{20}"]), | |
1169 | ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]), | |
1170 | ("\u{300}\u{D}", &["\u{300}", "\u{D}"]), | |
1171 | ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]), | |
1172 | ("\u{300}\u{A}", &["\u{300}", "\u{A}"]), | |
1173 | ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]), | |
1174 | ("\u{300}\u{1}", &["\u{300}", "\u{1}"]), | |
1175 | ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]), | |
1176 | ("\u{300}\u{300}", &["\u{300}\u{300}"]), | |
1177 | ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]), | |
1178 | ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]), | |
1179 | ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]), | |
1180 | ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]), | |
1181 | ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]), | |
1182 | ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]), | |
1183 | ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]), | |
1184 | ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]), | |
1185 | ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]), | |
1186 | ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]), | |
1187 | ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]), | |
1188 | ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]), | |
1189 | ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]), | |
1190 | ("\u{300}\u{378}", &["\u{300}", "\u{378}"]), | |
1191 | ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]), | |
1192 | ("\u{903}\u{20}", &["\u{903}", "\u{20}"]), | |
1193 | ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]), | |
1194 | ("\u{903}\u{D}", &["\u{903}", "\u{D}"]), | |
1195 | ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]), | |
1196 | ("\u{903}\u{A}", &["\u{903}", "\u{A}"]), | |
1197 | ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]), | |
1198 | ("\u{903}\u{1}", &["\u{903}", "\u{1}"]), | |
1199 | ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]), | |
1200 | ("\u{903}\u{300}", &["\u{903}\u{300}"]), | |
1201 | ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]), | |
1202 | ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]), | |
1203 | ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]), | |
1204 | ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]), | |
1205 | ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]), | |
1206 | ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]), | |
1207 | ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]), | |
1208 | ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]), | |
1209 | ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]), | |
1210 | ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]), | |
1211 | ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]), | |
1212 | ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]), | |
1213 | ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]), | |
1214 | ("\u{903}\u{378}", &["\u{903}", "\u{378}"]), | |
1215 | ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]), | |
1216 | ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]), | |
1217 | ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]), | |
1218 | ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]), | |
1219 | ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]), | |
1220 | ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]), | |
1221 | ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]), | |
1222 | ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]), | |
1223 | ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]), | |
1224 | ("\u{1100}\u{300}", &["\u{1100}\u{300}"]), | |
1225 | ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]), | |
1226 | ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]), | |
1227 | ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]), | |
1228 | ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]), | |
1229 | ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]), | |
1230 | ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]), | |
1231 | ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]), | |
1232 | ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]), | |
1233 | ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]), | |
1234 | ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]), | |
1235 | ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]), | |
1236 | ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]), | |
1237 | ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]), | |
1238 | ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]), | |
1239 | ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]), | |
1240 | ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]), | |
1241 | ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]), | |
1242 | ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]), | |
1243 | ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]), | |
1244 | ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]), | |
1245 | ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]), | |
1246 | ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]), | |
1247 | ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]), | |
1248 | ("\u{1160}\u{300}", &["\u{1160}\u{300}"]), | |
1249 | ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]), | |
1250 | ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]), | |
1251 | ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]), | |
1252 | ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]), | |
1253 | ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]), | |
1254 | ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]), | |
1255 | ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]), | |
1256 | ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]), | |
1257 | ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]), | |
1258 | ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]), | |
1259 | ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]), | |
1260 | ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]), | |
1261 | ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]), | |
1262 | ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]), | |
1263 | ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]), | |
1264 | ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]), | |
1265 | ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]), | |
1266 | ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]), | |
1267 | ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]), | |
1268 | ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]), | |
1269 | ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]), | |
1270 | ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]), | |
1271 | ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]), | |
1272 | ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]), | |
1273 | ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]), | |
1274 | ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]), | |
1275 | ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]), | |
1276 | ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]), | |
1277 | ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]), | |
1278 | ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]), | |
1279 | ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]), | |
1280 | ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]), | |
1281 | ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]), | |
1282 | ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]), | |
1283 | ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]), | |
1284 | ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]), | |
1285 | ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]), | |
1286 | ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]), | |
1287 | ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]), | |
1288 | ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]), | |
1289 | ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]), | |
1290 | ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]), | |
1291 | ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]), | |
1292 | ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]), | |
1293 | ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]), | |
1294 | ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]), | |
1295 | ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]), | |
1296 | ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]), | |
1297 | ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]), | |
1298 | ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]), | |
1299 | ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]), | |
1300 | ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]), | |
1301 | ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]), | |
1302 | ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]), | |
1303 | ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]), | |
1304 | ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]), | |
1305 | ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]), | |
1306 | ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]), | |
1307 | ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]), | |
1308 | ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]), | |
1309 | ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]), | |
1310 | ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]), | |
1311 | ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]), | |
1312 | ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]), | |
1313 | ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]), | |
1314 | ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]), | |
1315 | ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]), | |
1316 | ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]), | |
1317 | ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]), | |
1318 | ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]), | |
1319 | ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]), | |
1320 | ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]), | |
1321 | ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]), | |
1322 | ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]), | |
1323 | ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]), | |
1324 | ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]), | |
1325 | ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]), | |
1326 | ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]), | |
1327 | ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]), | |
1328 | ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]), | |
1329 | ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]), | |
1330 | ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]), | |
1331 | ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]), | |
1332 | ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]), | |
1333 | ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]), | |
1334 | ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]), | |
1335 | ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]), | |
1336 | ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]), | |
1337 | ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]), | |
1338 | ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]), | |
1339 | ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]), | |
1340 | ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]), | |
1341 | ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]), | |
1342 | ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]), | |
1343 | ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]), | |
1344 | ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]), | |
1345 | ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]), | |
1346 | ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]), | |
1347 | ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]), | |
1348 | ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]), | |
1349 | ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]), | |
1350 | ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]), | |
1351 | ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]), | |
1352 | ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]), | |
1353 | ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]), | |
1354 | ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]), | |
1355 | ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]), | |
1356 | ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]), | |
1357 | ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]), | |
1358 | ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]), | |
1359 | ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]), | |
1360 | ("\u{378}\u{20}", &["\u{378}", "\u{20}"]), | |
1361 | ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]), | |
1362 | ("\u{378}\u{D}", &["\u{378}", "\u{D}"]), | |
1363 | ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]), | |
1364 | ("\u{378}\u{A}", &["\u{378}", "\u{A}"]), | |
1365 | ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]), | |
1366 | ("\u{378}\u{1}", &["\u{378}", "\u{1}"]), | |
1367 | ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]), | |
1368 | ("\u{378}\u{300}", &["\u{378}\u{300}"]), | |
1369 | ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]), | |
1370 | ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]), | |
1371 | ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]), | |
1372 | ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]), | |
1373 | ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]), | |
1374 | ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]), | |
1375 | ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]), | |
1376 | ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]), | |
1377 | ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]), | |
1378 | ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]), | |
1379 | ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]), | |
1380 | ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]), | |
1381 | ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]), | |
1382 | ("\u{378}\u{378}", &["\u{378}", "\u{378}"]), | |
1383 | ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]), | |
1384 | ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]), | |
1385 | ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]), | |
1386 | ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]), | |
1387 | ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}", | |
1388 | &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]), | |
1389 | ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}", | |
1390 | &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]), | |
1391 | ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]), | |
1392 | ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}", | |
1393 | "\u{1F1E7}\u{1F1E8}"]), | |
1394 | ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}", | |
1395 | &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]), | |
1396 | ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]), | |
1397 | ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]), | |
1398 | ]; | |
1399 | ||
1400 | let test_diff: [(_, &[_], &[_]); 23] = [ | |
1401 | ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}", | |
1402 | &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}", | |
1403 | &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}", | |
1404 | &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}", | |
1405 | &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}", | |
1406 | &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}", | |
1407 | &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}", | |
1408 | &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}", | |
1409 | &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}", | |
1410 | &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}", | |
1411 | &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}", | |
1412 | &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}", | |
1413 | &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}", | |
1414 | &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}", | |
1415 | &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}", | |
1416 | &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}", | |
1417 | &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}", | |
1418 | &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}", | |
1419 | &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}", | |
1420 | &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}", | |
1421 | &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}", | |
1422 | &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}", | |
1423 | &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]), | |
1424 | ]; | |
1425 | ||
1426 | for &(s, g) in &test_same[..] { | |
1427 | // test forward iterator | |
1428 | assert!(order::equals(s.graphemes(true), g.iter().cloned())); | |
1429 | assert!(order::equals(s.graphemes(false), g.iter().cloned())); | |
1430 | ||
1431 | // test reverse iterator | |
1432 | assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().cloned())); | |
1433 | assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().cloned())); | |
1434 | } | |
1435 | ||
1436 | for &(s, gt, gf) in &test_diff { | |
1437 | // test forward iterator | |
1438 | assert!(order::equals(s.graphemes(true), gt.iter().cloned())); | |
1439 | assert!(order::equals(s.graphemes(false), gf.iter().cloned())); | |
1440 | ||
1441 | // test reverse iterator | |
1442 | assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().cloned())); | |
1443 | assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().cloned())); | |
1444 | } | |
1445 | ||
1446 | // test the indices iterators | |
1447 | let s = "a̐éö̲\r\n"; | |
1448 | let gr_inds = s.grapheme_indices(true).collect::<Vec<(usize, &str)>>(); | |
1449 | let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]; | |
1450 | assert_eq!(gr_inds, b); | |
1451 | let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(usize, &str)>>(); | |
1452 | let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0, "a̐")]; | |
1453 | assert_eq!(gr_inds, b); | |
1454 | let mut gr_inds_iter = s.grapheme_indices(true); | |
1455 | { | |
1456 | let gr_inds = gr_inds_iter.by_ref(); | |
1457 | let e1 = gr_inds.size_hint(); | |
1458 | assert_eq!(e1, (1, Some(13))); | |
1459 | let c = gr_inds.count(); | |
1460 | assert_eq!(c, 4); | |
1461 | } | |
1462 | let e2 = gr_inds_iter.size_hint(); | |
1463 | assert_eq!(e2, (0, Some(0))); | |
1464 | ||
1465 | // make sure the reverse iterator does the right thing with "\n" at beginning of string | |
1466 | let s = "\n\r\n\r"; | |
1467 | let gr = s.graphemes(true).rev().collect::<Vec<&str>>(); | |
1468 | let b: &[_] = &["\r", "\r\n", "\n"]; | |
1469 | assert_eq!(gr, b); | |
1470 | } | |
1471 | ||
1472 | #[test] | |
1473 | fn test_splitator() { | |
1474 | fn t(s: &str, sep: &str, u: &[&str]) { | |
1475 | let v: Vec<&str> = s.split(sep).collect(); | |
1476 | assert_eq!(v, u); | |
1477 | } | |
1478 | t("--1233345--", "12345", &["--1233345--"]); | |
1479 | t("abc::hello::there", "::", &["abc", "hello", "there"]); | |
1480 | t("::hello::there", "::", &["", "hello", "there"]); | |
1481 | t("hello::there::", "::", &["hello", "there", ""]); | |
1482 | t("::hello::there::", "::", &["", "hello", "there", ""]); | |
1483 | t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]); | |
1484 | t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]); | |
1485 | t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]); | |
1486 | t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]); | |
1487 | t("", ".", &[""]); | |
1488 | t("zz", "zz", &["",""]); | |
1489 | t("ok", "z", &["ok"]); | |
1490 | t("zzz", "zz", &["","z"]); | |
1491 | t("zzzzz", "zz", &["","","z"]); | |
1492 | } | |
1493 | ||
1494 | #[test] | |
1495 | fn test_str_default() { | |
1496 | use std::default::Default; | |
1497 | ||
1498 | fn t<S: Default + AsRef<str>>() { | |
1499 | let s: S = Default::default(); | |
1500 | assert_eq!(s.as_ref(), ""); | |
1501 | } | |
1502 | ||
1503 | t::<&str>(); | |
1504 | t::<String>(); | |
1505 | } | |
1506 | ||
1507 | #[test] | |
1508 | fn test_str_container() { | |
1509 | fn sum_len(v: &[&str]) -> usize { | |
1510 | v.iter().map(|x| x.len()).sum() | |
1511 | } | |
1512 | ||
62682a34 | 1513 | let s = String::from("01234"); |
c34b1796 | 1514 | assert_eq!(5, sum_len(&["012", "", "34"])); |
62682a34 SL |
1515 | assert_eq!(5, sum_len(&[&String::from("01"), |
1516 | &String::from("2"), | |
1517 | &String::from("34"), | |
1518 | &String::from("")])); | |
c34b1796 AL |
1519 | assert_eq!(5, sum_len(&[&s])); |
1520 | } | |
1521 | ||
1522 | #[test] | |
1523 | fn test_str_from_utf8() { | |
1524 | let xs = b"hello"; | |
1525 | assert_eq!(from_utf8(xs), Ok("hello")); | |
1526 | ||
1527 | let xs = "ศไทย中华Việt Nam".as_bytes(); | |
1528 | assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam")); | |
1529 | ||
1530 | let xs = b"hello\xFF"; | |
9346a6ac AL |
1531 | assert!(from_utf8(xs).is_err()); |
1532 | } | |
1533 | ||
1534 | #[test] | |
1535 | fn test_pattern_deref_forward() { | |
1536 | let data = "aabcdaa"; | |
1537 | assert!(data.contains("bcd")); | |
1538 | assert!(data.contains(&"bcd")); | |
1539 | assert!(data.contains(&"bcd".to_string())); | |
1540 | } | |
1541 | ||
1542 | #[test] | |
1543 | fn test_empty_match_indices() { | |
1544 | let data = "aä中!"; | |
1545 | let vec: Vec<_> = data.match_indices("").collect(); | |
1546 | assert_eq!(vec, [(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]); | |
1547 | } | |
1548 | ||
1549 | #[test] | |
1550 | fn test_bool_from_str() { | |
1551 | assert_eq!("true".parse().ok(), Some(true)); | |
1552 | assert_eq!("false".parse().ok(), Some(false)); | |
1553 | assert_eq!("not even a boolean".parse::<bool>().ok(), None); | |
1554 | } | |
1555 | ||
1556 | fn check_contains_all_substrings(s: &str) { | |
1557 | assert!(s.contains("")); | |
1558 | for i in 0..s.len() { | |
1559 | for j in i+1..s.len() + 1 { | |
1560 | assert!(s.contains(&s[i..j])); | |
1561 | } | |
1562 | } | |
1563 | } | |
1564 | ||
1565 | #[test] | |
1566 | fn strslice_issue_16589() { | |
1567 | assert!("bananas".contains("nana")); | |
1568 | ||
1569 | // prior to the fix for #16589, x.contains("abcdabcd") returned false | |
1570 | // test all substrings for good measure | |
1571 | check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd"); | |
1572 | } | |
1573 | ||
1574 | #[test] | |
1575 | fn strslice_issue_16878() { | |
1576 | assert!(!"1234567ah012345678901ah".contains("hah")); | |
1577 | assert!(!"00abc01234567890123456789abc".contains("bcabc")); | |
1578 | } | |
1579 | ||
1580 | ||
1581 | #[test] | |
1582 | fn test_strslice_contains() { | |
1583 | let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'"; | |
1584 | check_contains_all_substrings(x); | |
1585 | } | |
1586 | ||
1587 | #[test] | |
1588 | fn test_rsplitn_char_iterator() { | |
1589 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
1590 | ||
1591 | let mut split: Vec<&str> = data.rsplitn(4, ' ').collect(); | |
1592 | split.reverse(); | |
1593 | assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); | |
1594 | ||
1595 | let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == ' ').collect(); | |
1596 | split.reverse(); | |
1597 | assert_eq!(split, ["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]); | |
1598 | ||
1599 | // Unicode | |
1600 | let mut split: Vec<&str> = data.rsplitn(4, 'ä').collect(); | |
1601 | split.reverse(); | |
1602 | assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); | |
1603 | ||
1604 | let mut split: Vec<&str> = data.rsplitn(4, |c: char| c == 'ä').collect(); | |
1605 | split.reverse(); | |
1606 | assert_eq!(split, ["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]); | |
1607 | } | |
1608 | ||
1609 | #[test] | |
1610 | fn test_split_char_iterator() { | |
1611 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
1612 | ||
1613 | let split: Vec<&str> = data.split(' ').collect(); | |
1614 | assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1615 | ||
1616 | let mut rsplit: Vec<&str> = data.split(' ').rev().collect(); | |
1617 | rsplit.reverse(); | |
1618 | assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1619 | ||
1620 | let split: Vec<&str> = data.split(|c: char| c == ' ').collect(); | |
1621 | assert_eq!( split, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1622 | ||
1623 | let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect(); | |
1624 | rsplit.reverse(); | |
1625 | assert_eq!(rsplit, ["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]); | |
1626 | ||
1627 | // Unicode | |
1628 | let split: Vec<&str> = data.split('ä').collect(); | |
1629 | assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1630 | ||
1631 | let mut rsplit: Vec<&str> = data.split('ä').rev().collect(); | |
1632 | rsplit.reverse(); | |
1633 | assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1634 | ||
1635 | let split: Vec<&str> = data.split(|c: char| c == 'ä').collect(); | |
1636 | assert_eq!( split, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1637 | ||
1638 | let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect(); | |
1639 | rsplit.reverse(); | |
1640 | assert_eq!(rsplit, ["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]); | |
1641 | } | |
1642 | ||
1643 | #[test] | |
1644 | fn test_rev_split_char_iterator_no_trailing() { | |
1645 | let data = "\nMäry häd ä little lämb\nLittle lämb\n"; | |
1646 | ||
1647 | let mut split: Vec<&str> = data.split('\n').rev().collect(); | |
1648 | split.reverse(); | |
1649 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb", ""]); | |
1650 | ||
1651 | let mut split: Vec<&str> = data.split_terminator('\n').rev().collect(); | |
1652 | split.reverse(); | |
1653 | assert_eq!(split, ["", "Märy häd ä little lämb", "Little lämb"]); | |
1654 | } | |
1655 | ||
1656 | #[test] | |
1657 | fn test_utf16_code_units() { | |
d9579d0f | 1658 | use rustc_unicode::str::Utf16Encoder; |
9346a6ac AL |
1659 | assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(), |
1660 | [0xE9, 0xD83D, 0xDCA9]) | |
1661 | } | |
1662 | ||
1663 | #[test] | |
1664 | fn starts_with_in_unicode() { | |
1665 | assert!(!"├── Cargo.toml".starts_with("# ")); | |
1666 | } | |
1667 | ||
1668 | #[test] | |
1669 | fn starts_short_long() { | |
1670 | assert!(!"".starts_with("##")); | |
1671 | assert!(!"##".starts_with("####")); | |
1672 | assert!("####".starts_with("##")); | |
1673 | assert!(!"##ä".starts_with("####")); | |
1674 | assert!("####ä".starts_with("##")); | |
1675 | assert!(!"##".starts_with("####ä")); | |
1676 | assert!("##ä##".starts_with("##ä")); | |
1677 | ||
1678 | assert!("".starts_with("")); | |
1679 | assert!("ä".starts_with("")); | |
1680 | assert!("#ä".starts_with("")); | |
1681 | assert!("##ä".starts_with("")); | |
1682 | assert!("ä###".starts_with("")); | |
1683 | assert!("#ä##".starts_with("")); | |
1684 | assert!("##ä#".starts_with("")); | |
1685 | } | |
1686 | ||
1687 | #[test] | |
1688 | fn contains_weird_cases() { | |
1689 | assert!("* \t".contains(' ')); | |
1690 | assert!(!"* \t".contains('?')); | |
1691 | assert!(!"* \t".contains('\u{1F4A9}')); | |
1692 | } | |
1693 | ||
1694 | #[test] | |
1695 | fn trim_ws() { | |
1696 | assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()), | |
1697 | "a \t "); | |
1698 | assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()), | |
1699 | " \t a"); | |
1700 | assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()), | |
1701 | "a"); | |
1702 | assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()), | |
1703 | ""); | |
1704 | assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()), | |
1705 | ""); | |
1706 | assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()), | |
1707 | ""); | |
1708 | } | |
1709 | ||
62682a34 SL |
1710 | #[test] |
1711 | fn to_lowercase() { | |
1712 | assert_eq!("".to_lowercase(), ""); | |
1713 | assert_eq!("AÉDžaé ".to_lowercase(), "aédžaé "); | |
1714 | ||
1715 | // https://github.com/rust-lang/rust/issues/26035 | |
1716 | assert_eq!("ΑΣ".to_lowercase(), "ας"); | |
1717 | assert_eq!("Α'Σ".to_lowercase(), "α'ς"); | |
1718 | assert_eq!("Α''Σ".to_lowercase(), "α''ς"); | |
1719 | ||
1720 | assert_eq!("ΑΣ Α".to_lowercase(), "ας α"); | |
1721 | assert_eq!("Α'Σ Α".to_lowercase(), "α'ς α"); | |
1722 | assert_eq!("Α''Σ Α".to_lowercase(), "α''ς α"); | |
1723 | ||
1724 | assert_eq!("ΑΣ' Α".to_lowercase(), "ας' α"); | |
1725 | assert_eq!("ΑΣ'' Α".to_lowercase(), "ας'' α"); | |
1726 | ||
1727 | assert_eq!("Α'Σ' Α".to_lowercase(), "α'ς' α"); | |
1728 | assert_eq!("Α''Σ'' Α".to_lowercase(), "α''ς'' α"); | |
1729 | ||
1730 | assert_eq!("Α Σ".to_lowercase(), "α σ"); | |
1731 | assert_eq!("Α 'Σ".to_lowercase(), "α 'σ"); | |
1732 | assert_eq!("Α ''Σ".to_lowercase(), "α ''σ"); | |
1733 | ||
1734 | assert_eq!("Σ".to_lowercase(), "σ"); | |
1735 | assert_eq!("'Σ".to_lowercase(), "'σ"); | |
1736 | assert_eq!("''Σ".to_lowercase(), "''σ"); | |
1737 | ||
1738 | assert_eq!("ΑΣΑ".to_lowercase(), "ασα"); | |
1739 | assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α"); | |
1740 | assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α"); | |
1741 | } | |
1742 | ||
1743 | #[test] | |
1744 | fn to_uppercase() { | |
1745 | assert_eq!("".to_uppercase(), ""); | |
1746 | assert_eq!("aéDžßfiᾀ".to_uppercase(), "AÉDŽSSFIἈΙ"); | |
1747 | } | |
1748 | ||
9346a6ac AL |
1749 | mod pattern { |
1750 | use std::str::pattern::Pattern; | |
1751 | use std::str::pattern::{Searcher, ReverseSearcher}; | |
1752 | use std::str::pattern::SearchStep::{self, Match, Reject, Done}; | |
1753 | ||
1754 | macro_rules! make_test { | |
1755 | ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { | |
62682a34 | 1756 | #[allow(unused_imports)] |
9346a6ac AL |
1757 | mod $name { |
1758 | use std::str::pattern::SearchStep::{Match, Reject}; | |
1759 | use super::{cmp_search_to_vec}; | |
1760 | #[test] | |
1761 | fn fwd() { | |
1762 | cmp_search_to_vec(false, $p, $h, vec![$($e),*]); | |
1763 | } | |
1764 | #[test] | |
1765 | fn bwd() { | |
1766 | cmp_search_to_vec(true, $p, $h, vec![$($e),*]); | |
1767 | } | |
1768 | } | |
1769 | } | |
1770 | } | |
1771 | ||
1772 | fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str, | |
1773 | right: Vec<SearchStep>) | |
1774 | where P::Searcher: ReverseSearcher<'a> | |
1775 | { | |
1776 | let mut searcher = pat.into_searcher(haystack); | |
1777 | let mut v = vec![]; | |
1778 | loop { | |
1779 | match if !rev {searcher.next()} else {searcher.next_back()} { | |
1780 | Match(a, b) => v.push(Match(a, b)), | |
1781 | Reject(a, b) => v.push(Reject(a, b)), | |
1782 | Done => break, | |
1783 | } | |
1784 | } | |
1785 | if rev { | |
1786 | v.reverse(); | |
1787 | } | |
1788 | ||
1789 | let mut first_index = 0; | |
1790 | let mut err = None; | |
1791 | ||
1792 | for (i, e) in right.iter().enumerate() { | |
1793 | match *e { | |
1794 | Match(a, b) | Reject(a, b) | |
1795 | if a <= b && a == first_index => { | |
1796 | first_index = b; | |
1797 | } | |
1798 | _ => { | |
1799 | err = Some(i); | |
1800 | break; | |
1801 | } | |
1802 | } | |
1803 | } | |
1804 | ||
1805 | if let Some(err) = err { | |
1806 | panic!("Input skipped range at {}", err); | |
1807 | } | |
1808 | ||
1809 | if first_index != haystack.len() { | |
1810 | panic!("Did not cover whole input"); | |
1811 | } | |
1812 | ||
1813 | assert_eq!(v, right); | |
1814 | } | |
1815 | ||
1816 | make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [ | |
1817 | Reject(0, 1), | |
1818 | Match (1, 3), | |
1819 | Reject(3, 4), | |
1820 | Match (4, 6), | |
1821 | Reject(6, 7), | |
1822 | ]); | |
1823 | make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ | |
1824 | Match (0, 0), | |
1825 | Reject(0, 1), | |
1826 | Match (1, 1), | |
1827 | Reject(1, 2), | |
1828 | Match (2, 2), | |
1829 | Reject(2, 3), | |
1830 | Match (3, 3), | |
1831 | Reject(3, 4), | |
1832 | Match (4, 4), | |
1833 | Reject(4, 5), | |
1834 | Match (5, 5), | |
1835 | Reject(5, 6), | |
1836 | Match (6, 6), | |
1837 | Reject(6, 7), | |
1838 | Match (7, 7), | |
1839 | ]); | |
1840 | make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ | |
1841 | Reject(0, 3), | |
1842 | Reject(3, 6), | |
1843 | Reject(6, 9), | |
1844 | ]); | |
1845 | make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ | |
1846 | Match (0, 0), | |
1847 | Reject(0, 3), | |
1848 | Match (3, 3), | |
1849 | Reject(3, 6), | |
1850 | Match (6, 6), | |
1851 | Reject(6, 9), | |
1852 | Match (9, 9), | |
1853 | ]); | |
1854 | make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ | |
1855 | Match(0, 0), | |
1856 | ]); | |
1857 | make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [ | |
1858 | ]); | |
1859 | make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [ | |
1860 | Reject(0, 1), | |
1861 | Match (1, 2), | |
1862 | Match (2, 3), | |
1863 | Reject(3, 4), | |
1864 | Match (4, 5), | |
1865 | Match (5, 6), | |
1866 | Reject(6, 7), | |
1867 | ]); | |
1868 | make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [ | |
1869 | Reject(0, 3), | |
1870 | Reject(3, 6), | |
1871 | Reject(6, 9), | |
1872 | ]); | |
1873 | make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [ | |
1874 | Reject(0, 1), | |
1875 | Reject(1, 2), | |
1876 | Reject(2, 3), | |
1877 | ]); | |
1878 | ||
1879 | } | |
1880 | ||
1881 | macro_rules! generate_iterator_test { | |
1882 | { | |
1883 | $name:ident { | |
1884 | $( | |
1885 | ($($arg:expr),*) -> [$($t:tt)*]; | |
1886 | )* | |
1887 | } | |
1888 | with $fwd:expr, $bwd:expr; | |
1889 | } => { | |
1890 | #[test] | |
1891 | fn $name() { | |
1892 | $( | |
1893 | { | |
1894 | let res = vec![$($t)*]; | |
1895 | ||
1896 | let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); | |
1897 | assert_eq!(fwd_vec, res); | |
1898 | ||
1899 | let mut bwd_vec: Vec<_> = ($bwd)($($arg),*).collect(); | |
1900 | bwd_vec.reverse(); | |
1901 | assert_eq!(bwd_vec, res); | |
1902 | } | |
1903 | )* | |
1904 | } | |
1905 | }; | |
1906 | { | |
1907 | $name:ident { | |
1908 | $( | |
1909 | ($($arg:expr),*) -> [$($t:tt)*]; | |
1910 | )* | |
1911 | } | |
1912 | with $fwd:expr; | |
1913 | } => { | |
1914 | #[test] | |
1915 | fn $name() { | |
1916 | $( | |
1917 | { | |
1918 | let res = vec![$($t)*]; | |
1919 | ||
1920 | let fwd_vec: Vec<_> = ($fwd)($($arg),*).collect(); | |
1921 | assert_eq!(fwd_vec, res); | |
1922 | } | |
1923 | )* | |
1924 | } | |
1925 | } | |
1926 | } | |
1927 | ||
1928 | generate_iterator_test! { | |
1929 | double_ended_split { | |
1930 | ("foo.bar.baz", '.') -> ["foo", "bar", "baz"]; | |
1931 | ("foo::bar::baz", "::") -> ["foo", "bar", "baz"]; | |
1932 | } | |
1933 | with str::split, str::rsplit; | |
1934 | } | |
1935 | ||
1936 | generate_iterator_test! { | |
1937 | double_ended_split_terminator { | |
1938 | ("foo;bar;baz;", ';') -> ["foo", "bar", "baz"]; | |
1939 | } | |
1940 | with str::split_terminator, str::rsplit_terminator; | |
1941 | } | |
1942 | ||
1943 | generate_iterator_test! { | |
1944 | double_ended_matches { | |
1945 | ("a1b2c3", char::is_numeric) -> ["1", "2", "3"]; | |
1946 | } | |
1947 | with str::matches, str::rmatches; | |
1948 | } | |
1949 | ||
1950 | generate_iterator_test! { | |
1951 | double_ended_match_indices { | |
1952 | ("a1b2c3", char::is_numeric) -> [(1, 2), (3, 4), (5, 6)]; | |
1953 | } | |
1954 | with str::match_indices, str::rmatch_indices; | |
1955 | } | |
1956 | ||
1957 | generate_iterator_test! { | |
1958 | not_double_ended_splitn { | |
1959 | ("foo::bar::baz", 2, "::") -> ["foo", "bar::baz"]; | |
1960 | } | |
1961 | with str::splitn; | |
1962 | } | |
1963 | ||
1964 | generate_iterator_test! { | |
1965 | not_double_ended_rsplitn { | |
1966 | ("foo::bar::baz", 2, "::") -> ["baz", "foo::bar"]; | |
1967 | } | |
1968 | with str::rsplitn; | |
c34b1796 AL |
1969 | } |
1970 | ||
1971 | mod bench { | |
1972 | use test::{Bencher, black_box}; | |
1973 | ||
1974 | #[bench] | |
1975 | fn char_iterator(b: &mut Bencher) { | |
1976 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
1977 | ||
1978 | b.iter(|| s.chars().count()); | |
1979 | } | |
1980 | ||
1981 | #[bench] | |
1982 | fn char_iterator_for(b: &mut Bencher) { | |
1983 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
1984 | ||
1985 | b.iter(|| { | |
1986 | for ch in s.chars() { black_box(ch); } | |
1987 | }); | |
1988 | } | |
1989 | ||
1990 | #[bench] | |
1991 | fn char_iterator_ascii(b: &mut Bencher) { | |
1992 | let s = "Mary had a little lamb, Little lamb | |
1993 | Mary had a little lamb, Little lamb | |
1994 | Mary had a little lamb, Little lamb | |
1995 | Mary had a little lamb, Little lamb | |
1996 | Mary had a little lamb, Little lamb | |
1997 | Mary had a little lamb, Little lamb"; | |
1998 | ||
1999 | b.iter(|| s.chars().count()); | |
2000 | } | |
2001 | ||
2002 | #[bench] | |
2003 | fn char_iterator_rev(b: &mut Bencher) { | |
2004 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2005 | ||
2006 | b.iter(|| s.chars().rev().count()); | |
2007 | } | |
2008 | ||
2009 | #[bench] | |
2010 | fn char_iterator_rev_for(b: &mut Bencher) { | |
2011 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2012 | ||
2013 | b.iter(|| { | |
2014 | for ch in s.chars().rev() { black_box(ch); } | |
2015 | }); | |
2016 | } | |
2017 | ||
2018 | #[bench] | |
2019 | fn char_indicesator(b: &mut Bencher) { | |
2020 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2021 | let len = s.chars().count(); | |
2022 | ||
2023 | b.iter(|| assert_eq!(s.char_indices().count(), len)); | |
2024 | } | |
2025 | ||
2026 | #[bench] | |
2027 | fn char_indicesator_rev(b: &mut Bencher) { | |
2028 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2029 | let len = s.chars().count(); | |
2030 | ||
2031 | b.iter(|| assert_eq!(s.char_indices().rev().count(), len)); | |
2032 | } | |
2033 | ||
2034 | #[bench] | |
2035 | fn split_unicode_ascii(b: &mut Bencher) { | |
2036 | let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam"; | |
2037 | ||
2038 | b.iter(|| assert_eq!(s.split('V').count(), 3)); | |
2039 | } | |
2040 | ||
2041 | #[bench] | |
2042 | fn split_ascii(b: &mut Bencher) { | |
2043 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2044 | let len = s.split(' ').count(); | |
2045 | ||
2046 | b.iter(|| assert_eq!(s.split(' ').count(), len)); | |
2047 | } | |
2048 | ||
2049 | #[bench] | |
2050 | fn split_extern_fn(b: &mut Bencher) { | |
2051 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2052 | let len = s.split(' ').count(); | |
2053 | fn pred(c: char) -> bool { c == ' ' } | |
2054 | ||
2055 | b.iter(|| assert_eq!(s.split(pred).count(), len)); | |
2056 | } | |
2057 | ||
2058 | #[bench] | |
2059 | fn split_closure(b: &mut Bencher) { | |
2060 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2061 | let len = s.split(' ').count(); | |
2062 | ||
2063 | b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len)); | |
2064 | } | |
2065 | ||
2066 | #[bench] | |
2067 | fn split_slice(b: &mut Bencher) { | |
2068 | let s = "Mary had a little lamb, Little lamb, little-lamb."; | |
2069 | let len = s.split(' ').count(); | |
2070 | ||
2071 | let c: &[char] = &[' ']; | |
2072 | b.iter(|| assert_eq!(s.split(c).count(), len)); | |
2073 | } | |
2074 | ||
2075 | #[bench] | |
2076 | fn bench_connect(b: &mut Bencher) { | |
2077 | let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb"; | |
2078 | let sep = "→"; | |
2079 | let v = vec![s, s, s, s, s, s, s, s, s, s]; | |
2080 | b.iter(|| { | |
2081 | assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9); | |
2082 | }) | |
2083 | } | |
2084 | ||
2085 | #[bench] | |
2086 | fn bench_contains_short_short(b: &mut Bencher) { | |
2087 | let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; | |
2088 | let needle = "sit"; | |
2089 | ||
2090 | b.iter(|| { | |
2091 | assert!(haystack.contains(needle)); | |
2092 | }) | |
2093 | } | |
2094 | ||
2095 | #[bench] | |
2096 | fn bench_contains_short_long(b: &mut Bencher) { | |
2097 | let haystack = "\ | |
2098 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ | |
2099 | ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ | |
2100 | eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ | |
2101 | sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ | |
2102 | tempus vel, gravida nec quam. | |
2103 | ||
2104 | In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ | |
2105 | sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ | |
2106 | diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ | |
2107 | lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ | |
2108 | eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ | |
2109 | interdum. Curabitur ut nisi justo. | |
2110 | ||
2111 | Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ | |
2112 | mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ | |
2113 | lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ | |
2114 | est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ | |
2115 | felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ | |
2116 | ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ | |
2117 | feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ | |
2118 | Aliquam sit amet placerat lorem. | |
2119 | ||
2120 | Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ | |
2121 | mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ | |
2122 | Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ | |
2123 | lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ | |
2124 | suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ | |
2125 | cursus accumsan. | |
2126 | ||
2127 | Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ | |
2128 | feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ | |
2129 | vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ | |
2130 | leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ | |
2131 | malesuada sollicitudin quam eu fermentum."; | |
2132 | let needle = "english"; | |
2133 | ||
2134 | b.iter(|| { | |
2135 | assert!(!haystack.contains(needle)); | |
2136 | }) | |
2137 | } | |
2138 | ||
2139 | #[bench] | |
2140 | fn bench_contains_bad_naive(b: &mut Bencher) { | |
2141 | let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; | |
2142 | let needle = "aaaaaaaab"; | |
2143 | ||
2144 | b.iter(|| { | |
2145 | assert!(!haystack.contains(needle)); | |
2146 | }) | |
2147 | } | |
2148 | ||
2149 | #[bench] | |
2150 | fn bench_contains_equal(b: &mut Bencher) { | |
2151 | let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; | |
2152 | let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; | |
2153 | ||
2154 | b.iter(|| { | |
2155 | assert!(haystack.contains(needle)); | |
2156 | }) | |
2157 | } | |
9346a6ac AL |
2158 | |
2159 | macro_rules! make_test_inner { | |
2160 | ($s:ident, $code:expr, $name:ident, $str:expr) => { | |
2161 | #[bench] | |
2162 | fn $name(bencher: &mut Bencher) { | |
2163 | let mut $s = $str; | |
2164 | black_box(&mut $s); | |
2165 | bencher.iter(|| $code); | |
2166 | } | |
2167 | } | |
2168 | } | |
2169 | ||
2170 | macro_rules! make_test { | |
2171 | ($name:ident, $s:ident, $code:expr) => { | |
2172 | mod $name { | |
2173 | use test::Bencher; | |
2174 | use test::black_box; | |
2175 | ||
2176 | // Short strings: 65 bytes each | |
2177 | make_test_inner!($s, $code, short_ascii, | |
2178 | "Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!"); | |
2179 | make_test_inner!($s, $code, short_mixed, | |
2180 | "ศไทย中华Việt Nam; Mary had a little lamb, Little lam!"); | |
2181 | make_test_inner!($s, $code, short_pile_of_poo, | |
2182 | "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!"); | |
2183 | make_test_inner!($s, $code, long_lorem_ipsum,"\ | |
2184 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ | |
2185 | ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ | |
2186 | eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ | |
2187 | sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ | |
2188 | tempus vel, gravida nec quam. | |
2189 | ||
2190 | In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ | |
2191 | sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ | |
2192 | diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ | |
2193 | lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ | |
2194 | eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ | |
2195 | interdum. Curabitur ut nisi justo. | |
2196 | ||
2197 | Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ | |
2198 | mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ | |
2199 | lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ | |
2200 | est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ | |
2201 | felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ | |
2202 | ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ | |
2203 | feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ | |
2204 | Aliquam sit amet placerat lorem. | |
2205 | ||
2206 | Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ | |
2207 | mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ | |
2208 | Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ | |
2209 | lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ | |
2210 | suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ | |
2211 | cursus accumsan. | |
2212 | ||
2213 | Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ | |
2214 | feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ | |
2215 | vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ | |
2216 | leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ | |
2217 | malesuada sollicitudin quam eu fermentum!"); | |
2218 | } | |
2219 | } | |
2220 | } | |
2221 | ||
2222 | make_test!(chars_count, s, s.chars().count()); | |
2223 | ||
2224 | make_test!(contains_bang_str, s, s.contains("!")); | |
2225 | make_test!(contains_bang_char, s, s.contains('!')); | |
2226 | ||
2227 | make_test!(match_indices_a_str, s, s.match_indices("a").count()); | |
2228 | ||
2229 | make_test!(split_a_str, s, s.split("a").count()); | |
2230 | ||
2231 | make_test!(trim_ascii_char, s, { | |
2232 | use std::ascii::AsciiExt; | |
2233 | s.trim_matches(|c: char| c.is_ascii()) | |
2234 | }); | |
2235 | make_test!(trim_left_ascii_char, s, { | |
2236 | use std::ascii::AsciiExt; | |
2237 | s.trim_left_matches(|c: char| c.is_ascii()) | |
2238 | }); | |
2239 | make_test!(trim_right_ascii_char, s, { | |
2240 | use std::ascii::AsciiExt; | |
2241 | s.trim_right_matches(|c: char| c.is_ascii()) | |
2242 | }); | |
2243 | ||
2244 | make_test!(find_underscore_char, s, s.find('_')); | |
2245 | make_test!(rfind_underscore_char, s, s.rfind('_')); | |
2246 | make_test!(find_underscore_str, s, s.find("_")); | |
2247 | ||
2248 | make_test!(find_zzz_char, s, s.find('\u{1F4A4}')); | |
2249 | make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}')); | |
2250 | make_test!(find_zzz_str, s, s.find("\u{1F4A4}")); | |
2251 | ||
2252 | make_test!(split_space_char, s, s.split(' ').count()); | |
2253 | make_test!(split_terminator_space_char, s, s.split_terminator(' ').count()); | |
2254 | ||
2255 | make_test!(splitn_space_char, s, s.splitn(10, ' ').count()); | |
2256 | make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); | |
2257 | ||
2258 | make_test!(split_space_str, s, s.split(" ").count()); | |
2259 | make_test!(split_ad_str, s, s.split("ad").count()); | |
c34b1796 | 2260 | } |