]> git.proxmox.com Git - rustc.git/blob - src/vendor/strsim/src/lib.rs
New upstream version 1.17.0+dfsg1
[rustc.git] / src / vendor / strsim / src / lib.rs
1 //! This library implements string similarity metrics.
2
3 use std::char;
4 use std::cmp::{max, min};
5 use std::collections::HashMap;
6
7 #[derive(Debug, PartialEq)]
8 pub enum StrSimError {
9 DifferentLengthArgs
10 }
11
12 pub type HammingResult = Result<usize, StrSimError>;
13
14 /// Calculates the number of positions in the two strings where the characters
15 /// differ. Returns an error if the strings have different lengths.
16 ///
17 /// ```
18 /// use strsim::hamming;
19 ///
20 /// match hamming("hamming", "hammers") {
21 /// Ok(distance) => assert_eq!(3, distance),
22 /// Err(why) => panic!("{:?}", why)
23 /// }
24 /// ```
25 pub fn hamming(a: &str, b: &str) -> HammingResult {
26 let (mut ita, mut itb, mut count) = (a.chars(), b.chars(), 0);
27 loop {
28 match (ita.next(), itb.next()){
29 (Some(x), Some(y)) => if x != y { count += 1 },
30 (None, None) => return Ok(count),
31 _ => return Err(StrSimError::DifferentLengthArgs),
32 }
33 }
34 }
35
36 /// Calculates the Jaro similarity between two strings. The returned value
37 /// is between 0.0 and 1.0 (higher value means more similar).
38 ///
39 /// ```
40 /// use strsim::jaro;
41 ///
42 /// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
43 /// 0.001);
44 /// ```
45 pub fn jaro(a: &str, b: &str) -> f64 {
46 if a == b { return 1.0; }
47
48 let a_len = a.chars().count();
49 let b_len = b.chars().count();
50
51 // The check for lengths of one here is to prevent integer overflow when
52 // calculating the search range.
53 if a_len == 0 || b_len == 0 || (a_len == 1 && b_len == 1) {
54 return 0.0;
55 }
56
57 let search_range = (max(a_len, b_len) / 2) - 1;
58
59 let mut b_consumed = Vec::with_capacity(b_len);
60 for _ in 0..b_len {
61 b_consumed.push(false);
62 }
63 let mut matches = 0.0;
64
65 let mut transpositions = 0.0;
66 let mut b_match_index = 0;
67
68 for (i, a_char) in a.chars().enumerate() {
69 let min_bound =
70 // prevent integer wrapping
71 if i > search_range {
72 max(0, i - search_range)
73 } else {
74 0
75 };
76
77 let max_bound = min(b_len - 1, i + search_range);
78
79 if min_bound > max_bound {
80 continue;
81 }
82
83 for (j, b_char) in b.chars().enumerate() {
84 if min_bound <= j && j <= max_bound && a_char == b_char &&
85 !b_consumed[j] {
86 b_consumed[j] = true;
87 matches += 1.0;
88
89 if j < b_match_index {
90 transpositions += 1.0;
91 }
92 b_match_index = j;
93
94 break;
95 }
96 }
97 }
98
99 if matches == 0.0 {
100 0.0
101 } else {
102 (1.0 / 3.0) * ((matches / a_len as f64) +
103 (matches / b_len as f64) +
104 ((matches - transpositions) / matches))
105 }
106 }
107
108 /// Calculates the Jaro distance between a string and each string in a vector.
109 /// Returns a vector of corresponding values between 0.0 and 1.0 (higher value
110 /// means more similar).
111 ///
112 /// ```
113 /// use strsim::jaro_against_vec;
114 ///
115 /// let v = vec!["test", "test1", "test12", "test123", "", "tset"];
116 /// let result = jaro_against_vec("test", &v);
117 /// let expected = vec![1.0, 0.933333, 0.888889, 0.857143, 0.0, 0.916667];
118 /// let delta: f64 = result.iter()
119 /// .zip(expected.iter())
120 /// .map(|(x, y)| (x - y).abs() as f64)
121 /// .fold(0.0, |x, y| x + y as f64);
122 /// assert!(delta.abs() < 0.0001);
123 /// ```
124 pub fn jaro_against_vec(a: &str, v: &[&str]) -> Vec<f64> {
125 v.iter().map(|b| jaro(a, b)).collect()
126 }
127
128 /// Like Jaro but gives a boost to strings that have a common prefix.
129 ///
130 /// ```
131 /// use strsim::jaro_winkler;
132 ///
133 /// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
134 /// 0.001);
135 /// ```
136 pub fn jaro_winkler(a: &str, b: &str) -> f64 {
137 let jaro_distance = jaro(a, b);
138
139 // Don't limit the length of the common prefix
140 let prefix_length = a.chars()
141 .zip(b.chars())
142 .take_while(|&(a_char, b_char)| a_char == b_char)
143 .count();
144
145 let jaro_winkler_distance =
146 jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance));
147
148 if jaro_winkler_distance <= 1.0 {
149 jaro_winkler_distance
150 } else {
151 1.0
152 }
153 }
154
155 /// Calculates the Jaro-Winkler distances between a string and each string
156 /// in a vector. Returns a vector of corresponding values.
157 ///
158 /// ```
159 /// use strsim::jaro_winkler_against_vec;
160 ///
161 /// let v = vec!["test", "test1", "test12", "test123", "", "tset"];
162 /// let result = jaro_winkler_against_vec("test", &v);
163 /// let expected = vec![1.0, 0.96, 0.933333, 0.914286, 0.0, 0.925];
164 /// let delta: f64 = result.iter()
165 /// .zip(expected.iter())
166 /// .map(|(x, y)| (x - y).abs() as f64)
167 /// .fold(0.0, |x, y| x + y as f64);
168 /// assert!(delta.abs() < 0.0001);
169 /// ```
170 pub fn jaro_winkler_against_vec(a: &str, v: &[&str]) -> Vec<f64> {
171 v.iter().map(|b| jaro_winkler(a, b)).collect()
172 }
173
174 /// Calculates the minimum number of insertions, deletions, and substitutions
175 /// required to change one string into the other.
176 ///
177 /// ```
178 /// use strsim::levenshtein;
179 ///
180 /// assert_eq!(3, levenshtein("kitten", "sitting"));
181 /// ```
182 pub fn levenshtein(a: &str, b: &str) -> usize {
183 let a_len = a.chars().count();
184 let b_len = b.chars().count();
185 if a == b { return 0; }
186 else if a_len == 0 { return b_len; }
187 else if b_len == 0 { return a_len; }
188
189 let mut prev_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
190 let mut curr_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
191
192 for i in 0..(b_len + 1) {
193 prev_distances.push(i);
194 curr_distances.push(0);
195 }
196
197 for (i, a_char) in a.chars().enumerate() {
198 curr_distances[0] = i + 1;
199
200 for (j, b_char) in b.chars().enumerate() {
201 let cost = if a_char == b_char { 0 } else { 1 };
202 curr_distances[j + 1] = min(curr_distances[j] + 1,
203 min(prev_distances[j + 1] + 1,
204 prev_distances[j] + cost));
205 }
206
207 prev_distances.clone_from(&curr_distances);
208 }
209
210 curr_distances[b_len]
211 }
212
213 /// Calculates the Levenshtein distance between a string and each string in a
214 /// vector. Returns a vector of corresponding values.
215 ///
216 /// ```
217 /// use strsim::levenshtein_against_vec;
218 ///
219 /// let v = vec!["test", "test1", "test12", "test123", "", "tset"];
220 /// let result = levenshtein_against_vec("test", &v);
221 /// let expected = vec![0, 1, 2, 3, 4, 2];
222 /// assert_eq!(expected, result);
223 /// ```
224 pub fn levenshtein_against_vec(a: &str, v: &[&str]) -> Vec<usize> {
225 v.iter().map(|b| levenshtein(a, b)).collect()
226 }
227
228 /// Like Levenshtein but allows for adjacent transpositions. Each substring can
229 /// only be edited once.
230 ///
231 /// ```
232 /// use strsim::osa_distance;
233 ///
234 /// assert_eq!(3, osa_distance("ab", "bca"));
235 /// ```
236 pub fn osa_distance(a: &str, b: &str) -> usize {
237 let a_len = a.chars().count();
238 let b_len = b.chars().count();
239 if a == b { return 0; }
240 else if a_len == 0 { return b_len; }
241 else if b_len == 0 { return a_len; }
242
243 let mut prev_two_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
244 let mut prev_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
245 let mut curr_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
246
247 let mut prev_a_char = char::MAX;
248 let mut prev_b_char = char::MAX;
249
250 for i in 0..(b_len + 1) {
251 prev_two_distances.push(i);
252 prev_distances.push(i);
253 curr_distances.push(0);
254 }
255
256 for (i, a_char) in a.chars().enumerate() {
257 curr_distances[0] = i + 1;
258
259 for (j, b_char) in b.chars().enumerate() {
260 let cost = if a_char == b_char { 0 } else { 1 };
261 curr_distances[j + 1] = min(curr_distances[j] + 1,
262 min(prev_distances[j + 1] + 1,
263 prev_distances[j] + cost));
264 if i > 0 && j > 0 && a_char != b_char &&
265 a_char == prev_b_char && b_char == prev_a_char {
266 curr_distances[j + 1] = min(curr_distances[j + 1],
267 prev_two_distances[j - 1] + 1);
268 }
269
270 prev_b_char = b_char;
271 }
272
273 prev_two_distances.clone_from(&prev_distances);
274 prev_distances.clone_from(&curr_distances);
275 prev_a_char = a_char;
276 }
277
278 curr_distances[b_len]
279
280 }
281
282 /// Calculates the optimal string alignment distance between a string and each
283 /// string in a vector. Returns a vector of corresponding values.
284 ///
285 /// ```
286 /// use strsim::osa_distance_against_vec;
287 ///
288 /// let v = vec!["test", "test1", "test12", "test123", "", "tset"];
289 /// let result = osa_distance_against_vec("test", &v);
290 /// let expected = vec![0, 1, 2, 3, 4, 1];
291 /// assert_eq!(expected, result);
292 /// ```
293 pub fn osa_distance_against_vec(a: &str, v: &[&str]) -> Vec<usize> {
294 v.iter().map(|b| osa_distance(a, b)).collect()
295 }
296
297 /// Like optimal string alignment, but substrings can be edited an unlimited
298 /// number of times, and the triangle inequality holds.
299 ///
300 /// ```
301 /// use strsim::damerau_levenshtein;
302 ///
303 /// assert_eq!(2, damerau_levenshtein("ab", "bca"));
304 /// ```
305 pub fn damerau_levenshtein(a: &str, b: &str) -> usize {
306 if a == b { return 0; }
307
308 let a_chars: Vec<char> = a.chars().collect();
309 let b_chars: Vec<char> = b.chars().collect();
310 let a_len = a_chars.len();
311 let b_len = b_chars.len();
312
313 if a_len == 0 { return b_len; }
314 if b_len == 0 { return a_len; }
315
316 let mut distances = vec![vec![0; b_len + 2]; a_len + 2];
317 let max_distance = a_len + b_len;
318 distances[0][0] = max_distance;
319
320 for i in 0..(a_len + 1) {
321 distances[i + 1][0] = max_distance;
322 distances[i + 1][1] = i;
323 }
324
325 for j in 0..(b_len + 1) {
326 distances[0][j + 1] = max_distance;
327 distances[1][j + 1] = j;
328 }
329
330 let mut chars: HashMap<char, usize> = HashMap::new();
331
332 for i in 1..(a_len + 1) {
333 let mut db = 0;
334
335 for j in 1..(b_len + 1) {
336 let k = match chars.get(&b_chars[j - 1]) {
337 Some(value) => value.clone(),
338 None => 0
339 };
340
341 let l = db;
342
343 let mut cost = 1;
344 if a_chars[i - 1] == b_chars[j - 1] {
345 cost = 0;
346 db = j;
347 }
348
349 let substitution_cost = distances[i][j] + cost;
350 let insertion_cost = distances[i][j + 1] + 1;
351 let deletion_cost = distances[i + 1][j] + 1;
352 let transposition_cost = distances[k][l] + (i - k - 1) + 1 +
353 (j - l - 1);
354
355 distances[i + 1][j + 1] = min(substitution_cost,
356 min(insertion_cost,
357 min(deletion_cost,
358 transposition_cost)));
359 }
360
361 chars.insert(a_chars[i - 1], i);
362 }
363
364 distances[a_len + 1][b_len + 1]
365 }
366
367 /// Calculates the Damerau-Levenshtein distance between a string and each string
368 /// in a vector. Returns a vector of corresponding values.
369 ///
370 /// ```
371 /// use strsim::damerau_levenshtein_against_vec;
372 ///
373 /// let v = vec!["test", "test1", "test12", "test123", "", "tset"];
374 /// let result = damerau_levenshtein_against_vec("test", &v);
375 /// let expected = vec![0, 1, 2, 3, 4, 1];
376 /// assert_eq!(expected, result);
377 /// ```
378 pub fn damerau_levenshtein_against_vec(a: &str, v: &[&str]) -> Vec<usize> {
379 v.iter().map(|b| damerau_levenshtein(a, b)).collect()
380 }
381
382 #[cfg(test)]
383 mod tests {
384 use super::*;
385
386 #[test]
387 fn hamming_empty() {
388 match hamming("", "") {
389 Ok(distance) => { assert_eq!(0, distance); },
390 Err(why) => { panic!("{:?}", why); }
391 }
392 }
393
394 #[test]
395 fn hamming_same() {
396 match hamming("hamming", "hamming") {
397 Ok(distance) => { assert_eq!(0, distance); },
398 Err(why) => { panic!("{:?}", why); }
399 }
400 }
401
402 #[test]
403 fn hamming_diff() {
404 match hamming("hamming", "hammers") {
405 Ok(distance) => { assert_eq!(3, distance); },
406 Err(why) => { panic!("{:?}", why); }
407 }
408 }
409
410 #[test]
411 fn hamming_diff_multibyte() {
412 match hamming("hamming", "h香mmüng") {
413 Ok(distance) => { assert_eq!(2, distance); },
414 Err(why) => { panic!("{:?}", why); }
415 }
416 }
417
418 #[test]
419 fn hamming_unequal_length() {
420 match hamming("ham", "hamming") {
421 Ok(_) => { panic!(); },
422 Err(why) => { assert_eq!(why, StrSimError::DifferentLengthArgs); }
423 }
424 }
425
426 #[test]
427 fn hamming_names() {
428 match hamming("Friedrich Nietzs", "Jean-Paul Sartre") {
429 Ok(distance) => { assert_eq!(14, distance); },
430 Err(why) => { panic!("{:?}", why); }
431 }
432 }
433
434 #[test]
435 fn jaro_both_empty() {
436 assert_eq!(1.0, jaro("", ""));
437 }
438
439 #[test]
440 fn jaro_first_empty() {
441 assert_eq!(0.0, jaro("", "jaro"));
442 }
443
444 #[test]
445 fn jaro_second_empty() {
446 assert_eq!(0.0, jaro("distance", ""));
447 }
448
449 #[test]
450 fn jaro_same() {
451 assert_eq!(1.0, jaro("jaro", "jaro"));
452 }
453
454 #[test]
455 fn jaro_multibyte() {
456 assert!((0.818 - jaro("testabctest", "testöঙ香test")) < 0.001);
457 assert!((0.818 - jaro("testöঙ香test", "testabctest")) < 0.001);
458 }
459
460 #[test]
461 fn jaro_diff_short() {
462 assert!((0.767 - jaro("dixon", "dicksonx")).abs() < 0.001);
463 }
464
465 #[test]
466 fn jaro_diff_one_character() {
467 assert_eq!(0.0, jaro("a", "b"));
468 }
469
470 #[test]
471 fn jaro_diff_one_and_two() {
472 assert!((0.83 - jaro("a", "ab")).abs() < 0.01);
473 }
474
475 #[test]
476 fn jaro_diff_two_and_one() {
477 assert!((0.83 - jaro("ab", "a")).abs() < 0.01);
478 }
479
480 #[test]
481 fn jaro_diff_no_transposition() {
482 assert!((0.822 - jaro("dwayne", "duane")).abs() < 0.001);
483 }
484
485 #[test]
486 fn jaro_diff_with_transposition() {
487 assert!((0.944 - jaro("martha", "marhta")).abs() < 0.001);
488 }
489
490 #[test]
491 fn jaro_names() {
492 assert!((0.392 - jaro("Friedrich Nietzsche",
493 "Jean-Paul Sartre")).abs() < 0.001);
494 }
495
496 #[test]
497 fn jaro_winkler_both_empty() {
498 assert_eq!(1.0, jaro_winkler("", ""));
499 }
500
501 #[test]
502 fn jaro_winkler_first_empty() {
503 assert_eq!(0.0, jaro_winkler("", "jaro-winkler"));
504 }
505
506 #[test]
507 fn jaro_winkler_second_empty() {
508 assert_eq!(0.0, jaro_winkler("distance", ""));
509 }
510
511 #[test]
512 fn jaro_winkler_same() {
513 assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler"));
514 }
515
516 #[test]
517 fn jaro_winkler_multibyte() {
518 assert!((0.89 - jaro_winkler("testabctest", "testöঙ香test")).abs() <
519 0.001);
520 assert!((0.89 - jaro_winkler("testöঙ香test", "testabctest")).abs() <
521 0.001);
522 }
523
524 #[test]
525 fn jaro_winkler_diff_short() {
526 assert!((0.813 - jaro_winkler("dixon", "dicksonx")).abs() < 0.001);
527 assert!((0.813 - jaro_winkler("dicksonx", "dixon")).abs() < 0.001);
528 }
529
530 #[test]
531 fn jaro_winkler_diff_one_character() {
532 assert_eq!(0.0, jaro_winkler("a", "b"));
533 }
534
535 #[test]
536 fn jaro_winkler_diff_no_transposition() {
537 assert!((0.840 - jaro_winkler("dwayne", "duane")).abs() < 0.001);
538 }
539
540 #[test]
541 fn jaro_winkler_diff_with_transposition() {
542 assert!((0.961 - jaro_winkler("martha", "marhta")).abs() < 0.001);
543 }
544
545 #[test]
546 fn jaro_winkler_names() {
547 assert!((0.562 - jaro_winkler("Friedrich Nietzsche",
548 "Fran-Paul Sartre")).abs() < 0.001);
549 }
550
551 #[test]
552 fn jaro_winkler_long_prefix() {
553 assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
554 0.001);
555 }
556
557 #[test]
558 fn jaro_winkler_more_names() {
559 assert!((0.868 - jaro_winkler("Thorkel", "Thorgier")).abs() < 0.001);
560 }
561
562 #[test]
563 fn jaro_winkler_length_of_one() {
564 assert!((0.738 - jaro_winkler("Dinsdale", "D")).abs() < 0.001);
565 }
566
567 #[test]
568 fn jaro_winkler_very_long_prefix() {
569 assert!((1.0 - jaro_winkler("thequickbrownfoxjumpedoverx",
570 "thequickbrownfoxjumpedovery")).abs() <
571 0.001);
572 }
573
574 #[test]
575 fn levenshtein_empty() {
576 assert_eq!(0, levenshtein("", ""));
577 }
578
579 #[test]
580 fn levenshtein_same() {
581 assert_eq!(0, levenshtein("levenshtein", "levenshtein"));
582 }
583
584 #[test]
585 fn levenshtein_diff_short() {
586 assert_eq!(3, levenshtein("kitten", "sitting"));
587 }
588
589 #[test]
590 fn levenshtein_diff_with_space() {
591 assert_eq!(5, levenshtein("hello, world", "bye, world"));
592 }
593
594 #[test]
595 fn levenshtein_diff_multibyte() {
596 assert_eq!(3, levenshtein("öঙ香", "abc"));
597 assert_eq!(3, levenshtein("abc", "öঙ香"));
598 }
599
600 #[test]
601 fn levenshtein_diff_longer() {
602 let a = "The quick brown fox jumped over the angry dog.";
603 let b = "Lorem ipsum dolor sit amet, dicta latine an eam.";
604 assert_eq!(37, levenshtein(a, b));
605 }
606
607 #[test]
608 fn levenshtein_first_empty() {
609 assert_eq!(7, levenshtein("", "sitting"));
610 }
611
612 #[test]
613 fn levenshtein_second_empty() {
614 assert_eq!(6, levenshtein("kitten", ""));
615 }
616
617 #[test]
618 fn osa_distance_empty() {
619 assert_eq!(0, osa_distance("", ""));
620 }
621
622 #[test]
623 fn osa_distance_same() {
624 assert_eq!(0, osa_distance("damerau", "damerau"));
625 }
626
627 #[test]
628 fn osa_distance_first_empty() {
629 assert_eq!(7, osa_distance("", "damerau"));
630 }
631
632 #[test]
633 fn osa_distance_second_empty() {
634 assert_eq!(7, osa_distance("damerau", ""));
635 }
636
637 #[test]
638 fn osa_distance_diff() {
639 assert_eq!(3, osa_distance("ca", "abc"));
640 }
641
642 #[test]
643 fn osa_distance_diff_short() {
644 assert_eq!(3, osa_distance("damerau", "aderua"));
645 }
646
647 #[test]
648 fn osa_distance_diff_reversed() {
649 assert_eq!(3, osa_distance("aderua", "damerau"));
650 }
651
652 #[test]
653 fn osa_distance_diff_multibyte() {
654 assert_eq!(3, osa_distance("öঙ香", "abc"));
655 assert_eq!(3, osa_distance("abc", "öঙ香"));
656 }
657
658 #[test]
659 fn osa_distance_diff_unequal_length() {
660 assert_eq!(6, osa_distance("damerau", "aderuaxyz"));
661 }
662
663 #[test]
664 fn osa_distance_diff_unequal_length_reversed() {
665 assert_eq!(6, osa_distance("aderuaxyz", "damerau"));
666 }
667
668 #[test]
669 fn osa_distance_diff_comedians() {
670 assert_eq!(5, osa_distance("Stewart", "Colbert"));
671 }
672
673 #[test]
674 fn osa_distance_many_transpositions() {
675 assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk"));
676 }
677
678 #[test]
679 fn osa_distance_diff_longer() {
680 let a = "The quick brown fox jumped over the angry dog.";
681 let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
682 assert_eq!(36, osa_distance(a, b));
683 }
684
685 #[test]
686 fn osa_distance_beginning_transposition() {
687 assert_eq!(1, osa_distance("foobar", "ofobar"));
688 }
689
690 #[test]
691 fn osa_distance_end_transposition() {
692 assert_eq!(1, osa_distance("specter", "spectre"));
693 }
694
695 #[test]
696 fn osa_distance_restricted_edit() {
697 assert_eq!(4, osa_distance("a cat", "an abct"));
698 }
699
700 #[test]
701 fn damerau_levenshtein_empty() {
702 assert_eq!(0, damerau_levenshtein("", ""));
703 }
704
705 #[test]
706 fn damerau_levenshtein_same() {
707 assert_eq!(0, damerau_levenshtein("damerau", "damerau"));
708 }
709
710 #[test]
711 fn damerau_levenshtein_first_empty() {
712 assert_eq!(7, damerau_levenshtein("", "damerau"));
713 }
714
715 #[test]
716 fn damerau_levenshtein_second_empty() {
717 assert_eq!(7, damerau_levenshtein("damerau", ""));
718 }
719
720 #[test]
721 fn damerau_levenshtein_diff() {
722 assert_eq!(2, damerau_levenshtein("ca", "abc"));
723 }
724
725 #[test]
726 fn damerau_levenshtein_diff_short() {
727 assert_eq!(3, damerau_levenshtein("damerau", "aderua"));
728 }
729
730 #[test]
731 fn damerau_levenshtein_diff_reversed() {
732 assert_eq!(3, damerau_levenshtein("aderua", "damerau"));
733 }
734
735 #[test]
736 fn damerau_levenshtein_diff_multibyte() {
737 assert_eq!(3, damerau_levenshtein("öঙ香", "abc"));
738 assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
739 }
740
741 #[test]
742 fn damerau_levenshtein_diff_unequal_length() {
743 assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz"));
744 }
745
746 #[test]
747 fn damerau_levenshtein_diff_unequal_length_reversed() {
748 assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau"));
749 }
750
751 #[test]
752 fn damerau_levenshtein_diff_comedians() {
753 assert_eq!(5, damerau_levenshtein("Stewart", "Colbert"));
754 }
755
756 #[test]
757 fn damerau_levenshtein_many_transpositions() {
758 assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk"));
759 }
760
761 #[test]
762 fn damerau_levenshtein_diff_longer() {
763 let a = "The quick brown fox jumped over the angry dog.";
764 let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
765 assert_eq!(36, damerau_levenshtein(a, b));
766 }
767
768 #[test]
769 fn damerau_levenshtein_beginning_transposition() {
770 assert_eq!(1, damerau_levenshtein("foobar", "ofobar"));
771 }
772
773 #[test]
774 fn damerau_levenshtein_end_transposition() {
775 assert_eq!(1, damerau_levenshtein("specter", "spectre"));
776 }
777
778 #[test]
779 fn damerau_levenshtein_unrestricted_edit() {
780 assert_eq!(3, damerau_levenshtein("a cat", "an abct"));
781 }
782
783 #[test]
784 fn levenshtein_against_vec_empty() {
785 let v = Vec::new();
786 let result = levenshtein_against_vec("test", &v);
787 let expected: Vec<usize> = Vec::new();
788 assert_eq!(expected, result);
789 }
790
791 #[test]
792 fn levenshtein_against_vec_one() {
793 let v = vec!["testy"];
794 let result = levenshtein_against_vec("test", &v);
795 let expected = vec![1];
796 assert_eq!(expected, result);
797 }
798
799 #[test]
800 fn levenshtein_against_vec_many() {
801 let v = vec!["test", "test1", "test12", "test123", "", "tset"];
802 let result = levenshtein_against_vec("test", &v);
803 let expected = vec![0, 1, 2, 3, 4, 2];
804 assert_eq!(expected, result);
805 }
806
807 #[test]
808 fn osa_distance_against_vec_empty() {
809 let v = Vec::new();
810 let result = osa_distance_against_vec("test", &v);
811 let expected: Vec<usize> = Vec::new();
812 assert_eq!(expected, result);
813 }
814
815 #[test]
816 fn osa_distance_against_vec_one() {
817 let v = vec!["etst"];
818 let result = osa_distance_against_vec("test", &v);
819 let expected = vec![1];
820 assert_eq!(expected, result);
821 }
822
823 #[test]
824 fn osa_distance_against_vec_many() {
825 let v = vec!["test", "test1", "test12", "test123", "", "tsvet"];
826 let result = osa_distance_against_vec("test", &v);
827 let expected = vec![0, 1, 2, 3, 4, 3];
828 assert_eq!(expected, result);
829 }
830
831 #[test]
832 fn damerau_levenshtein_against_vec_empty() {
833 let v = Vec::new();
834 let result = damerau_levenshtein_against_vec("test", &v);
835 let expected: Vec<usize> = Vec::new();
836 assert_eq!(expected, result);
837 }
838
839 #[test]
840 fn damerau_levenshtein_against_vec_one() {
841 let v = vec!["etst"];
842 let result = damerau_levenshtein_against_vec("test", &v);
843 let expected = vec![1];
844 assert_eq!(expected, result);
845 }
846
847 #[test]
848 fn damerau_levenshtein_against_vec_many() {
849 let v = vec!["test", "test1", "test12", "test123", "", "tsvet"];
850 let result = damerau_levenshtein_against_vec("test", &v);
851 let expected = vec![0, 1, 2, 3, 4, 2];
852 assert_eq!(expected, result);
853 }
854
855 fn equal_float_vecs(a: Vec<f64>, b: Vec<f64>) -> bool {
856 let delta: f64 = a.iter()
857 .zip(b.iter())
858 .map(|(x, y)| (x - y).abs() as f64)
859 .fold(0.0, |x, y| x + y as f64);
860 delta < 0.0001
861 }
862
863 #[test]
864 fn jaro_against_vec_empty() {
865 let v = Vec::new();
866 let result = jaro_against_vec("test", &v);
867 let expected: Vec<f64> = Vec::new();
868 assert_eq!(expected, result);
869 }
870
871 #[test]
872 fn jaro_against_vec_one() {
873 let v = vec!["test1"];
874 let result = jaro_against_vec("test", &v);
875 let expected = vec![0.93333];
876 assert!(equal_float_vecs(result, expected));
877 }
878
879 #[test]
880 fn jaro_against_vec_many() {
881 let v = vec!["test", "test1", "test12", "test123", "", "tset"];
882 let result = jaro_against_vec("test", &v);
883 let expected = vec![1.0, 0.933333, 0.888889, 0.857143, 0.0, 0.916667];
884 assert!(equal_float_vecs(result, expected));
885 }
886
887 #[test]
888 fn jaro_winkler_against_vec_empty() {
889 let v = Vec::new();
890 let result = jaro_winkler_against_vec("test", &v);
891 let expected: Vec<f64> = Vec::new();
892 assert_eq!(expected, result);
893 }
894
895 #[test]
896 fn jaro_winkler_against_vec_one() {
897 let v = vec!["test123"];
898 let result = jaro_winkler_against_vec("test", &v);
899 let expected = vec![0.914286];
900 assert!(equal_float_vecs(result, expected));
901 }
902
903 #[test]
904 fn jaro_winkler_against_vec_many() {
905 let v = vec!["test", "test1", "test12", "test123", "", "tset"];
906 let result = jaro_winkler_against_vec("test", &v);
907 let expected = vec![1.0, 0.96, 0.933333, 0.914286, 0.0, 0.925];
908 assert!(equal_float_vecs(result, expected));
909 }
910 }
911