]>
Commit | Line | Data |
---|---|---|
dfeec247 XL |
1 | // FIXME(Centril): Move to rustc_span? |
2 | ||
3 | use rustc_span::symbol::Symbol; | |
1a4d82fc JJ |
4 | use std::cmp; |
5 | ||
416331ca XL |
6 | #[cfg(test)] |
7 | mod tests; | |
8 | ||
9fa01778 | 9 | /// Finds the Levenshtein distance between two strings |
9cc50fc6 SL |
10 | pub fn lev_distance(a: &str, b: &str) -> usize { |
11 | // cases which don't require further computation | |
12 | if a.is_empty() { | |
13 | return b.chars().count(); | |
14 | } else if b.is_empty() { | |
15 | return a.chars().count(); | |
16 | } | |
1a4d82fc | 17 | |
0731742a | 18 | let mut dcol: Vec<_> = (0..=b.len()).collect(); |
1a4d82fc JJ |
19 | let mut t_last = 0; |
20 | ||
9cc50fc6 | 21 | for (i, sc) in a.chars().enumerate() { |
1a4d82fc JJ |
22 | let mut current = i; |
23 | dcol[0] = current + 1; | |
24 | ||
9cc50fc6 | 25 | for (j, tc) in b.chars().enumerate() { |
1a4d82fc | 26 | let next = dcol[j + 1]; |
1a4d82fc JJ |
27 | if sc == tc { |
28 | dcol[j + 1] = current; | |
29 | } else { | |
30 | dcol[j + 1] = cmp::min(current, next); | |
31 | dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1; | |
32 | } | |
1a4d82fc JJ |
33 | current = next; |
34 | t_last = j; | |
35 | } | |
0731742a XL |
36 | } |
37 | dcol[t_last + 1] | |
1a4d82fc JJ |
38 | } |
39 | ||
9fa01778 | 40 | /// Finds the best match for a given word in the given iterator |
b7449926 | 41 | /// |
9cc50fc6 SL |
42 | /// As a loose rule to avoid the obviously incorrect suggestions, it takes |
43 | /// an optional limit for the maximum allowable edit distance, which defaults | |
ff7c6d11 | 44 | /// to one-third of the given word. |
b7449926 | 45 | /// |
ff7c6d11 XL |
46 | /// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with |
47 | /// a lower(upper)case letters mismatch. | |
dfeec247 XL |
48 | pub fn find_best_match_for_name<'a, T>( |
49 | iter_names: T, | |
3dfed10e | 50 | lookup: Symbol, |
dfeec247 XL |
51 | dist: Option<usize>, |
52 | ) -> Option<Symbol> | |
53 | where | |
54 | T: Iterator<Item = &'a Symbol>, | |
55 | { | |
3dfed10e | 56 | let lookup = &lookup.as_str(); |
29967ef6 | 57 | let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3); |
dfeec247 XL |
58 | let name_vec: Vec<&Symbol> = iter_names.collect(); |
59 | ||
60 | let (case_insensitive_match, levenshtein_match) = name_vec | |
61 | .iter() | |
62 | .filter_map(|&name| { | |
63 | let dist = lev_distance(lookup, &name.as_str()); | |
64 | if dist <= max_dist { Some((name, dist)) } else { None } | |
65 | }) | |
66 | // Here we are collecting the next structure: | |
67 | // (case_insensitive_match, (levenshtein_match, levenshtein_distance)) | |
68 | .fold((None, None), |result, (candidate, dist)| { | |
69 | ( | |
70 | if candidate.as_str().to_uppercase() == lookup.to_uppercase() { | |
71 | Some(candidate) | |
72 | } else { | |
73 | result.0 | |
74 | }, | |
75 | match result.1 { | |
76 | None => Some((candidate, dist)), | |
77 | Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }), | |
78 | }, | |
79 | ) | |
80 | }); | |
81 | // Priority of matches: | |
82 | // 1. Exact case insensitive match | |
83 | // 2. Levenshtein distance match | |
84 | // 3. Sorted word match | |
85 | if let Some(candidate) = case_insensitive_match { | |
86 | Some(*candidate) | |
87 | } else if levenshtein_match.is_some() { | |
88 | levenshtein_match.map(|(candidate, _)| *candidate) | |
89 | } else { | |
90 | find_match_by_sorted_words(name_vec, lookup) | |
91 | } | |
92 | } | |
ff7c6d11 | 93 | |
dfeec247 XL |
94 | fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> { |
95 | iter_names.iter().fold(None, |result, candidate| { | |
96 | if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) { | |
97 | Some(**candidate) | |
7cac9316 | 98 | } else { |
dfeec247 | 99 | result |
9cc50fc6 SL |
100 | } |
101 | }) | |
dfeec247 | 102 | } |
ff7c6d11 | 103 | |
dfeec247 XL |
104 | fn sort_by_words(name: &str) -> String { |
105 | let mut split_words: Vec<&str> = name.split('_').collect(); | |
1b1a35ee XL |
106 | // We are sorting primitive &strs and can use unstable sort here |
107 | split_words.sort_unstable(); | |
dfeec247 | 108 | split_words.join("_") |
92a42be0 | 109 | } |