]> git.proxmox.com Git - rustc.git/blob - compiler/rustc_ast/src/util/lev_distance.rs
New upstream version 1.49.0+dfsg1
[rustc.git] / compiler / rustc_ast / src / util / lev_distance.rs
1 // FIXME(Centril): Move to rustc_span?
2
3 use rustc_span::symbol::Symbol;
4 use std::cmp;
5
6 #[cfg(test)]
7 mod tests;
8
9 /// Finds the Levenshtein distance between two strings
10 pub fn lev_distance(a: &str, b: &str) -> usize {
11 // cases which don't require further computation
12 if a.is_empty() {
13 return b.chars().count();
14 } else if b.is_empty() {
15 return a.chars().count();
16 }
17
18 let mut dcol: Vec<_> = (0..=b.len()).collect();
19 let mut t_last = 0;
20
21 for (i, sc) in a.chars().enumerate() {
22 let mut current = i;
23 dcol[0] = current + 1;
24
25 for (j, tc) in b.chars().enumerate() {
26 let next = dcol[j + 1];
27 if sc == tc {
28 dcol[j + 1] = current;
29 } else {
30 dcol[j + 1] = cmp::min(current, next);
31 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
32 }
33 current = next;
34 t_last = j;
35 }
36 }
37 dcol[t_last + 1]
38 }
39
40 /// Finds the best match for a given word in the given iterator
41 ///
42 /// As a loose rule to avoid the obviously incorrect suggestions, it takes
43 /// an optional limit for the maximum allowable edit distance, which defaults
44 /// to one-third of the given word.
45 ///
46 /// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
47 /// a lower(upper)case letters mismatch.
48 pub fn find_best_match_for_name<'a, T>(
49 iter_names: T,
50 lookup: Symbol,
51 dist: Option<usize>,
52 ) -> Option<Symbol>
53 where
54 T: Iterator<Item = &'a Symbol>,
55 {
56 let lookup = &lookup.as_str();
57 let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
58 let name_vec: Vec<&Symbol> = iter_names.collect();
59
60 let (case_insensitive_match, levenshtein_match) = name_vec
61 .iter()
62 .filter_map(|&name| {
63 let dist = lev_distance(lookup, &name.as_str());
64 if dist <= max_dist { Some((name, dist)) } else { None }
65 })
66 // Here we are collecting the next structure:
67 // (case_insensitive_match, (levenshtein_match, levenshtein_distance))
68 .fold((None, None), |result, (candidate, dist)| {
69 (
70 if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
71 Some(candidate)
72 } else {
73 result.0
74 },
75 match result.1 {
76 None => Some((candidate, dist)),
77 Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }),
78 },
79 )
80 });
81 // Priority of matches:
82 // 1. Exact case insensitive match
83 // 2. Levenshtein distance match
84 // 3. Sorted word match
85 if let Some(candidate) = case_insensitive_match {
86 Some(*candidate)
87 } else if levenshtein_match.is_some() {
88 levenshtein_match.map(|(candidate, _)| *candidate)
89 } else {
90 find_match_by_sorted_words(name_vec, lookup)
91 }
92 }
93
94 fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
95 iter_names.iter().fold(None, |result, candidate| {
96 if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
97 Some(**candidate)
98 } else {
99 result
100 }
101 })
102 }
103
104 fn sort_by_words(name: &str) -> String {
105 let mut split_words: Vec<&str> = name.split('_').collect();
106 // We are sorting primitive &strs and can use unstable sort here
107 split_words.sort_unstable();
108 split_words.join("_")
109 }