]> git.proxmox.com Git - rustc.git/blame - compiler/rustc_ast/src/util/lev_distance.rs
New upstream version 1.49.0+dfsg1
[rustc.git] / compiler / rustc_ast / src / util / lev_distance.rs
CommitLineData
dfeec247
XL
1// FIXME(Centril): Move to rustc_span?
2
3use rustc_span::symbol::Symbol;
1a4d82fc
JJ
4use std::cmp;
5
416331ca
XL
6#[cfg(test)]
7mod tests;
8
9fa01778 9/// Finds the Levenshtein distance between two strings
9cc50fc6
SL
10pub fn lev_distance(a: &str, b: &str) -> usize {
11 // cases which don't require further computation
12 if a.is_empty() {
13 return b.chars().count();
14 } else if b.is_empty() {
15 return a.chars().count();
16 }
1a4d82fc 17
0731742a 18 let mut dcol: Vec<_> = (0..=b.len()).collect();
1a4d82fc
JJ
19 let mut t_last = 0;
20
9cc50fc6 21 for (i, sc) in a.chars().enumerate() {
1a4d82fc
JJ
22 let mut current = i;
23 dcol[0] = current + 1;
24
9cc50fc6 25 for (j, tc) in b.chars().enumerate() {
1a4d82fc 26 let next = dcol[j + 1];
1a4d82fc
JJ
27 if sc == tc {
28 dcol[j + 1] = current;
29 } else {
30 dcol[j + 1] = cmp::min(current, next);
31 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
32 }
1a4d82fc
JJ
33 current = next;
34 t_last = j;
35 }
0731742a
XL
36 }
37 dcol[t_last + 1]
1a4d82fc
JJ
38}
39
9fa01778 40/// Finds the best match for a given word in the given iterator
b7449926 41///
9cc50fc6
SL
42/// As a loose rule to avoid the obviously incorrect suggestions, it takes
43/// an optional limit for the maximum allowable edit distance, which defaults
ff7c6d11 44/// to one-third of the given word.
b7449926 45///
ff7c6d11
XL
46/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
47/// a lower(upper)case letters mismatch.
dfeec247
XL
48pub fn find_best_match_for_name<'a, T>(
49 iter_names: T,
3dfed10e 50 lookup: Symbol,
dfeec247
XL
51 dist: Option<usize>,
52) -> Option<Symbol>
53where
54 T: Iterator<Item = &'a Symbol>,
55{
3dfed10e 56 let lookup = &lookup.as_str();
29967ef6 57 let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
dfeec247
XL
58 let name_vec: Vec<&Symbol> = iter_names.collect();
59
60 let (case_insensitive_match, levenshtein_match) = name_vec
61 .iter()
62 .filter_map(|&name| {
63 let dist = lev_distance(lookup, &name.as_str());
64 if dist <= max_dist { Some((name, dist)) } else { None }
65 })
66 // Here we are collecting the next structure:
67 // (case_insensitive_match, (levenshtein_match, levenshtein_distance))
68 .fold((None, None), |result, (candidate, dist)| {
69 (
70 if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
71 Some(candidate)
72 } else {
73 result.0
74 },
75 match result.1 {
76 None => Some((candidate, dist)),
77 Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }),
78 },
79 )
80 });
81 // Priority of matches:
82 // 1. Exact case insensitive match
83 // 2. Levenshtein distance match
84 // 3. Sorted word match
85 if let Some(candidate) = case_insensitive_match {
86 Some(*candidate)
87 } else if levenshtein_match.is_some() {
88 levenshtein_match.map(|(candidate, _)| *candidate)
89 } else {
90 find_match_by_sorted_words(name_vec, lookup)
91 }
92}
ff7c6d11 93
dfeec247
XL
94fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
95 iter_names.iter().fold(None, |result, candidate| {
96 if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
97 Some(**candidate)
7cac9316 98 } else {
dfeec247 99 result
9cc50fc6
SL
100 }
101 })
dfeec247 102}
ff7c6d11 103
dfeec247
XL
104fn sort_by_words(name: &str) -> String {
105 let mut split_words: Vec<&str> = name.split('_').collect();
1b1a35ee
XL
106 // We are sorting primitive &strs and can use unstable sort here
107 split_words.sort_unstable();
dfeec247 108 split_words.join("_")
92a42be0 109}