compiler/rustc_ast/src/util/lev_distance.rs

   1 // FIXME(Centril): Move to rustc_span?
   2
   3 use rustc_span::symbol::Symbol;
   4 use std::cmp;
   5
   6 #[cfg(test)]
   7 mod tests;
   8
   9 /// Finds the Levenshtein distance between two strings
  10 pub fn lev_distance(a: &str, b: &str) -> usize {
  11     // cases which don't require further computation
  12     if a.is_empty() {
  13         return b.chars().count();
  14     } else if b.is_empty() {
  15         return a.chars().count();
  16     }
  17
  18     let mut dcol: Vec<_> = (0..=b.len()).collect();
  19     let mut t_last = 0;
  20
  21     for (i, sc) in a.chars().enumerate() {
  22         let mut current = i;
  23         dcol[0] = current + 1;
  24
  25         for (j, tc) in b.chars().enumerate() {
  26             let next = dcol[j + 1];
  27             if sc == tc {
  28                 dcol[j + 1] = current;
  29             } else {
  30                 dcol[j + 1] = cmp::min(current, next);
  31                 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
  32             }
  33             current = next;
  34             t_last = j;
  35         }
  36     }
  37     dcol[t_last + 1]
  38 }
  39
  40 /// Finds the best match for a given word in the given iterator
  41 ///
  42 /// As a loose rule to avoid the obviously incorrect suggestions, it takes
  43 /// an optional limit for the maximum allowable edit distance, which defaults
  44 /// to one-third of the given word.
  45 ///
  46 /// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
  47 /// a lower(upper)case letters mismatch.
  48 pub fn find_best_match_for_name<'a, T>(
  49     iter_names: T,
  50     lookup: Symbol,
  51     dist: Option<usize>,
  52 ) -> Option<Symbol>
  53 where
  54     T: Iterator<Item = &'a Symbol>,
  55 {
  56     let lookup = &lookup.as_str();
  57     let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
  58     let name_vec: Vec<&Symbol> = iter_names.collect();
  59
  60     let (case_insensitive_match, levenshtein_match) = name_vec
  61         .iter()
  62         .filter_map(|&name| {
  63             let dist = lev_distance(lookup, &name.as_str());
  64             if dist <= max_dist { Some((name, dist)) } else { None }
  65         })
  66         // Here we are collecting the next structure:
  67         // (case_insensitive_match, (levenshtein_match, levenshtein_distance))
  68         .fold((None, None), |result, (candidate, dist)| {
  69             (
  70                 if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
  71                     Some(candidate)
  72                 } else {
  73                     result.0
  74                 },
  75                 match result.1 {
  76                     None => Some((candidate, dist)),
  77                     Some((c, d)) => Some(if dist < d { (candidate, dist) } else { (c, d) }),
  78                 },
  79             )
  80         });
  81     // Priority of matches:
  82     // 1. Exact case insensitive match
  83     // 2. Levenshtein distance match
  84     // 3. Sorted word match
  85     if let Some(candidate) = case_insensitive_match {
  86         Some(*candidate)
  87     } else if levenshtein_match.is_some() {
  88         levenshtein_match.map(|(candidate, _)| *candidate)
  89     } else {
  90         find_match_by_sorted_words(name_vec, lookup)
  91     }
  92 }
  93
  94 fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
  95     iter_names.iter().fold(None, |result, candidate| {
  96         if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
  97             Some(**candidate)
  98         } else {
  99             result
 100         }
 101     })
 102 }
 103
 104 fn sort_by_words(name: &str) -> String {
 105     let mut split_words: Vec<&str> = name.split('_').collect();
 106     // We are sorting primitive &strs and can use unstable sort here
 107     split_words.sort_unstable();
 108     split_words.join("_")
 109 }