]> git.proxmox.com Git - rustc.git/blob - vendor/similar/src/text/utils.rs
New upstream version 1.70.0+dfsg2
[rustc.git] / vendor / similar / src / text / utils.rs
1 use std::collections::HashMap;
2 use std::hash::Hash;
3
4 use super::DiffableStrRef;
5
6 // quick and dirty way to get an upper sequence ratio.
7 pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
8 let n = seq1.len() + seq2.len();
9 if n == 0 {
10 1.0
11 } else {
12 2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
13 }
14 }
15
16 /// Internal utility to calculate an upper bound for a ratio for
17 /// [`get_close_matches`]. This is based on Python's difflib approach
18 /// of considering the two sets to be multisets.
19 ///
20 /// It counts the number of matches without regard to order, which is an
21 /// obvious upper bound.
22 pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
23
24 impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
25 pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
26 let mut counts = HashMap::new();
27 for &word in seq {
28 *counts.entry(word).or_insert(0) += 1;
29 }
30 QuickSeqRatio(counts)
31 }
32
33 pub fn calc(&self, seq: &[&T]) -> f32 {
34 let n = self.0.len() + seq.len();
35 if n == 0 {
36 return 1.0;
37 }
38
39 let mut available = HashMap::new();
40 let mut matches = 0;
41 for &word in seq {
42 let x = if let Some(count) = available.get(&word) {
43 *count
44 } else {
45 self.0.get(&word).copied().unwrap_or(0)
46 };
47 available.insert(word, x - 1);
48 if x > 0 {
49 matches += 1;
50 }
51 }
52
53 2.0 * matches as f32 / n as f32
54 }
55 }