]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | use std::cmp::max; |
2 | ||
3 | // strsplit is like `s.split(split)`, except that if `split` is "", it | |
4 | // trims the leading and trailing empty elements, since the `lcs` | |
5 | // logic won't handle those properly. | |
6 | fn strsplit<'a>(s: &'a str, split: &str) -> Vec<&'a str> { | |
7 | let mut si = s.split(split); | |
8 | if split == "" { | |
9 | si.next(); | |
10 | } | |
11 | let mut v: Vec<&str> = si.collect(); | |
12 | if split == "" { | |
13 | v.pop(); | |
14 | } | |
15 | v | |
16 | } | |
17 | ||
18 | // finds the longest common subsequences | |
19 | // outputs the edit distance and a string containing | |
20 | // all chars both inputs have in common | |
21 | // | |
22 | // This algorithm is based on | |
23 | // https://en.wikipedia.org/wiki/Longest_common_subsequence_problem#Code_for_the_dynamic_programming_solution | |
24 | #[allow(non_snake_case)] | |
25 | #[cfg_attr(feature = "cargo-clippy", allow(many_single_char_names))] | |
26 | pub fn lcs(orig: &str, edit: &str, split: &str) -> (i32, String) { | |
27 | // make list by custom splits | |
28 | let a = strsplit(orig, split); | |
29 | let b = strsplit(edit, split); | |
30 | ||
31 | let N = a.len(); | |
32 | let M = b.len(); | |
33 | ||
34 | let mut idx: Vec<usize> = Vec::with_capacity(N * M); | |
35 | idx.resize(N * M, 0); | |
36 | ||
37 | for i in 0..N { | |
38 | for j in 0..M { | |
39 | if b[j] == a[i] { | |
40 | if i == 0 || j == 0 { | |
41 | idx[i * M + j] = 1; | |
42 | } else { | |
43 | idx[i * M + j] = idx[(i - 1) * M + j - 1] + 1; | |
44 | } | |
45 | } else if i == 0 { | |
46 | if j == 0 { | |
47 | idx[i * M + j] = 0; | |
48 | } else { | |
49 | idx[i * M + j] = idx[i * M + j - 1]; | |
50 | } | |
51 | } else if j == 0 { | |
52 | idx[i * M + j] = idx[(i - 1) * M + j]; | |
53 | } else { | |
54 | idx[i * M + j] = max(idx[i * M + j - 1], idx[(i - 1) * M + j]); | |
55 | } | |
56 | } | |
57 | } | |
58 | ||
59 | let mut i = (N as isize) - 1; | |
60 | let mut j = (M as isize) - 1; | |
61 | let mut lcs = Vec::new(); | |
62 | while i >= 0 && j >= 0 { | |
63 | let ui = i as usize; | |
64 | let uj = j as usize; | |
65 | if a[ui] == b[uj] { | |
66 | lcs.push(a[ui]); | |
67 | i -= 1; | |
68 | j -= 1; | |
69 | } else if j == 0 && i == 0 { | |
70 | break; | |
71 | } else if i == 0 || idx[ui * M + uj - 1] > idx[(ui - 1) * M + uj] { | |
72 | j -= 1; | |
73 | } else { | |
74 | i -= 1; | |
75 | } | |
76 | } | |
77 | ||
78 | lcs.reverse(); | |
79 | ((N + M - 2 * lcs.len()) as i32, lcs.join(split)) | |
80 | } | |
81 | ||
82 | #[test] | |
83 | fn test_lcs() { | |
84 | assert_eq!(lcs("test", "tost", ""), (2, "tst".to_string())); | |
85 | assert_eq!(lcs("test", "test", ""), (0, "test".to_string())); | |
86 | ||
87 | assert_eq!(lcs("test", "test", " "), (0, "test".to_string())); | |
88 | ||
89 | assert_eq!( | |
90 | lcs( | |
91 | "The quick brown fox jumps over the lazy dog", | |
92 | "The quick brown dog leaps over the lazy cat", | |
93 | "", | |
94 | ), | |
95 | (16, "The quick brown o ps over the lazy ".to_string()) | |
96 | ); | |
97 | assert_eq!( | |
98 | lcs( | |
99 | "The quick brown fox jumps over the lazy dog", | |
100 | "The quick brown dog leaps over the lazy cat", | |
101 | " ", | |
102 | ), | |
103 | (6, "The quick brown over the lazy".to_string()) | |
104 | ); | |
105 | ||
106 | assert_eq!( | |
107 | lcs( | |
108 | "The quick brown fox jumps over the lazy dog", | |
109 | "The quick brown dog leaps over the lazy cat", | |
110 | "\n", | |
111 | ), | |
112 | (2, "".to_string()) | |
113 | ); | |
114 | assert_eq!( | |
115 | lcs( | |
116 | "The quick brown fox jumps over the lazy dog", | |
117 | "The quick brown fox jumps over the lazy dog", | |
118 | "\n", | |
119 | ), | |
120 | (0, "The quick brown fox jumps over the lazy dog".to_string()) | |
121 | ); | |
122 | ||
123 | assert_eq!( | |
124 | lcs("a b : c", "b a : b : c", " "), | |
125 | (2, "a b : c".to_string()) | |
126 | ); | |
127 | ||
128 | assert_eq!(lcs("", "a b c", ""), (5, "".to_string())); | |
129 | ||
130 | assert_eq!(lcs("", " a", " "), (1, "".to_string())); | |
131 | } |