]>
Commit | Line | Data |
---|---|---|
0a29b90c FG |
1 | //! Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. |
2 | //! | |
3 | //! * time: `O((NM)D log (M)D)` | |
4 | //! * space `O(MN)` | |
5 | use std::collections::BTreeMap; | |
6 | use std::ops::{Index, Range}; | |
7 | use std::time::Instant; | |
8 | ||
9 | use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range}; | |
10 | use crate::algorithms::DiffHook; | |
11 | ||
12 | /// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. | |
13 | /// | |
14 | /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. | |
15 | /// | |
16 | /// This diff is done with an optional deadline that defines the maximal | |
17 | /// execution time permitted before it bails and falls back to an very bad | |
18 | /// approximation. Deadlines with LCS do not make a lot of sense and should | |
19 | /// not be used. | |
20 | pub fn diff<Old, New, D>( | |
21 | d: &mut D, | |
22 | old: &Old, | |
23 | old_range: Range<usize>, | |
24 | new: &New, | |
25 | new_range: Range<usize>, | |
26 | ) -> Result<(), D::Error> | |
27 | where | |
28 | Old: Index<usize> + ?Sized, | |
29 | New: Index<usize> + ?Sized, | |
30 | D: DiffHook, | |
31 | New::Output: PartialEq<Old::Output>, | |
32 | { | |
33 | diff_deadline(d, old, old_range, new, new_range, None) | |
34 | } | |
35 | ||
36 | /// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. | |
37 | /// | |
38 | /// Diff `old`, between indices `old_range` and `new` between indices `new_range`. | |
39 | /// | |
40 | /// This diff is done with an optional deadline that defines the maximal | |
41 | /// execution time permitted before it bails and falls back to an approximation. | |
42 | pub fn diff_deadline<Old, New, D>( | |
43 | d: &mut D, | |
44 | old: &Old, | |
45 | old_range: Range<usize>, | |
46 | new: &New, | |
47 | new_range: Range<usize>, | |
48 | deadline: Option<Instant>, | |
49 | ) -> Result<(), D::Error> | |
50 | where | |
51 | Old: Index<usize> + ?Sized, | |
52 | New: Index<usize> + ?Sized, | |
53 | D: DiffHook, | |
54 | New::Output: PartialEq<Old::Output>, | |
55 | { | |
56 | if is_empty_range(&new_range) { | |
57 | d.delete(old_range.start, old_range.len(), new_range.start)?; | |
58 | return Ok(()); | |
59 | } else if is_empty_range(&old_range) { | |
60 | d.insert(old_range.start, new_range.start, new_range.len())?; | |
61 | return Ok(()); | |
62 | } | |
63 | ||
64 | let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone()); | |
65 | let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone()); | |
66 | ||
49aad941 FG |
67 | // If the sequences are not different then we're done |
68 | if common_prefix_len == old_range.len() && (old_range.len() == new_range.len()) { | |
69 | d.equal(0, 0, old_range.len())?; | |
70 | return Ok(()); | |
71 | } | |
72 | ||
0a29b90c FG |
73 | let maybe_table = make_table( |
74 | old, | |
75 | common_prefix_len..(old_range.len() - common_suffix_len), | |
76 | new, | |
77 | common_prefix_len..(new_range.len() - common_suffix_len), | |
78 | deadline, | |
79 | ); | |
80 | let mut old_idx = 0; | |
81 | let mut new_idx = 0; | |
82 | let new_len = new_range.len() - common_prefix_len - common_suffix_len; | |
83 | let old_len = old_range.len() - common_prefix_len - common_suffix_len; | |
84 | ||
85 | if common_prefix_len > 0 { | |
86 | d.equal(old_range.start, new_range.start, common_prefix_len)?; | |
87 | } | |
88 | ||
89 | if let Some(table) = maybe_table { | |
90 | while new_idx < new_len && old_idx < old_len { | |
91 | let old_orig_idx = old_range.start + common_prefix_len + old_idx; | |
92 | let new_orig_idx = new_range.start + common_prefix_len + new_idx; | |
93 | ||
94 | if new[new_orig_idx] == old[old_orig_idx] { | |
95 | d.equal(old_orig_idx, new_orig_idx, 1)?; | |
96 | old_idx += 1; | |
97 | new_idx += 1; | |
98 | } else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x) | |
99 | >= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x) | |
100 | { | |
101 | d.delete(old_orig_idx, 1, new_orig_idx)?; | |
102 | old_idx += 1; | |
103 | } else { | |
104 | d.insert(old_orig_idx, new_orig_idx, 1)?; | |
105 | new_idx += 1; | |
106 | } | |
107 | } | |
108 | } else { | |
109 | let old_orig_idx = old_range.start + common_prefix_len + old_idx; | |
110 | let new_orig_idx = new_range.start + common_prefix_len + new_idx; | |
111 | d.delete(old_orig_idx, old_len, new_orig_idx)?; | |
112 | d.insert(old_orig_idx, new_orig_idx, new_len)?; | |
113 | } | |
114 | ||
115 | if old_idx < old_len { | |
116 | d.delete( | |
117 | old_range.start + common_prefix_len + old_idx, | |
118 | old_len - old_idx, | |
119 | new_range.start + common_prefix_len + new_idx, | |
120 | )?; | |
121 | old_idx += old_len - old_idx; | |
122 | } | |
123 | ||
124 | if new_idx < new_len { | |
125 | d.insert( | |
126 | old_range.start + common_prefix_len + old_idx, | |
127 | new_range.start + common_prefix_len + new_idx, | |
128 | new_len - new_idx, | |
129 | )?; | |
130 | } | |
131 | ||
132 | if common_suffix_len > 0 { | |
133 | d.equal( | |
134 | old_range.start + old_len + common_prefix_len, | |
135 | new_range.start + new_len + common_prefix_len, | |
136 | common_suffix_len, | |
137 | )?; | |
138 | } | |
139 | ||
140 | d.finish() | |
141 | } | |
142 | ||
143 | fn make_table<Old, New>( | |
144 | old: &Old, | |
145 | old_range: Range<usize>, | |
146 | new: &New, | |
147 | new_range: Range<usize>, | |
148 | deadline: Option<Instant>, | |
149 | ) -> Option<BTreeMap<(usize, usize), u32>> | |
150 | where | |
151 | Old: Index<usize> + ?Sized, | |
152 | New: Index<usize> + ?Sized, | |
153 | New::Output: PartialEq<Old::Output>, | |
154 | { | |
155 | let old_len = old_range.len(); | |
156 | let new_len = new_range.len(); | |
157 | let mut table = BTreeMap::new(); | |
158 | ||
159 | for i in (0..new_len).rev() { | |
160 | // are we running for too long? give up on the table | |
161 | if let Some(deadline) = deadline { | |
162 | if Instant::now() > deadline { | |
163 | return None; | |
164 | } | |
165 | } | |
166 | ||
167 | for j in (0..old_len).rev() { | |
168 | let val = if new[i] == old[j] { | |
169 | table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1 | |
170 | } else { | |
171 | table | |
172 | .get(&(i + 1, j)) | |
173 | .map_or(0, |&x| x) | |
174 | .max(table.get(&(i, j + 1)).map_or(0, |&x| x)) | |
175 | }; | |
176 | if val > 0 { | |
177 | table.insert((i, j), val); | |
178 | } | |
179 | } | |
180 | } | |
181 | ||
182 | Some(table) | |
183 | } | |
184 | ||
185 | #[test] | |
186 | fn test_table() { | |
187 | let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap(); | |
188 | let expected = { | |
189 | let mut m = BTreeMap::new(); | |
190 | m.insert((1, 0), 1); | |
191 | m.insert((0, 0), 1); | |
192 | m.insert((2, 0), 1); | |
193 | m | |
194 | }; | |
195 | assert_eq!(table, expected); | |
196 | } | |
197 | ||
198 | #[test] | |
199 | fn test_diff() { | |
200 | let a: &[usize] = &[0, 1, 2, 3, 4]; | |
201 | let b: &[usize] = &[0, 1, 2, 9, 4]; | |
202 | ||
203 | let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); | |
204 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); | |
205 | insta::assert_debug_snapshot!(d.into_inner().ops()); | |
206 | } | |
207 | ||
208 | #[test] | |
209 | fn test_contiguous() { | |
210 | let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; | |
211 | let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7]; | |
212 | ||
213 | let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); | |
214 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); | |
215 | insta::assert_debug_snapshot!(d.into_inner().ops()); | |
216 | } | |
217 | ||
218 | #[test] | |
219 | fn test_pat() { | |
220 | let a: &[usize] = &[0, 1, 3, 4, 5]; | |
221 | let b: &[usize] = &[0, 1, 4, 5, 8, 9]; | |
222 | ||
223 | let mut d = crate::algorithms::Capture::new(); | |
224 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); | |
225 | insta::assert_debug_snapshot!(d.ops()); | |
226 | } | |
49aad941 FG |
227 | |
228 | #[test] | |
229 | fn test_same() { | |
230 | let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; | |
231 | let b: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; | |
232 | ||
233 | let mut d = crate::algorithms::Capture::new(); | |
234 | diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap(); | |
235 | insta::assert_debug_snapshot!(d.ops()); | |
236 | } |