]> git.proxmox.com Git - rustc.git/blob - vendor/imara-diff/src/myers.rs
New upstream version 1.70.0+dfsg2
[rustc.git] / vendor / imara-diff / src / myers.rs
1 use std::ptr::NonNull;
2
3 use crate::intern::Token;
4 use crate::myers::middle_snake::{MiddleSnakeSearch, SearchResult};
5 use crate::myers::preprocess::PreprocessedFile;
6 use crate::myers::slice::FileSlice;
7 use crate::util::sqrt;
8 use crate::Sink;
9
10 mod middle_snake;
11 mod preprocess;
12 mod slice;
13
14 pub struct Myers {
15 kvec: NonNull<[i32]>,
16 kforward: NonNull<i32>,
17 kbackward: NonNull<i32>,
18 max_cost: u32,
19 }
20
21 pub fn diff<S: Sink>(
22 before: &[Token],
23 after: &[Token],
24 _num_tokens: u32,
25 mut sink: S,
26 minimal: bool,
27 ) -> S::Out {
28 // preprocess the files by removing parts of the file that are not contained in the other file at all
29 // this process remaps the token indices and therefore requires us to track changed files in a char array
30 // PERF use a bitset?
31 let (mut before, mut after) = preprocess::preprocess(before, after);
32
33 // Perform the actual diff
34 Myers::new(before.tokens.len(), after.tokens.len()).run(
35 FileSlice::new(&mut before),
36 FileSlice::new(&mut after),
37 minimal,
38 );
39
40 process_changes_with_sink(&before, &after, &mut sink);
41 sink.finish()
42 }
43
44 const HEUR_MIN_COST: u32 = 256;
45 const MAX_COST_MIN: u32 = 256;
46
47 impl Drop for Myers {
48 fn drop(&mut self) {
49 unsafe { drop(Box::from_raw(self.kvec.as_ptr())) }
50 }
51 }
52
53 impl Myers {
54 fn new(len1: usize, len2: usize) -> Self {
55 let ndiags = len1 + len2 as usize + 3;
56 let kvec = Box::leak(vec![0; 2 * ndiags + 2].into_boxed_slice());
57 let kforward = NonNull::from(&mut kvec[len2 + 1]);
58 let kbackward = NonNull::from(&mut kvec[ndiags + len2 + 1]);
59 Self { kvec: kvec.into(), kforward, kbackward, max_cost: sqrt(ndiags).max(MAX_COST_MIN) }
60 }
61
62 fn run<'f>(&mut self, mut file1: FileSlice<'f>, mut file2: FileSlice<'f>, mut need_min: bool) {
63 loop {
64 file1.strip_common(&mut file2);
65
66 if file1.is_empty() {
67 file2.mark_changed();
68 return;
69 } else if file2.is_empty() {
70 file1.mark_changed();
71 return;
72 }
73
74 let split = self.split(&file1, &file2, need_min);
75 self.run(
76 file1.borrow().slice(..split.token_idx1 as u32),
77 file2.borrow().slice(..split.token_idx2 as u32),
78 split.minimized_lo,
79 );
80
81 file1 = file1.slice(split.token_idx1 as u32..);
82 file2 = file2.slice(split.token_idx2 as u32..);
83 need_min = split.minimized_hi
84 }
85 }
86
87 /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
88 /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
89 /// the forward diagonal starting from (off1, off2) and the backward diagonal
90 /// starting from (lim1, lim2). If the K values on the same diagonal crosses
91 /// returns the furthest point of reach. We might encounter expensive edge cases
92 /// using this algorithm, so a little bit of heuristic is needed to cut the
93 /// search and to return a suboptimal point.
94 fn split(&mut self, file1: &FileSlice, file2: &FileSlice, need_min: bool) -> Split {
95 let mut forward_search =
96 unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) };
97 let mut backwards_search =
98 unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) };
99 let is_odd = (file2.len() - file2.len()) & 1 != 0;
100
101 let mut ec = 0;
102
103 while ec <= self.max_cost {
104 let mut found_snake = false;
105 forward_search.next_d();
106 if is_odd {
107 if let Some(res) = forward_search.run(file1, file2, |k, token_idx1| {
108 backwards_search.contains(k)
109 && backwards_search.x_pos_at_diagonal(k) <= token_idx1
110 }) {
111 match res {
112 SearchResult::Snake => found_snake = true,
113 SearchResult::Found { token_idx1, token_idx2 } => {
114 return Split {
115 token_idx1,
116 token_idx2,
117 minimized_lo: true,
118 minimized_hi: true,
119 };
120 }
121 }
122 }
123 } else {
124 found_snake |= forward_search.run(file1, file2, |_, _| false).is_some()
125 };
126
127 backwards_search.next_d();
128 if !is_odd {
129 if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| {
130 forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k)
131 }) {
132 match res {
133 SearchResult::Snake => found_snake = true,
134 SearchResult::Found { token_idx1, token_idx2 } => {
135 return Split {
136 token_idx1,
137 token_idx2,
138 minimized_lo: true,
139 minimized_hi: true,
140 };
141 }
142 }
143 }
144 } else {
145 found_snake |= backwards_search.run(file1, file2, |_, _| false).is_some()
146 };
147
148 if need_min {
149 continue;
150 }
151
152 // If the edit cost is above the heuristic trigger and if
153 // we got a good snake, we sample current diagonals to see
154 // if some of them have reached an "interesting" path. Our
155 // measure is a function of the distance from the diagonal
156 // corner (i1 + i2) penalized with the distance from the
157 // mid diagonal itself. If this value is above the current
158 // edit cost times a magic factor (XDL_K_HEUR) we consider
159 // it interesting.
160 if found_snake && ec > HEUR_MIN_COST {
161 if let Some((token_idx1, token_idx2)) = forward_search.found_snake(ec, file1, file2)
162 {
163 return Split {
164 token_idx1,
165 token_idx2,
166 minimized_lo: true,
167 minimized_hi: false,
168 };
169 }
170
171 if let Some((token_idx1, token_idx2)) =
172 backwards_search.found_snake(ec, file1, file2)
173 {
174 return Split {
175 token_idx1,
176 token_idx2,
177 minimized_lo: false,
178 minimized_hi: true,
179 };
180 }
181 }
182
183 ec += 1;
184 }
185
186 let (distance_forward, token_idx1_forward) = forward_search.best_position(file1, file2);
187 let (distance_backwards, token_idx1_backwards) =
188 backwards_search.best_position(file1, file2);
189 if distance_forward > file1.len() as isize + file2.len() as isize - distance_backwards {
190 Split {
191 token_idx1: token_idx1_forward,
192 token_idx2: (distance_forward - token_idx1_forward as isize) as i32,
193 minimized_lo: true,
194 minimized_hi: false,
195 }
196 } else {
197 Split {
198 token_idx1: token_idx1_backwards,
199 token_idx2: (distance_backwards - token_idx1_backwards as isize) as i32,
200 minimized_lo: false,
201 minimized_hi: true,
202 }
203 }
204 }
205 }
206
207 #[derive(Debug)]
208 struct Split {
209 token_idx1: i32,
210 token_idx2: i32,
211 minimized_lo: bool,
212 minimized_hi: bool,
213 }
214
215 /// the mapping performed during preprocessing makes it impossible to directly call
216 /// the `sink` during the diff itself. Instead `file.changed` is set to true for all
217 /// tokens that are changed
218 /// below these arrays are used to call the sink function
219 fn process_changes_with_sink(
220 before: &PreprocessedFile,
221 after: &PreprocessedFile,
222 sink: &mut impl Sink,
223 ) {
224 let before_end = before.is_changed.len() as u32 + before.offset;
225 let after_end = after.is_changed.len() as u32 + after.offset;
226
227 let mut before = before
228 .is_changed
229 .iter()
230 .enumerate()
231 .map(|(i, removed)| (i as u32 + before.offset, *removed));
232
233 let mut after = after
234 .is_changed
235 .iter()
236 .enumerate()
237 .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
238
239 let mut next1 = before.next();
240 let mut next2 = after.next();
241
242 while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) {
243 if !(removed | inserted) {
244 next1 = before.next();
245 next2 = after.next();
246 continue;
247 }
248
249 let mut hunk_before = before_pos..before_pos;
250 let mut hunk_after = after_pos..after_pos;
251 if removed {
252 let end = before.find(|(_, changed)| !changed);
253 next1 = end.map(|(end, _)| (end, false));
254 hunk_before.end = end.map_or(before_end, |(end, _)| end);
255 };
256
257 if inserted {
258 let end = after.find(|(_, changed)| !changed);
259 next2 = end.map(|(end, _)| (end, false));
260 hunk_after.end = end.map_or(after_end, |(end, _)| end);
261 }
262
263 sink.process_change(hunk_before, hunk_after);
264 }
265
266 if let Some((before_pos, _)) = next1 {
267 sink.process_change(before_pos..before_end, after_end..after_end);
268 } else if let Some((after_pos, _)) = next2 {
269 sink.process_change(before_end..before_end, after_pos..after_end);
270 }
271 }