3 use crate::intern
::Token
;
4 use crate::myers
::middle_snake
::{MiddleSnakeSearch, SearchResult}
;
5 use crate::myers
::preprocess
::PreprocessedFile
;
6 use crate::myers
::slice
::FileSlice
;
16 kforward
: NonNull
<i32>,
17 kbackward
: NonNull
<i32>,
28 // preprocess the files by removing parts of the file that are not contained in the other file at all
29 // this process remaps the token indices and therefore requires us to track changed files in a char array
31 let (mut before
, mut after
) = preprocess
::preprocess(before
, after
);
33 // Perform the actual diff
34 Myers
::new(before
.tokens
.len(), after
.tokens
.len()).run(
35 FileSlice
::new(&mut before
),
36 FileSlice
::new(&mut after
),
40 process_changes_with_sink(&before
, &after
, &mut sink
);
44 const HEUR_MIN_COST
: u32 = 256;
45 const MAX_COST_MIN
: u32 = 256;
49 unsafe { drop(Box::from_raw(self.kvec.as_ptr())) }
54 fn new(len1
: usize, len2
: usize) -> Self {
55 let ndiags
= len1
+ len2
as usize + 3;
56 let kvec
= Box
::leak(vec
![0; 2 * ndiags
+ 2].into_boxed_slice());
57 let kforward
= NonNull
::from(&mut kvec
[len2
+ 1]);
58 let kbackward
= NonNull
::from(&mut kvec
[ndiags
+ len2
+ 1]);
59 Self { kvec: kvec.into(), kforward, kbackward, max_cost: sqrt(ndiags).max(MAX_COST_MIN) }
62 fn run
<'f
>(&mut self, mut file1
: FileSlice
<'f
>, mut file2
: FileSlice
<'f
>, mut need_min
: bool
) {
64 file1
.strip_common(&mut file2
);
69 } else if file2
.is_empty() {
74 let split
= self.split(&file1
, &file2
, need_min
);
76 file1
.borrow().slice(..split
.token_idx1
as u32),
77 file2
.borrow().slice(..split
.token_idx2
as u32),
81 file1
= file1
.slice(split
.token_idx1
as u32..);
82 file2
= file2
.slice(split
.token_idx2
as u32..);
83 need_min
= split
.minimized_hi
87 /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
88 /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
89 /// the forward diagonal starting from (off1, off2) and the backward diagonal
90 /// starting from (lim1, lim2). If the K values on the same diagonal crosses
91 /// returns the furthest point of reach. We might encounter expensive edge cases
92 /// using this algorithm, so a little bit of heuristic is needed to cut the
93 /// search and to return a suboptimal point.
94 fn split(&mut self, file1
: &FileSlice
, file2
: &FileSlice
, need_min
: bool
) -> Split
{
95 let mut forward_search
=
96 unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) }
;
97 let mut backwards_search
=
98 unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) }
;
99 let is_odd
= (file2
.len() - file2
.len()) & 1 != 0;
103 while ec
<= self.max_cost
{
104 let mut found_snake
= false;
105 forward_search
.next_d();
107 if let Some(res
) = forward_search
.run(file1
, file2
, |k
, token_idx1
| {
108 backwards_search
.contains(k
)
109 && backwards_search
.x_pos_at_diagonal(k
) <= token_idx1
112 SearchResult
::Snake
=> found_snake
= true,
113 SearchResult
::Found { token_idx1, token_idx2 }
=> {
124 found_snake
|= forward_search
.run(file1
, file2
, |_
, _
| false).is_some()
127 backwards_search
.next_d();
129 if let Some(res
) = backwards_search
.run(file1
, file2
, |k
, token_idx1
| {
130 forward_search
.contains(k
) && token_idx1
<= forward_search
.x_pos_at_diagonal(k
)
133 SearchResult
::Snake
=> found_snake
= true,
134 SearchResult
::Found { token_idx1, token_idx2 }
=> {
145 found_snake
|= backwards_search
.run(file1
, file2
, |_
, _
| false).is_some()
152 // If the edit cost is above the heuristic trigger and if
153 // we got a good snake, we sample current diagonals to see
154 // if some of them have reached an "interesting" path. Our
155 // measure is a function of the distance from the diagonal
156 // corner (i1 + i2) penalized with the distance from the
157 // mid diagonal itself. If this value is above the current
158 // edit cost times a magic factor (XDL_K_HEUR) we consider
160 if found_snake
&& ec
> HEUR_MIN_COST
{
161 if let Some((token_idx1
, token_idx2
)) = forward_search
.found_snake(ec
, file1
, file2
)
171 if let Some((token_idx1
, token_idx2
)) =
172 backwards_search
.found_snake(ec
, file1
, file2
)
186 let (distance_forward
, token_idx1_forward
) = forward_search
.best_position(file1
, file2
);
187 let (distance_backwards
, token_idx1_backwards
) =
188 backwards_search
.best_position(file1
, file2
);
189 if distance_forward
> file1
.len() as isize + file2
.len() as isize - distance_backwards
{
191 token_idx1
: token_idx1_forward
,
192 token_idx2
: (distance_forward
- token_idx1_forward
as isize) as i32,
198 token_idx1
: token_idx1_backwards
,
199 token_idx2
: (distance_backwards
- token_idx1_backwards
as isize) as i32,
215 /// the mapping performed during preprocessing makes it impossible to directly call
216 /// the `sink` during the diff itself. Instead `file.changed` is set to true for all
217 /// tokens that are changed
218 /// below these arrays are used to call the sink function
219 fn process_changes_with_sink(
220 before
: &PreprocessedFile
,
221 after
: &PreprocessedFile
,
222 sink
: &mut impl Sink
,
224 let before_end
= before
.is_changed
.len() as u32 + before
.offset
;
225 let after_end
= after
.is_changed
.len() as u32 + after
.offset
;
227 let mut before
= before
231 .map(|(i
, removed
)| (i
as u32 + before
.offset
, *removed
));
233 let mut after
= after
237 .map(|(i
, inserted
)| (i
as u32 + after
.offset
, *inserted
));
239 let mut next1
= before
.next();
240 let mut next2
= after
.next();
242 while let (Some((before_pos
, removed
)), Some((after_pos
, inserted
))) = (next1
, next2
) {
243 if !(removed
| inserted
) {
244 next1
= before
.next();
245 next2
= after
.next();
249 let mut hunk_before
= before_pos
..before_pos
;
250 let mut hunk_after
= after_pos
..after_pos
;
252 let end
= before
.find(|(_
, changed
)| !changed
);
253 next1
= end
.map(|(end
, _
)| (end
, false));
254 hunk_before
.end
= end
.map_or(before_end
, |(end
, _
)| end
);
258 let end
= after
.find(|(_
, changed
)| !changed
);
259 next2
= end
.map(|(end
, _
)| (end
, false));
260 hunk_after
.end
= end
.map_or(after_end
, |(end
, _
)| end
);
263 sink
.process_change(hunk_before
, hunk_after
);
266 if let Some((before_pos
, _
)) = next1
{
267 sink
.process_change(before_pos
..before_end
, after_end
..after_end
);
268 } else if let Some((after_pos
, _
)) = next2
{
269 sink
.process_change(before_end
..before_end
, after_pos
..after_end
);