1 use crate::intern
::Token
;
2 use crate::myers
::sqrt
;
3 use crate::util
::{strip_common_postfix, strip_common_prefix}
;
8 ) -> (PreprocessedFile
, PreprocessedFile
) {
9 let common_prefix
= strip_common_prefix(&mut file1
, &mut file2
);
10 strip_common_postfix(&mut file1
, &mut file2
);
11 let (hdiff1
, hdiff2
) = token_occurrences(file1
, file2
);
12 let file1
= PreprocessedFile
::new(common_prefix
, &hdiff1
, file1
);
13 let file2
= PreprocessedFile
::new(common_prefix
, &hdiff2
, file2
);
18 fn token_occurrences(file1
: &[Token
], file2
: &[Token
]) -> (Vec
<Occurances
>, Vec
<Occurances
>) {
19 const MAX_EQLIMIT
: u32 = 1024;
21 // compute the limit after which tokens are treated as `Occurances::COMMON`
22 let eqlimit1
= sqrt(file1
.len()).min(MAX_EQLIMIT
);
23 let eqlimit2
= sqrt(file2
.len()).min(MAX_EQLIMIT
);
25 // first collect how often each token occurs in a file
26 let mut occurances1
= Vec
::new();
28 let bucket
= token
.0 as usize;
29 if bucket
>= occurances1
.len() {
30 occurances1
.resize(bucket
+ 1, 0u32);
32 occurances1
[bucket
] += 1;
35 // do the same thing for
36 let mut occurances2
= Vec
::new();
37 let token_occurances2
: Vec
<_
> = file2
40 let bucket
= token
.0 as usize;
41 if bucket
>= occurances2
.len() {
42 occurances2
.resize(bucket
+ 1, 0);
44 occurances2
[bucket
] += 1;
45 let occurances1
= *occurances1
.get(bucket
).unwrap_or(&0);
46 Occurances
::from_occurances(occurances1
, eqlimit2
)
50 let token_occurances1
: Vec
<_
> = file1
53 let bucket
= token
.0 as usize;
54 let occurances2
= *occurances2
.get(bucket
).unwrap_or(&0);
55 Occurances
::from_occurances(occurances2
, eqlimit1
)
59 (token_occurances1
, token_occurances2
)
62 #[derive(Clone, Copy, Debug)]
64 /// Token does not occur in this file
66 /// Token occurs at least once
68 /// Token occurs very frequently (exact number depends on file size).
69 /// Such a tokens are usually empty lines or braces and are often not meaningful to a diff
74 pub fn from_occurances(occurances
: u32, eqlimit
: u32) -> Occurances
{
77 } else if occurances
>= eqlimit
{
86 pub struct PreprocessedFile
{
88 pub is_changed
: Vec
<bool
>,
89 pub indices
: Vec
<u32>,
90 pub tokens
: Vec
<Token
>,
93 impl PreprocessedFile
{
94 fn new(offset
: u32, token_diff
: &[Occurances
], tokens
: &[Token
]) -> PreprocessedFile
{
95 let mut changed
= vec
![false; tokens
.len()];
96 let (tokens
, indices
) = prune_unmatched_tokens(tokens
, token_diff
, &mut changed
);
97 PreprocessedFile { offset, is_changed: changed, indices, tokens }
101 fn prune_unmatched_tokens(
103 token_status
: &[Occurances
],
104 changed
: &mut [bool
],
105 ) -> (Vec
<Token
>, Vec
<u32>) {
106 assert_eq
!(token_status
.len(), file
.len());
110 .filter_map(|(i
, (&token
, &status
))| {
111 let prune
= match status
{
112 Occurances
::None
=> true,
113 Occurances
::Some
=> false,
114 Occurances
::Common
=> should_prune_common_line(token_status
, i
),
120 Some((token
, i
as u32))
126 // TODO do not unnecessarily rescan lines
127 fn should_prune_common_line(token_status
: &[Occurances
], pos
: usize) -> bool
{
128 const WINDOW_SIZE
: usize = 100;
130 let mut unmatched_before
= 0;
131 let mut common_before
= 0;
133 let start
= if pos
> WINDOW_SIZE { WINDOW_SIZE }
else { 0 }
;
134 for status
in token_status
[start
..pos
].iter().rev() {
136 Occurances
::None
=> {
137 unmatched_before
+= 1;
139 Occurances
::Common
=> {
142 Occurances
::Some
=> break,
146 if unmatched_before
== 0 {
150 let end
= token_status
.len().min(pos
+ WINDOW_SIZE
);
151 let mut unmatched_after
= 0;
152 let mut common_after
= 0;
153 for status
in token_status
[pos
..end
].iter() {
155 Occurances
::None
=> {
156 unmatched_after
+= 1;
158 Occurances
::Common
=> {
161 Occurances
::Some
=> break,
165 if unmatched_after
== 0 {
169 let common
= common_before
+ common_after
;
170 let unmatched
= unmatched_before
+ unmatched_after
;
172 unmatched
> 3 * common