]>
Commit | Line | Data |
---|---|---|
0a29b90c FG |
1 | use std::convert::Infallible; |
2 | use std::path::PathBuf; | |
3 | ||
4 | use criterion::measurement::Measurement; | |
5 | use criterion::{ | |
6 | black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, | |
7 | }; | |
8 | use git_repository::object::tree::diff::{Action, Change}; | |
9 | use git_repository::Id; | |
10 | use imara_diff::intern::InternedInput; | |
11 | use imara_diff::sink::Counter; | |
12 | use imara_diff::Algorithm; | |
13 | ||
14 | fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> { | |
15 | use git_repository::object::tree::diff::change::Event::Modification; | |
16 | ||
17 | let (previous_id, id) = match change.event { | |
18 | Modification { previous_entry_mode, previous_id, entry_mode, id } | |
19 | if previous_entry_mode.is_blob() && entry_mode.is_blob() => | |
20 | { | |
21 | (previous_id, id) | |
22 | } | |
23 | _ => return None, | |
24 | }; | |
25 | ||
26 | let old = previous_id.object().ok()?.detach().data; | |
27 | let new = id.object().ok()?.detach().data; | |
28 | ||
29 | Some((new, old)) | |
30 | } | |
31 | ||
32 | fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) { | |
33 | let from_tree = from.object().unwrap().peel_to_tree().unwrap(); | |
34 | let to_tree = to.object().unwrap().peel_to_tree().unwrap(); | |
35 | from_tree | |
36 | .changes() | |
37 | .track_filename() | |
38 | .for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> { | |
39 | if let Some((old, new)) = extract_diff(&change) { | |
40 | let input = InternedInput::new(&*old, &*new); | |
41 | let changes = | |
42 | imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total(); | |
43 | let complexity = changes * (old.len() + new.len()); | |
44 | diffs.push((old, new, complexity)); | |
45 | } | |
46 | Ok(Action::Continue) | |
47 | }) | |
48 | .unwrap(); | |
49 | } | |
50 | ||
51 | pub fn project_root() -> PathBuf { | |
52 | let dir = env!("CARGO_MANIFEST_DIR"); | |
53 | let mut res = PathBuf::from(dir); | |
54 | while !res.join("README.md").exists() { | |
55 | res = res.parent().expect("reached fs root without finding project root").to_owned() | |
56 | } | |
57 | res | |
58 | } | |
59 | ||
60 | fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) { | |
61 | let path = project_root().join("bench_data").join("repos").join(name); | |
62 | let repo = git_repository::open(path).unwrap(); | |
63 | let tag1 = repo.find_reference(tag1).unwrap().into_fully_peeled_id().unwrap(); | |
64 | let tag2 = repo.find_reference(tag2).unwrap().into_fully_peeled_id().unwrap(); | |
65 | let mut diffs = Vec::new(); | |
66 | git_tree_diff(tag1, tag2, &mut diffs); | |
67 | let mut last_commit = tag2; | |
68 | tag2.object() | |
69 | .unwrap() | |
70 | .into_commit() | |
71 | .ancestors() | |
72 | .all() | |
73 | .unwrap() | |
74 | .take(num_commits as usize) | |
75 | .for_each(|parent| { | |
76 | let parent = parent.unwrap(); | |
77 | git_tree_diff(last_commit, parent, &mut diffs); | |
78 | last_commit = parent; | |
79 | }); | |
80 | diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity); | |
81 | ||
82 | if std::env::var("PLOT").is_ok() { | |
83 | let mut group = c.benchmark_group(format!("{name}_plot")); | |
84 | group.sample_size(15); | |
85 | bench_file_diffs(group, &diffs, 12, true); | |
86 | } else { | |
87 | bench_file_diffs(c.benchmark_group(name), &diffs, 2, false); | |
88 | } | |
89 | } | |
90 | ||
91 | fn bench_file_diffs<M: Measurement>( | |
92 | mut group: BenchmarkGroup<M>, | |
93 | files: &[(Vec<u8>, Vec<u8>, usize)], | |
94 | num_chunks: usize, | |
95 | compare_to_similar: bool, | |
96 | ) { | |
97 | let mut run = |name, f: fn(&[u8], &[u8]) -> usize| { | |
98 | let mut i = 0; | |
99 | for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) { | |
100 | let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum(); | |
101 | average_complexity /= chunk.len(); | |
102 | println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len()); | |
103 | i += chunk.len(); | |
104 | group.bench_function( | |
105 | BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())), | |
106 | |b| { | |
107 | b.iter(|| { | |
108 | for (old, new, _) in chunk { | |
109 | // myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths | |
110 | // we use that as an x axis to find a meaningful way to plot a | |
111 | black_box(f(old, new)); | |
112 | } | |
113 | }); | |
114 | }, | |
115 | ); | |
116 | } | |
117 | }; | |
118 | ||
119 | run("imara_diff-histogram", |file1, file2| { | |
120 | let input = InternedInput::new(file1, file2); | |
121 | imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total() | |
122 | }); | |
123 | ||
124 | run("imara_diff-myers", |file1, file2| { | |
125 | let input = InternedInput::new(file1, file2); | |
126 | imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total() | |
127 | }); | |
128 | ||
129 | if compare_to_similar { | |
130 | run("similar", |file1, file2| { | |
131 | let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2); | |
132 | diff.len() | |
133 | }); | |
134 | } | |
135 | ||
136 | group.finish(); | |
137 | } | |
138 | ||
139 | fn rust(c: &mut Criterion) { | |
140 | bench_repo(c, "rust", "1.64.0", "1.50.0", 30); | |
141 | } | |
142 | ||
143 | fn vscode(c: &mut Criterion) { | |
144 | bench_repo(c, "vscode", "1.72.2", "1.41.0", 30); | |
145 | } | |
146 | ||
147 | fn linux(c: &mut Criterion) { | |
148 | bench_repo(c, "linux", "v6.0", "v5.7", 30); | |
149 | } | |
150 | ||
151 | fn helix(c: &mut Criterion) { | |
152 | bench_repo(c, "helix", "22.08.1", "v0.5.0", 30); | |
153 | } | |
154 | ||
155 | criterion_group!(realworld_repos, helix, rust, vscode, linux); | |
156 | criterion_main!(realworld_repos); |