[rustc.git] / vendor / imara-diff / benches / git_repo.rs

use std::convert::Infallible;
use std::path::PathBuf;

use criterion::measurement::Measurement;
use criterion::{
    black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion,
};
use git_repository::object::tree::diff::{Action, Change};
use git_repository::Id;
use imara_diff::intern::InternedInput;
use imara_diff::sink::Counter;
use imara_diff::Algorithm;

fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> {
    use git_repository::object::tree::diff::change::Event::Modification;

    let (previous_id, id) = match change.event {
        Modification { previous_entry_mode, previous_id, entry_mode, id }
            if previous_entry_mode.is_blob() && entry_mode.is_blob() =>
        {
            (previous_id, id)
        }
        _ => return None,
    };

    let old = previous_id.object().ok()?.detach().data;
    let new = id.object().ok()?.detach().data;

    Some((new, old))
}

fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) {
    let from_tree = from.object().unwrap().peel_to_tree().unwrap();
    let to_tree = to.object().unwrap().peel_to_tree().unwrap();
    from_tree
        .changes()
        .track_filename()
        .for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> {
            if let Some((old, new)) = extract_diff(&change) {
                let input = InternedInput::new(&*old, &*new);
                let changes =
                    imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total();
                let complexity = changes * (old.len() + new.len());
                diffs.push((old, new, complexity));
            }
            Ok(Action::Continue)
        })
        .unwrap();
}

pub fn project_root() -> PathBuf {
    let dir = env!("CARGO_MANIFEST_DIR");
    let mut res = PathBuf::from(dir);
    while !res.join("README.md").exists() {
        res = res.parent().expect("reached fs root without finding project root").to_owned()
    }
    res
}

fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) {
    let path = project_root().join("bench_data").join("repos").join(name);
    let repo = git_repository::open(path).unwrap();
    let tag1 = repo.find_reference(tag1).unwrap().into_fully_peeled_id().unwrap();
    let tag2 = repo.find_reference(tag2).unwrap().into_fully_peeled_id().unwrap();
    let mut diffs = Vec::new();
    git_tree_diff(tag1, tag2, &mut diffs);
    let mut last_commit = tag2;
    tag2.object()
        .unwrap()
        .into_commit()
        .ancestors()
        .all()
        .unwrap()
        .take(num_commits as usize)
        .for_each(|parent| {
            let parent = parent.unwrap();
            git_tree_diff(last_commit, parent, &mut diffs);
            last_commit = parent;
        });
    diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity);

    if std::env::var("PLOT").is_ok() {
        let mut group = c.benchmark_group(format!("{name}_plot"));
        group.sample_size(15);
        bench_file_diffs(group, &diffs, 12, true);
    } else {
        bench_file_diffs(c.benchmark_group(name), &diffs, 2, false);
    }
}

fn bench_file_diffs<M: Measurement>(
    mut group: BenchmarkGroup<M>,
    files: &[(Vec<u8>, Vec<u8>, usize)],
    num_chunks: usize,
    compare_to_similar: bool,
) {
    let mut run = |name, f: fn(&[u8], &[u8]) -> usize| {
        let mut i = 0;
        for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) {
            let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum();
            average_complexity /= chunk.len();
            println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len());
            i += chunk.len();
            group.bench_function(
                BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())),
                |b| {
                    b.iter(|| {
                        for (old, new, _) in chunk {
                            // myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths
                            // we use that as an x axis to find a meaningful way to plot a
                            black_box(f(old, new));
                        }
                    });
                },
            );
        }
    };

    run("imara_diff-histogram", |file1, file2| {
        let input = InternedInput::new(file1, file2);
        imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total()
    });

    run("imara_diff-myers", |file1, file2| {
        let input = InternedInput::new(file1, file2);
        imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total()
    });

    if compare_to_similar {
        run("similar", |file1, file2| {
            let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2);
            diff.len()
        });
    }

    group.finish();
}

fn rust(c: &mut Criterion) {
    bench_repo(c, "rust", "1.64.0", "1.50.0", 30);
}

fn vscode(c: &mut Criterion) {
    bench_repo(c, "vscode", "1.72.2", "1.41.0", 30);
}

fn linux(c: &mut Criterion) {
    bench_repo(c, "linux", "v6.0", "v5.7", 30);
}

fn helix(c: &mut Criterion) {
    bench_repo(c, "helix", "22.08.1", "v0.5.0", 30);
}

criterion_group!(realworld_repos, helix, rust, vscode, linux);
criterion_main!(realworld_repos);
Commit	Line	Data
0a29b90c FG	1	use std::convert::Infallible;
	2	use std::path::PathBuf;
	3
	4	use criterion::measurement::Measurement;
	5	use criterion::{
	6	black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion,
	7	};
	8	use git_repository::object::tree::diff::{Action, Change};
	9	use git_repository::Id;
	10	use imara_diff::intern::InternedInput;
	11	use imara_diff::sink::Counter;
	12	use imara_diff::Algorithm;
	13
	14	fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> {
	15	use git_repository::object::tree::diff::change::Event::Modification;
	16
	17	let (previous_id, id) = match change.event {
	18	Modification { previous_entry_mode, previous_id, entry_mode, id }
	19	if previous_entry_mode.is_blob() && entry_mode.is_blob() =>
	20	{
	21	(previous_id, id)
	22	}
	23	_ => return None,
	24	};
	25
	26	let old = previous_id.object().ok()?.detach().data;
	27	let new = id.object().ok()?.detach().data;
	28
	29	Some((new, old))
	30	}
	31
	32	fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) {
	33	let from_tree = from.object().unwrap().peel_to_tree().unwrap();
	34	let to_tree = to.object().unwrap().peel_to_tree().unwrap();
	35	from_tree
	36	.changes()
	37	.track_filename()
	38	.for_each_to_obtain_tree(&to_tree, \|change\| -> Result<_, Infallible> {
	39	if let Some((old, new)) = extract_diff(&change) {
	40	let input = InternedInput::new(&old, &new);
	41	let changes =
	42	imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total();
	43	let complexity = changes * (old.len() + new.len());
	44	diffs.push((old, new, complexity));
	45	}
	46	Ok(Action::Continue)
	47	})
	48	.unwrap();
	49	}
	50
	51	pub fn project_root() -> PathBuf {
	52	let dir = env!("CARGO_MANIFEST_DIR");
	53	let mut res = PathBuf::from(dir);
	54	while !res.join("README.md").exists() {
	55	res = res.parent().expect("reached fs root without finding project root").to_owned()
	56	}
	57	res
	58	}
	59
	60	fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) {
	61	let path = project_root().join("bench_data").join("repos").join(name);
	62	let repo = git_repository::open(path).unwrap();
	63	let tag1 = repo.find_reference(tag1).unwrap().into_fully_peeled_id().unwrap();
	64	let tag2 = repo.find_reference(tag2).unwrap().into_fully_peeled_id().unwrap();
65	let mut diffs = Vec::new();
66	git_tree_diff(tag1, tag2, &mut diffs);
67	let mut last_commit = tag2;
68	tag2.object()
69	.unwrap()
70	.into_commit()
71	.ancestors()
72	.all()
73	.unwrap()
74	.take(num_commits as usize)
75	.for_each(\|parent\| {
76	let parent = parent.unwrap();
77	git_tree_diff(last_commit, parent, &mut diffs);
78	last_commit = parent;
79	});
80	diffs.sort_unstable_by_key(\|(_, _, complexity)\| *complexity);
81
82	if std::env::var("PLOT").is_ok() {
83	let mut group = c.benchmark_group(format!("{name}_plot"));
84	group.sample_size(15);
85	bench_file_diffs(group, &diffs, 12, true);
86	} else {
87	bench_file_diffs(c.benchmark_group(name), &diffs, 2, false);
88	}
89	}
90
91	fn bench_file_diffs<M: Measurement>(
92	mut group: BenchmarkGroup<M>,
93	files: &[(Vec<u8>, Vec<u8>, usize)],
94	num_chunks: usize,
95	compare_to_similar: bool,
96	) {
97	let mut run = \|name, f: fn(&[u8], &[u8]) -> usize\| {
98	let mut i = 0;
99	for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) {
100	let mut average_complexity: usize = chunk.iter().map(\|(_, _, it)\| *it).sum();
101	average_complexity /= chunk.len();
102	println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len());
103	i += chunk.len();
104	group.bench_function(
105	BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())),
106	\|b\| {
107	b.iter(\|\| {
108	for (old, new, _) in chunk {
109	// myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths
110	// we use that as an x axis to find a meaningful way to plot a
111	black_box(f(old, new));
112	}
113	});
114	},
115	);
116	}
117	};
118
119	run("imara_diff-histogram", \|file1, file2\| {
120	let input = InternedInput::new(file1, file2);
121	imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total()
122	});
123
124	run("imara_diff-myers", \|file1, file2\| {
125	let input = InternedInput::new(file1, file2);
126	imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total()
127	});
128
129	if compare_to_similar {
130	run("similar", \|file1, file2\| {
131	let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2);
132	diff.len()
133	});
134	}
135
136	group.finish();
137	}
138
139	fn rust(c: &mut Criterion) {
140	bench_repo(c, "rust", "1.64.0", "1.50.0", 30);
141	}
142
143	fn vscode(c: &mut Criterion) {
144	bench_repo(c, "vscode", "1.72.2", "1.41.0", 30);
145	}
146
147	fn linux(c: &mut Criterion) {
148	bench_repo(c, "linux", "v6.0", "v5.7", 30);
149	}
150
151	fn helix(c: &mut Criterion) {
152	bench_repo(c, "helix", "22.08.1", "v0.5.0", 30);
153	}
154
155	criterion_group!(realworld_repos, helix, rust, vscode, linux);
156	criterion_main!(realworld_repos);