]>
Commit | Line | Data |
---|---|---|
f20569fa XL |
1 | //! Benchmarking module. |
2 | ||
3 | use super::{ | |
4 | event::CompletedTest, options::BenchMode, test_result::TestResult, types::TestDesc, Sender, | |
5 | }; | |
6 | ||
7 | use crate::stats; | |
8 | use std::cmp; | |
9 | #[cfg(feature = "capture")] | |
10 | use std::io; | |
11 | use std::panic::{catch_unwind, AssertUnwindSafe}; | |
12 | use std::sync::{Arc, Mutex}; | |
13 | use std::time::{Duration, Instant}; | |
14 | ||
15 | #[cfg(feature = "asm_black_box")] | |
16 | pub use std::hint::black_box; | |
17 | ||
18 | #[cfg(not(feature = "asm_black_box"))] | |
19 | #[inline(never)] | |
20 | pub fn black_box<T>(dummy: T) -> T { | |
21 | dummy | |
22 | } | |
23 | ||
24 | /// Manager of the benchmarking runs. | |
25 | /// | |
26 | /// This is fed into functions marked with `#[bench]` to allow for | |
27 | /// set-up & tear-down before running a piece of code repeatedly via a | |
28 | /// call to `iter`. | |
29 | #[derive(Clone)] | |
30 | pub struct Bencher { | |
31 | mode: BenchMode, | |
32 | summary: Option<stats::Summary>, | |
33 | pub bytes: u64, | |
34 | } | |
35 | ||
36 | impl Bencher { | |
37 | /// Callback for benchmark functions to run in their body. | |
38 | pub fn iter<T, F>(&mut self, mut inner: F) | |
39 | where | |
40 | F: FnMut() -> T, | |
41 | { | |
42 | if self.mode == BenchMode::Single { | |
43 | ns_iter_inner(&mut inner, 1); | |
44 | return; | |
45 | } | |
46 | ||
47 | self.summary = Some(iter(&mut inner)); | |
48 | } | |
49 | ||
50 | pub fn bench<F>(&mut self, mut f: F) -> Option<stats::Summary> | |
51 | where | |
52 | F: FnMut(&mut Bencher), | |
53 | { | |
54 | f(self); | |
55 | self.summary | |
56 | } | |
57 | } | |
58 | ||
59 | #[derive(Debug, Clone, PartialEq)] | |
60 | pub struct BenchSamples { | |
61 | pub ns_iter_summ: stats::Summary, | |
62 | pub mb_s: usize, | |
63 | } | |
64 | ||
65 | pub fn fmt_bench_samples(bs: &BenchSamples) -> String { | |
66 | use std::fmt::Write; | |
67 | let mut output = String::new(); | |
68 | ||
69 | let median = bs.ns_iter_summ.median as usize; | |
70 | let deviation = (bs.ns_iter_summ.max - bs.ns_iter_summ.min) as usize; | |
71 | ||
72 | write!( | |
73 | output, | |
74 | "{:>11} ns/iter (+/- {})", | |
75 | fmt_thousands_sep(median, ','), | |
76 | fmt_thousands_sep(deviation, ',') | |
77 | ) | |
78 | .unwrap(); | |
79 | if bs.mb_s != 0 { | |
80 | write!(output, " = {} MB/s", bs.mb_s).unwrap(); | |
81 | } | |
82 | output | |
83 | } | |
84 | ||
85 | // Format a number with thousands separators | |
86 | fn fmt_thousands_sep(mut n: usize, sep: char) -> String { | |
87 | use std::fmt::Write; | |
88 | let mut output = String::new(); | |
89 | let mut trailing = false; | |
90 | for &pow in &[9, 6, 3, 0] { | |
91 | let base = 10_usize.pow(pow); | |
92 | if pow == 0 || trailing || n / base != 0 { | |
93 | if !trailing { | |
94 | write!(output, "{}", n / base).unwrap(); | |
95 | } else { | |
96 | write!(output, "{:03}", n / base).unwrap(); | |
97 | } | |
98 | if pow != 0 { | |
99 | output.push(sep); | |
100 | } | |
101 | trailing = true; | |
102 | } | |
103 | n %= base; | |
104 | } | |
105 | ||
106 | output | |
107 | } | |
108 | ||
109 | fn ns_iter_inner<T, F>(inner: &mut F, k: u64) -> u64 | |
110 | where | |
111 | F: FnMut() -> T, | |
112 | { | |
113 | let start = Instant::now(); | |
114 | for _ in 0..k { | |
115 | black_box(inner()); | |
116 | } | |
117 | start.elapsed().as_nanos() as u64 | |
118 | } | |
119 | ||
120 | pub fn iter<T, F>(inner: &mut F) -> stats::Summary | |
121 | where | |
122 | F: FnMut() -> T, | |
123 | { | |
124 | // Initial bench run to get ballpark figure. | |
125 | let ns_single = ns_iter_inner(inner, 1); | |
126 | ||
127 | // Try to estimate iter count for 1ms falling back to 1m | |
128 | // iterations if first run took < 1ns. | |
129 | let ns_target_total = 1_000_000; // 1ms | |
130 | let mut n = ns_target_total / cmp::max(1, ns_single); | |
131 | ||
132 | // if the first run took more than 1ms we don't want to just | |
133 | // be left doing 0 iterations on every loop. The unfortunate | |
134 | // side effect of not being able to do as many runs is | |
135 | // automatically handled by the statistical analysis below | |
136 | // (i.e., larger error bars). | |
137 | n = cmp::max(1, n); | |
138 | ||
139 | let mut total_run = Duration::new(0, 0); | |
140 | let samples: &mut [f64] = &mut [0.0_f64; 50]; | |
141 | loop { | |
142 | let loop_start = Instant::now(); | |
143 | ||
144 | for p in &mut *samples { | |
145 | *p = ns_iter_inner(inner, n) as f64 / n as f64; | |
146 | } | |
147 | ||
148 | stats::winsorize(samples, 5.0); | |
149 | let summ = stats::Summary::new(samples); | |
150 | ||
151 | for p in &mut *samples { | |
152 | let ns = ns_iter_inner(inner, 5 * n); | |
153 | *p = ns as f64 / (5 * n) as f64; | |
154 | } | |
155 | ||
156 | stats::winsorize(samples, 5.0); | |
157 | let summ5 = stats::Summary::new(samples); | |
158 | ||
159 | let loop_run = loop_start.elapsed(); | |
160 | ||
161 | // If we've run for 100ms and seem to have converged to a | |
162 | // stable median. | |
163 | if loop_run > Duration::from_millis(100) | |
164 | && summ.median_abs_dev_pct < 1.0 | |
165 | && summ.median - summ5.median < summ5.median_abs_dev | |
166 | { | |
167 | return summ5; | |
168 | } | |
169 | ||
170 | total_run += loop_run; | |
171 | // Longest we ever run for is 3s. | |
172 | if total_run > Duration::from_secs(3) { | |
173 | return summ5; | |
174 | } | |
175 | ||
176 | // If we overflow here just return the results so far. We check a | |
177 | // multiplier of 10 because we're about to multiply by 2 and the | |
178 | // next iteration of the loop will also multiply by 5 (to calculate | |
179 | // the summ5 result) | |
180 | n = match n.checked_mul(10) { | |
181 | Some(_) => n * 2, | |
182 | None => { | |
183 | return summ5; | |
184 | } | |
185 | }; | |
186 | } | |
187 | } | |
188 | ||
189 | pub fn benchmark<F>(desc: TestDesc, monitor_ch: Sender<CompletedTest>, nocapture: bool, f: F) | |
190 | where | |
191 | F: FnMut(&mut Bencher), | |
192 | { | |
193 | let mut bs = Bencher { mode: BenchMode::Auto, summary: None, bytes: 0 }; | |
194 | ||
195 | let data = Arc::new(Mutex::new(Vec::new())); | |
196 | ||
197 | if !nocapture { | |
198 | #[cfg(feature = "capture")] | |
199 | io::set_output_capture(Some(data.clone())); | |
200 | } | |
201 | ||
202 | let result = catch_unwind(AssertUnwindSafe(|| bs.bench(f))); | |
203 | ||
204 | #[cfg(feature = "capture")] | |
205 | io::set_output_capture(None); | |
206 | ||
207 | let test_result = match result { | |
208 | //bs.bench(f) { | |
209 | Ok(Some(ns_iter_summ)) => { | |
210 | let ns_iter = cmp::max(ns_iter_summ.median as u64, 1); | |
211 | let mb_s = bs.bytes * 1000 / ns_iter; | |
212 | ||
213 | let bs = BenchSamples { ns_iter_summ, mb_s: mb_s as usize }; | |
214 | TestResult::TrBench(bs) | |
215 | } | |
216 | Ok(None) => { | |
217 | // iter not called, so no data. | |
218 | // FIXME: error in this case? | |
219 | let samples: &mut [f64] = &mut [0.0_f64; 1]; | |
220 | let bs = BenchSamples { ns_iter_summ: stats::Summary::new(samples), mb_s: 0 }; | |
221 | TestResult::TrBench(bs) | |
222 | } | |
223 | Err(_) => TestResult::TrFailed, | |
224 | }; | |
225 | ||
226 | let stdout = data.lock().unwrap().to_vec(); | |
227 | let message = CompletedTest::new(desc, test_result, None, stdout); | |
228 | monitor_ch.send(message).unwrap(); | |
229 | } | |
230 | ||
231 | pub fn run_once<F>(f: F) | |
232 | where | |
233 | F: FnMut(&mut Bencher), | |
234 | { | |
235 | let mut bs = Bencher { mode: BenchMode::Single, summary: None, bytes: 0 }; | |
236 | bs.bench(f); | |
237 | } |