]> git.proxmox.com Git - rustc.git/blob - vendor/html5ever/benches/html5ever.rs
New upstream version 1.37.0+dfsg1
[rustc.git] / vendor / html5ever / benches / html5ever.rs
1 #[macro_use]
2 extern crate criterion;
3 extern crate html5ever;
4
5 use std::fs;
6 use std::path::PathBuf;
7
8 use criterion::{black_box, Criterion};
9
10 use html5ever::tendril::*;
11 use html5ever::tokenizer::{
12 BufferQueue, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
13 };
14
15 struct Sink;
16
17 impl TokenSink for Sink {
18 type Handle = ();
19
20 fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
21 // Don't use the token, but make sure we don't get
22 // optimized out entirely.
23 black_box(token);
24 TokenSinkResult::Continue
25 }
26 }
27
28 fn run_bench(c: &mut Criterion, name: &str) {
29 let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
30 path.push("data/bench/");
31 path.push(name);
32 let mut file = fs::File::open(&path).ok().expect("can't open file");
33
34 // Read the file and treat it as an infinitely repeating sequence of characters.
35 let mut file_input = ByteTendril::new();
36 file.read_to_tendril(&mut file_input)
37 .ok()
38 .expect("can't read file");
39 let file_input: StrTendril = file_input.try_reinterpret().unwrap();
40 let size = file_input.len();
41 let mut stream = file_input.chars().cycle();
42
43 // Break the input into chunks of 1024 chars (= a few kB).
44 // This simulates reading from the network.
45 let mut input = vec![];
46 let mut total = 0usize;
47 while total < size {
48 // The by_ref() call is important, otherwise we get wrong results!
49 // See rust-lang/rust#18045.
50 let sz = std::cmp::min(1024, size - total);
51 input.push(stream.by_ref().take(sz).collect::<String>().to_tendril());
52 total += sz;
53 }
54
55 let test_name = format!("html tokenizing {}", name);
56
57 c.bench_function(&test_name, move |b| {
58 b.iter(|| {
59 let mut tok = Tokenizer::new(Sink, Default::default());
60 let mut buffer = BufferQueue::new();
61 // We are doing clone inside the bench function, this is not ideal, but possibly
62 // necessary since our iterator consumes the underlying buffer.
63 for buf in input.clone().into_iter() {
64 buffer.push_back(buf);
65 let _ = tok.feed(&mut buffer);
66 }
67 let _ = tok.feed(&mut buffer);
68 tok.end();
69 })
70 });
71 }
72
73 fn html5ever_benchmark(c: &mut Criterion) {
74 run_bench(c, "lipsum.html");
75 run_bench(c, "lipsum-zh.html");
76 run_bench(c, "medium-fragment.html");
77 run_bench(c, "small-fragment.html");
78 run_bench(c, "tiny-fragment.html");
79 run_bench(c, "strong.html");
80 }
81
82 criterion_group!(benches, html5ever_benchmark);
83 criterion_main!(benches);