]>
Commit | Line | Data |
---|---|---|
94b46f34 | 1 | // The Computer Language Benchmarks Game |
5869c6ff | 2 | // https://benchmarksgame-team.pages.debian.net/benchmarksgame/ |
94b46f34 XL |
3 | // |
4 | // contributed by the Rust Project Developers | |
5 | // contributed by TeXitoi | |
6 | // contributed by BurntSushi | |
7 | ||
8 | // This technically solves the problem posed in the `regex-dna` benchmark, but | |
9 | // it cheats by combining all of the replacements into a single regex and | |
10 | // replacing them with a single linear scan. i.e., it re-implements | |
11 | // `replace_all`. As a result, this is around 25% faster. ---AG | |
12 | ||
13 | extern crate regex; | |
14 | ||
15 | use std::io::{self, Read}; | |
16 | use std::sync::Arc; | |
17 | use std::thread; | |
18 | ||
f9f354fc XL |
19 | macro_rules! regex { |
20 | ($re:expr) => { | |
21 | ::regex::Regex::new($re).unwrap() | |
22 | }; | |
23 | } | |
94b46f34 XL |
24 | |
25 | fn main() { | |
26 | let mut seq = String::with_capacity(50 * (1 << 20)); | |
27 | io::stdin().read_to_string(&mut seq).unwrap(); | |
28 | let ilen = seq.len(); | |
29 | ||
30 | seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned(); | |
31 | let clen = seq.len(); | |
32 | let seq_arc = Arc::new(seq.clone()); | |
33 | ||
34 | let variants = vec![ | |
35 | regex!("agggtaaa|tttaccct"), | |
36 | regex!("[cgt]gggtaaa|tttaccc[acg]"), | |
37 | regex!("a[act]ggtaaa|tttacc[agt]t"), | |
38 | regex!("ag[act]gtaaa|tttac[agt]ct"), | |
39 | regex!("agg[act]taaa|ttta[agt]cct"), | |
40 | regex!("aggg[acg]aaa|ttt[cgt]ccct"), | |
41 | regex!("agggt[cgt]aa|tt[acg]accct"), | |
42 | regex!("agggta[cgt]a|t[acg]taccct"), | |
43 | regex!("agggtaa[cgt]|[acg]ttaccct"), | |
44 | ]; | |
45 | let mut counts = vec![]; | |
46 | for variant in variants { | |
47 | let seq = seq_arc.clone(); | |
48 | let restr = variant.to_string(); | |
49 | let future = thread::spawn(move || variant.find_iter(&seq).count()); | |
50 | counts.push((restr, future)); | |
51 | } | |
52 | ||
53 | let substs = vec![ | |
54 | (b'B', "(c|g|t)"), | |
55 | (b'D', "(a|g|t)"), | |
56 | (b'H', "(a|c|t)"), | |
57 | (b'K', "(g|t)"), | |
58 | (b'M', "(a|c)"), | |
59 | (b'N', "(a|c|g|t)"), | |
60 | (b'R', "(a|g)"), | |
61 | (b'S', "(c|g)"), | |
62 | (b'V', "(a|c|g)"), | |
63 | (b'W', "(a|t)"), | |
64 | (b'Y', "(c|t)"), | |
65 | ]; // combined into one regex in `replace_all` | |
66 | let seq = replace_all(&seq, substs); | |
67 | ||
68 | for (variant, count) in counts { | |
69 | println!("{} {}", variant, count.join().unwrap()); | |
70 | } | |
71 | println!("\n{}\n{}\n{}", ilen, clen, seq.len()); | |
72 | } | |
73 | ||
74 | fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String { | |
75 | let mut replacements = vec![""; 256]; | |
76 | let mut alternates = vec![]; | |
77 | for (re, replacement) in substs { | |
78 | replacements[re as usize] = replacement; | |
79 | alternates.push((re as char).to_string()); | |
80 | } | |
81 | ||
82 | let re = regex!(&alternates.join("|")); | |
83 | let mut new = String::with_capacity(text.len()); | |
84 | let mut last_match = 0; | |
85 | for m in re.find_iter(text) { | |
86 | new.push_str(&text[last_match..m.start()]); | |
87 | new.push_str(replacements[text.as_bytes()[m.start()] as usize]); | |
88 | last_match = m.end(); | |
89 | } | |
90 | new.push_str(&text[last_match..]); | |
91 | new | |
92 | } |