You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
93 lines
2.8 KiB
93 lines
2.8 KiB
// The Computer Language Benchmarks Game
|
|
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/
|
|
//
|
|
// contributed by the Rust Project Developers
|
|
// contributed by TeXitoi
|
|
// contributed by BurntSushi
|
|
|
|
// This technically solves the problem posed in the `regex-dna` benchmark, but
|
|
// it cheats by combining all of the replacements into a single regex and
|
|
// replacing them with a single linear scan. i.e., it re-implements
|
|
// `replace_all`. As a result, this is around 25% faster. ---AG
|
|
|
|
extern crate regex;
|
|
|
|
use std::io::{self, Read};
|
|
use std::sync::Arc;
|
|
use std::thread;
|
|
|
|
macro_rules! regex {
|
|
($re:expr) => {
|
|
::regex::Regex::new($re).unwrap()
|
|
};
|
|
}
|
|
|
|
fn main() {
|
|
let mut seq = String::with_capacity(50 * (1 << 20));
|
|
io::stdin().read_to_string(&mut seq).unwrap();
|
|
let ilen = seq.len();
|
|
|
|
seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
|
|
let clen = seq.len();
|
|
let seq_arc = Arc::new(seq.clone());
|
|
|
|
let variants = vec![
|
|
regex!("agggtaaa|tttaccct"),
|
|
regex!("[cgt]gggtaaa|tttaccc[acg]"),
|
|
regex!("a[act]ggtaaa|tttacc[agt]t"),
|
|
regex!("ag[act]gtaaa|tttac[agt]ct"),
|
|
regex!("agg[act]taaa|ttta[agt]cct"),
|
|
regex!("aggg[acg]aaa|ttt[cgt]ccct"),
|
|
regex!("agggt[cgt]aa|tt[acg]accct"),
|
|
regex!("agggta[cgt]a|t[acg]taccct"),
|
|
regex!("agggtaa[cgt]|[acg]ttaccct"),
|
|
];
|
|
let mut counts = vec![];
|
|
for variant in variants {
|
|
let seq = seq_arc.clone();
|
|
let restr = variant.to_string();
|
|
let future = thread::spawn(move || variant.find_iter(&seq).count());
|
|
counts.push((restr, future));
|
|
}
|
|
|
|
let substs = vec![
|
|
(b'B', "(c|g|t)"),
|
|
(b'D', "(a|g|t)"),
|
|
(b'H', "(a|c|t)"),
|
|
(b'K', "(g|t)"),
|
|
(b'M', "(a|c)"),
|
|
(b'N', "(a|c|g|t)"),
|
|
(b'R', "(a|g)"),
|
|
(b'S', "(c|g)"),
|
|
(b'V', "(a|c|g)"),
|
|
(b'W', "(a|t)"),
|
|
(b'Y', "(c|t)"),
|
|
]; // combined into one regex in `replace_all`
|
|
let seq = replace_all(&seq, substs);
|
|
|
|
for (variant, count) in counts {
|
|
println!("{} {}", variant, count.join().unwrap());
|
|
}
|
|
println!("\n{}\n{}\n{}", ilen, clen, seq.len());
|
|
}
|
|
|
|
fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
|
|
let mut replacements = vec![""; 256];
|
|
let mut alternates = vec![];
|
|
for (re, replacement) in substs {
|
|
replacements[re as usize] = replacement;
|
|
alternates.push((re as char).to_string());
|
|
}
|
|
|
|
let re = regex!(&alternates.join("|"));
|
|
let mut new = String::with_capacity(text.len());
|
|
let mut last_match = 0;
|
|
for m in re.find_iter(text) {
|
|
new.push_str(&text[last_match..m.start()]);
|
|
new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
|
|
last_match = m.end();
|
|
}
|
|
new.push_str(&text[last_match..]);
|
|
new
|
|
}
|