-
Notifications
You must be signed in to change notification settings - Fork 13.3k
simplify shootout-reverse-complement.rs #18357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,185 +38,93 @@ | |
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | ||
// OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
// ignore-android doesn't terminate? | ||
// ignore-android see #10393 #13206 | ||
|
||
#![feature(slicing_syntax, asm, if_let, tuple_indexing)] | ||
#![feature(slicing_syntax, unboxed_closures, overloaded_calls)] | ||
|
||
extern crate libc; | ||
|
||
use std::io::stdio::{stdin_raw, stdout_raw}; | ||
use std::sync::{Future}; | ||
use std::num::{div_rem}; | ||
use std::ptr::{copy_memory}; | ||
use std::io::{IoResult, EndOfFile}; | ||
use std::slice::raw::{mut_buf_as_slice}; | ||
|
||
use shared_memory::{SharedMemory}; | ||
|
||
mod tables { | ||
use std::sync::{Once, ONCE_INIT}; | ||
|
||
/// Lookup tables. | ||
static mut CPL16: [u16, ..1 << 16] = [0, ..1 << 16]; | ||
static mut CPL8: [u8, ..1 << 8] = [0, ..1 << 8]; | ||
|
||
/// Generates the tables. | ||
pub fn get() -> Tables { | ||
/// To make sure we initialize the tables only once. | ||
static INIT: Once = ONCE_INIT; | ||
INIT.doit(|| { | ||
unsafe { | ||
for i in range(0, 1 << 8) { | ||
CPL8[i] = match i as u8 { | ||
b'A' | b'a' => b'T', | ||
b'C' | b'c' => b'G', | ||
b'G' | b'g' => b'C', | ||
b'T' | b't' => b'A', | ||
b'U' | b'u' => b'A', | ||
b'M' | b'm' => b'K', | ||
b'R' | b'r' => b'Y', | ||
b'W' | b'w' => b'W', | ||
b'S' | b's' => b'S', | ||
b'Y' | b'y' => b'R', | ||
b'K' | b'k' => b'M', | ||
b'V' | b'v' => b'B', | ||
b'H' | b'h' => b'D', | ||
b'D' | b'd' => b'H', | ||
b'B' | b'b' => b'V', | ||
b'N' | b'n' => b'N', | ||
i => i, | ||
}; | ||
} | ||
|
||
for (i, v) in CPL16.iter_mut().enumerate() { | ||
*v = *CPL8.unsafe_get(i & 255) as u16 << 8 | | ||
*CPL8.unsafe_get(i >> 8) as u16; | ||
} | ||
} | ||
}); | ||
Tables { _dummy: () } | ||
} | ||
|
||
/// Accessor for the static arrays. | ||
/// | ||
/// To make sure that the tables can't be accessed without having been initialized. | ||
pub struct Tables { | ||
_dummy: () | ||
} | ||
|
||
impl Tables { | ||
/// Retreives the complement for `i`. | ||
pub fn cpl8(self, i: u8) -> u8 { | ||
// Not really unsafe. | ||
unsafe { CPL8[i as uint] } | ||
} | ||
|
||
/// Retreives the complement for `i`. | ||
pub fn cpl16(self, i: u16) -> u16 { | ||
unsafe { CPL16[i as uint] } | ||
} | ||
} | ||
struct Tables { | ||
table8: [u8, ..1 << 8], | ||
table16: [u16, ..1 << 16] | ||
} | ||
|
||
mod shared_memory { | ||
use std::sync::{Arc}; | ||
use std::mem::{transmute}; | ||
use std::raw::{Slice}; | ||
|
||
/// Structure for sharing disjoint parts of a vector mutably across tasks. | ||
pub struct SharedMemory { | ||
ptr: Arc<Vec<u8>>, | ||
start: uint, | ||
len: uint, | ||
} | ||
|
||
impl SharedMemory { | ||
pub fn new(ptr: Vec<u8>) -> SharedMemory { | ||
let len = ptr.len(); | ||
SharedMemory { | ||
ptr: Arc::new(ptr), | ||
start: 0, | ||
len: len, | ||
} | ||
impl Tables { | ||
fn new() -> Tables { | ||
let mut table8 = [0, ..1 << 8]; | ||
for (i, v) in table8.iter_mut().enumerate() { | ||
*v = Tables::computed_cpl8(i as u8); | ||
} | ||
|
||
pub fn as_mut_slice(&mut self) -> &mut [u8] { | ||
unsafe { | ||
transmute(Slice { | ||
data: self.ptr.as_ptr().offset(self.start as int) as *const u8, | ||
len: self.len, | ||
}) | ||
} | ||
let mut table16 = [0, ..1 << 16]; | ||
for (i, v) in table16.iter_mut().enumerate() { | ||
*v = table8[i & 255] as u16 << 8 | | ||
table8[i >> 8] as u16; | ||
} | ||
Tables { table8: table8, table16: table16 } | ||
} | ||
|
||
pub fn len(&self) -> uint { | ||
self.len | ||
fn computed_cpl8(c: u8) -> u8 { | ||
match c { | ||
b'A' | b'a' => b'T', | ||
b'C' | b'c' => b'G', | ||
b'G' | b'g' => b'C', | ||
b'T' | b't' => b'A', | ||
b'U' | b'u' => b'A', | ||
b'M' | b'm' => b'K', | ||
b'R' | b'r' => b'Y', | ||
b'W' | b'w' => b'W', | ||
b'S' | b's' => b'S', | ||
b'Y' | b'y' => b'R', | ||
b'K' | b'k' => b'M', | ||
b'V' | b'v' => b'B', | ||
b'H' | b'h' => b'D', | ||
b'D' | b'd' => b'H', | ||
b'B' | b'b' => b'V', | ||
b'N' | b'n' => b'N', | ||
i => i, | ||
} | ||
} | ||
|
||
pub fn split_at(self, mid: uint) -> (SharedMemory, SharedMemory) { | ||
assert!(mid <= self.len); | ||
let left = SharedMemory { | ||
ptr: self.ptr.clone(), | ||
start: self.start, | ||
len: mid, | ||
}; | ||
let right = SharedMemory { | ||
ptr: self.ptr, | ||
start: self.start + mid, | ||
len: self.len - mid, | ||
}; | ||
(left, right) | ||
} | ||
/// Retreives the complement for `i`. | ||
fn cpl8(&self, i: u8) -> u8 { | ||
self.table8[i as uint] | ||
} | ||
|
||
/// Resets the object so that it covers the whole range of the contained vector. | ||
/// | ||
/// You must not call this method if `self` is not the only reference to the | ||
/// shared memory. | ||
/// | ||
/// FIXME: If `Arc` had a method to check if the reference is unique, then we | ||
/// wouldn't need the `unsafe` here. | ||
/// | ||
/// FIXME: If `Arc` had a method to unwrap the contained value, then we could | ||
/// simply unwrap here. | ||
pub unsafe fn reset(self) -> SharedMemory { | ||
let len = self.ptr.len(); | ||
SharedMemory { | ||
ptr: self.ptr, | ||
start: 0, | ||
len: len, | ||
} | ||
} | ||
/// Retreives the complement for `i`. | ||
fn cpl16(&self, i: u16) -> u16 { | ||
self.table16[i as uint] | ||
} | ||
} | ||
|
||
|
||
/// Reads all remaining bytes from the stream. | ||
fn read_to_end<R: Reader>(r: &mut R) -> IoResult<Vec<u8>> { | ||
// As reading the input stream in memory is a bottleneck, we tune | ||
// Reader::read_to_end() with a fast growing policy to limit | ||
// recopies. If MREMAP_RETAIN is implemented in the linux kernel | ||
// and jemalloc use it, this trick will become useless. | ||
const CHUNK: uint = 64 * 1024; | ||
|
||
let mut vec = Vec::with_capacity(1024 * 1024); | ||
let mut vec = Vec::with_capacity(CHUNK); | ||
loop { | ||
// workaround: very fast growing | ||
if vec.capacity() - vec.len() < CHUNK { | ||
let cap = vec.capacity(); | ||
let mult = if cap < 256 * 1024 * 1024 { | ||
// FIXME (mahkoh): Temporary workaround for jemalloc on linux. Replace | ||
// this by 2x once the jemalloc preformance issue has been fixed. | ||
16 | ||
} else { | ||
2 | ||
}; | ||
vec.reserve_exact(mult * cap); | ||
} | ||
unsafe { | ||
let ptr = vec.as_mut_ptr().offset(vec.len() as int); | ||
match mut_buf_as_slice(ptr, CHUNK, |s| r.read(s)) { | ||
Ok(n) => { | ||
let len = vec.len(); | ||
vec.set_len(len + n); | ||
}, | ||
Err(ref e) if e.kind == EndOfFile => break, | ||
Err(e) => return Err(e), | ||
} | ||
match r.push_at_least(1, CHUNK, &mut vec) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. push_at_least is slow There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just call push if you must There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can change that. Do you have an explication why push_at_least is slow? because then, we should also change the implementation of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because it's a large function that calls another large function that calls read. Don't ever call any _at_least function with the first argument 1. It will just do the same thing push does. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the Reader doc, read can return Ok(0). Theoretically, using push may eat CPU without limit. In practice, as read should avoir returning Ok(0), it should work as expected. Then, I don't know if we should be correct using push_at_least or potentially faster using push. As I can't see any performance changes I personnally prefere correctness. |
||
Ok(_) => {} | ||
Err(ref e) if e.kind == EndOfFile => break, | ||
Err(e) => return Err(e) | ||
} | ||
} | ||
Ok(vec) | ||
|
@@ -225,11 +133,8 @@ fn read_to_end<R: Reader>(r: &mut R) -> IoResult<Vec<u8>> { | |
/// Finds the first position at which `b` occurs in `s`. | ||
fn memchr(h: &[u8], n: u8) -> Option<uint> { | ||
use libc::{c_void, c_int, size_t}; | ||
extern { | ||
fn memchr(h: *const c_void, n: c_int, s: size_t) -> *mut c_void; | ||
} | ||
let res = unsafe { | ||
memchr(h.as_ptr() as *const c_void, n as c_int, h.len() as size_t) | ||
libc::memchr(h.as_ptr() as *const c_void, n as c_int, h.len() as size_t) | ||
}; | ||
if res.is_null() { | ||
None | ||
|
@@ -238,13 +143,36 @@ fn memchr(h: &[u8], n: u8) -> Option<uint> { | |
} | ||
} | ||
|
||
/// A mutable iterator over DNA sequences | ||
struct MutDnaSeqs<'a> { s: &'a mut [u8] } | ||
fn mut_dna_seqs<'a>(s: &'a mut [u8]) -> MutDnaSeqs<'a> { | ||
MutDnaSeqs { s: s } | ||
} | ||
impl<'a> Iterator<&'a mut [u8]> for MutDnaSeqs<'a> { | ||
fn next(&mut self) -> Option<&'a mut [u8]> { | ||
let tmp = std::mem::replace(&mut self.s, &mut []); | ||
let tmp = match memchr(tmp, b'\n') { | ||
Some(i) => tmp.slice_from_mut(i + 1), | ||
None => return None, | ||
}; | ||
let (seq, tmp) = match memchr(tmp, b'>') { | ||
Some(i) => tmp.split_at_mut(i), | ||
None => { | ||
let len = tmp.len(); | ||
tmp.split_at_mut(len) | ||
} | ||
}; | ||
self.s = tmp; | ||
Some(seq) | ||
} | ||
} | ||
|
||
/// Length of a normal line without the terminating \n. | ||
const LINE_LEN: uint = 60; | ||
|
||
/// Compute the reverse complement. | ||
fn reverse_complement(mut view: SharedMemory, tables: tables::Tables) { | ||
// Drop the last newline | ||
let seq = view.as_mut_slice().init_mut(); | ||
fn reverse_complement(seq: &mut [u8], tables: &Tables) { | ||
let seq = seq.init_mut();// Drop the last newline | ||
let len = seq.len(); | ||
let off = LINE_LEN - len % (LINE_LEN + 1); | ||
let mut i = LINE_LEN; | ||
|
@@ -290,34 +218,36 @@ fn reverse_complement(mut view: SharedMemory, tables: tables::Tables) { | |
} | ||
} | ||
|
||
fn main() { | ||
let mut data = SharedMemory::new(read_to_end(&mut stdin_raw()).unwrap()); | ||
let tables = tables::get(); | ||
|
||
let mut futures = vec!(); | ||
loop { | ||
let (_, mut tmp_data) = match memchr(data.as_mut_slice(), b'\n') { | ||
Some(i) => data.split_at(i + 1), | ||
_ => break, | ||
}; | ||
let (view, tmp_data) = match memchr(tmp_data.as_mut_slice(), b'>') { | ||
Some(i) => tmp_data.split_at(i), | ||
None => { | ||
let len = tmp_data.len(); | ||
tmp_data.split_at(len) | ||
}, | ||
}; | ||
futures.push(Future::spawn(proc() reverse_complement(view, tables))); | ||
data = tmp_data; | ||
} | ||
|
||
for f in futures.iter_mut() { | ||
f.get(); | ||
/// Executes a closure in parallel over the given iterator over mutable slice. | ||
/// The closure `f` is run in parallel with an element of `iter`. | ||
fn parallel<'a, I, T, F>(mut iter: I, f: F) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this function safe wrt all possible closures? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AFAIK, yes. this is an adaptation of @alexcrichton parallel function on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
where T: Send + Sync, | ||
I: Iterator<&'a mut [T]>, | ||
F: Fn(&'a mut [T]) + Sync { | ||
use std::mem; | ||
use std::raw::Repr; | ||
|
||
let (tx, rx) = channel(); | ||
for chunk in iter { | ||
let tx = tx.clone(); | ||
|
||
// Need to convert `f` and `chunk` to something that can cross the task | ||
// boundary. | ||
let f = &f as *const F as *const uint; | ||
let raw = chunk.repr(); | ||
spawn(proc() { | ||
let f = f as *const F; | ||
unsafe { (*f)(mem::transmute(raw)) } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a fan of this transmute. f could just be a function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Without a closure, it cannot take tables by context (and by reference). |
||
drop(tx) | ||
}); | ||
} | ||
drop(tx); | ||
for () in rx.iter() {} | ||
} | ||
|
||
// Not actually unsafe. If Arc had a way to check uniqueness then we could do that in | ||
// `reset` and it would tell us that, yes, it is unique at this point. | ||
data = unsafe { data.reset() }; | ||
|
||
fn main() { | ||
let mut data = read_to_end(&mut stdin_raw()).unwrap(); | ||
let tables = &Tables::new(); | ||
parallel(mut_dna_seqs(data[mut]), |&: seq| reverse_complement(seq, tables)); | ||
stdout_raw().write(data.as_mut_slice()).unwrap(); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has to be 1MB for the code below to work.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've rewritten this function to be as close as possible to
Reader::read_to_end()
. This value will just add exactly one more allocation, or there is something I didn't understand?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One more allocation can be very significant with jemalloc. But in the case 64k -> 1M it looks like it's insignificant.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
64k -> 1M isn't a huge reallocation so the mremap issue isn't relevant.