-
Notifications
You must be signed in to change notification settings - Fork 214
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
179 additions
and
101 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,96 +1,79 @@ | ||
use pyo3::prelude::*; | ||
use rayon::prelude::*; | ||
use rayon::ThreadPoolBuilder; | ||
use crate::filehandler::bam_stats; | ||
use crate::filehandler::{bam_ispaired, write_file}; | ||
use crate::covcalc::{bam_pileup, parse_regions, collapse_bgvecs}; | ||
use crate::normalization::scale_factor_bamcompare; | ||
use crate::covcalc::{bam_pileup, parse_regions}; | ||
use crate::calc::median; | ||
|
||
#[pyfunction] | ||
pub fn r_bamcompare( | ||
bam_ifile: &str, | ||
bam_ifile1: &str, | ||
bam_ifile2: &str, | ||
_ofile: &str, | ||
_ofiletype: &str, | ||
_norm: &str, | ||
ofile: &str, | ||
ofiletype: &str, | ||
norm: &str, | ||
scalefactorsmethod: &str, | ||
effective_genome_size: u64, | ||
operation: &str, | ||
pseudocount: f64, | ||
nproc: usize, | ||
binsize: u32, | ||
regions: Vec<(String, u64, u64)>, | ||
verbose: bool | ||
) -> PyResult<()> { | ||
// put statistics into scope, this should probably be rewritten. (can't we always assume at least 2 threads ? ) | ||
// will need to be revisited for multiBamsummaries / computeMatrix. | ||
let mut total_reads1: u64 = 0; | ||
let mut total_reads2: u64 = 0; | ||
let mut mapped_reads1: u64 = 0; | ||
let mut mapped_reads2: u64 = 0; | ||
let mut unmapped_reads1: u64 = 0; | ||
let mut unmapped_reads2: u64 = 0; | ||
let mut readlen1: f32 = 0.0; | ||
let mut readlen2: f32 = 0.0; | ||
let mut fraglen1: f32 = 0.0; | ||
let mut fraglen2: f32 = 0.0; | ||
// Get statistics of bam file. | ||
if nproc > 1 { | ||
let pool2 = ThreadPoolBuilder::new().num_threads(2).build().unwrap(); | ||
let bamstatvec: Vec<_> = pool2.install(|| { | ||
vec![ | ||
(bam_ifile, &verbose), | ||
(bam_ifile2, &verbose) | ||
] | ||
.par_iter() | ||
.map(|(bam_ifile, verbose)| bam_stats(bam_ifile, verbose)) | ||
.collect() | ||
}); | ||
let (_total_reads1, _mapped_reads1, _unmapped_reads1, _readlen1, _fraglen1) = bamstatvec[0]; | ||
let (_total_reads2, _mapped_reads2, _unmapped_reads2, _readlen2, _fraglen2) = bamstatvec[1]; | ||
total_reads1 = _total_reads1; | ||
total_reads2 = _total_reads2; | ||
mapped_reads1 = _mapped_reads1; | ||
mapped_reads2 = _mapped_reads2; | ||
unmapped_reads1 = _unmapped_reads1; | ||
unmapped_reads2 = _unmapped_reads2; | ||
readlen1 = _readlen1; | ||
readlen2 = _readlen2; | ||
fraglen1 = _fraglen1; | ||
fraglen2 = _fraglen2; | ||
let ispe1 = bam_ispaired(bam_ifile1); | ||
let ispe2 = bam_ispaired(bam_ifile2); | ||
|
||
} else { | ||
let (_total_reads1, _mapped_reads1, _unmapped_reads1, _readlen1, _fraglen1) = bam_stats(bam_ifile, &verbose); | ||
let (_total_reads2, _mapped_reads2, _unmapped_reads2, _readlen2, _fraglen2) = bam_stats(bam_ifile2, &verbose); | ||
total_reads1 = _total_reads1; | ||
total_reads2 = _total_reads2; | ||
mapped_reads1 = _mapped_reads1; | ||
mapped_reads2 = _mapped_reads2; | ||
unmapped_reads1 = _unmapped_reads1; | ||
unmapped_reads2 = _unmapped_reads2; | ||
readlen1 = _readlen1; | ||
readlen2 = _readlen2; | ||
fraglen1 = _fraglen1; | ||
fraglen2 = _fraglen2; | ||
if verbose { | ||
println!("Sample1: {} is-paired: {}", bam_ifile1, ispe1); | ||
println!("Sample2: {} is-paired: {}", bam_ifile2, ispe2); | ||
} | ||
|
||
// Calculate scale factors | ||
let (scale_factor1, scale_factor2) = scale_factor_bamcompare( | ||
scalefactorsmethod, | ||
mapped_reads1, | ||
mapped_reads2, | ||
binsize as u64, | ||
effective_genome_size | ||
); | ||
println!("scalefactors = {} and {}", scale_factor1, scale_factor2); | ||
let (regions, chromsizes) = parse_regions(®ions, bam_ifile); | ||
// Parse regions & calculate coverage | ||
let (regions, chromsizes) = parse_regions(®ions, bam_ifile1); | ||
let pool = ThreadPoolBuilder::new().num_threads(nproc).build().unwrap(); | ||
let _bg1: Vec<(String, u64, u64, f64)> = pool.install(|| { | ||
|
||
// Parse first bamfile | ||
let (bg1, mapped1, _unmapped1, readlen1, fraglen1) = pool.install(|| { | ||
regions.par_iter() | ||
.flat_map(|i| bam_pileup(bam_ifile, &i, &binsize, scale_factor1)) | ||
.collect() | ||
.map(|i| bam_pileup(bam_ifile1, &i, &binsize, &ispe1)) | ||
.reduce( | ||
|| (vec![], 0, 0, vec![], vec![]), | ||
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| { | ||
_bg.extend(bg); | ||
_readlen.extend(readlen); | ||
_fraglen.extend(fraglen); | ||
_mapped += mapped; | ||
_unmapped += unmapped; | ||
(_bg, _mapped, _unmapped, _readlen, _fraglen) | ||
} | ||
) | ||
}); | ||
let _bg2: Vec<(String, u64, u64, f64)> = pool.install(|| { | ||
let _readlen1 = median(readlen1); | ||
let _fraglen1 = median(fraglen1); | ||
|
||
// Parse first bamfile | ||
let (bg2, mapped2, _unmapped2, readlen2, fraglen2) = pool.install(|| { | ||
regions.par_iter() | ||
.flat_map(|i| bam_pileup(bam_ifile2, &i, &binsize, scale_factor2)) | ||
.collect() | ||
.map(|i| bam_pileup(bam_ifile2, &i, &binsize, &ispe2)) | ||
.reduce( | ||
|| (vec![], 0, 0, vec![], vec![]), | ||
|(mut _bg, mut _mapped, mut _unmapped, mut _readlen, mut _fraglen), (bg, mapped, unmapped, readlen, fraglen)| { | ||
_bg.extend(bg); | ||
_readlen.extend(readlen); | ||
_fraglen.extend(fraglen); | ||
_mapped += mapped; | ||
_unmapped += unmapped; | ||
(_bg, _mapped, _unmapped, _readlen, _fraglen) | ||
} | ||
) | ||
}); | ||
let _readlen2 = median(readlen2); | ||
let _fraglen2 = median(fraglen2); | ||
let (sf1, sf2) = scale_factor_bamcompare(scalefactorsmethod, mapped1, mapped2, binsize, effective_genome_size, norm); | ||
println!("scale factor1 = {}, scale factor2 = {}", sf1, sf2); | ||
let bge = collapse_bgvecs(bg1, bg2, sf1, sf2, pseudocount, operation); | ||
write_file(ofile, ofiletype, bge, chromsizes); | ||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,4 +12,4 @@ pub fn median(mut nvec: Vec<u32>) -> f32 { | |
return (nvec[len / 2] + nvec[len / 2 - 1]) as f32 / 2.0; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters