Skip to content

Commit

Permalink
feat: added method to recover an unaligned dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
nishaq503 committed Nov 9, 2024
1 parent 25af9a0 commit cc8fa32
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 1 deletion.
26 changes: 26 additions & 0 deletions crates/abd-clam/src/msa/quality/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,32 @@ use rayon::prelude::*;

use crate::{utils, Dataset, FlatVec};

impl<U: Number, M> FlatVec<String, U, M> {
/// Remove all gaps from all sequences in the MSA.
#[must_use]
pub fn remove_gaps(mut self) -> Self {
self.instances = self
.instances
.into_iter()
.map(|s| s.chars().filter(|&c| c != '-').collect::<String>())
.collect();
self
}
}

impl<U: Number, M: Send + Sync> FlatVec<String, U, M> {
/// Parallel version of `remove_gaps`.
#[must_use]
pub fn par_remove_gaps(mut self) -> Self {
self.instances = self
.instances
.into_par_iter()
.map(|s| s.chars().filter(|&c| c != '-').collect::<String>())
.collect();
self
}
}

// TODO: Consider adding a new trait for MSA datasets. Then move these methods
// to that trait.

Expand Down
7 changes: 6 additions & 1 deletion crates/results/msa/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ struct Args {
#[arg(short('i'), long)]
inp_path: PathBuf,

/// Whether the original fasta file was pre-aligned by the provider.
#[arg(short('p'), long)]
pre_aligned: bool,

/// The number of samples to use for the dataset.
#[arg(short('n'), long)]
num_samples: Option<usize>,
Expand Down Expand Up @@ -103,7 +107,8 @@ fn main() -> Result<(), String> {
ftlog::info!("Input file: {:?}", fasta_file.raw_path());
ftlog::info!("Output directory: {:?}", fasta_file.out_dir());

let data = fasta_file.read(args.num_samples)?;
let data = fasta_file.read::<i32>(args.num_samples)?;
let data = if args.pre_aligned { data } else { data.par_remove_gaps() };
ftlog::info!(
"Finished reading original dataset: length range = {:?}",
data.dimensionality_hint()
Expand Down

0 comments on commit cc8fa32

Please sign in to comment.