Skip to content

Commit

Permalink
feat: append suffix to sequence if reverse complemented
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov committed Jun 24, 2022
1 parent bc9839c commit 879f780
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 6 deletions.
16 changes: 12 additions & 4 deletions packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::cli::nextalign_loop::NextalignRecord;
use eyre::{Report, WrapErr};
use log::warn;
use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX;
use nextclade::io::errors_csv::ErrorsCsvWriter;
use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
use nextclade::io::gene_map::GeneMap;
Expand Down Expand Up @@ -87,24 +88,31 @@ impl<'a> NextalignOrderedWriter<'a> {
translations,
warnings,
missing_genes,
is_reverse_complement,
} = output;

let seq_name = if *is_reverse_complement {
format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
} else {
seq_name.clone()
};

if let Some(fasta_writer) = &mut self.fasta_writer {
fasta_writer.write(seq_name, &from_nuc_seq(&stripped.qry_seq))?;
fasta_writer.write(&seq_name, &from_nuc_seq(&stripped.qry_seq))?;
}

if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer {
for translation in translations {
fasta_peptide_writer.write(seq_name, translation)?;
fasta_peptide_writer.write(&seq_name, translation)?;
}
}

if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer {
insertions_csv_writer.write(seq_name, &stripped.insertions, translations)?;
insertions_csv_writer.write(&seq_name, &stripped.insertions, translations)?;
}

if let Some(errors_csv_writer) = &mut self.errors_csv_writer {
errors_csv_writer.write_aa_errors(seq_name, warnings, missing_genes)?;
errors_csv_writer.write_aa_errors(&seq_name, warnings, missing_genes)?;
}
}
Err(report) => {
Expand Down
8 changes: 8 additions & 0 deletions packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::cli::nextclade_loop::NextcladeRecord;
use eyre::{Report, WrapErr};
use itertools::Itertools;
use log::warn;
use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX;
use nextclade::io::errors_csv::ErrorsCsvWriter;
use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
use nextclade::io::gene_map::GeneMap;
Expand Down Expand Up @@ -118,9 +119,16 @@ impl<'a> NextcladeOrderedWriter<'a> {
warnings,
insertions,
missing_genes,
is_reverse_complement,
..
} = &nextclade_outputs;

let seq_name = if *is_reverse_complement {
format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
} else {
seq_name.clone()
};

if let Some(fasta_writer) = &mut self.fasta_writer {
fasta_writer.write(&seq_name, &from_nuc_seq(&qry_seq_stripped))?;
}
Expand Down
4 changes: 3 additions & 1 deletion packages_rs/nextclade/src/align/align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ pub fn align_nuc(
let mut qry_seq = qry_seq.to_owned();
reverse_complement_in_place(&mut qry_seq);
let stripes = seed_alignment(&qry_seq, ref_seq, params).map_err(|_| report)?;
Ok(align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes))
let mut result = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
result.is_reverse_complement = true;
Ok(result)
} else {
Err(report)
}
Expand Down
3 changes: 3 additions & 0 deletions packages_rs/nextclade/src/align/backtrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub struct AlignmentOutput<T> {
pub qry_seq: Vec<T>,
pub ref_seq: Vec<T>,
pub alignment_score: i32,
pub is_reverse_complement: bool,
}

pub fn backtrace<T: Letter<T>>(
Expand Down Expand Up @@ -89,6 +90,7 @@ pub fn backtrace<T: Letter<T>>(
qry_seq: aln_qry,
ref_seq: aln_ref,
alignment_score: scores[(num_rows - 1, num_cols - 1)],
is_reverse_complement: false,
}
}

Expand Down Expand Up @@ -156,6 +158,7 @@ mod tests {
qry_seq: to_nuc_seq("---CTCGCT")?,
ref_seq: to_nuc_seq("ACGCTCGCT")?,
alignment_score: 18,
is_reverse_complement: false
};

let output = backtrace(&qry_seq, &ref_seq, &scores, &paths);
Expand Down
1 change: 1 addition & 0 deletions packages_rs/nextclade/src/constants.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub const REVERSE_COMPLEMENT_SUFFIX: &str = " |(reverse complement)";
1 change: 1 addition & 0 deletions packages_rs/nextclade/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod align;
pub mod analyze;
pub mod constants;
pub mod gene;
pub mod io;
pub mod qc;
Expand Down
3 changes: 3 additions & 0 deletions packages_rs/nextclade/src/run/nextalign_run_one.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,15 @@ pub fn nextalign_run_one(
.cloned()
.collect_vec();

let is_reverse_complement = alignment.is_reverse_complement;

Ok(NextalignOutputs {
stripped,
alignment,
translations,
warnings,
missing_genes,
is_reverse_complement,
})
}
}
Expand Down
11 changes: 10 additions & 1 deletion packages_rs/nextclade/src/run/nextclade_run_one.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::analyze::nuc_changes::{find_nuc_changes, FindNucChangesOutput};
use crate::analyze::pcr_primer_changes::get_pcr_primer_changes;
use crate::analyze::pcr_primers::PcrPrimer;
use crate::analyze::virus_properties::VirusProperties;
use crate::constants::REVERSE_COMPLEMENT_SUFFIX;
use crate::io::aa::Aa;
use crate::io::gene_map::GeneMap;
use crate::io::letter::Letter;
Expand Down Expand Up @@ -46,6 +47,7 @@ pub fn nextclade_run_one(
translations,
warnings,
missing_genes,
is_reverse_complement,
} = nextalign_run_one(
qry_seq,
ref_seq,
Expand Down Expand Up @@ -154,11 +156,17 @@ pub fn nextclade_run_one(
qc_config,
);

let seq_name = if is_reverse_complement {
format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
} else {
seq_name.to_owned()
};

Ok((
stripped.qry_seq,
translations,
NextcladeOutputs {
seq_name: seq_name.to_owned(),
seq_name,
substitutions,
total_substitutions,
deletions,
Expand Down Expand Up @@ -195,6 +203,7 @@ pub fn nextclade_run_one(
qc,
custom_node_attributes: clade_node_attrs,
nearest_node_id,
is_reverse_complement,
},
))
}
2 changes: 2 additions & 0 deletions packages_rs/nextclade/src/types/outputs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub struct NextalignOutputs {
pub translations: Vec<Translation>,
pub warnings: Vec<PeptideWarning>,
pub missing_genes: Vec<String>,
pub is_reverse_complement: bool,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
Expand Down Expand Up @@ -75,6 +76,7 @@ pub struct NextcladeOutputs {
pub qc: QcResult,
pub custom_node_attributes: BTreeMap<String, String>,
pub nearest_node_id: usize,
pub is_reverse_complement: bool,
}

impl NextcladeOutputs {
Expand Down

0 comments on commit 879f780

Please sign in to comment.