From 879f78051ef858c3edf691b86199d916bbfb22b0 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov <ivan.aksamentov@gmail.com> Date: Fri, 24 Jun 2022 21:08:18 +0200 Subject: [PATCH] feat: append suffix to sequence if reverse complemented --- .../src/cli/nextalign_ordered_writer.rs | 16 ++++++++++++---- .../src/cli/nextclade_ordered_writer.rs | 8 ++++++++ packages_rs/nextclade/src/align/align.rs | 4 +++- packages_rs/nextclade/src/align/backtrace.rs | 3 +++ packages_rs/nextclade/src/constants.rs | 1 + packages_rs/nextclade/src/lib.rs | 1 + .../nextclade/src/run/nextalign_run_one.rs | 3 +++ .../nextclade/src/run/nextclade_run_one.rs | 11 ++++++++++- packages_rs/nextclade/src/types/outputs.rs | 2 ++ 9 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 packages_rs/nextclade/src/constants.rs diff --git a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs index 49f4ff6e4..3052600eb 100644 --- a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs +++ b/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs @@ -1,6 +1,7 @@ use crate::cli::nextalign_loop::NextalignRecord; use eyre::{Report, WrapErr}; use log::warn; +use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX; use nextclade::io::errors_csv::ErrorsCsvWriter; use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter}; use nextclade::io::gene_map::GeneMap; @@ -87,24 +88,31 @@ impl<'a> NextalignOrderedWriter<'a> { translations, warnings, missing_genes, + is_reverse_complement, } = output; + let seq_name = if *is_reverse_complement { + format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}") + } else { + seq_name.clone() + }; + if let Some(fasta_writer) = &mut self.fasta_writer { - fasta_writer.write(seq_name, &from_nuc_seq(&stripped.qry_seq))?; + fasta_writer.write(&seq_name, &from_nuc_seq(&stripped.qry_seq))?; } if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer { for translation in translations { - fasta_peptide_writer.write(seq_name, translation)?; + fasta_peptide_writer.write(&seq_name, translation)?; } } if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer { - insertions_csv_writer.write(seq_name, &stripped.insertions, translations)?; + insertions_csv_writer.write(&seq_name, &stripped.insertions, translations)?; } if let Some(errors_csv_writer) = &mut self.errors_csv_writer { - errors_csv_writer.write_aa_errors(seq_name, warnings, missing_genes)?; + errors_csv_writer.write_aa_errors(&seq_name, warnings, missing_genes)?; } } Err(report) => { diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs index 934294d49..3b5a064d4 100644 --- a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs +++ b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs @@ -2,6 +2,7 @@ use crate::cli::nextclade_loop::NextcladeRecord; use eyre::{Report, WrapErr}; use itertools::Itertools; use log::warn; +use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX; use nextclade::io::errors_csv::ErrorsCsvWriter; use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter}; use nextclade::io::gene_map::GeneMap; @@ -118,9 +119,16 @@ impl<'a> NextcladeOrderedWriter<'a> { warnings, insertions, missing_genes, + is_reverse_complement, .. } = &nextclade_outputs; + let seq_name = if *is_reverse_complement { + format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}") + } else { + seq_name.clone() + }; + if let Some(fasta_writer) = &mut self.fasta_writer { fasta_writer.write(&seq_name, &from_nuc_seq(&qry_seq_stripped))?; } diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs index 3d17fc6e1..34cae2c93 100644 --- a/packages_rs/nextclade/src/align/align.rs +++ b/packages_rs/nextclade/src/align/align.rs @@ -52,7 +52,9 @@ pub fn align_nuc( let mut qry_seq = qry_seq.to_owned(); reverse_complement_in_place(&mut qry_seq); let stripes = seed_alignment(&qry_seq, ref_seq, params).map_err(|_| report)?; - Ok(align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes)) + let mut result = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes); + result.is_reverse_complement = true; + Ok(result) } else { Err(report) } diff --git a/packages_rs/nextclade/src/align/backtrace.rs b/packages_rs/nextclade/src/align/backtrace.rs index ae60a0962..fb53c9ef2 100644 --- a/packages_rs/nextclade/src/align/backtrace.rs +++ b/packages_rs/nextclade/src/align/backtrace.rs @@ -14,6 +14,7 @@ pub struct AlignmentOutput<T> { pub qry_seq: Vec<T>, pub ref_seq: Vec<T>, pub alignment_score: i32, + pub is_reverse_complement: bool, } pub fn backtrace<T: Letter<T>>( @@ -89,6 +90,7 @@ pub fn backtrace<T: Letter<T>>( qry_seq: aln_qry, ref_seq: aln_ref, alignment_score: scores[(num_rows - 1, num_cols - 1)], + is_reverse_complement: false, } } @@ -156,6 +158,7 @@ mod tests { qry_seq: to_nuc_seq("---CTCGCT")?, ref_seq: to_nuc_seq("ACGCTCGCT")?, alignment_score: 18, + is_reverse_complement: false }; let output = backtrace(&qry_seq, &ref_seq, &scores, &paths); diff --git a/packages_rs/nextclade/src/constants.rs b/packages_rs/nextclade/src/constants.rs new file mode 100644 index 000000000..b102b539f --- /dev/null +++ b/packages_rs/nextclade/src/constants.rs @@ -0,0 +1 @@ +pub const REVERSE_COMPLEMENT_SUFFIX: &str = " |(reverse complement)"; diff --git a/packages_rs/nextclade/src/lib.rs b/packages_rs/nextclade/src/lib.rs index 058de692f..744d3c773 100644 --- a/packages_rs/nextclade/src/lib.rs +++ b/packages_rs/nextclade/src/lib.rs @@ -1,5 +1,6 @@ pub mod align; pub mod analyze; +pub mod constants; pub mod gene; pub mod io; pub mod qc; diff --git a/packages_rs/nextclade/src/run/nextalign_run_one.rs b/packages_rs/nextclade/src/run/nextalign_run_one.rs index cecf66492..ebb2ff33f 100644 --- a/packages_rs/nextclade/src/run/nextalign_run_one.rs +++ b/packages_rs/nextclade/src/run/nextalign_run_one.rs @@ -51,12 +51,15 @@ pub fn nextalign_run_one( .cloned() .collect_vec(); + let is_reverse_complement = alignment.is_reverse_complement; + Ok(NextalignOutputs { stripped, alignment, translations, warnings, missing_genes, + is_reverse_complement, }) } } diff --git a/packages_rs/nextclade/src/run/nextclade_run_one.rs b/packages_rs/nextclade/src/run/nextclade_run_one.rs index 518b55441..6ed919cfd 100644 --- a/packages_rs/nextclade/src/run/nextclade_run_one.rs +++ b/packages_rs/nextclade/src/run/nextclade_run_one.rs @@ -12,6 +12,7 @@ use crate::analyze::nuc_changes::{find_nuc_changes, FindNucChangesOutput}; use crate::analyze::pcr_primer_changes::get_pcr_primer_changes; use crate::analyze::pcr_primers::PcrPrimer; use crate::analyze::virus_properties::VirusProperties; +use crate::constants::REVERSE_COMPLEMENT_SUFFIX; use crate::io::aa::Aa; use crate::io::gene_map::GeneMap; use crate::io::letter::Letter; @@ -46,6 +47,7 @@ pub fn nextclade_run_one( translations, warnings, missing_genes, + is_reverse_complement, } = nextalign_run_one( qry_seq, ref_seq, @@ -154,11 +156,17 @@ pub fn nextclade_run_one( qc_config, ); + let seq_name = if is_reverse_complement { + format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}") + } else { + seq_name.to_owned() + }; + Ok(( stripped.qry_seq, translations, NextcladeOutputs { - seq_name: seq_name.to_owned(), + seq_name, substitutions, total_substitutions, deletions, @@ -195,6 +203,7 @@ pub fn nextclade_run_one( qc, custom_node_attributes: clade_node_attrs, nearest_node_id, + is_reverse_complement, }, )) } diff --git a/packages_rs/nextclade/src/types/outputs.rs b/packages_rs/nextclade/src/types/outputs.rs index 8b34a6067..2953698c2 100644 --- a/packages_rs/nextclade/src/types/outputs.rs +++ b/packages_rs/nextclade/src/types/outputs.rs @@ -31,6 +31,7 @@ pub struct NextalignOutputs { pub translations: Vec<Translation>, pub warnings: Vec<PeptideWarning>, pub missing_genes: Vec<String>, + pub is_reverse_complement: bool, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -75,6 +76,7 @@ pub struct NextcladeOutputs { pub qc: QcResult, pub custom_node_attributes: BTreeMap<String, String>, pub nearest_node_id: usize, + pub is_reverse_complement: bool, } impl NextcladeOutputs {