From 879f78051ef858c3edf691b86199d916bbfb22b0 Mon Sep 17 00:00:00 2001
From: ivan-aksamentov <ivan.aksamentov@gmail.com>
Date: Fri, 24 Jun 2022 21:08:18 +0200
Subject: [PATCH] feat: append suffix to sequence if reverse complemented

---
 .../src/cli/nextalign_ordered_writer.rs          | 16 ++++++++++++----
 .../src/cli/nextclade_ordered_writer.rs          |  8 ++++++++
 packages_rs/nextclade/src/align/align.rs         |  4 +++-
 packages_rs/nextclade/src/align/backtrace.rs     |  3 +++
 packages_rs/nextclade/src/constants.rs           |  1 +
 packages_rs/nextclade/src/lib.rs                 |  1 +
 .../nextclade/src/run/nextalign_run_one.rs       |  3 +++
 .../nextclade/src/run/nextclade_run_one.rs       | 11 ++++++++++-
 packages_rs/nextclade/src/types/outputs.rs       |  2 ++
 9 files changed, 43 insertions(+), 6 deletions(-)
 create mode 100644 packages_rs/nextclade/src/constants.rs

diff --git a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs
index 49f4ff6e4..3052600eb 100644
--- a/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs
+++ b/packages_rs/nextclade-cli/src/cli/nextalign_ordered_writer.rs
@@ -1,6 +1,7 @@
 use crate::cli::nextalign_loop::NextalignRecord;
 use eyre::{Report, WrapErr};
 use log::warn;
+use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX;
 use nextclade::io::errors_csv::ErrorsCsvWriter;
 use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
 use nextclade::io::gene_map::GeneMap;
@@ -87,24 +88,31 @@ impl<'a> NextalignOrderedWriter<'a> {
           translations,
           warnings,
           missing_genes,
+          is_reverse_complement,
         } = output;
 
+        let seq_name = if *is_reverse_complement {
+          format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
+        } else {
+          seq_name.clone()
+        };
+
         if let Some(fasta_writer) = &mut self.fasta_writer {
-          fasta_writer.write(seq_name, &from_nuc_seq(&stripped.qry_seq))?;
+          fasta_writer.write(&seq_name, &from_nuc_seq(&stripped.qry_seq))?;
         }
 
         if let Some(fasta_peptide_writer) = &mut self.fasta_peptide_writer {
           for translation in translations {
-            fasta_peptide_writer.write(seq_name, translation)?;
+            fasta_peptide_writer.write(&seq_name, translation)?;
           }
         }
 
         if let Some(insertions_csv_writer) = &mut self.insertions_csv_writer {
-          insertions_csv_writer.write(seq_name, &stripped.insertions, translations)?;
+          insertions_csv_writer.write(&seq_name, &stripped.insertions, translations)?;
         }
 
         if let Some(errors_csv_writer) = &mut self.errors_csv_writer {
-          errors_csv_writer.write_aa_errors(seq_name, warnings, missing_genes)?;
+          errors_csv_writer.write_aa_errors(&seq_name, warnings, missing_genes)?;
         }
       }
       Err(report) => {
diff --git a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
index 934294d49..3b5a064d4 100644
--- a/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
+++ b/packages_rs/nextclade-cli/src/cli/nextclade_ordered_writer.rs
@@ -2,6 +2,7 @@ use crate::cli::nextclade_loop::NextcladeRecord;
 use eyre::{Report, WrapErr};
 use itertools::Itertools;
 use log::warn;
+use nextclade::constants::REVERSE_COMPLEMENT_SUFFIX;
 use nextclade::io::errors_csv::ErrorsCsvWriter;
 use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
 use nextclade::io::gene_map::GeneMap;
@@ -118,9 +119,16 @@ impl<'a> NextcladeOrderedWriter<'a> {
           warnings,
           insertions,
           missing_genes,
+          is_reverse_complement,
           ..
         } = &nextclade_outputs;
 
+        let seq_name = if *is_reverse_complement {
+          format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
+        } else {
+          seq_name.clone()
+        };
+
         if let Some(fasta_writer) = &mut self.fasta_writer {
           fasta_writer.write(&seq_name, &from_nuc_seq(&qry_seq_stripped))?;
         }
diff --git a/packages_rs/nextclade/src/align/align.rs b/packages_rs/nextclade/src/align/align.rs
index 3d17fc6e1..34cae2c93 100644
--- a/packages_rs/nextclade/src/align/align.rs
+++ b/packages_rs/nextclade/src/align/align.rs
@@ -52,7 +52,9 @@ pub fn align_nuc(
         let mut qry_seq = qry_seq.to_owned();
         reverse_complement_in_place(&mut qry_seq);
         let stripes = seed_alignment(&qry_seq, ref_seq, params).map_err(|_| report)?;
-        Ok(align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes))
+        let mut result = align_pairwise(&qry_seq, ref_seq, gap_open_close, params, &stripes);
+        result.is_reverse_complement = true;
+        Ok(result)
       } else {
         Err(report)
       }
diff --git a/packages_rs/nextclade/src/align/backtrace.rs b/packages_rs/nextclade/src/align/backtrace.rs
index ae60a0962..fb53c9ef2 100644
--- a/packages_rs/nextclade/src/align/backtrace.rs
+++ b/packages_rs/nextclade/src/align/backtrace.rs
@@ -14,6 +14,7 @@ pub struct AlignmentOutput<T> {
   pub qry_seq: Vec<T>,
   pub ref_seq: Vec<T>,
   pub alignment_score: i32,
+  pub is_reverse_complement: bool,
 }
 
 pub fn backtrace<T: Letter<T>>(
@@ -89,6 +90,7 @@ pub fn backtrace<T: Letter<T>>(
     qry_seq: aln_qry,
     ref_seq: aln_ref,
     alignment_score: scores[(num_rows - 1, num_cols - 1)],
+    is_reverse_complement: false,
   }
 }
 
@@ -156,6 +158,7 @@ mod tests {
       qry_seq: to_nuc_seq("---CTCGCT")?,
       ref_seq: to_nuc_seq("ACGCTCGCT")?,
       alignment_score: 18,
+      is_reverse_complement: false
     };
 
     let output = backtrace(&qry_seq, &ref_seq, &scores, &paths);
diff --git a/packages_rs/nextclade/src/constants.rs b/packages_rs/nextclade/src/constants.rs
new file mode 100644
index 000000000..b102b539f
--- /dev/null
+++ b/packages_rs/nextclade/src/constants.rs
@@ -0,0 +1 @@
+pub const REVERSE_COMPLEMENT_SUFFIX: &str = " |(reverse complement)";
diff --git a/packages_rs/nextclade/src/lib.rs b/packages_rs/nextclade/src/lib.rs
index 058de692f..744d3c773 100644
--- a/packages_rs/nextclade/src/lib.rs
+++ b/packages_rs/nextclade/src/lib.rs
@@ -1,5 +1,6 @@
 pub mod align;
 pub mod analyze;
+pub mod constants;
 pub mod gene;
 pub mod io;
 pub mod qc;
diff --git a/packages_rs/nextclade/src/run/nextalign_run_one.rs b/packages_rs/nextclade/src/run/nextalign_run_one.rs
index cecf66492..ebb2ff33f 100644
--- a/packages_rs/nextclade/src/run/nextalign_run_one.rs
+++ b/packages_rs/nextclade/src/run/nextalign_run_one.rs
@@ -51,12 +51,15 @@ pub fn nextalign_run_one(
         .cloned()
         .collect_vec();
 
+      let is_reverse_complement = alignment.is_reverse_complement;
+
       Ok(NextalignOutputs {
         stripped,
         alignment,
         translations,
         warnings,
         missing_genes,
+        is_reverse_complement,
       })
     }
   }
diff --git a/packages_rs/nextclade/src/run/nextclade_run_one.rs b/packages_rs/nextclade/src/run/nextclade_run_one.rs
index 518b55441..6ed919cfd 100644
--- a/packages_rs/nextclade/src/run/nextclade_run_one.rs
+++ b/packages_rs/nextclade/src/run/nextclade_run_one.rs
@@ -12,6 +12,7 @@ use crate::analyze::nuc_changes::{find_nuc_changes, FindNucChangesOutput};
 use crate::analyze::pcr_primer_changes::get_pcr_primer_changes;
 use crate::analyze::pcr_primers::PcrPrimer;
 use crate::analyze::virus_properties::VirusProperties;
+use crate::constants::REVERSE_COMPLEMENT_SUFFIX;
 use crate::io::aa::Aa;
 use crate::io::gene_map::GeneMap;
 use crate::io::letter::Letter;
@@ -46,6 +47,7 @@ pub fn nextclade_run_one(
     translations,
     warnings,
     missing_genes,
+    is_reverse_complement,
   } = nextalign_run_one(
     qry_seq,
     ref_seq,
@@ -154,11 +156,17 @@ pub fn nextclade_run_one(
     qc_config,
   );
 
+  let seq_name = if is_reverse_complement {
+    format!("{seq_name}{REVERSE_COMPLEMENT_SUFFIX}")
+  } else {
+    seq_name.to_owned()
+  };
+
   Ok((
     stripped.qry_seq,
     translations,
     NextcladeOutputs {
-      seq_name: seq_name.to_owned(),
+      seq_name,
       substitutions,
       total_substitutions,
       deletions,
@@ -195,6 +203,7 @@ pub fn nextclade_run_one(
       qc,
       custom_node_attributes: clade_node_attrs,
       nearest_node_id,
+      is_reverse_complement,
     },
   ))
 }
diff --git a/packages_rs/nextclade/src/types/outputs.rs b/packages_rs/nextclade/src/types/outputs.rs
index 8b34a6067..2953698c2 100644
--- a/packages_rs/nextclade/src/types/outputs.rs
+++ b/packages_rs/nextclade/src/types/outputs.rs
@@ -31,6 +31,7 @@ pub struct NextalignOutputs {
   pub translations: Vec<Translation>,
   pub warnings: Vec<PeptideWarning>,
   pub missing_genes: Vec<String>,
+  pub is_reverse_complement: bool,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -75,6 +76,7 @@ pub struct NextcladeOutputs {
   pub qc: QcResult,
   pub custom_node_attributes: BTreeMap<String, String>,
   pub nearest_node_id: usize,
+  pub is_reverse_complement: bool,
 }
 
 impl NextcladeOutputs {