Skip to content

Commit

Permalink
feat: implement alignment param presets
Browse files Browse the repository at this point in the history
Define some hard-coded presets of alignment parameters additionally to the default one.

The `--alignment-preset` arg allows users to toggle between presets.

Example:

```bash
nextclade run --dataset-name=sars-cov-2 -O tmp/sars-cov-2 sequences.fasta --alignment-preset=high-diversity --verbosity=info
```
  • Loading branch information
ivan-aksamentov committed Feb 21, 2025
1 parent c9c28ad commit f501968
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 2 deletions.
31 changes: 31 additions & 0 deletions packages/nextclade/src/align/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ impl Default for GapAlignmentSide {
#[derive(Parser, Debug, Clone, Eq, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct AlignPairwiseParams {
/// Alignment param preset
#[clap(long, default_value = ALIGNMENT_PRESET_DEFAULT)]
pub alignment_preset: String,

/// Minimum length of nucleotide sequence to consider for alignment.
///
/// If a sequence is shorter than that, alignment will not be attempted and a warning will be emitted. When adjusting this parameter, note that alignment of short sequences can be unreliable.
Expand Down Expand Up @@ -150,9 +154,13 @@ pub struct AlignPairwiseParams {
pub seed_spacing: Option<AnyType>,
}

pub const ALIGNMENT_PRESET_DEFAULT: &str = "default";

impl Default for AlignPairwiseParams {
fn default() -> Self {
Self {
alignment_preset: ALIGNMENT_PRESET_DEFAULT.to_owned(),

min_length: 100,
penalty_gap_extend: 0,
penalty_gap_open: 6,
Expand Down Expand Up @@ -188,6 +196,29 @@ impl Default for AlignPairwiseParams {
}

impl AlignPairwiseParams {
pub fn from_preset(preset_name: impl AsRef<str>) -> Result<AlignPairwiseParams, Report> {
let preset_name = preset_name.as_ref();
match preset_name {
"default" => Ok(AlignPairwiseParams::default()),
"high-diversity" => Ok(AlignPairwiseParams {
alignment_preset: o!("high-diversity"),
penalty_gap_extend: 0,
penalty_gap_open: 999999, // FIXME: dummy values
penalty_gap_open_in_frame: 88888, // FIXME: dummy values
penalty_gap_open_out_of_frame: 77777, // FIXME: dummy values
penalty_mismatch: 6666, // FIXME: dummy values
score_match: 33333, // FIXME: dummy values
..AlignPairwiseParams::default()
}),
"short-sequences" => Ok(AlignPairwiseParams {
alignment_preset: o!("short-sequences"),
min_length: 0, // FIXME: dummy values
..AlignPairwiseParams::default()
}),
_ => make_error!("Alignment params preset not found: {preset_name}"),
}
}

pub fn validate(&self) -> Result<(), Report> {
#[rustfmt::skip]
let deprecated = BTreeMap::from([
Expand Down
11 changes: 9 additions & 2 deletions packages/nextclade/src/run/params.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::align::params::{AlignPairwiseParams, AlignPairwiseParamsOptional};
use crate::align::params::{AlignPairwiseParams, AlignPairwiseParamsOptional, ALIGNMENT_PRESET_DEFAULT};
use crate::analyze::aa_changes_find_for_cds::{AaChangesParams, AaChangesParamsOptional};
use crate::analyze::virus_properties::VirusProperties;
use crate::run::params_general::{NextcladeGeneralParams, NextcladeGeneralParamsOptional};
Expand Down Expand Up @@ -52,9 +52,16 @@ impl NextcladeInputParams {
general_params
};

let preset_name: String = params
.alignment
.as_ref()
.and_then(|a| a.alignment_preset.as_ref())
.cloned()
.unwrap_or_else(|| ALIGNMENT_PRESET_DEFAULT.to_owned());

let alignment = {
// Start with defaults
let mut alignment_params = AlignPairwiseParams::default();
let mut alignment_params = AlignPairwiseParams::from_preset(preset_name)?;
// Merge params coming from virus_properties
if let Some(alignment_params_from_file) = &virus_properties.alignment_params {
alignment_params.merge_opt(alignment_params_from_file.clone());
Expand Down

0 comments on commit f501968

Please sign in to comment.