Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow contig clustering #45

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/cluster_argument_parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ impl ClusterDistanceFinder for Clusterer {
Clusterer::Skani(s) => s.calculate_ani(fasta1, fasta2),
}
}

fn calculate_ani_contigs(&self, fasta1: &str) -> Option<f32> {
match self {
Clusterer::Fastani(f) => f.calculate_ani_contigs(fasta1),
Clusterer::Skani(s) => s.calculate_ani_contigs(fasta1),
}
}
}

pub struct GalahClusterer<'a> {
Expand All @@ -95,6 +102,7 @@ pub struct GalahClustererCommandDefinition {
pub dereplication_cluster_method_argument: String,
pub dereplication_aligned_fraction_argument: String,
pub dereplication_fraglen_argument: String,
pub dereplication_cluster_contigs_argument: String,
// pub dereplication_ani_method_argument: String,
pub dereplication_output_cluster_definition_file: String,
pub dereplication_output_representative_fasta_directory: String,
Expand All @@ -112,6 +120,7 @@ lazy_static! {
dereplication_cluster_method_argument: "cluster-method".to_string(),
dereplication_aligned_fraction_argument: "min-aligned-fraction".to_string(),
dereplication_fraglen_argument: "fragment-length".to_string(),
dereplication_cluster_contigs_argument: "cluster-contigs".to_string(),
// dereplication_ani_method_argument: "ani-method".to_string(),
dereplication_output_cluster_definition_file: "output-cluster-definition".to_string(),
dereplication_output_representative_fasta_directory:
Expand Down Expand Up @@ -313,6 +322,14 @@ pub fn add_dereplication_clustering_parameters_to_section(
default_roff(crate::DEFAULT_CLUSTER_METHOD)
)),
)
.flag(
Flag::new()
.long(&format!(
"--{}",
definition.dereplication_cluster_contigs_argument
))
.help("Cluster contigs instead of genomes."),
)
}

pub fn add_dereplication_output_parameters_to_section(
Expand Down Expand Up @@ -1152,6 +1169,9 @@ pub fn generate_galah_clusterer<'a>(
}),
_ => panic!("Programming error"),
},
cluster_contigs = clap_matches
.get_one::<bool>(&argument_definition.dereplication_cluster_contigs_argument)
.unwrap()
})
}
}
Expand Down Expand Up @@ -1187,6 +1207,7 @@ impl GalahClusterer<'_> {
&self.genome_fasta_paths,
&self.preclusterer,
&self.clusterer,
&self.cluster_contigs,
)
}
}
Expand Down Expand Up @@ -1324,6 +1345,9 @@ pub fn add_cluster_subcommand(app: clap::Command) -> clap::Command {
.help("method of calculating ANI. 'fastani' for FastANI, 'skani' for Skani")
.value_parser(crate::CLUSTER_METHODS)
.default_value(crate::DEFAULT_CLUSTER_METHOD))
.arg(Arg::new(&*GALAH_COMMAND_DEFINITION.dereplication_cluster_contigs_argument)
.long("cluster-contigs")
.help("Cluster contigs instead of genomes"))
.arg(Arg::new("threads")
.short('t')
.long("threads")
Expand Down
1 change: 1 addition & 0 deletions src/clusterer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub fn cluster<P: PreclusterDistanceFinder, C: ClusterDistanceFinder + std::mark
genomes: &[&str],
preclusterer: &P,
clusterer: &C,
cluster_contigs: bool,
) -> Vec<Vec<usize>> {
clusterer.initialise();

Expand Down
5 changes: 5 additions & 0 deletions src/fastani.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ impl ClusterDistanceFinder for FastaniClusterer {
fn calculate_ani(&self, fasta1: &str, fasta2: &str) -> Option<f32> {
calculate_fastani(fasta1, fasta2, self.min_aligned_threshold, self.fraglen)
}

fn calculate_ani_contigs(&self, fasta1: &str) -> Option<f32> {
// FastANI doesn't support self-self comparisons, so we can't use it for contig comparisons
None
}
}

pub fn calculate_fastani(
Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub trait ClusterDistanceFinder {
fn get_ani_threshold(&self) -> f32;

fn calculate_ani(&self, fasta1: &str, fasta2: &str) -> Option<f32>;

fn calculate_ani_contigs(&self, fasta1: &str) -> Option<f32>;
}

pub const DEFAULT_ALIGNED_FRACTION: &str = "15";
Expand Down
24 changes: 20 additions & 4 deletions src/skani.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,13 @@ impl ClusterDistanceFinder for SkaniClusterer {
fn calculate_ani(&self, fasta1: &str, fasta2: &str) -> Option<f32> {
Some(calculate_skani(fasta1, fasta2, self.min_aligned_threshold))
}

fn calculate_ani_contigs(&self, fasta1: &str) -> Option<f32> {
Some(calculate_skani_contigs(fasta1, self.min_aligned_threshold))
}
}

fn default_params(mode: Mode, min_aligned_frac: f32) -> (CommandParams, SketchParams) {
fn default_params(mode: Mode, min_aligned_frac: f32, cluster_contigs: bool) -> (CommandParams, SketchParams) {
let cmd_params = CommandParams {
screen: true,
screen_val: 0.00,
Expand All @@ -143,8 +147,8 @@ fn default_params(mode: Mode, min_aligned_frac: f32) -> (CommandParams, SketchPa
sparse: false,
full_matrix: false,
max_results: 10000000, // for Triange usize::MAX,
individual_contig_q: false,
individual_contig_r: false,
individual_contig_q: cluster_contigs,
individual_contig_r: cluster_contigs,
min_aligned_frac: min_aligned_frac as f64,
keep_refs: false,
est_ci: false,
Expand All @@ -167,11 +171,23 @@ pub fn calculate_skani(fasta1: &str, fasta2: &str, min_aligned_frac: f32) -> f32
let refs = vec![fasta1.to_string()];
let queries = vec![fasta2.to_string()];

let (command_params, sketch_params) = default_params(Mode::Dist, min_aligned_frac);
let (command_params, sketch_params) = default_params(Mode::Dist, min_aligned_frac, false);
let ref_sketch = &file_io::fastx_to_sketches(&refs, &sketch_params, true)[0];
let query_sketch = &file_io::fastx_to_sketches(&queries, &sketch_params, true)[0];
let map_params = chain::map_params_from_sketch(ref_sketch, false, &command_params);
let ani_result = chain::chain_seeds(ref_sketch, query_sketch, map_params);

ani_result.ani * 100.0
}

pub fn calculate_skani_contigs(fasta1: &str, min_aligned_frac: f32) -> f32 {
//Vector of Strings
let refs = vec![fasta1.to_string()];

let (command_params, sketch_params) = default_params(Mode::Dist, min_aligned_frac, true);
let ref_sketch = &file_io::fastx_to_sketches(&refs, &sketch_params, true)[0];
let map_params = chain::map_params_from_sketch(ref_sketch, false, &command_params);
let ani_result = chain::chain_seeds(ref_sketch, map_params);

ani_result.ani * 100.0
}
Loading
Loading