From 04989d24c76d0882b13be773d4369e0e1b046ce3 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Sat, 2 Mar 2024 11:42:14 +1000 Subject: [PATCH 1/5] extract aviary assembly parameter logic to functions --- binchicken/workflow/coassemble.smk | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/binchicken/workflow/coassemble.smk b/binchicken/workflow/coassemble.smk index 9cfbf756..45213688 100644 --- a/binchicken/workflow/coassemble.smk +++ b/binchicken/workflow/coassemble.smk @@ -642,6 +642,25 @@ rule aviary_commands: ######################################### ### Run Aviary commands (alternative) ### ######################################### +def get_assemble_threads(wildcards, attempt): + # Attempt 1 with 32, 2 with 64, then 32 with Megahit + current_threads = 64 if attempt == 2 else 32 + threads = min(int(config["aviary_threads"]), current_threads) + + return threads + +def get_assemble_memory(wildcards, attempt, unit="GB"): + # Attempt 1 with 250GB, 2 with 500GB, then 250GB with Megahit + current_mem = 500 if attempt == 2 else 250 + mem = min(int(config["aviary_memory"]), current_mem) + mult = 1000 if unit == "MB" else 1 + + return mem * mult + +def get_assemble_assembler(wildcards, attempt): + # Attempt 1/2 with Metaspades, then Megahit + return "" if attempt < 3 else "--use-megahit" + rule aviary_assemble: input: output_dir + "/mapping/done" if config["assemble_unmapped"] else output_dir + "/qc/done" if config["run_qc"] else [], @@ -657,13 +676,12 @@ rule aviary_assemble: drytouch = "&& touch "+output_dir+"/coassemble/{coassembly}/assemble/assembly/final_contigs.fasta" if config["aviary_dryrun"] else "", conda_prefix = config["conda_prefix"] if config["conda_prefix"] else ".", tmpdir = config["tmpdir"], - threads: - threads = config["aviary_threads"] + threads: lambda wildcards, attempt: get_assemble_threads(wildcards, attempt) resources: - mem_mb = int(config["aviary_memory"])*1000, - mem_gb = int(config["aviary_memory"]), + mem_mb = lambda wildcards, attempt: get_assemble_memory(wildcards, attempt, unit="MB"), + mem_gb = get_assemble_memory, runtime = "96h", - assembler = lambda wildcards, attempt: "" if attempt == 1 else "--use-megahit", + assembler = get_assemble_assembler, log: logs_dir + "/aviary/{coassembly}_assemble.log" conda: From 5019da11fbcb58aeb815d02ac7a790745fc7f458 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Sat, 2 Mar 2024 11:59:42 +1000 Subject: [PATCH 2/5] increase runtime with retries --- binchicken/workflow/coassemble.smk | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/binchicken/workflow/coassemble.smk b/binchicken/workflow/coassemble.smk index 45213688..8fae1b78 100644 --- a/binchicken/workflow/coassemble.smk +++ b/binchicken/workflow/coassemble.smk @@ -119,7 +119,7 @@ rule singlem_pipe_reads: threads: 1 resources: mem_mb=get_mem_mb, - runtime = "24h", + runtime = lambda wildcards, attempt: 24*60*attempt, conda: "env/singlem.yml" shell: @@ -147,7 +147,7 @@ rule genome_transcripts: threads: 1 resources: mem_mb=get_mem_mb, - runtime = "1h", + runtime = lambda wildcards, attempt: 1*60*attempt, group: "singlem_bins" conda: "env/prodigal.yml" @@ -172,7 +172,7 @@ rule singlem_pipe_genomes: threads: 1 resources: mem_mb=get_mem_mb, - runtime = "1h", + runtime = lambda wildcards, attempt: 1*60*attempt, group: "singlem_bins" conda: "env/singlem.yml" @@ -225,7 +225,7 @@ rule singlem_appraise: threads: 1 resources: mem_mb=get_mem_mb, - runtime = "24h", + runtime = lambda wildcards, attempt: 24*60*attempt, conda: "env/singlem.yml" shell: @@ -276,7 +276,7 @@ rule update_appraise: threads: 1 resources: mem_mb=get_mem_mb, - runtime = "24h", + runtime = lambda wildcards, attempt: 24*60*attempt, conda: "env/singlem.yml" shell: @@ -313,7 +313,7 @@ rule query_processing: threads: 1 resources: mem_mb=get_mem_mb, - runtime = "24h", + runtime = lambda wildcards, attempt: 24*60*attempt, script: "scripts/query_processing.py" @@ -347,7 +347,7 @@ rule count_bp_reads: threads: 8 resources: mem_mb=get_mem_mb, - runtime = "24h", + runtime = lambda wildcards, attempt: 24*60*attempt, shell: "parallel -k -j {threads} " "echo -n {{1}}, '&&' " @@ -369,7 +369,7 @@ rule target_elusive: threads: 32 resources: mem_mb=get_mem_mb, - runtime = "24h", + runtime = lambda wildcards, attempt: 24*60*attempt, log: logs_dir + "/target/target_elusive.log" benchmark: @@ -393,7 +393,7 @@ checkpoint cluster_graph: threads: 64 resources: mem_mb=get_mem_mb, - runtime = "168h", + runtime = lambda wildcards, attempt: 48*60*attempt, log: logs_dir + "/target/cluster_graph.log" benchmark: @@ -413,7 +413,7 @@ rule download_read: threads: 4 resources: mem_mb=get_mem_mb, - runtime = "4h", + runtime = lambda wildcards, attempt: 4*60*attempt, downloading = 1, conda: "env/kingfisher.yml" @@ -476,7 +476,7 @@ rule qc_reads: threads: 16 resources: mem_mb=get_mem_mb, - runtime = "4h", + runtime = lambda wildcards, attempt: 4*60*attempt, log: logs_dir + "/mapping/{read}_qc.log" benchmark: @@ -522,7 +522,7 @@ rule map_reads: threads: 16 resources: mem_mb=get_mem_mb, - runtime = "12h", + runtime = lambda wildcards, attempt: 12*60*attempt, log: logs_dir + "/mapping/{read}_coverm.log", benchmark: @@ -552,7 +552,7 @@ rule filter_bam_files: threads: 16 resources: mem_mb=get_mem_mb, - runtime = "4h", + runtime = lambda wildcards, attempt: 4*60*attempt, log: logs_dir + "/mapping/{read}_filter.log", benchmark: @@ -579,7 +579,7 @@ rule bam_to_fastq: threads: 16 resources: mem_mb=get_mem_mb, - runtime = "4h", + runtime = lambda wildcards, attempt: 4*60*attempt, log: logs_dir + "/mapping/{read}_fastq.log", conda: @@ -680,7 +680,7 @@ rule aviary_assemble: resources: mem_mb = lambda wildcards, attempt: get_assemble_memory(wildcards, attempt, unit="MB"), mem_gb = get_assemble_memory, - runtime = "96h", + runtime = lambda wildcards, attempt: 96*60*attempt, assembler = get_assemble_assembler, log: logs_dir + "/aviary/{coassembly}_assemble.log" From 01d939a7694d7b012f351816d1534447b952007f Mon Sep 17 00:00:00 2001 From: AroneyS Date: Sat, 2 Mar 2024 12:01:58 +1000 Subject: [PATCH 3/5] add threads to collect_genomes --- binchicken/workflow/coassemble.smk | 1 + 1 file changed, 1 insertion(+) diff --git a/binchicken/workflow/coassemble.smk b/binchicken/workflow/coassemble.smk index 8fae1b78..2fabef30 100644 --- a/binchicken/workflow/coassemble.smk +++ b/binchicken/workflow/coassemble.smk @@ -503,6 +503,7 @@ rule collect_genomes: appraise_unbinned = output_dir + "/appraise/unbinned.otu_table.tsv", output: temp(output_dir + "/mapping/{read}_reference.fna"), + threads: 1 params: genomes = config["genomes"], sample = "{read}", From bee7e178dc0439a7184621b1e6e2d78cf9ef1744 Mon Sep 17 00:00:00 2001 From: AroneyS Date: Sat, 2 Mar 2024 12:03:06 +1000 Subject: [PATCH 4/5] clarify run-aviary interaction with coassemblies --- binchicken/binchicken.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/binchicken/binchicken.py b/binchicken/binchicken.py index cf31700b..ef99fbaa 100755 --- a/binchicken/binchicken.py +++ b/binchicken/binchicken.py @@ -1070,7 +1070,7 @@ def add_aviary_options(argument_group): default_aviary_speed = FAST_AVIARY_MODE argument_group.add_argument("--aviary-speed", help="Run Aviary recover in 'fast' or 'comprehensive' mode. Fast mode skips slow binners and refinement steps. [default: %s]" % default_aviary_speed, default=default_aviary_speed, choices=[FAST_AVIARY_MODE, COMPREHENSIVE_AVIARY_MODE]) - argument_group.add_argument("--run-aviary", action="store_true", help="Run Aviary commands for all identified coassemblies (unless specified) [default: do not]") + argument_group.add_argument("--run-aviary", action="store_true", help="Run Aviary commands for all identified coassemblies (unless specific coassemblies are chosen with --coassemblies) [default: do not]") argument_group.add_argument("--aviary-gtdbtk-db", help="Path to GTDB-Tk database directory for Aviary. [default: use path from GTDBTK_DATA_PATH env variable]") argument_group.add_argument("--aviary-checkm2-db", help="Path to CheckM2 database directory for Aviary. [default: use path from CHECKM2DB env variable]") aviary_default_cores = 64 From 7fb74df1e4c213dbf6c014dfd95aa0aaa20cd1cb Mon Sep 17 00:00:00 2001 From: AroneyS Date: Sat, 2 Mar 2024 12:05:33 +1000 Subject: [PATCH 5/5] set cluster-retries default to 3 --- binchicken/binchicken.py | 2 +- docs/preludes/coassemble_prelude.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/binchicken/binchicken.py b/binchicken/binchicken.py index ef99fbaa..e3d186b4 100755 --- a/binchicken/binchicken.py +++ b/binchicken/binchicken.py @@ -1047,7 +1047,7 @@ def add_general_snakemake_options(argument_group, required_conda_prefix=False): help="Snakemake profile (see https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles).\n" "Can be used to submit rules as jobs to cluster engine (see https://snakemake.readthedocs.io/en/stable/executing/cluster.html).") argument_group.add_argument("--local-cores", type=int, help="Maximum number of cores to use on localrules when running in cluster mode", default=1) - argument_group.add_argument("--cluster-retries", help="Number of times to retry a failed job when using cluster submission (see `--snakemake-profile`).", default=0) + argument_group.add_argument("--cluster-retries", help="Number of times to retry a failed job when using cluster submission (see `--snakemake-profile`).", default=3) argument_group.add_argument("--snakemake-args", help="Additional commands to be supplied to snakemake in the form of a space-prefixed single string e.g. \" --quiet\"", default="") argument_group.add_argument("--tmp-dir", help="Path to temporary directory. [default: Use path from TMPDIR env variable]") diff --git a/docs/preludes/coassemble_prelude.md b/docs/preludes/coassemble_prelude.md index b858baea..f9ad8398 100644 --- a/docs/preludes/coassemble_prelude.md +++ b/docs/preludes/coassemble_prelude.md @@ -21,7 +21,7 @@ binchicken coassemble --forward reads_1.1.fq ... --reverse reads_1.2.fq ... --si # Create snakemake profile at ~/.config/snakemake/qsub with cluster, cluster-status, cluster-cancel, etc. # See https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles binchicken coassemble --forward reads_1.1.fq ... --reverse reads_1.2.fq ... --run-aviary \ - --snakemake-profile qsub --cluster-retries 3 --local-cores 64 --cores 64 + --snakemake-profile qsub --local-cores 64 --cores 64 ``` Important options: