AroneyS · AroneyS · Mar 2, 2024 · Mar 2, 2024 · Mar 2, 2024 · Mar 2, 2024
diff --git a/binchicken/binchicken.py b/binchicken/binchicken.py
@@ -1047,7 +1047,7 @@ def add_general_snakemake_options(argument_group, required_conda_prefix=False):
                                     help="Snakemake profile (see https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles).\n"
                                          "Can be used to submit rules as jobs to cluster engine (see https://snakemake.readthedocs.io/en/stable/executing/cluster.html).")
         argument_group.add_argument("--local-cores", type=int, help="Maximum number of cores to use on localrules when running in cluster mode", default=1)
-        argument_group.add_argument("--cluster-retries", help="Number of times to retry a failed job when using cluster submission (see `--snakemake-profile`).", default=0)
+        argument_group.add_argument("--cluster-retries", help="Number of times to retry a failed job when using cluster submission (see `--snakemake-profile`).", default=3)
         argument_group.add_argument("--snakemake-args", help="Additional commands to be supplied to snakemake in the form of a space-prefixed single string e.g. \" --quiet\"", default="")
         argument_group.add_argument("--tmp-dir", help="Path to temporary directory. [default: Use path from TMPDIR env variable]")
 
@@ -1070,7 +1070,7 @@ def add_aviary_options(argument_group):
         default_aviary_speed = FAST_AVIARY_MODE
         argument_group.add_argument("--aviary-speed", help="Run Aviary recover in 'fast' or 'comprehensive' mode. Fast mode skips slow binners and refinement steps. [default: %s]" % default_aviary_speed,
                                     default=default_aviary_speed, choices=[FAST_AVIARY_MODE, COMPREHENSIVE_AVIARY_MODE])
-        argument_group.add_argument("--run-aviary", action="store_true", help="Run Aviary commands for all identified coassemblies (unless specified) [default: do not]")
+        argument_group.add_argument("--run-aviary", action="store_true", help="Run Aviary commands for all identified coassemblies (unless specific coassemblies are chosen with --coassemblies) [default: do not]")
         argument_group.add_argument("--aviary-gtdbtk-db", help="Path to GTDB-Tk database directory for Aviary. [default: use path from GTDBTK_DATA_PATH env variable]")
         argument_group.add_argument("--aviary-checkm2-db", help="Path to CheckM2 database directory for Aviary. [default: use path from CHECKM2DB env variable]")
         aviary_default_cores = 64

diff --git a/binchicken/workflow/coassemble.smk b/binchicken/workflow/coassemble.smk
@@ -119,7 +119,7 @@ rule singlem_pipe_reads:
     threads: 1
     resources:
         mem_mb=get_mem_mb,
-        runtime = "24h",
+        runtime = lambda wildcards, attempt: 24*60*attempt,
     conda:
         "env/singlem.yml"
     shell:
@@ -147,7 +147,7 @@ rule genome_transcripts:
     threads: 1
     resources:
         mem_mb=get_mem_mb,
-        runtime = "1h",
+        runtime = lambda wildcards, attempt: 1*60*attempt,
     group: "singlem_bins"
     conda:
         "env/prodigal.yml"
@@ -172,7 +172,7 @@ rule singlem_pipe_genomes:
     threads: 1
     resources:
         mem_mb=get_mem_mb,
-        runtime = "1h",
+        runtime = lambda wildcards, attempt: 1*60*attempt,
     group: "singlem_bins"
     conda:
         "env/singlem.yml"
@@ -225,7 +225,7 @@ rule singlem_appraise:
     threads: 1
     resources:
         mem_mb=get_mem_mb,
-        runtime = "24h",
+        runtime = lambda wildcards, attempt: 24*60*attempt,
     conda:
         "env/singlem.yml"
     shell:
@@ -276,7 +276,7 @@ rule update_appraise:
     threads: 1
     resources:
         mem_mb=get_mem_mb,
-        runtime = "24h",
+        runtime = lambda wildcards, attempt: 24*60*attempt,
     conda:
         "env/singlem.yml"
     shell:
@@ -313,7 +313,7 @@ rule query_processing:
     threads: 1
     resources:
         mem_mb=get_mem_mb,
-        runtime = "24h",
+        runtime = lambda wildcards, attempt: 24*60*attempt,
     script:
         "scripts/query_processing.py"
 
@@ -347,7 +347,7 @@ rule count_bp_reads:
     threads: 8
     resources:
         mem_mb=get_mem_mb,
-        runtime = "24h",
+        runtime = lambda wildcards, attempt: 24*60*attempt,
     shell:
         "parallel -k -j {threads} "
         "echo -n {{1}}, '&&' "
@@ -369,7 +369,7 @@ rule target_elusive:
     threads: 32
     resources:
         mem_mb=get_mem_mb,
-        runtime = "24h",
+        runtime = lambda wildcards, attempt: 24*60*attempt,
     log:
         logs_dir + "/target/target_elusive.log"
     benchmark:
@@ -393,7 +393,7 @@ checkpoint cluster_graph:
     threads: 64
     resources:
         mem_mb=get_mem_mb,
-        runtime = "168h",
+        runtime = lambda wildcards, attempt: 48*60*attempt,
     log:
         logs_dir + "/target/cluster_graph.log"
     benchmark:
@@ -413,7 +413,7 @@ rule download_read:
     threads: 4
     resources:
         mem_mb=get_mem_mb,
-        runtime = "4h",
+        runtime = lambda wildcards, attempt: 4*60*attempt,
         downloading = 1,
     conda:
         "env/kingfisher.yml"
@@ -476,7 +476,7 @@ rule qc_reads:
     threads: 16
     resources:
         mem_mb=get_mem_mb,
-        runtime = "4h",
+        runtime = lambda wildcards, attempt: 4*60*attempt,
     log:
         logs_dir + "/mapping/{read}_qc.log"
     benchmark:
@@ -503,6 +503,7 @@ rule collect_genomes:
         appraise_unbinned = output_dir + "/appraise/unbinned.otu_table.tsv",
     output:
         temp(output_dir + "/mapping/{read}_reference.fna"),
+    threads: 1
     params:
         genomes = config["genomes"],
         sample = "{read}",
@@ -522,7 +523,7 @@ rule map_reads:
     threads: 16
     resources:
         mem_mb=get_mem_mb,
-        runtime = "12h",
+        runtime = lambda wildcards, attempt: 12*60*attempt,
     log:
         logs_dir + "/mapping/{read}_coverm.log",
     benchmark:
@@ -552,7 +553,7 @@ rule filter_bam_files:
     threads: 16
     resources:
         mem_mb=get_mem_mb,
-        runtime = "4h",
+        runtime = lambda wildcards, attempt: 4*60*attempt,
     log:
         logs_dir + "/mapping/{read}_filter.log",
     benchmark:
@@ -579,7 +580,7 @@ rule bam_to_fastq:
     threads: 16
     resources:
         mem_mb=get_mem_mb,
-        runtime = "4h",
+        runtime = lambda wildcards, attempt: 4*60*attempt,
     log:
         logs_dir + "/mapping/{read}_fastq.log",
     conda:
@@ -642,6 +643,25 @@ rule aviary_commands:
 #########################################
 ### Run Aviary commands (alternative) ###
 #########################################
+def get_assemble_threads(wildcards, attempt):
+    # Attempt 1 with 32, 2 with 64, then 32 with Megahit
+    current_threads = 64 if attempt == 2 else 32
+    threads = min(int(config["aviary_threads"]), current_threads)
+
+    return threads
+
+def get_assemble_memory(wildcards, attempt, unit="GB"):
+    # Attempt 1 with 250GB, 2 with 500GB, then 250GB with Megahit
+    current_mem = 500 if attempt == 2 else 250
+    mem = min(int(config["aviary_memory"]), current_mem)
+    mult = 1000 if unit == "MB" else 1
+
+    return mem * mult
+
+def get_assemble_assembler(wildcards, attempt):
+    # Attempt 1/2 with Metaspades, then Megahit
+    return "" if attempt < 3 else "--use-megahit"
+
 rule aviary_assemble:
     input:
         output_dir + "/mapping/done" if config["assemble_unmapped"] else output_dir + "/qc/done" if config["run_qc"] else [],
@@ -657,13 +677,12 @@ rule aviary_assemble:
         drytouch = "&& touch "+output_dir+"/coassemble/{coassembly}/assemble/assembly/final_contigs.fasta" if config["aviary_dryrun"] else "",
         conda_prefix = config["conda_prefix"] if config["conda_prefix"] else ".",
         tmpdir = config["tmpdir"],
-    threads:
-        threads = config["aviary_threads"]
+    threads: lambda wildcards, attempt: get_assemble_threads(wildcards, attempt)
     resources:
-        mem_mb = int(config["aviary_memory"])*1000,
-        mem_gb = int(config["aviary_memory"]),
-        runtime = "96h",
-        assembler = lambda wildcards, attempt: "" if attempt == 1 else "--use-megahit",
+        mem_mb = lambda wildcards, attempt: get_assemble_memory(wildcards, attempt, unit="MB"),
+        mem_gb = get_assemble_memory,
+        runtime = lambda wildcards, attempt: 96*60*attempt,
+        assembler = get_assemble_assembler,
     log:
         logs_dir + "/aviary/{coassembly}_assemble.log"
     conda:

diff --git a/docs/preludes/coassemble_prelude.md b/docs/preludes/coassemble_prelude.md
@@ -21,7 +21,7 @@ binchicken coassemble --forward reads_1.1.fq ... --reverse reads_1.2.fq ... --si
 # Create snakemake profile at ~/.config/snakemake/qsub with cluster, cluster-status, cluster-cancel, etc.
 # See https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles
 binchicken coassemble --forward reads_1.1.fq ... --reverse reads_1.2.fq ... --run-aviary \
-  --snakemake-profile qsub --cluster-retries 3 --local-cores 64 --cores 64
+  --snakemake-profile qsub --local-cores 64 --cores 64
 ```
 
 Important options: