Updating many things

sanger-tol · Feb 7, 2025 · 3b158eb · 3b158eb
1 parent 880bafc
commit 3b158eb
Show file tree

Hide file tree

Showing 7 changed files with 69 additions and 57 deletions.
diff --git a/assets/test.yaml b/assets/test.yaml
@@ -1,4 +1,6 @@
-reads_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/
+reads_path:
+  - /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/Pyoeliiyoelii17XNL_pbtools_simulated_pacbio_reads.fa.gz
+  - /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/simulated_2.fa.gz
 reads_type: "hifi"
 pacbio_barcode_file: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa
 pacbio_barcode_names: "bc2008,bc2009"

diff --git a/conf/base.config b/conf/base.config
@@ -22,23 +22,32 @@ process {
 
     // PIPELINE NESTING
     withName: 'SANGER_TOL_BTK|SANGER_TOL_BTK_CASCADE' {
-        time = { check_max (48.h       * task.attempt, 'time') }
+        time = { check_max (100.h       * task.attempt, 'time') }
     }
 
+    // BLAST RELATED MODULES WHICH WILL NEED THE MOST RESOURCES
+    // TIME IS STOLEN FROM SANGER-TOL/BLOBTOOLKIT
+    withName: 'BLAST_BLASTN|BLAST_BLASTN_MOD|DIAMOND_BLASTX' {
+        cpus    = { check_max( 16       * 1, 'cpus'               ) }
+        memory  = { check_max( 50.GB    * task.attempt, 'memory'  ) }
+        time    = { check_max( 100.h    * task.attempt, 'time'    ) }
+    }
+
+
+    withName: 'PACBIO_BARCODE_CHECK:BLAST_BLASTN' {
+        cpus    = { check_max( 16       * 1, 'cpus'               ) }
+        memory  = { check_max( 50.GB    * task.attempt, 'memory'  ) }
+        time    = { check_max( 16.h     * task.attempt, 'time'    ) }
+    }
+
+
     // FCS REQUIRES LOADING THE WHOLE FCS DB INTO MEMORY AT ONE TIME
     withName: 'FCSGX_RUNGX' {
         cpus    = { check_max( 32        * 1, 'cpus'              ) }
         memory  = { check_max( 500.GB    * task.attempt, 'memory') }
         time    = { check_max( 50.h      * task.attempt, 'time'  ) }
     }
 
-    // For testing on the minimal FCSGX
-    // withName: 'FCSGX_RUNGX' {
-    //     cpus    = { check_max( 16        * 1, 'cpus'              ) }
-    //     memory  = { check_max( 50.GB    * task.attempt, 'memory') }
-    //     time    = { check_max( 40.h      * task.attempt, 'time'  ) }
-    // }
-
 
     withName: 'MINIMAP2_ALIGN_SE' {
         cpus   = { check_max( 16       * task.attempt, 'cpus'    ) }
@@ -73,12 +82,6 @@ process {
         memory  = { check_max( 50.GB     * task.attempt, 'memory') }
     }
 
-    withName: DIAMOND_BLASTX {
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 85.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 20.h  * task.attempt, 'time'    ) }
-    }
-
     // PYTHON BASED MODULES WHICH SHOULDN'T NEED MORE THAN 1 CORE AND A MIDDLING AMOUNT OF MEMORY
     // WILL BE REVIEWED
     withName: 'VALIDATE_TAXID|TRAILINGNS|GC_CONTENT|GET_KMERS_PROFILE|PARSE_FCSGX_RESULT|ASCC_MERGE_TABLES|GET_LARGEST_SCAFF|KMER_COUNT_DIM_REDUCTION|KMER_COUNT_DIM_REDUCTION_COMBINE_CSV|REFORMAT_DIAMOND_OUTFMT6|CONVERT_TO_HITS_FILE|DIAMOND_BLAST_CHUNK_TO_FULL|GENERATE_SAMPLESHEET' {
@@ -87,21 +90,14 @@ process {
         time    = { check_max( 5.h      * task.attempt, 'time'  ) }
     }
 
-    // BLAST RELATED MODULES WHICH WILL NEED THE MOST RESOURCES
-    // TIME IS STOLEN FROM SANGER-TOL/BLOBTOOLKIT
-    withName: 'BLAST_BLASTN|BLAST_BLASTN_MOD|DIAMOND_BLASTX' {
-        cpus    = { check_max( 16        * 1, 'cpus'                ) }
-        memory  = { check_max( 50.GB    * task.attempt, 'memory'    ) }
-        time    = { check_max( task.attempt == 1 ? 12.h : ( task.attempt == 2 ? 47.h : 167.h ), 'time' ) }
-    }
-
-    withLabel:KRAKEN2_KRAKEN2 {
+    withName:KRAKEN2_KRAKEN2 {
         // Scalling based on the size of the input genome.
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 72.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus   = { check_max( 12        * task.attempt, 'cpus'    ) }
+        memory = { check_max( 72.GB     * task.attempt, 'memory'  ) }
+        time   = { check_max( 16.h      * task.attempt, 'time'    ) }
     }
 
+
     // Process-specific resource requirements
     // NOTE - Please try and re-use the labels below as much as possible.
     //        These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.

diff --git a/main.nf b/main.nf
@@ -190,7 +190,7 @@ workflow {
             include_workflow_steps.contains('ALL') && params.profile_name == 'test'
         )
     ) {
-        ch_grabbed_reads_path       = MAIN_WORKFLOW_GrabFiles( params.reads_path )
+        ch_grabbed_reads_path       = Channel.of(params.reads_path).collect()
     } else {
         ch_grabbed_reads_path       = []
     }
@@ -276,6 +276,7 @@ process MAIN_WORKFLOW_GrabFiles {
     "true"
 }
 
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     THE END

diff --git a/modules/local/check_barcode.nf b/modules/local/check_barcode.nf
@@ -8,7 +8,7 @@ process CHECK_BARCODE {
         'biocontainers/python:3.9' }"
 
     input:
-    tuple val(meta) , path(pacbio_dir)
+    tuple val(meta) , path(pacbio_dir, stageAs:"in/*")
     path barcodes
     val multiplex_csv
 
@@ -23,7 +23,7 @@ process CHECK_BARCODE {
     OUTPUT=\$(\\
         pacbio_barcode_check.py \\
             -b ${barcodes} \\
-            -p ${pacbio_dir} \\
+            -p in/ \\
             -m ${multiplex_csv})
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -39,7 +39,7 @@
                     "fa_icon": "fas fa-hastag"
                 },
                 "reads_path": {
-                    "type": "string",
+                    "type": "array",
                     "description": "folder containing long read data files in FASTA.GZ format",
                     "fa_icon": "fas fa-file-lines"
                 },

diff --git a/workflows/ascc_genomic.nf b/workflows/ascc_genomic.nf
@@ -48,7 +48,7 @@ workflow ASCC_GENOMIC {
     validate_taxid_versions // Versions channel from main.nf
     include_steps           // params.include_steps
     exclude_steps           // params.exclude_steps
-    fcs_db                  // path(path)
+    fcs_db                  // [path(path)]
     reads
 
     main:
@@ -76,6 +76,8 @@ workflow ASCC_GENOMIC {
     log.info "GENOMIC RUN -- EXCLUDE STEPS INC.: $exclude_workflow_steps"
 
 
+    //reads = CollectReads(reads_list)
+
     //
     // LOGIC: CREATE btk_busco_run_mode VALUE
     //
@@ -180,17 +182,16 @@ workflow ASCC_GENOMIC {
         )
         ch_versions         = ch_versions.mix(EXTRACT_NT_BLAST.out.versions)
 
-        if (!EXTRACT_NT_BLAST.out.ch_blast_hits.ifEmpty(true)) {
-            ch_nt_blast         = EXTRACT_NT_BLAST.out.ch_blast_hits.map{it[1]}
-        } else {
-            ch_nt_blast         = []
-        }
+        //
+        // TODO: This needs testing
+        //
+        ch_nt_blast = EXTRACT_NT_BLAST.out.ch_blast_hits
+            .map { it -> it[1] }
+            .ifEmpty { [] }
 
-        if (!EXTRACT_NT_BLAST.out.ch_top_lineages.ifEmpty(true)) {
-            ch_blast_lineage         = EXTRACT_NT_BLAST.out.ch_top_lineages.map{it[1]}
-        } else {
-            ch_blast_lineage    = []
-        }
+        ch_blast_lineage = EXTRACT_NT_BLAST.out.ch_top_lineages
+            .map { it -> it[1] }
+            .ifEmpty { [] }
 
     } else {
         ch_nt_blast         = []
@@ -403,24 +404,20 @@ workflow ASCC_GENOMIC {
             params.nt_kraken_database_path,
             params.ncbi_ranked_lineage_path
         )
+        ch_versions         = ch_versions.mix(RUN_NT_KRAKEN.out.versions)
 
-        if (!RUN_NT_KRAKEN.out.classified.ifEmpty(true)) {
-            ch_kraken1          = RUN_NT_KRAKEN.out.classified.map{it[1]}
-        } else {
-            ch_kraken1 = []
-        }
+        ch_kraken1 = RUN_NT_KRAKEN.out.classified
+            .map { it -> it[1] }
+            .ifEmpty { [] }
 
-        ch_kraken2          = RUN_NT_KRAKEN.out.report.map{it[1]}
+        ch_kraken2 = RUN_NT_KRAKEN.out.report
+            .map { it -> it[1] }
+            .ifEmpty { [] }
 
-        if (!RUN_NT_KRAKEN.out.lineage.ifEmpty(true)) {
+        ch_kraken3 = RUN_NT_KRAKEN.out.lineage
+            .map { it -> it[1] }
+            .ifEmpty { [] }
 
-            // TODO: Channel is not getting populated even though the it is includes.
-            ch_kraken3          = RUN_NT_KRAKEN.out.lineage
-        } else {
-            ch_kraken3 = []
-        }
-
-        ch_versions         = ch_versions.mix(RUN_NT_KRAKEN.out.versions)
     } else {
         ch_kraken1          = []
         ch_kraken2          = []
@@ -709,7 +706,7 @@ workflow ASCC_GENOMIC {
         println "ASCC_MERGE_TABLES - TR: $ch_tiara"         // FROM -- TIARA.classifications[0]
         println "ASCC_MERGE_TABLES - K3: $ch_kraken3"       // FROM -- RUN_NT_KRAKEN.lineage[0]
         println "ASCC_MERGE_TABLES - BL: $ch_blast_lineage" // FROM -- E_NT_BLAST.ch_blast_hits[0]
-        println "ASCC_MERGE_TABLES - K3: $ch_kmers"         // FROM -- G_KMERS_PROF.combined_csv[0]
+        println "ASCC_MERGE_TABLES - KM: $ch_kmers"         // FROM -- G_KMERS_PROF.combined_csv[0]
         println "ASCC_MERGE_TABLES - NR: $nr_hits"          // FROM -- NR_DIAMOND.reformed[0]
         println "ASCC_MERGE_TABLES - UN: $un_hits"          // FROM -- UP_DIAMOND.reformed[0]
 
@@ -744,6 +741,20 @@ workflow ASCC_GENOMIC {
         ).set { ch_collated_versions }
 }
 
+// CollectReads {
+//     tag "${meta.id}"
+//     executor 'local'
+
+//     input:
+//     tuple val(meta), path("in")
+
+//     output:
+//     tuple val(meta), path("in/*.{fa,fasta}.{gz}")
+
+//     "true"
+// }
+
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     THE END

diff --git a/workflows/ascc_organellar.nf b/workflows/ascc_organellar.nf
@@ -381,6 +381,8 @@ workflow ASCC_ORGANELLAR {
 //
 // Function: this is to count the length of ONLY the fasta sequence
 //
+// @param input_file: path
+// @return int
 def CountFastaLength(input_file) {
     int counter = 0;
     def list_lines = new File(input_file.toString()).text.readLines()