Pinning verison of nf-core and bug fixes

sanger-tol · Nov 15, 2024 · 5a092d7 · 5a092d7
1 parent a4e87aa
commit 5a092d7
Show file tree

Hide file tree

Showing 10 changed files with 147 additions and 115 deletions.
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -83,10 +83,16 @@ jobs:
           python-version: "3.8"
           architecture: "x64"
 
+      - name: read .nf-core.yml
+        uses: pietrobolcato/action-read-yaml@1.0.0
+        id: read_yml
+        with:
+          config: ${{ github.workspace }}/.nf-core.yaml
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install nf-core
+          pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }}
 
       - name: Run nf-core lint
         env:

diff --git a/bin/ascc_merge_tables.py b/bin/ascc_merge_tables.py
@@ -98,7 +98,7 @@ def load_and_merge_dataframes(paths_dict):
         bacterial_kraken_df = pd.read_csv(paths_dict["bacterial_kraken"], sep=",")
         if bacterial_kraken_df.shape[0] > 0:
             bacterial_kraken_df.rename(columns={bacterial_kraken_df.columns[0]: "scaff"}, inplace=True)
-            bacterial_kraken_df.rename(columns={"taxid": "kraken_taxid"}, inplace=True)
+            bacterial_kraken_df.rename(columns={"taxid": "nt_kraken_taxid"}, inplace=True)
         else:
             sys.stderr.write(
                 "No rows were found in bacterial Kraken output table ({})\n".format(paths_dict["bacterial_kraken"])
@@ -110,21 +110,14 @@ def load_and_merge_dataframes(paths_dict):
         nt_kraken_df = pd.read_csv(paths_dict["nt_kraken"], sep=",")
         if nt_kraken_df.shape[0] > 0:
             nt_kraken_df.rename(columns={nt_kraken_df.columns[0]: "scaff"}, inplace=True)
-            nt_kraken_df.rename(columns={"taxid": "kraken_taxid"}, inplace=True)
+            nt_kraken_df.rename(columns={"taxid": "nt_kraken_taxid"}, inplace=True)
         else:
             sys.stderr.write("No rows were found in nt Kraken output table ({})\n".format(paths_dict["nt_kraken"]))
             nt_kraken_df = None
 
     dim_reduction_df = None
     if paths_dict["dim_reduction_embeddings"] is not None:
-        dim_reduction_df = pd.read_csv(paths_dict["dim_reduction_embeddings"], sep=",")
-        if dim_reduction_df.shape[0] == 0:
-            sys.stderr.write(
-                "No rows were found in kmers dimensionality reduction output table ({})\n".format(
-                    paths_dict["dim_reduction_embeddings"]
-                )
-            )
-            dim_reduction_df = None
+        dim_reduction_df = parse_or_pass(paths_dict["dim_reduction_embeddings"], "DIMENSIONAL-REDUCTION-EMBEDDINGS")
 
     btk_df = None
     if paths_dict["blobtoolkit"] is not None:
@@ -194,51 +187,27 @@ def load_and_merge_dataframes(paths_dict):
 
     fcs_gx_df = None
     if paths_dict["fcs_gx"] is not None:
-        fcs_gx_df = pd.read_csv(paths_dict["fcs_gx"], sep=",")
-        if fcs_gx_df.shape[0] == 0:
-            sys.stderr.write("No rows were found in FCS-GX output table ({})\n".format(paths_dict["fcs_gx"]))
-            fcs_gx_df = None
+        fcs_gx_df = parse_or_pass(paths_dict["fcs_gx"], "FCSGX")
 
     nt_blast_df = None
     if paths_dict["nt_blast"] is not None:
-        nt_blast_df = pd.read_csv(paths_dict["nt_blast"], sep=",")
-        if nt_blast_df.shape[0] == 0:
-            sys.stderr.write("No rows were found in nt BLAST output table ({})\n".format(paths_dict["nt_blast"]))
-            nt_blast_df = None
+        nt_blast_df = parse_or_pass(paths_dict["nt_blast"], "NT_BLAST")
 
     nr_diamond_df = None
     if paths_dict["nr_diamond"] is not None:
-        nr_diamond_df = pd.read_csv(paths_dict["nr_diamond"], sep=",")
-        if nr_diamond_df.shape[0] == 0:
-            sys.stderr.write("No rows were found in nr Diamond output table ({})\n".format(paths_dict["nr_diamond"]))
-            nr_diamond_df = None
+        nr_diamond_df = parse_or_pass(paths_dict["nr_diamond"], "NR_DIAMOND")
 
     uniprot_diamond_df = None
     if paths_dict["uniprot_diamond"] is not None:
-        uniprot_diamond_df = pd.read_csv(paths_dict["uniprot_diamond"], sep=",")
-        if uniprot_diamond_df.shape[0] == 0:
-            sys.stderr.write(
-                "No rows were found in Uniprot Diamond output table ({})\n".format(paths_dict["uniprot_diamond"])
-            )
-            uniprot_diamond_df = None
+        uniprot_diamond_df = parse_or_pass(paths_dict["uniprot_diamond"], "UNIPROT_DIAMOND")
 
     cobiontid_markerscan_df = None
     if paths_dict["cobiontid_markerscan"] is not None:
-        cobiontid_markerscan_df = pd.read_csv(paths_dict["cobiontid_markerscan"], sep=",")
-        if cobiontid_markerscan_df.shape[0] == 0:
-            sys.stderr.write(
-                "No rows were found in CobiontID MarkerScan output table ({})\n".format(
-                    paths_dict["cobiontid_markerscan"]
-                )
-            )
-            uniprot_diamond_df = None
+        cobiontid_markerscan_df = parse_or_pass(paths_dict["cobiontid_markerscan"], "COBIONT MARKERSCAN")
 
     contigviz_df = None
     if paths_dict["contigviz"] is not None:
-        contigviz_df = pd.read_csv(paths_dict["contigviz"], sep=",")
-        if contigviz_df.shape[0] == 0:
-            sys.stderr.write("No rows were found in ContigViz output table ({})\n".format(paths_dict["contigviz"]))
-            contigviz_df = None
+        contigviz_df = parse_or_pass(paths_dict["contigviz"], "CONTIG VIZ")
 
     if coverage_df is not None:
         df = pd.merge(df, coverage_df, on="scaff", how="outer")
@@ -270,6 +239,14 @@ def load_and_merge_dataframes(paths_dict):
     return df
 
 
+def parse_or_pass(input_file: str, name: str):
+    try:
+        return pd.read_csv(input_file, sep=",")
+    except:
+        sys.stderr.write(f"Process:: {name} :: No rows in file: {input_file}")
+        return None
+
+
 def main(args):
     paths_dict = dict()
     paths_dict["gc_content"] = args.gc_cov

diff --git a/conf/modules.config b/conf/modules.config
@@ -19,64 +19,71 @@ process {
         ext.get_versions    = "lsid | head -n1 | cut -d ',' -f 1"
         ext.version         = "0.6.0"
         publishDir = [
-            path: { "${params.outdir}/sanger-tol-btk" },
+            path: { "${params.outdir}/${meta.id}/sanger-tol-btk" },
             mode: params.publish_dir_mode,
         ]
     }
 
     withName: "AUTOFILTER_AND_CHECK_ASSEMBLY|CREATE_BTK_DATASET|ORGANELLE_CONTAMINATION_RECOMMENDATIONS|FILTER_BARCODE|SUMMARISE_VECSCREEN_OUTPUT|MERGE_BTK_DATASETS" {
-        publishDir = [
-            path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
+        publishDir      = [
+            path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
             mode: params.publish_dir_mode
         ]
     }
 
+    withName: "FCS_FCSADAPTOR_EUK|FCS_FCSADAPTOR_PROK" {
+        publishDir      = [
+            path: { "${params.outdir}/${meta.id}/FCS-adaptor" },
+            mode: params.publish_dir_mode,
+        ]
+    }
+
     withName: AUTOFILTER_AND_CHECK_ASSEMBLY {
-        publishDir = [
+        publishDir      = [
             path: { "${params.outdir}/" },
             mode: params.publish_dir_mode,
             pattern: "autofiltering_done_indicator_file.txt"
         ]
     }
 
     withName: ASCC_MERGE_TABLES {
-        publishDir = [
-            path: { "${params.outdir}/ASCC-main-output" },
+        publishDir      = [
+            path: { "${params.outdir}/${meta.id}/ASCC-main-output" },
             mode: params.publish_dir_mode
         ]
     }
 
     withName: FILTER_FASTA {
-        ext.args    = "--low_pass --remove_original_fasta"
-        ext.cutoff  = 1900000000
+        ext.args        = "--low_pass --remove_original_fasta"
+        ext.cutoff      = 1900000000
     }
 
     withName: SEQKIT_SLIDING {
-        ext.args    = {"-s ${meta.sliding} -W ${meta.window} "}
+        ext.args        = {"-s ${meta.sliding} -W ${meta.window} "}
     }
 
     withName: '.*:.*:EXTRACT_NT_BLAST:BLAST_CHUNK_TO_FULL' {
-        ext.args    = "nucleotide"
+        ext.args        = "nucleotide"
     }
 
     withName: '.*:.*:NR_DIAMOND:DIAMOND_BLAST_CHUNK_TO_FULL' {
-        ext.args    = "diamond"
+        ext.args        = "diamond"
     }
 
     withName: '.*:.*:NR_DIAMOND:CONVERT_TO_HITS_FILE' {
-        ext.args    = "nr"
+        ext.args        = "nr"
     }
 
     withName: '.*:.*:UP_DIAMOND:DIAMOND_BLAST_CHUNK_TO_FULL' {
-        ext.args    = "diamond"
+        ext.args        = "diamond"
     }
 
     withName: '.*:.*:UP_DIAMOND:CONVERT_TO_HITS_FILE' {
-        ext.args    = "Uniprot"
+        ext.args        = "Uniprot"
     }
 
     withName: BLAST_MAKEBLASTDB {
-        ext.args = { "-dbtype nucl" }
+        ext.args        = { "-dbtype nucl" }
     }
 
     withName: '.*:.*:BLAST_BLASTN' {
@@ -86,7 +93,7 @@ process {
     }
 
     withName: DIAMOND_BLASTX {
-        ext.args    = { "--sensitive --max-target-seqs 3 --evalue 1e-25 --no-unlink --tmpdir ./" }
+        ext.args        = { "--sensitive --max-target-seqs 3 --evalue 1e-25 --no-unlink --tmpdir ./" }
     }
 
     withName: '.*:EXTRACT_NT_BLAST:BLAST_BLASTN_MOD' {
@@ -95,63 +102,56 @@ process {
     }
 
     withName: '.*:.*:(PLASTID_ORGANELLAR_BLAST|MITO_ORGANELLAR_BLAST):BLAST_BLASTN' {
-        ext.args    = { "-task megablast -word_size 28 -best_hit_overhang 0.1 -best_hit_score_edge 0.1 -dust yes -evalue 0.0001 -perc_identity 80 -soft_masking true -outfmt 7" }
+        ext.args        = { "-task megablast -word_size 28 -best_hit_overhang 0.1 -best_hit_score_edge 0.1 -dust yes -evalue 0.0001 -perc_identity 80 -soft_masking true -outfmt 7" }
     }
 
     withName: SAMTOOLS_DEPTH{
-        ext.args = { "-aa" }
+        ext.args        = { "-aa" }
     }
 
     withName: SAMTOOLS_SORT{
-        ext.prefix  = { "${meta.id}_sorted" }
+        ext.prefix      = { "${meta.id}_sorted" }
     }
 
     withName: KRAKEN2_KRAKEN2 {
-        ext.args    = { "--report-zero-counts --use-names --memory-mapping" }
+        ext.args        = { "--report-zero-counts --use-names --memory-mapping" }
     }
 
     withName: FCS_FCSADAPTOR_PROK {
-        ext.args    = "--prok"
-        ext.prefix  = { "${meta.id}_prok" }
+        ext.args        = "--prok"
+        ext.prefix      = { "${meta.id}_prok" }
     }
 
     withName: FCS_FCSADAPTOR_EUK {
-        ext.args    = "--euk"
-        ext.prefix  = { "${meta.id}_euk" }
-    }
-
-    withName: "FCS_FCSADAPTOR_EUK|FCS_FCSADAPTOR_PROK" {
-        publishDir = [
-            path: { "${params.outdir}/FCS-adaptor" },
-            mode: params.publish_dir_mode,
-        ]
+        ext.args        = "--euk"
+        ext.prefix      = { "${meta.id}_euk" }
     }
 
     withName: SED_SED {
-        ext.prefix  = { "${meta.id}_fixed" }
-        ext.args    = " -e '/>/s/ //g' "
+        ext.prefix      = { "${meta.id}_fixed" }
+        ext.args        = " -e '/>/s/ //g' "
     }
 
     withName: '.*:.*:GENERATE_GENOME:GNU_SORT' {
-        ext.prefix  = { "${meta.id}_sorted"}
-        ext.args    = { '-k2,2 -nr' }
+        ext.prefix      = { "${meta.id}_sorted"}
+        ext.args        = { '-k2,2 -nr' }
     }
 
     withName: MINIMAP2_ALIGN_SE {
-            ext.args = {'-ax '+ (meta.readtype.equals("hifi") ? "map-hifi" : meta.readtype.equals("clr") ? "map-pb" : meta.readtype.equals("ont") ? "map-ont" : "") + ' --cs=short' + (reference.size() > 4e9 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
-            ext.prefix  = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
+        ext.args        = {'-ax '+ (meta.readtype.equals("hifi") ? "map-hifi" : meta.readtype.equals("clr") ? "map-pb" : meta.readtype.equals("ont") ? "map-ont" : "") + ' --cs=short' + (reference.size() > 4e9 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
+        ext.prefix      = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
     }
 
     withName: MINIMAP2_ALIGN_ILLUMINA {
-            ext.args = { '-ax sr --cs=short' + (reference.size() > 4294967296 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
-            ext.prefix  = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
+        ext.args        = { '-ax sr --cs=short' + (reference.size() > 4294967296 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
+        ext.prefix      = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
     }
 
     withName: NCBITOOLS_VECSCREEN {
-        ext.args = { "-f3" }
+        ext.args        = { "-f3" }
     }
 
     withName: FILTER_VECSCREEN_RESULTS {
-        ext.args    =   "--skip_reporting_suspect_hits --skip_reporting_weak_hits --skip_reporting_no_hits"
+        ext.args        =   "--skip_reporting_suspect_hits --skip_reporting_weak_hits --skip_reporting_no_hits"
     }
 }
diff --git a/main.nf b/main.nf
@@ -119,9 +119,9 @@ workflow {
         }
         .set { branched_assemblies }
 
-    branched_assemblies.organellar_genome.view {"ORGANELLAR: $it"}
-    branched_assemblies.sample_genome.view {"GENOMIC: $it"}
-    branched_assemblies.error.view {"ERROR CHANNELS: $it"}
+    // branched_assemblies.organellar_genome.view {"ORGANELLAR: $it"}
+    // branched_assemblies.sample_genome.view {"GENOMIC: $it"}
+    // branched_assemblies.error.view {"ERROR CHANNELS: $it"}
 
 
     //