Skip to content

Commit

Permalink
Pinning verison of nf-core and bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Nov 15, 2024
1 parent a4e87aa commit 5a092d7
Show file tree
Hide file tree
Showing 10 changed files with 147 additions and 115 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,16 @@ jobs:
python-version: "3.8"
architecture: "x64"

- name: read .nf-core.yml
uses: pietrobolcato/action-read-yaml@1.0.0
id: read_yml
with:
config: ${{ github.workspace }}/.nf-core.yaml

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install nf-core
pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }}
- name: Run nf-core lint
env:
Expand Down
57 changes: 17 additions & 40 deletions bin/ascc_merge_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def load_and_merge_dataframes(paths_dict):
bacterial_kraken_df = pd.read_csv(paths_dict["bacterial_kraken"], sep=",")
if bacterial_kraken_df.shape[0] > 0:
bacterial_kraken_df.rename(columns={bacterial_kraken_df.columns[0]: "scaff"}, inplace=True)
bacterial_kraken_df.rename(columns={"taxid": "kraken_taxid"}, inplace=True)
bacterial_kraken_df.rename(columns={"taxid": "nt_kraken_taxid"}, inplace=True)
else:
sys.stderr.write(
"No rows were found in bacterial Kraken output table ({})\n".format(paths_dict["bacterial_kraken"])
Expand All @@ -110,21 +110,14 @@ def load_and_merge_dataframes(paths_dict):
nt_kraken_df = pd.read_csv(paths_dict["nt_kraken"], sep=",")
if nt_kraken_df.shape[0] > 0:
nt_kraken_df.rename(columns={nt_kraken_df.columns[0]: "scaff"}, inplace=True)
nt_kraken_df.rename(columns={"taxid": "kraken_taxid"}, inplace=True)
nt_kraken_df.rename(columns={"taxid": "nt_kraken_taxid"}, inplace=True)
else:
sys.stderr.write("No rows were found in nt Kraken output table ({})\n".format(paths_dict["nt_kraken"]))
nt_kraken_df = None

dim_reduction_df = None
if paths_dict["dim_reduction_embeddings"] is not None:
dim_reduction_df = pd.read_csv(paths_dict["dim_reduction_embeddings"], sep=",")
if dim_reduction_df.shape[0] == 0:
sys.stderr.write(
"No rows were found in kmers dimensionality reduction output table ({})\n".format(
paths_dict["dim_reduction_embeddings"]
)
)
dim_reduction_df = None
dim_reduction_df = parse_or_pass(paths_dict["dim_reduction_embeddings"], "DIMENSIONAL-REDUCTION-EMBEDDINGS")

btk_df = None
if paths_dict["blobtoolkit"] is not None:
Expand Down Expand Up @@ -194,51 +187,27 @@ def load_and_merge_dataframes(paths_dict):

fcs_gx_df = None
if paths_dict["fcs_gx"] is not None:
fcs_gx_df = pd.read_csv(paths_dict["fcs_gx"], sep=",")
if fcs_gx_df.shape[0] == 0:
sys.stderr.write("No rows were found in FCS-GX output table ({})\n".format(paths_dict["fcs_gx"]))
fcs_gx_df = None
fcs_gx_df = parse_or_pass(paths_dict["fcs_gx"], "FCSGX")

nt_blast_df = None
if paths_dict["nt_blast"] is not None:
nt_blast_df = pd.read_csv(paths_dict["nt_blast"], sep=",")
if nt_blast_df.shape[0] == 0:
sys.stderr.write("No rows were found in nt BLAST output table ({})\n".format(paths_dict["nt_blast"]))
nt_blast_df = None
nt_blast_df = parse_or_pass(paths_dict["nt_blast"], "NT_BLAST")

nr_diamond_df = None
if paths_dict["nr_diamond"] is not None:
nr_diamond_df = pd.read_csv(paths_dict["nr_diamond"], sep=",")
if nr_diamond_df.shape[0] == 0:
sys.stderr.write("No rows were found in nr Diamond output table ({})\n".format(paths_dict["nr_diamond"]))
nr_diamond_df = None
nr_diamond_df = parse_or_pass(paths_dict["nr_diamond"], "NR_DIAMOND")

uniprot_diamond_df = None
if paths_dict["uniprot_diamond"] is not None:
uniprot_diamond_df = pd.read_csv(paths_dict["uniprot_diamond"], sep=",")
if uniprot_diamond_df.shape[0] == 0:
sys.stderr.write(
"No rows were found in Uniprot Diamond output table ({})\n".format(paths_dict["uniprot_diamond"])
)
uniprot_diamond_df = None
uniprot_diamond_df = parse_or_pass(paths_dict["uniprot_diamond"], "UNIPROT_DIAMOND")

cobiontid_markerscan_df = None
if paths_dict["cobiontid_markerscan"] is not None:
cobiontid_markerscan_df = pd.read_csv(paths_dict["cobiontid_markerscan"], sep=",")
if cobiontid_markerscan_df.shape[0] == 0:
sys.stderr.write(
"No rows were found in CobiontID MarkerScan output table ({})\n".format(
paths_dict["cobiontid_markerscan"]
)
)
uniprot_diamond_df = None
cobiontid_markerscan_df = parse_or_pass(paths_dict["cobiontid_markerscan"], "COBIONT MARKERSCAN")

contigviz_df = None
if paths_dict["contigviz"] is not None:
contigviz_df = pd.read_csv(paths_dict["contigviz"], sep=",")
if contigviz_df.shape[0] == 0:
sys.stderr.write("No rows were found in ContigViz output table ({})\n".format(paths_dict["contigviz"]))
contigviz_df = None
contigviz_df = parse_or_pass(paths_dict["contigviz"], "CONTIG VIZ")

if coverage_df is not None:
df = pd.merge(df, coverage_df, on="scaff", how="outer")
Expand Down Expand Up @@ -270,6 +239,14 @@ def load_and_merge_dataframes(paths_dict):
return df


def parse_or_pass(input_file: str, name: str):
try:
return pd.read_csv(input_file, sep=",")
except:
sys.stderr.write(f"Process:: {name} :: No rows in file: {input_file}")
return None


def main(args):
paths_dict = dict()
paths_dict["gc_content"] = args.gc_cov
Expand Down
82 changes: 41 additions & 41 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,64 +19,71 @@ process {
ext.get_versions = "lsid | head -n1 | cut -d ',' -f 1"
ext.version = "0.6.0"
publishDir = [
path: { "${params.outdir}/sanger-tol-btk" },
path: { "${params.outdir}/${meta.id}/sanger-tol-btk" },
mode: params.publish_dir_mode,
]
}

withName: "AUTOFILTER_AND_CHECK_ASSEMBLY|CREATE_BTK_DATASET|ORGANELLE_CONTAMINATION_RECOMMENDATIONS|FILTER_BARCODE|SUMMARISE_VECSCREEN_OUTPUT|MERGE_BTK_DATASETS" {
publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
publishDir = [
path: { "${params.outdir}/${meta.id}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode
]
}

withName: "FCS_FCSADAPTOR_EUK|FCS_FCSADAPTOR_PROK" {
publishDir = [
path: { "${params.outdir}/${meta.id}/FCS-adaptor" },
mode: params.publish_dir_mode,
]
}

withName: AUTOFILTER_AND_CHECK_ASSEMBLY {
publishDir = [
publishDir = [
path: { "${params.outdir}/" },
mode: params.publish_dir_mode,
pattern: "autofiltering_done_indicator_file.txt"
]
}

withName: ASCC_MERGE_TABLES {
publishDir = [
path: { "${params.outdir}/ASCC-main-output" },
publishDir = [
path: { "${params.outdir}/${meta.id}/ASCC-main-output" },
mode: params.publish_dir_mode
]
}

withName: FILTER_FASTA {
ext.args = "--low_pass --remove_original_fasta"
ext.cutoff = 1900000000
ext.args = "--low_pass --remove_original_fasta"
ext.cutoff = 1900000000
}

withName: SEQKIT_SLIDING {
ext.args = {"-s ${meta.sliding} -W ${meta.window} "}
ext.args = {"-s ${meta.sliding} -W ${meta.window} "}
}

withName: '.*:.*:EXTRACT_NT_BLAST:BLAST_CHUNK_TO_FULL' {
ext.args = "nucleotide"
ext.args = "nucleotide"
}

withName: '.*:.*:NR_DIAMOND:DIAMOND_BLAST_CHUNK_TO_FULL' {
ext.args = "diamond"
ext.args = "diamond"
}

withName: '.*:.*:NR_DIAMOND:CONVERT_TO_HITS_FILE' {
ext.args = "nr"
ext.args = "nr"
}

withName: '.*:.*:UP_DIAMOND:DIAMOND_BLAST_CHUNK_TO_FULL' {
ext.args = "diamond"
ext.args = "diamond"
}

withName: '.*:.*:UP_DIAMOND:CONVERT_TO_HITS_FILE' {
ext.args = "Uniprot"
ext.args = "Uniprot"
}

withName: BLAST_MAKEBLASTDB {
ext.args = { "-dbtype nucl" }
ext.args = { "-dbtype nucl" }
}

withName: '.*:.*:BLAST_BLASTN' {
Expand All @@ -86,7 +93,7 @@ process {
}

withName: DIAMOND_BLASTX {
ext.args = { "--sensitive --max-target-seqs 3 --evalue 1e-25 --no-unlink --tmpdir ./" }
ext.args = { "--sensitive --max-target-seqs 3 --evalue 1e-25 --no-unlink --tmpdir ./" }
}

withName: '.*:EXTRACT_NT_BLAST:BLAST_BLASTN_MOD' {
Expand All @@ -95,63 +102,56 @@ process {
}

withName: '.*:.*:(PLASTID_ORGANELLAR_BLAST|MITO_ORGANELLAR_BLAST):BLAST_BLASTN' {
ext.args = { "-task megablast -word_size 28 -best_hit_overhang 0.1 -best_hit_score_edge 0.1 -dust yes -evalue 0.0001 -perc_identity 80 -soft_masking true -outfmt 7" }
ext.args = { "-task megablast -word_size 28 -best_hit_overhang 0.1 -best_hit_score_edge 0.1 -dust yes -evalue 0.0001 -perc_identity 80 -soft_masking true -outfmt 7" }
}

withName: SAMTOOLS_DEPTH{
ext.args = { "-aa" }
ext.args = { "-aa" }
}

withName: SAMTOOLS_SORT{
ext.prefix = { "${meta.id}_sorted" }
ext.prefix = { "${meta.id}_sorted" }
}

withName: KRAKEN2_KRAKEN2 {
ext.args = { "--report-zero-counts --use-names --memory-mapping" }
ext.args = { "--report-zero-counts --use-names --memory-mapping" }
}

withName: FCS_FCSADAPTOR_PROK {
ext.args = "--prok"
ext.prefix = { "${meta.id}_prok" }
ext.args = "--prok"
ext.prefix = { "${meta.id}_prok" }
}

withName: FCS_FCSADAPTOR_EUK {
ext.args = "--euk"
ext.prefix = { "${meta.id}_euk" }
}

withName: "FCS_FCSADAPTOR_EUK|FCS_FCSADAPTOR_PROK" {
publishDir = [
path: { "${params.outdir}/FCS-adaptor" },
mode: params.publish_dir_mode,
]
ext.args = "--euk"
ext.prefix = { "${meta.id}_euk" }
}

withName: SED_SED {
ext.prefix = { "${meta.id}_fixed" }
ext.args = " -e '/>/s/ //g' "
ext.prefix = { "${meta.id}_fixed" }
ext.args = " -e '/>/s/ //g' "
}

withName: '.*:.*:GENERATE_GENOME:GNU_SORT' {
ext.prefix = { "${meta.id}_sorted"}
ext.args = { '-k2,2 -nr' }
ext.prefix = { "${meta.id}_sorted"}
ext.args = { '-k2,2 -nr' }
}

withName: MINIMAP2_ALIGN_SE {
ext.args = {'-ax '+ (meta.readtype.equals("hifi") ? "map-hifi" : meta.readtype.equals("clr") ? "map-pb" : meta.readtype.equals("ont") ? "map-ont" : "") + ' --cs=short' + (reference.size() > 4e9 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
ext.args = {'-ax '+ (meta.readtype.equals("hifi") ? "map-hifi" : meta.readtype.equals("clr") ? "map-pb" : meta.readtype.equals("ont") ? "map-ont" : "") + ' --cs=short' + (reference.size() > 4e9 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
}

withName: MINIMAP2_ALIGN_ILLUMINA {
ext.args = { '-ax sr --cs=short' + (reference.size() > 4294967296 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
ext.args = { '-ax sr --cs=short' + (reference.size() > 4294967296 ? (" -I" + Math.ceil(reference.size()/1073741824)+"G") : "") }
ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
}

withName: NCBITOOLS_VECSCREEN {
ext.args = { "-f3" }
ext.args = { "-f3" }
}

withName: FILTER_VECSCREEN_RESULTS {
ext.args = "--skip_reporting_suspect_hits --skip_reporting_weak_hits --skip_reporting_no_hits"
ext.args = "--skip_reporting_suspect_hits --skip_reporting_weak_hits --skip_reporting_no_hits"
}
}
6 changes: 3 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ workflow {
}
.set { branched_assemblies }

branched_assemblies.organellar_genome.view {"ORGANELLAR: $it"}
branched_assemblies.sample_genome.view {"GENOMIC: $it"}
branched_assemblies.error.view {"ERROR CHANNELS: $it"}
// branched_assemblies.organellar_genome.view {"ORGANELLAR: $it"}
// branched_assemblies.sample_genome.view {"GENOMIC: $it"}
// branched_assemblies.error.view {"ERROR CHANNELS: $it"}


//
Expand Down
Loading

0 comments on commit 5a092d7

Please sign in to comment.