Skip to content

Commit

Permalink
Updating many things
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Feb 7, 2025
1 parent 880bafc commit 3b158eb
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 57 deletions.
4 changes: 3 additions & 1 deletion assets/test.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
reads_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/
reads_path:
- /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/Pyoeliiyoelii17XNL_pbtools_simulated_pacbio_reads.fa.gz
- /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/simulated_2.fa.gz
reads_type: "hifi"
pacbio_barcode_file: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa
pacbio_barcode_names: "bc2008,bc2009"
Expand Down
48 changes: 22 additions & 26 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,32 @@ process {

// PIPELINE NESTING
withName: 'SANGER_TOL_BTK|SANGER_TOL_BTK_CASCADE' {
time = { check_max (48.h * task.attempt, 'time') }
time = { check_max (100.h * task.attempt, 'time') }
}

// BLAST RELATED MODULES WHICH WILL NEED THE MOST RESOURCES
// TIME IS STOLEN FROM SANGER-TOL/BLOBTOOLKIT
withName: 'BLAST_BLASTN|BLAST_BLASTN_MOD|DIAMOND_BLASTX' {
cpus = { check_max( 16 * 1, 'cpus' ) }
memory = { check_max( 50.GB * task.attempt, 'memory' ) }
time = { check_max( 100.h * task.attempt, 'time' ) }
}


withName: 'PACBIO_BARCODE_CHECK:BLAST_BLASTN' {
cpus = { check_max( 16 * 1, 'cpus' ) }
memory = { check_max( 50.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}


// FCS REQUIRES LOADING THE WHOLE FCS DB INTO MEMORY AT ONE TIME
withName: 'FCSGX_RUNGX' {
cpus = { check_max( 32 * 1, 'cpus' ) }
memory = { check_max( 500.GB * task.attempt, 'memory') }
time = { check_max( 50.h * task.attempt, 'time' ) }
}

// For testing on the minimal FCSGX
// withName: 'FCSGX_RUNGX' {
// cpus = { check_max( 16 * 1, 'cpus' ) }
// memory = { check_max( 50.GB * task.attempt, 'memory') }
// time = { check_max( 40.h * task.attempt, 'time' ) }
// }


withName: 'MINIMAP2_ALIGN_SE' {
cpus = { check_max( 16 * task.attempt, 'cpus' ) }
Expand Down Expand Up @@ -73,12 +82,6 @@ process {
memory = { check_max( 50.GB * task.attempt, 'memory') }
}

withName: DIAMOND_BLASTX {
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 85.GB * task.attempt, 'memory' ) }
time = { check_max( 20.h * task.attempt, 'time' ) }
}

// PYTHON BASED MODULES WHICH SHOULDN'T NEED MORE THAN 1 CORE AND A MIDDLING AMOUNT OF MEMORY
// WILL BE REVIEWED
withName: 'VALIDATE_TAXID|TRAILINGNS|GC_CONTENT|GET_KMERS_PROFILE|PARSE_FCSGX_RESULT|ASCC_MERGE_TABLES|GET_LARGEST_SCAFF|KMER_COUNT_DIM_REDUCTION|KMER_COUNT_DIM_REDUCTION_COMBINE_CSV|REFORMAT_DIAMOND_OUTFMT6|CONVERT_TO_HITS_FILE|DIAMOND_BLAST_CHUNK_TO_FULL|GENERATE_SAMPLESHEET' {
Expand All @@ -87,21 +90,14 @@ process {
time = { check_max( 5.h * task.attempt, 'time' ) }
}

// BLAST RELATED MODULES WHICH WILL NEED THE MOST RESOURCES
// TIME IS STOLEN FROM SANGER-TOL/BLOBTOOLKIT
withName: 'BLAST_BLASTN|BLAST_BLASTN_MOD|DIAMOND_BLASTX' {
cpus = { check_max( 16 * 1, 'cpus' ) }
memory = { check_max( 50.GB * task.attempt, 'memory' ) }
time = { check_max( task.attempt == 1 ? 12.h : ( task.attempt == 2 ? 47.h : 167.h ), 'time' ) }
}

withLabel:KRAKEN2_KRAKEN2 {
withName:KRAKEN2_KRAKEN2 {
// Scalling based on the size of the input genome.
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
memory = { check_max( 72.GB * task.attempt, 'memory' ) }
time = { check_max( 16.h * task.attempt, 'time' ) }
}


// Process-specific resource requirements
// NOTE - Please try and re-use the labels below as much as possible.
// These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
Expand Down
3 changes: 2 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ workflow {
include_workflow_steps.contains('ALL') && params.profile_name == 'test'
)
) {
ch_grabbed_reads_path = MAIN_WORKFLOW_GrabFiles( params.reads_path )
ch_grabbed_reads_path = Channel.of(params.reads_path).collect()
} else {
ch_grabbed_reads_path = []
}
Expand Down Expand Up @@ -276,6 +276,7 @@ process MAIN_WORKFLOW_GrabFiles {
"true"
}


/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
Expand Down
4 changes: 2 additions & 2 deletions modules/local/check_barcode.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process CHECK_BARCODE {
'biocontainers/python:3.9' }"

input:
tuple val(meta) , path(pacbio_dir)
tuple val(meta) , path(pacbio_dir, stageAs:"in/*")
path barcodes
val multiplex_csv

Expand All @@ -23,7 +23,7 @@ process CHECK_BARCODE {
OUTPUT=\$(\\
pacbio_barcode_check.py \\
-b ${barcodes} \\
-p ${pacbio_dir} \\
-p in/ \\
-m ${multiplex_csv})
cat <<-END_VERSIONS > versions.yml
Expand Down
2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
"fa_icon": "fas fa-hastag"
},
"reads_path": {
"type": "string",
"type": "array",
"description": "folder containing long read data files in FASTA.GZ format",
"fa_icon": "fas fa-file-lines"
},
Expand Down
63 changes: 37 additions & 26 deletions workflows/ascc_genomic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ workflow ASCC_GENOMIC {
validate_taxid_versions // Versions channel from main.nf
include_steps // params.include_steps
exclude_steps // params.exclude_steps
fcs_db // path(path)
fcs_db // [path(path)]
reads

main:
Expand Down Expand Up @@ -76,6 +76,8 @@ workflow ASCC_GENOMIC {
log.info "GENOMIC RUN -- EXCLUDE STEPS INC.: $exclude_workflow_steps"


//reads = CollectReads(reads_list)

//
// LOGIC: CREATE btk_busco_run_mode VALUE
//
Expand Down Expand Up @@ -180,17 +182,16 @@ workflow ASCC_GENOMIC {
)
ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions)

if (!EXTRACT_NT_BLAST.out.ch_blast_hits.ifEmpty(true)) {
ch_nt_blast = EXTRACT_NT_BLAST.out.ch_blast_hits.map{it[1]}
} else {
ch_nt_blast = []
}
//
// TODO: This needs testing
//
ch_nt_blast = EXTRACT_NT_BLAST.out.ch_blast_hits
.map { it -> it[1] }
.ifEmpty { [] }

if (!EXTRACT_NT_BLAST.out.ch_top_lineages.ifEmpty(true)) {
ch_blast_lineage = EXTRACT_NT_BLAST.out.ch_top_lineages.map{it[1]}
} else {
ch_blast_lineage = []
}
ch_blast_lineage = EXTRACT_NT_BLAST.out.ch_top_lineages
.map { it -> it[1] }
.ifEmpty { [] }

} else {
ch_nt_blast = []
Expand Down Expand Up @@ -403,24 +404,20 @@ workflow ASCC_GENOMIC {
params.nt_kraken_database_path,
params.ncbi_ranked_lineage_path
)
ch_versions = ch_versions.mix(RUN_NT_KRAKEN.out.versions)

if (!RUN_NT_KRAKEN.out.classified.ifEmpty(true)) {
ch_kraken1 = RUN_NT_KRAKEN.out.classified.map{it[1]}
} else {
ch_kraken1 = []
}
ch_kraken1 = RUN_NT_KRAKEN.out.classified
.map { it -> it[1] }
.ifEmpty { [] }

ch_kraken2 = RUN_NT_KRAKEN.out.report.map{it[1]}
ch_kraken2 = RUN_NT_KRAKEN.out.report
.map { it -> it[1] }
.ifEmpty { [] }

if (!RUN_NT_KRAKEN.out.lineage.ifEmpty(true)) {
ch_kraken3 = RUN_NT_KRAKEN.out.lineage
.map { it -> it[1] }
.ifEmpty { [] }

// TODO: Channel is not getting populated even though the it is includes.
ch_kraken3 = RUN_NT_KRAKEN.out.lineage
} else {
ch_kraken3 = []
}

ch_versions = ch_versions.mix(RUN_NT_KRAKEN.out.versions)
} else {
ch_kraken1 = []
ch_kraken2 = []
Expand Down Expand Up @@ -709,7 +706,7 @@ workflow ASCC_GENOMIC {
println "ASCC_MERGE_TABLES - TR: $ch_tiara" // FROM -- TIARA.classifications[0]
println "ASCC_MERGE_TABLES - K3: $ch_kraken3" // FROM -- RUN_NT_KRAKEN.lineage[0]
println "ASCC_MERGE_TABLES - BL: $ch_blast_lineage" // FROM -- E_NT_BLAST.ch_blast_hits[0]
println "ASCC_MERGE_TABLES - K3: $ch_kmers" // FROM -- G_KMERS_PROF.combined_csv[0]
println "ASCC_MERGE_TABLES - KM: $ch_kmers" // FROM -- G_KMERS_PROF.combined_csv[0]
println "ASCC_MERGE_TABLES - NR: $nr_hits" // FROM -- NR_DIAMOND.reformed[0]
println "ASCC_MERGE_TABLES - UN: $un_hits" // FROM -- UP_DIAMOND.reformed[0]

Expand Down Expand Up @@ -744,6 +741,20 @@ workflow ASCC_GENOMIC {
).set { ch_collated_versions }
}

// CollectReads {
// tag "${meta.id}"
// executor 'local'

// input:
// tuple val(meta), path("in")

// output:
// tuple val(meta), path("in/*.{fa,fasta}.{gz}")

// "true"
// }


/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
Expand Down
2 changes: 2 additions & 0 deletions workflows/ascc_organellar.nf
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ workflow ASCC_ORGANELLAR {
//
// Function: this is to count the length of ONLY the fasta sequence
//
// @param input_file: path
// @return int
def CountFastaLength(input_file) {
int counter = 0;
def list_lines = new File(input_file.toString()).text.readLines()
Expand Down

0 comments on commit 3b158eb

Please sign in to comment.