From fe5074a06df31110545ce56912d3f27304817ddd Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Mon, 22 Jul 2024 18:10:23 +0700 Subject: [PATCH 1/2] add condition check to disable bwamem2 index if align_short will not be run --- subworkflows/local/prepare_genome.nf | 35 ++++++++++++++++------------ workflows/readmapping.nf | 13 +++++++++-- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 5264569..0fe5a8b 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -11,7 +11,7 @@ include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' workflow PREPARE_GENOME { take: fasta // channel: [ meta, /path/to/fasta ] - + short_reads // channel: [ meta, /path/to/reads ] main: ch_versions = Channel.empty() @@ -33,24 +33,29 @@ workflow PREPARE_GENOME { UNMASK ( ch_fasta ) ch_versions = ch_versions.mix ( UNMASK.out.versions ) + // def align_short = short_reads ? true : false // Generate BWA index - if ( params.bwamem2_index ) { - Channel.fromPath ( params.bwamem2_index ) - | combine ( ch_fasta ) - | map { bwa, meta, fa -> [ meta, bwa ] } - | set { ch_bwamem } - - if ( params.bwamem2_index.endsWith('.tar.gz') ) { - ch_bwamem2_index = UNTAR ( ch_bwamem ).untar - ch_versions = ch_versions.mix ( UNTAR.out.versions ) + ch_bwamem2_index = Channel.empty() + + if ( short_reads ) { + if ( params.bwamem2_index ) { + Channel.fromPath ( params.bwamem2_index ) + | combine ( ch_fasta ) + | map { bwa, meta, fa -> [ meta, bwa ] } + | set { ch_bwamem } + + if ( params.bwamem2_index.endsWith('.tar.gz') ) { + ch_bwamem2_index = UNTAR ( ch_bwamem ).untar + ch_versions = ch_versions.mix ( UNTAR.out.versions ) + } else { + ch_bwamem2_index = ch_bwamem + } + } else { - ch_bwamem2_index = ch_bwamem + ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index + ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions ) } - - } else { - ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index - ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions ) } diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 9d12b0b..043c745 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -75,7 +75,16 @@ workflow READMAPPING { | set { ch_reads } ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions ) - + // Determine datatype of short and long reads + INPUT_CHECK.out.reads + | branch { + meta, reads -> + short_reads: meta.datatype == "illumina" || meta.datatype == "hic" + return [meta.datatype, true] + long_reads: meta.datatype == "pacbio" || meta.datatype == "pacbio_clr" || meta.datatype == "ont" + return [meta.datatype, false] + } + | set { ch_filtered_reads } // // SUBWORKFLOW: Uncompress and prepare reference genome files @@ -84,7 +93,7 @@ workflow READMAPPING { | map { [ [ id: it.baseName ], it ] } | set { ch_genome } - PREPARE_GENOME ( ch_genome ) + PREPARE_GENOME ( ch_genome, ch_filtered_reads.short_reads ) ch_versions = ch_versions.mix ( PREPARE_GENOME.out.versions ) From 797099e9bc5cdbab0ebf643fd1109bde26d44246 Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Thu, 25 Jul 2024 00:53:31 +0700 Subject: [PATCH 2/2] add function to check datatype --- subworkflows/local/prepare_genome.nf | 34 +++++++++++++++++++++------- workflows/readmapping.nf | 15 +----------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 0fe5a8b..9f51967 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -11,7 +11,6 @@ include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' workflow PREPARE_GENOME { take: fasta // channel: [ meta, /path/to/fasta ] - short_reads // channel: [ meta, /path/to/reads ] main: ch_versions = Channel.empty() @@ -33,12 +32,8 @@ workflow PREPARE_GENOME { UNMASK ( ch_fasta ) ch_versions = ch_versions.mix ( UNMASK.out.versions ) - // def align_short = short_reads ? true : false - // Generate BWA index - ch_bwamem2_index = Channel.empty() - - if ( short_reads ) { + if ( checkShortReads(params.input) ) { if ( params.bwamem2_index ) { Channel.fromPath ( params.bwamem2_index ) | combine ( ch_fasta ) @@ -47,15 +42,17 @@ workflow PREPARE_GENOME { if ( params.bwamem2_index.endsWith('.tar.gz') ) { ch_bwamem2_index = UNTAR ( ch_bwamem ).untar - ch_versions = ch_versions.mix ( UNTAR.out.versions ) + ch_versions = ch_versions.mix ( UNTAR.out.versions.first() ) } else { ch_bwamem2_index = ch_bwamem } } else { ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index - ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions ) + ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions.first() ) } + } else { + ch_bwamem2_index = Channel.empty() } @@ -64,3 +61,24 @@ workflow PREPARE_GENOME { bwaidx = ch_bwamem2_index.first() // channel: [ meta, /path/to/bwamem2/index_dir/ ] versions = ch_versions // channel: [ versions.yml ] } + +def checkShortReads(filePath, columnToCheck="datatype") { + // Define the target values to check + def valuesToCheck = ['illumina', 'hic'] + // Read the CSV file + def csvLines = new File(filePath).readLines() + // Extract the header and find the index of the column + def header = csvLines[0].split(',') + def columnIndex = header.findIndexOf { it == columnToCheck } + // Check if the column index was found + if (columnIndex == -1) { + println "Column '${columnToCheck}' not found in the CSV header." + return false + } + // Check for the values in the specified column and return true if found + def containsValues = csvLines[1..-1].any { line -> + def columns = line.split(',') + valuesToCheck.contains(columns[columnIndex].toLowerCase()) + } + return containsValues +} \ No newline at end of file diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 043c745..32c0805 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -75,25 +75,12 @@ workflow READMAPPING { | set { ch_reads } ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions ) - // Determine datatype of short and long reads - INPUT_CHECK.out.reads - | branch { - meta, reads -> - short_reads: meta.datatype == "illumina" || meta.datatype == "hic" - return [meta.datatype, true] - long_reads: meta.datatype == "pacbio" || meta.datatype == "pacbio_clr" || meta.datatype == "ont" - return [meta.datatype, false] - } - | set { ch_filtered_reads } - // - // SUBWORKFLOW: Uncompress and prepare reference genome files - // ch_fasta | map { [ [ id: it.baseName ], it ] } | set { ch_genome } - PREPARE_GENOME ( ch_genome, ch_filtered_reads.short_reads ) + PREPARE_GENOME ( ch_genome ) ch_versions = ch_versions.mix ( PREPARE_GENOME.out.versions )