From 3332ebbc1dc877a875acf69f481627b595268ef8 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 24 May 2024 15:32:43 +0200 Subject: [PATCH 01/41] WIP: restructure reference building --- conf/modules.config | 2 +- main.nf | 10 +- nextflow.config | 5 +- subworkflows/local/build_references.nf | 122 +++++++ workflows/build_references.nf | 89 ----- workflows/rnafusion.nf | 436 +++++++++++++------------ 6 files changed, 347 insertions(+), 317 deletions(-) create mode 100644 subworkflows/local/build_references.nf delete mode 100644 workflows/build_references.nf diff --git a/conf/modules.config b/conf/modules.config index a1fe1702..44dec339 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -59,7 +59,7 @@ process { withName: 'FASTQC' { ext.args = '--quiet' - ext.when = { !params.skip_qc } + ext.when = { !params.skip_qc } publishDir = [ path: { "${params.outdir}/fastqc" }, mode: params.publish_dir_mode, diff --git a/main.nf b/main.nf index ac96ab66..4c945cf8 100644 --- a/main.nf +++ b/main.nf @@ -20,7 +20,6 @@ nextflow.enable.dsl = 2 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_rnafusion_pipeline' -include { BUILD_REFERENCES } from './workflows/build_references' include { RNAFUSION } from './workflows/rnafusion' @@ -50,12 +49,9 @@ workflow NFCORE_RNAFUSION { // // WORKFLOW: Run pipeline // - if (params.build_references) { - BUILD_REFERENCES () - } else { - ch_samplesheet = Channel.value(file(params.input, checkIfExists: true)) - RNAFUSION(ch_samplesheet) - } + + ch_samplesheet = Channel.value(file(params.input, checkIfExists: true)) + RNAFUSION(ch_samplesheet) } /* diff --git a/nextflow.config b/nextflow.config index 14127c38..265c3c0b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,11 +33,10 @@ params { genomes = [:] // Genomes options - fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa" - fai = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.all.fa.fai" + fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa" + fai = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa.fai" gtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf" chrgtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf" - transcript = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.cdna.all.fa.gz" refflat = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf.refflat" rrna_intervals = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.interval_list" diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf new file mode 100644 index 00000000..bb2acc4d --- /dev/null +++ b/subworkflows/local/build_references.nf @@ -0,0 +1,122 @@ +/* +======================================================================================== + IMPORT LOCAL MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +include { ARRIBA_DOWNLOAD } from '../../modules/local/arriba/download/main' +include { ENSEMBL_DOWNLOAD } from '../../modules/local/ensembl/main' +include { FUSIONCATCHER_DOWNLOAD } from '../../modules/local/fusioncatcher/download/main' +include { FUSIONREPORT_DOWNLOAD } from '../../modules/local/fusionreport/download/main' +include { HGNC_DOWNLOAD } from '../../modules/local/hgnc/main' +include { STARFUSION_BUILD } from '../../modules/local/starfusion/build/main' +include { STARFUSION_DOWNLOAD } from '../../modules/local/starfusion/download/main' +include { GTF_TO_REFFLAT } from '../../modules/local/uscs/custom_gtftogenepred/main' +include { RRNA_TRANSCRIPTS } from '../../modules/local/rrnatranscripts/main' +include { CONVERT2BED } from '../../modules/local/convert2bed/main' +/* +======================================================================================== + IMPORT NF-CORE MODULES/SUBWORKFLOWS +======================================================================================== +*/ + +include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' +include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate/main' +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/gatk4/createsequencedictionary/main' +include { GATK4_BEDTOINTERVALLIST } from '../../modules/nf-core/gatk4/bedtointervallist/main' +include { SALMON_INDEX } from '../../modules/nf-core/salmon/index/main' +include { GFFREAD } from '../../modules/nf-core/gffread/main' + +/* +======================================================================================== + RUN MAIN WORKFLOW +======================================================================================== +*/ + +workflow BUILD_REFERENCES { + + + main: + + ch_versions = Channel.empty() + + def fake_meta = [:] + fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" + + if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || + !file(params.chrgtf).exists() || file(params.chrgtf).isEmpty() || + !file(params.gtf).exists() || file(params.gtf).isEmpty()){ + ENSEMBL_DOWNLOAD(params.ensembl_version, params.genome, fake_meta)} + ch_fasta = {(!file(params.fasta).exists() || file(params.fasta).isEmpty()) ? ENSEMBL_DOWNLOAD.out.primary_assembly : Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] }.collect()} + + if (!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || + !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()){ + HGNC_DOWNLOAD( )} + + if (!file(params.fai).exists() || file(params.fai).isEmpty(){ + SAMTOOLS_FAIDX(ENSEMBL_DOWNLOAD.out.primary_assembly, [[],[]])} + + if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty(){ + GATK4_CREATESEQUENCEDICTIONARY(ENSEMBL_DOWNLOAD.out.primary_assembly) + RRNA_TRANSCRIPTS(ENSEMBL_DOWNLOAD.out.gtf) + CONVERT2BED(RRNA_TRANSCRIPTS.out.rrna_gtf) + GATK4_BEDTOINTERVALLIST(CONVERT2BED.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict) + } + + if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty(){ // add condition for qc, check that dirs can also be checked with isEmpty() + GFFREAD(ENSEMBL_DOWNLOAD.out.gtf, ENSEMBL_DOWNLOAD.out.primary_assembly.map { meta, fasta -> [ fasta ] }) + SALMON_INDEX(ENSEMBL_DOWNLOAD.out.primary_assembly.map{ meta, fasta -> [ fasta ] }, GFFREAD.out.gffread_fasta.map{ meta, gffread_fasta -> [ gffread_fasta ] }) + } + + + if ((params.starindex || params.all || params.starfusion || params.arriba) && + (!params.starindex_ref.exits() || params.starindex_ref.isEmpty()) + ) { + STAR_GENOMEGENERATE( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf ) + } + ch_starindex_ref = ... + + // if (params.arriba || params.all) { + // ARRIBA_DOWNLOAD() + // } + + // if (params.fusioncatcher || params.all) { + // FUSIONCATCHER_DOWNLOAD() + // } + + // if (params.starfusion || params.all) { + // if (params.starfusion_build){ + // STARFUSION_BUILD( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf ) + // } else { + // STARFUSION_DOWNLOAD() + // } + // } + + // if (params.starfusion_build){ + // GTF_TO_REFFLAT(ENSEMBL_DOWNLOAD.out.gtf) + // } else { + // GTF_TO_REFFLAT(STARFUSION_DOWNLOAD.out.gtf) + // } + + // if (params.fusionreport || params.all) { + // FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ) + // } + + emit: + ch_fasta + ch_chrgtf = {(!file(params.chrgtf).exists() || file(params.chrgtf).isEmpty()) ? ENSEMBL_DOWNLOAD.out.chrgtf : Channel.fromPath(params.chrgtf).map { that -> [[id:that.Name], that] }.collect()} + ch_gtf = {(!file(params.gtf).exists() || file(params.gtf).isEmpty()) ? ENSEMBL_DOWNLOAD.out.gtf : Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] }.collect()} + ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { it -> [[id:it.Name], it] }.collect() + ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { it -> [[id:it.Name], it] }.collect() + ch_fai = Channel.fromPath(params.fai).map { it -> [[id:it.Name], it] }.collect() + ch_rrna_interval = params.starfusion_build ? Channel.fromPath(params.rrna_intervals).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.interval_list").map { it -> [[id:it.Name], it] }.collect() + ch_salmon_index = Channel.fromPath(params.salmon_index).map { it -> [[id:it.Name], it] }.collect() + + +} + +/* +======================================================================================== + THE END +======================================================================================== +*/ diff --git a/workflows/build_references.nf b/workflows/build_references.nf deleted file mode 100644 index 5b523db3..00000000 --- a/workflows/build_references.nf +++ /dev/null @@ -1,89 +0,0 @@ -/* -======================================================================================== - IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -include { ARRIBA_DOWNLOAD } from '../modules/local/arriba/download/main' -include { ENSEMBL_DOWNLOAD } from '../modules/local/ensembl/main' -include { FUSIONCATCHER_DOWNLOAD } from '../modules/local/fusioncatcher/download/main' -include { FUSIONREPORT_DOWNLOAD } from '../modules/local/fusionreport/download/main' -include { HGNC_DOWNLOAD } from '../modules/local/hgnc/main' -include { STARFUSION_BUILD } from '../modules/local/starfusion/build/main' -include { STARFUSION_DOWNLOAD } from '../modules/local/starfusion/download/main' -include { GTF_TO_REFFLAT } from '../modules/local/uscs/custom_gtftogenepred/main' -include { RRNA_TRANSCRIPTS } from '../modules/local/rrnatranscripts/main' -include { CONVERT2BED } from '../modules/local/convert2bed/main' -/* -======================================================================================== - IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== -*/ - -include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' -include { STAR_GENOMEGENERATE } from '../modules/nf-core/star/genomegenerate/main' -include { GATK4_CREATESEQUENCEDICTIONARY } from '../modules/nf-core/gatk4/createsequencedictionary/main' -include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/gatk4/bedtointervallist/main' -include { SALMON_INDEX } from '../modules/nf-core/salmon/index/main' -include { GFFREAD } from '../modules/nf-core/gffread/main' - -/* -======================================================================================== - RUN MAIN WORKFLOW -======================================================================================== -*/ - -workflow BUILD_REFERENCES { - - def fake_meta = [:] - fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" - ENSEMBL_DOWNLOAD( params.ensembl_version, params.genome, fake_meta ) - HGNC_DOWNLOAD( ) - SAMTOOLS_FAIDX(ENSEMBL_DOWNLOAD.out.primary_assembly, [[],[]]) - GATK4_CREATESEQUENCEDICTIONARY(ENSEMBL_DOWNLOAD.out.primary_assembly) - - RRNA_TRANSCRIPTS(ENSEMBL_DOWNLOAD.out.gtf) - CONVERT2BED(RRNA_TRANSCRIPTS.out.rrna_gtf) - - GATK4_BEDTOINTERVALLIST(CONVERT2BED.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict) - - GFFREAD(ENSEMBL_DOWNLOAD.out.gtf, ENSEMBL_DOWNLOAD.out.primary_assembly.map { meta, fasta -> [ fasta ] }) - SALMON_INDEX(ENSEMBL_DOWNLOAD.out.primary_assembly.map{ meta, fasta -> [ fasta ] }, GFFREAD.out.gffread_fasta.map{ meta, gffread_fasta -> [ gffread_fasta ] }) - - if (params.starindex || params.all || params.starfusion || params.arriba) { - STAR_GENOMEGENERATE( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf ) - } - - if (params.arriba || params.all) { - ARRIBA_DOWNLOAD() - } - - if (params.fusioncatcher || params.all) { - FUSIONCATCHER_DOWNLOAD() - } - - if (params.starfusion || params.all) { - if (params.starfusion_build){ - STARFUSION_BUILD( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf ) - } else { - STARFUSION_DOWNLOAD() - } - } - - if (params.starfusion_build){ - GTF_TO_REFFLAT(ENSEMBL_DOWNLOAD.out.gtf) - } else { - GTF_TO_REFFLAT(STARFUSION_DOWNLOAD.out.gtf) - } - - if (params.fusionreport || params.all) { - FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ) - } - -} - -/* -======================================================================================== - THE END -======================================================================================== -*/ diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 45fd77eb..bf7fa1b8 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -12,25 +12,6 @@ include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pi include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -ch_chrgtf = params.starfusion_build ? Channel.fromPath(params.chrgtf).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_annot.gtf").map { it -> [[id:it.Name], it] }.collect() -ch_starindex_ref = params.starfusion_build ? Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_genome.fa.star.idx").map { it -> [[id:it.Name], it] }.collect() -ch_starindex_ensembl_ref = Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() -ch_refflat = params.starfusion_build ? Channel.fromPath(params.refflat).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.gtf.refflat").map { it -> [[id:it.Name], it] }.collect() -ch_rrna_interval = params.starfusion_build ? Channel.fromPath(params.rrna_intervals).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.interval_list").map { it -> [[id:it.Name], it] }.collect() -ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { it -> [[id:it.Name], it] }.collect() -ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { it -> [[id:it.Name], it] }.collect() -ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { it -> [[id:it.Name], it] }.collect() -ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { it -> [[id:it.Name], it] }.collect() -ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.Name], it] }.collect() -ch_gtf = Channel.fromPath(params.gtf).map { it -> [[id:it.Name], it] }.collect() -ch_salmon_index = Channel.fromPath(params.salmon_index).map { it -> [[id:it.Name], it] }.collect() -ch_transcript = Channel.fromPath(params.transcript).map { it -> [[id:it.Name], it] }.collect() -ch_fai = Channel.fromPath(params.fai).map { it -> [[id:it.Name], it] }.collect() - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -40,7 +21,7 @@ ch_fai = Channel.fromPath(params.fai).map { it -> [[id:it.Name], it] }.collect() // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // - +include { BUILD_REFERENCES } from '../subworkflows/local/build_references' include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' @@ -75,212 +56,233 @@ workflow RNAFUSION { main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - - // - // Create channel from input file provided through params.input - // - Channel - .fromSamplesheet("input") - .map { - meta, fastq_1, fastq_2, strandedness -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { - validateInputSamplesheet(it) - } - .branch { - meta, fastqs -> - single : fastqs.size() == 1 - return [ meta, fastqs.flatten() ] - multiple: fastqs.size() > 1 - return [ meta, fastqs.flatten() ] - } - .set { ch_fastq } - - // - // MODULE: Concatenate FastQ files from same sample if required - // - CAT_FASTQ ( - ch_fastq.multiple - ) - .reads - .mix(ch_fastq.single) - .set { ch_cat_fastq } - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) - - // - // MODULE: Run FastQC - // - FASTQC ( - ch_cat_fastq - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions) + // Reference channels + // ch_starindex_ref = params.starfusion_build ? Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_genome.fa.star.idx").map { it -> [[id:it.Name], it] }.collect() + // ch_starindex_ensembl_ref = Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() + // ch_refflat = params.starfusion_build ? Channel.fromPath(params.refflat).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.gtf.refflat").map { it -> [[id:it.Name], it] }.collect() + // ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { it -> [[id:it.Name], it] }.collect() + // ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { it -> [[id:it.Name], it] }.collect() + // ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { it -> [[id:it.Name], it] }.collect() + // ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { it -> [[id:it.Name], it] }.collect() + // ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { it -> [[id:it.Name], it] }.collect() - TRIM_WORKFLOW ( - ch_cat_fastq - ) - ch_reads_fusioncatcher = TRIM_WORKFLOW.out.ch_reads_fusioncatcher - ch_reads_all = TRIM_WORKFLOW.out.ch_reads_all - ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) - SALMON_QUANT( ch_reads_all, ch_salmon_index.map{ meta, index -> index }, ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // - // SUBWORKFLOW: Run STAR alignment and Arriba + // Create references if necessary // - ARRIBA_WORKFLOW ( - ch_reads_all, - ch_gtf, - ch_fasta, - ch_starindex_ensembl_ref, - ch_arriba_ref_blacklist, - ch_arriba_ref_known_fusions, - ch_arriba_ref_protein_domains - ) - ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) - - -//Run STAR fusion - STARFUSION_WORKFLOW ( - ch_reads_all, - ch_chrgtf, - ch_starindex_ref, - ch_fasta - ) - ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) - - -//Run fusioncatcher - FUSIONCATCHER_WORKFLOW ( - ch_reads_fusioncatcher - ) - ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) - - -//Run stringtie - STRINGTIE_WORKFLOW ( - STARFUSION_WORKFLOW.out.ch_bam_sorted, - ch_chrgtf - ) - ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions) - - - //Run fusion-report - FUSIONREPORT_WORKFLOW ( - ch_reads_all, - ch_fusionreport_ref, - ARRIBA_WORKFLOW.out.fusions, - STARFUSION_WORKFLOW.out.fusions, - FUSIONCATCHER_WORKFLOW.out.fusions - ) - ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) - - - //Run fusionInpector - FUSIONINSPECTOR_WORKFLOW ( - ch_reads_all, - FUSIONREPORT_WORKFLOW.out.fusion_list, - FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, - FUSIONREPORT_WORKFLOW.out.report, - FUSIONREPORT_WORKFLOW.out.csv, - STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, - ch_chrgtf, - ch_arriba_ref_protein_domains, - ch_arriba_ref_cytobands, - ch_hgnc_ref, - ch_hgnc_date - ) - ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions) - - - //QC - QC_WORKFLOW ( - ch_reads_all, - STARFUSION_WORKFLOW.out.ch_bam_sorted, - STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, - ch_chrgtf, - ch_refflat, - ch_fasta, - ch_fai, - ch_rrna_interval - ) - ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_pipeline_software_mqc_versions.yml', - sort: true, - newLine: true - ).set { ch_collated_versions } - - - // - // MODULE: MultiQC - // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( - ch_methods_description.collectFile( - name: 'methods_description_mqc.yaml', - sort: true - ) - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_html.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_json.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastqc_trimmed.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_gene_count.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.insertsize_metrics.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(FUSIONINSPECTOR_WORKFLOW.out.ch_arriba_visualisation.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - - emit: - multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + BUILD_REFERENCES() + + +// // +// // Create channel from input file provided through params.input +// // +// Channel +// .fromSamplesheet("input") +// .map { +// meta, fastq_1, fastq_2, strandedness -> +// if (!fastq_2) { +// return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] +// } else { +// return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] +// } +// } +// .groupTuple() +// .map { +// validateInputSamplesheet(it) +// } +// .branch { +// meta, fastqs -> +// single : fastqs.size() == 1 +// return [ meta, fastqs.flatten() ] +// multiple: fastqs.size() > 1 +// return [ meta, fastqs.flatten() ] +// } +// .set { ch_fastq } + +// // +// // MODULE: Concatenate FastQ files from same sample if required +// // +// CAT_FASTQ ( +// ch_fastq.multiple +// ) +// .reads +// .mix(ch_fastq.single) +// .set { ch_cat_fastq } +// ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + + +// // +// // MODULE: Run FastQC +// // +// FASTQC ( +// ch_cat_fastq +// ) +// ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) +// ch_versions = ch_versions.mix(FASTQC.out.versions) + +// TRIM_WORKFLOW ( +// ch_cat_fastq +// ) +// ch_reads_fusioncatcher = TRIM_WORKFLOW.out.ch_reads_fusioncatcher +// ch_reads_all = TRIM_WORKFLOW.out.ch_reads_all +// ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) + + +// SALMON_QUANT( ch_reads_all, ch_salmon_index.map{ meta, index -> index }, ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') + + +// // +// // SUBWORKFLOW: Run STAR alignment and Arriba +// // +// ARRIBA_WORKFLOW ( +// ch_reads_all, +// ch_gtf, +// ch_fasta, +// ch_starindex_ensembl_ref, +// ch_arriba_ref_blacklist, +// ch_arriba_ref_known_fusions, +// ch_arriba_ref_protein_domains +// ) +// ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) + + +// //Run STAR fusion +// STARFUSION_WORKFLOW ( +// ch_reads_all, +// ch_chrgtf, +// ch_starindex_ref, +// ch_fasta +// ) +// ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) + + +// //Run fusioncatcher +// FUSIONCATCHER_WORKFLOW ( +// ch_reads_fusioncatcher +// ) +// ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) + + +// //Run stringtie +// STRINGTIE_WORKFLOW ( +// STARFUSION_WORKFLOW.out.ch_bam_sorted, +// ch_chrgtf +// ) +// ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions) + + +// //Run fusion-report +// FUSIONREPORT_WORKFLOW ( +// ch_reads_all, +// ch_fusionreport_ref, +// ARRIBA_WORKFLOW.out.fusions, +// STARFUSION_WORKFLOW.out.fusions, +// FUSIONCATCHER_WORKFLOW.out.fusions +// ) +// ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) + + +// //Run fusionInpector +// FUSIONINSPECTOR_WORKFLOW ( +// ch_reads_all, +// FUSIONREPORT_WORKFLOW.out.fusion_list, +// FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, +// FUSIONREPORT_WORKFLOW.out.report, +// FUSIONREPORT_WORKFLOW.out.csv, +// STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, +// ch_chrgtf, +// ch_arriba_ref_protein_domains, +// ch_arriba_ref_cytobands, +// ch_hgnc_ref, +// ch_hgnc_date +// ) +// ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions) + + +// //QC +// QC_WORKFLOW ( +// ch_reads_all, +// STARFUSION_WORKFLOW.out.ch_bam_sorted, +// STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, +// ch_chrgtf, +// ch_refflat, +// ch_fasta, +// ch_fai, +// ch_rrna_interval +// ) +// ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) + +// // +// // Collate and save software versions +// // +// softwareVersionsToYAML(ch_versions) +// .collectFile( +// storeDir: "${params.outdir}/pipeline_info", +// name: 'nf_core_pipeline_software_mqc_versions.yml', +// sort: true, +// newLine: true +// ).set { ch_collated_versions } + + +// // +// // MODULE: MultiQC +// // +// ch_multiqc_config = Channel.fromPath( +// "$projectDir/assets/multiqc_config.yml", checkIfExists: true) +// ch_multiqc_custom_config = params.multiqc_config ? +// Channel.fromPath(params.multiqc_config, checkIfExists: true) : +// Channel.empty() +// ch_multiqc_logo = params.multiqc_logo ? +// Channel.fromPath(params.multiqc_logo, checkIfExists: true) : +// Channel.empty() + +// summary_params = paramsSummaryMap( +// workflow, parameters_schema: "nextflow_schema.json") +// ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + +// ch_multiqc_custom_methods_description = params.multiqc_methods_description ? +// file(params.multiqc_methods_description, checkIfExists: true) : +// file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) +// ch_methods_description = Channel.value( +// methodsDescriptionText(ch_multiqc_custom_methods_description)) + +// ch_multiqc_files = ch_multiqc_files.mix( +// ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) +// ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) +// ch_multiqc_files = ch_multiqc_files.mix( +// ch_methods_description.collectFile( +// name: 'methods_description_mqc.yaml', +// sort: true +// ) +// ) +// ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_html.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_json.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastqc_trimmed.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_gene_count.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.insertsize_metrics.collect{it[1]}.ifEmpty([])) +// ch_multiqc_files = ch_multiqc_files.mix(FUSIONINSPECTOR_WORKFLOW.out.ch_arriba_visualisation.collect{it[1]}.ifEmpty([])) + +// MULTIQC ( +// ch_multiqc_files.collect(), +// ch_multiqc_config.toList(), +// ch_multiqc_custom_config.toList(), +// ch_multiqc_logo.toList() +// ) + +// emit: +// multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html +// versions = ch_versions // channel: [ path(versions.yml) ] } From bf0c33e179290f060a661301e906cb3d24435951 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 28 May 2024 15:39:38 +0200 Subject: [PATCH 02/41] refactor downloading of references --- modules/local/fusioncatcher/build/main.nf | 42 +++++ modules/local/fusioncatcher/build/meta.yml | 25 +++ modules/local/fusioncatcher/download/main.nf | 30 ++-- .../local/uscs/custom_gtftogenepred/main.nf | 1 + nextflow.config | 30 ++-- nextflow_schema.json | 48 +++--- subworkflows/local/build_references.nf | 144 +++++++++++------- workflows/rnafusion.nf | 9 +- 8 files changed, 220 insertions(+), 109 deletions(-) create mode 100644 modules/local/fusioncatcher/build/main.nf create mode 100644 modules/local/fusioncatcher/build/meta.yml diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf new file mode 100644 index 00000000..afab999c --- /dev/null +++ b/modules/local/fusioncatcher/build/main.nf @@ -0,0 +1,42 @@ +process FUSIONCATCHER_BUILD { + tag "fusioncatcher_build" + label 'process_medium' + + conda "bioconda::fusioncatcher=1.33" + container "docker.io/clinicalgenomics/fusioncatcher:1.33" + + input: + val ensembl_version + + output: + path "*" , emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + def args = task.ext.args ?: '' + """ + fusioncatcher-build \\ + -g homo_sapiens \\ + -o human_${human_version} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ + + stub: + """ + mkdir human_v${ensembl_version} + touch human_v${ensembl_version}/ensembl_fully_overlapping_genes.txt + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/local/fusioncatcher/build/meta.yml b/modules/local/fusioncatcher/build/meta.yml new file mode 100644 index 00000000..40421a4e --- /dev/null +++ b/modules/local/fusioncatcher/build/meta.yml @@ -0,0 +1,25 @@ +name: fusioncatcher_download +description: Build genome for fusioncatcher +keywords: + - sort +tools: + - fusioncatcher: + description: Build genome for fusioncatcher + homepage: https://github.com/ndaniel/fusioncatcher/ + documentation: https://github.com/ndaniel/fusioncatcher/blob/master/doc/manual.md + tool_dev_url: https://github.com/ndaniel/fusioncatcher/ + doi: "10.1101/011650" + licence: ["GPL v3"] + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reference: + type: directory + description: Path to fusioncatcher references + pattern: "*" + +authors: + - "@praveenraj2018, @rannick" diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index 156e70b6..7f59d920 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -5,6 +5,10 @@ process FUSIONCATCHER_DOWNLOAD { conda "bioconda::fusioncatcher=1.33" container "docker.io/clinicalgenomics/fusioncatcher:1.33" + + input: + val ensembl_version + output: path "*" , emit: reference path "versions.yml" , emit: versions @@ -16,22 +20,14 @@ process FUSIONCATCHER_DOWNLOAD { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def human_version = "v102" - def url = "http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.aa" + def url = """ - if wget --spider "$url" 2>/dev/null; then - wget $args $url - wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.ab - wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.ac - wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${human_version}.tar.gz.ad - cat human_${human_version}.tar.gz.* | tar xz - rm human_${human_version}.tar* - else - fusioncatcher-build \\ - -g homo_sapiens \\ - -o human_${human_version} \\ - $args2 - fi + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.aa + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ab + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ac + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ad + cat human_${ensembl_version}.tar.gz.* | tar xz + rm human_${ensembl_version}.tar* cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,9 +36,9 @@ process FUSIONCATCHER_DOWNLOAD { """ stub: - def human_version = "v102" """ - mkdir human_${human_version} + mkdir human_v${ensembl_version} + touch human_v${ensembl_version}/ensembl_fully_overlapping_genes.txt cat <<-END_VERSIONS > versions.yml "${task.process}": fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) diff --git a/modules/local/uscs/custom_gtftogenepred/main.nf b/modules/local/uscs/custom_gtftogenepred/main.nf index 78fcbd29..53a74e3d 100644 --- a/modules/local/uscs/custom_gtftogenepred/main.nf +++ b/modules/local/uscs/custom_gtftogenepred/main.nf @@ -1,4 +1,5 @@ process GTF_TO_REFFLAT { + tag "$meta.id" label 'process_low' conda "bioconda::ucsc-gtftogenepred=377" diff --git a/nextflow.config b/nextflow.config index 265c3c0b..fe2dd83d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,19 +27,11 @@ params { // Genome genome = 'GRCh38' genomes_base = "${params.outdir}/references" - ensembl_version = 102 + ensembl_version = 112 read_length = 100 starfusion_build = true genomes = [:] - // Genomes options - fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa" - fai = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa.fai" - gtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf" - chrgtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf" - refflat = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf.refflat" - rrna_intervals = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.interval_list" - // Filtering tools_cutoff = 1 @@ -72,14 +64,21 @@ params { skip_qc = false skip_vis = false + // Download references option + download_refs = true + // Path to references + fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa" + fai = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa.fai" + gtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf" + chrgtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf" + refflat = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf.refflat" + rrna_intervals = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.interval_list" ensembl_ref = "${params.genomes_base}/ensembl" - arriba_ref = "${params.genomes_base}/arriba" arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz" - arriba_ref_cytobands = "${params.genomes_base}/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv" arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz" arriba_ref_protein_domains = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3" - fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v102" + fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v${params.ensembl_version}" hgnc_ref = "${params.genomes_base}/hgnc/hgnc_complete_set.txt" hgnc_date = "${params.genomes_base}/hgnc/HGNC-DB-timestamp.txt" salmon_index = "${params.genomes_base}/salmon/salmon" @@ -88,6 +87,13 @@ params { fusionreport_ref = "${params.genomes_base}/fusion_report_db" + // Internal file presence checks + salmon_index_stub_check = "${params.genomes_base}/salmon/salmon/complete_ref_lens.bin" + starindex_ref_stub_check = "${params.genomes_base}/star/star/Genome" + fusionreport_ref_stub_check = "${params.genomes_base}/fusion_report_db/mitelman.db" + fusioncatcher_ref_stub_check = "${params.genomes_base}/fusioncatcher/human_v${params.ensembl_version}/ensembl_fully_overlapping_genes.txt" + starfusion_ref_stub_check = "${params.genomes_base}/starfusion/Pfam-A.hmm" + // Path to fusion outputs arriba_fusions = null starfusion_fusions = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 69270631..0889caa2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -105,21 +105,11 @@ "fa_icon": "far fa-file-code", "description": "Build or run arriba references/analyses" }, - "arriba_ref": { - "type": "string", - "fa_icon": "far fa-file-code", - "description": "Path to arriba references" - }, "arriba_ref_blacklist": { "type": "string", "fa_icon": "far fa-file-code", "description": "Path to arriba reference blacklist" }, - "arriba_ref_cytobands": { - "type": "string", - "fa_icon": "far fa-file-code", - "description": "Path to arriba reference cytobands" - }, "arriba_ref_known_fusions": { "type": "string", "fa_icon": "far fa-file-code", @@ -135,6 +125,11 @@ "fa_icon": "far fa-file-code", "description": "Path to arriba output" }, + "download_refs": { + "type": "boolean", + "fa_icon": "far fa-file-code", + "description": "Download references instead of building them (for fusioncatcher and starfusion)" + }, "ensembl_ref": { "type": "string", "fa_icon": "far fa-file-code", @@ -160,6 +155,11 @@ "fa_icon": "far fa-file-code", "description": "Path to fusioncatcher references" }, + "fusioncatcher_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in fusioncatcher references" + }, "fusioninspector_limitSjdbInsertNsj": { "type": "integer", "fa_icon": "far fa-file-code", @@ -185,6 +185,11 @@ "fa_icon": "far fa-file-code", "description": "Path to fusionreport references" }, + "fusionreport_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in fusionreport references" + }, "hgnc_ref": { "type": "string", "fa_icon": "far fa-file-code", @@ -205,6 +210,11 @@ "fa_icon": "far fa-file-code", "description": "Path to salmon index" }, + "salmon_index_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in salmon index" + }, "starfusion": { "type": "boolean", "fa_icon": "far fa-file-code", @@ -220,6 +230,11 @@ "fa_icon": "far fa-file-code", "description": "Path to starfusion references" }, + "starfusion_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in starfusion references" + }, "starindex": { "type": "boolean", "fa_icon": "far fa-file-code", @@ -230,6 +245,11 @@ "fa_icon": "far fa-file-code", "description": "Path to starindex references" }, + "starindex_ref_stub_check": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to file in starindex references" + }, "stringtie": { "type": "boolean", "fa_icon": "far fa-file-code", @@ -326,14 +346,6 @@ "description": "Path to GTF genome file.", "fa_icon": "far fa-file-code" }, - "transcript": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", - "description": "Path to GTF genome file.", - "fa_icon": "far fa-file-code" - }, "refflat": { "type": "string", "format": "file-path", diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index bb2acc4d..0101c4d3 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -7,6 +7,7 @@ include { ARRIBA_DOWNLOAD } from '../../modules/local/arriba/download/main' include { ENSEMBL_DOWNLOAD } from '../../modules/local/ensembl/main' include { FUSIONCATCHER_DOWNLOAD } from '../../modules/local/fusioncatcher/download/main' +include { FUSIONCATCHER_BUILD } from '../../modules/local/fusioncatcher/build/main' include { FUSIONREPORT_DOWNLOAD } from '../../modules/local/fusionreport/download/main' include { HGNC_DOWNLOAD } from '../../modules/local/hgnc/main' include { STARFUSION_BUILD } from '../../modules/local/starfusion/build/main' @@ -37,82 +38,117 @@ workflow BUILD_REFERENCES { main: - ch_versions = Channel.empty() - def fake_meta = [:] - fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || - !file(params.chrgtf).exists() || file(params.chrgtf).isEmpty() || - !file(params.gtf).exists() || file(params.gtf).isEmpty()){ + !file(params.chrgtf).exists() || file(params.chrgtf).isEmpty() || + !file(params.gtf).exists() || file(params.gtf).isEmpty()){ + fake_meta = [:] + fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" ENSEMBL_DOWNLOAD(params.ensembl_version, params.genome, fake_meta)} - ch_fasta = {(!file(params.fasta).exists() || file(params.fasta).isEmpty()) ? ENSEMBL_DOWNLOAD.out.primary_assembly : Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] }.collect()} + ch_fasta = Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] } + ch_chrgtf = Channel.fromPath(params.chrgtf).map { that -> [[id:that.Name], that] } + ch_gtf = Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] } if (!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || - !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()){ + !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()){ HGNC_DOWNLOAD( )} + ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { that -> [[id:that.Name], that] } + ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { that -> [[id:that.Name], that] } + - if (!file(params.fai).exists() || file(params.fai).isEmpty(){ - SAMTOOLS_FAIDX(ENSEMBL_DOWNLOAD.out.primary_assembly, [[],[]])} + if (!file(params.fai).exists() || file(params.fai).isEmpty()){ + SAMTOOLS_FAIDX(ch_fasta, [[],[]])} + ch_fai = Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } - if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty(){ - GATK4_CREATESEQUENCEDICTIONARY(ENSEMBL_DOWNLOAD.out.primary_assembly) - RRNA_TRANSCRIPTS(ENSEMBL_DOWNLOAD.out.gtf) + + if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty()){ + GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) + RRNA_TRANSCRIPTS(ch_gtf) CONVERT2BED(RRNA_TRANSCRIPTS.out.rrna_gtf) GATK4_BEDTOINTERVALLIST(CONVERT2BED.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict) } + ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } + - if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty(){ // add condition for qc, check that dirs can also be checked with isEmpty() - GFFREAD(ENSEMBL_DOWNLOAD.out.gtf, ENSEMBL_DOWNLOAD.out.primary_assembly.map { meta, fasta -> [ fasta ] }) - SALMON_INDEX(ENSEMBL_DOWNLOAD.out.primary_assembly.map{ meta, fasta -> [ fasta ] }, GFFREAD.out.gffread_fasta.map{ meta, gffread_fasta -> [ gffread_fasta ] }) + if (!file(params.refflat).exists() || file(params.refflat).isEmpty()){ + GTF_TO_REFFLAT(ch_gtf)} + ch_refflat = Channel.fromPath(params.refflat) + + + if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty() || + !file(params.salmon_index_stub_check).exists() || file(params.salmon_index_stub_check).isEmpty()){ // add condition for qc + GFFREAD(ch_gtf, ch_fasta.map{ meta, fasta -> [ fasta ] }) + SALMON_INDEX(ch_fasta.map{ meta, fasta -> [ fasta ] }, GFFREAD.out.gffread_fasta.map{ meta, gffread_fasta -> [ gffread_fasta ] }) } + ch_salmon_index = Channel.fromPath(params.salmon_index) if ((params.starindex || params.all || params.starfusion || params.arriba) && - (!params.starindex_ref.exits() || params.starindex_ref.isEmpty()) - ) { - STAR_GENOMEGENERATE( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf ) + (!file(params.starindex_ref).exists() || file(params.starindex_ref).isEmpty() || + !file(params.starindex_ref_stub_check).exists() || file(params.starindex_ref_stub_check).isEmpty() )) { + STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + } + ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } + + + if ((params.arriba || params.all) && + (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || + !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || + !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { + ARRIBA_DOWNLOAD()} + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } + + + if ((params.fusioncatcher || params.all) && + (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || + !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { + if (params.download_refs) { + FUSIONCATCHER_DOWNLOAD(params.ensembl_version)} + else { + FUSIONCATCHER_BUILD(params.ensembl_version)} } - ch_starindex_ref = ... - - // if (params.arriba || params.all) { - // ARRIBA_DOWNLOAD() - // } - - // if (params.fusioncatcher || params.all) { - // FUSIONCATCHER_DOWNLOAD() - // } - - // if (params.starfusion || params.all) { - // if (params.starfusion_build){ - // STARFUSION_BUILD( ENSEMBL_DOWNLOAD.out.primary_assembly, ENSEMBL_DOWNLOAD.out.gtf ) - // } else { - // STARFUSION_DOWNLOAD() - // } - // } - - // if (params.starfusion_build){ - // GTF_TO_REFFLAT(ENSEMBL_DOWNLOAD.out.gtf) - // } else { - // GTF_TO_REFFLAT(STARFUSION_DOWNLOAD.out.gtf) - // } - - // if (params.fusionreport || params.all) { - // FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ) - // } + ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } - emit: - ch_fasta - ch_chrgtf = {(!file(params.chrgtf).exists() || file(params.chrgtf).isEmpty()) ? ENSEMBL_DOWNLOAD.out.chrgtf : Channel.fromPath(params.chrgtf).map { that -> [[id:that.Name], that] }.collect()} - ch_gtf = {(!file(params.gtf).exists() || file(params.gtf).isEmpty()) ? ENSEMBL_DOWNLOAD.out.gtf : Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] }.collect()} - ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { it -> [[id:it.Name], it] }.collect() - ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { it -> [[id:it.Name], it] }.collect() - ch_fai = Channel.fromPath(params.fai).map { it -> [[id:it.Name], it] }.collect() - ch_rrna_interval = params.starfusion_build ? Channel.fromPath(params.rrna_intervals).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.interval_list").map { it -> [[id:it.Name], it] }.collect() - ch_salmon_index = Channel.fromPath(params.salmon_index).map { it -> [[id:it.Name], it] }.collect() + + if ((params.starfusion || params.all) && + (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || + !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { + if (params.download_refs) { + STARFUSION_DOWNLOAD( ch_fasta, ch_gtf )} + else { + STARFUSION_BUILD( ch_fasta, ch_gtf )} + } + ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] } + if ((params.fusionreport || params.all) && + (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || + !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { + if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } + FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd )} + ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + + emit: + ch_fasta + ch_chrgtf + ch_gtf + ch_hgnc_ref + ch_hgnc_date + ch_fai + ch_rrna_interval + ch_refflat + ch_salmon_index + ch_starindex_ref + ch_arriba_ref_blacklist + ch_arriba_ref_known_fusions + ch_arriba_ref_protein_domains + ch_fusioncatcher_ref + ch_starfusion_ref + ch_fusionreport_ref } /* diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index bf7fa1b8..64bd3f99 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -58,14 +58,7 @@ workflow RNAFUSION { // Reference channels - // ch_starindex_ref = params.starfusion_build ? Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_genome.fa.star.idx").map { it -> [[id:it.Name], it] }.collect() - // ch_starindex_ensembl_ref = Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() - // ch_refflat = params.starfusion_build ? Channel.fromPath(params.refflat).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.gtf.refflat").map { it -> [[id:it.Name], it] }.collect() - // ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { it -> [[id:it.Name], it] }.collect() - // ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { it -> [[id:it.Name], it] }.collect() - // ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { it -> [[id:it.Name], it] }.collect() - // ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { it -> [[id:it.Name], it] }.collect() - // ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { it -> [[id:it.Name], it] }.collect() + From a19abbde7f1268bf49b5e57a7485040fc592468d Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 28 May 2024 21:14:21 +0200 Subject: [PATCH 03/41] fix params --- modules/local/fusioncatcher/build/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index afab999c..c5eeeb45 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -21,7 +21,7 @@ process FUSIONCATCHER_BUILD { """ fusioncatcher-build \\ -g homo_sapiens \\ - -o human_${human_version} \\ + -o human_v${ensembl_version} \\ $args cat <<-END_VERSIONS > versions.yml From 93fbfb8bc795c3fe405d9e98c6199469e213e203 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 28 May 2024 21:25:30 +0200 Subject: [PATCH 04/41] fix --- modules/local/fusioncatcher/build/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index c5eeeb45..51353556 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -19,7 +19,7 @@ process FUSIONCATCHER_BUILD { def args = task.ext.args ?: '' """ - fusioncatcher-build \\ + fusioncatcher-build.py \\ -g homo_sapiens \\ -o human_v${ensembl_version} \\ $args From b7dd1375cd74fa4c9b37edc415c665976b708930 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 13 Jun 2024 11:41:44 +0200 Subject: [PATCH 05/41] update refs generation --- conf/modules.config | 12 ++- modules/local/fusioncatcher/build/main.nf | 4 +- modules/local/fusioncatcher/download/main.nf | 2 +- nextflow.config | 1 + nextflow_schema.json | 4 + subworkflows/local/build_references.nf | 101 +++++++++++------- workflows/rnafusion.nf | 106 +++++++++---------- 7 files changed, 131 insertions(+), 99 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 44dec339..e78f73db 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -45,6 +45,10 @@ process { ] } + withName: 'CAT_FASTQ' { + ext.when = { !params.build_references } + } + withName: 'ENSEMBL_DOWNLOAD' { publishDir = [ path: { "${params.genomes_base}/ensembl" }, @@ -59,7 +63,7 @@ process { withName: 'FASTQC' { ext.args = '--quiet' - ext.when = { !params.skip_qc } + ext.when = { !params.skip_qc and not !params.build_references } publishDir = [ path: { "${params.outdir}/fastqc" }, mode: params.publish_dir_mode, @@ -69,6 +73,7 @@ process { withName: 'FASTQC_FOR_FASTP' { ext.args = '--quiet' + ext.when = ( !params.skip_qc) ext.prefix = { "${meta.id}_trimmed" } publishDir = [ path: { "${params.outdir}/fastqc_for_fastp" }, @@ -362,6 +367,11 @@ process { ] } + withName: 'TRIM_WORKFLOW:*' { + ext.when = { !params.build_references } + + } + withName: 'VCF_COLLECT' { ext.when = {!params.fusioninspector_only} } diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index 51353556..15f7afed 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -9,8 +9,8 @@ process FUSIONCATCHER_BUILD { val ensembl_version output: - path "*" , emit: reference - path "versions.yml" , emit: versions + path "human_v${ensembl_version}" , emit: reference + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index 7f59d920..ff146c95 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -10,7 +10,7 @@ process FUSIONCATCHER_DOWNLOAD { val ensembl_version output: - path "*" , emit: reference + path "human_v${ensembl_version}" , emit: reference path "versions.yml" , emit: versions when: diff --git a/nextflow.config b/nextflow.config index fe2dd83d..b7d78f7d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -62,6 +62,7 @@ params { // Skip steps skip_qc = false + skip_vcf = false skip_vis = false // Download references option diff --git a/nextflow_schema.json b/nextflow_schema.json index 0889caa2..604e7b00 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,6 +15,10 @@ "type": "boolean", "description": "Skip QC steps" }, + "skip_vcf": { + "type": "boolean", + "description": "Skip vcf creation step" + } "skip_vis": { "type": "boolean", "description": "Skip visualisation steps" diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 0101c4d3..32602cc1 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -46,21 +46,30 @@ workflow BUILD_REFERENCES { !file(params.gtf).exists() || file(params.gtf).isEmpty()){ fake_meta = [:] fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" - ENSEMBL_DOWNLOAD(params.ensembl_version, params.genome, fake_meta)} - ch_fasta = Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] } - ch_chrgtf = Channel.fromPath(params.chrgtf).map { that -> [[id:that.Name], that] } - ch_gtf = Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] } + ENSEMBL_DOWNLOAD(params.ensembl_version, params.genome, fake_meta) + ch_fasta = ENSEMBL_DOWNLOAD.out.primary_assembly + ch_chrgtf = ENSEMBL_DOWNLOAD.out.chrgtf + ch_gtf = ENSEMBL_DOWNLOAD.out.gtf + } else { + ch_fasta = Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] } + ch_chrgtf = Channel.fromPath(params.chrgtf).map { that -> [[id:that.Name], that] } + ch_gtf = Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] } + } + - if (!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || - !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()){ - HGNC_DOWNLOAD( )} - ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { that -> [[id:that.Name], that] } - ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { that -> [[id:that.Name], that] } + if ((!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || + !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()) and not params.skip_vcf){ + HGNC_DOWNLOAD( ) + ch_hgnc_ref = HGNC_DOWNLOAD.out.hgnc_ref + ch_hgnc_date =HGNC_DOWNLOAD.out.hgnc_date + } else { + ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { that -> [[id:that.Name], that] } + ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { that -> [[id:that.Name], that] } + } - if (!file(params.fai).exists() || file(params.fai).isEmpty()){ - SAMTOOLS_FAIDX(ch_fasta, [[],[]])} - ch_fai = Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } + + ch_fai = (!file(params.fai).exists() || file(params.fai).isEmpty()) ? SAMTOOLS_FAIDX(ch_fasta, [[],[]]).fai : Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty()){ @@ -68,69 +77,81 @@ workflow BUILD_REFERENCES { RRNA_TRANSCRIPTS(ch_gtf) CONVERT2BED(RRNA_TRANSCRIPTS.out.rrna_gtf) GATK4_BEDTOINTERVALLIST(CONVERT2BED.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict) - } - ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } + ch_rrna_interval = GATK4_BEDTOINTERVALLIST.out.interval_list + } else { + ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } + } - if (!file(params.refflat).exists() || file(params.refflat).isEmpty()){ - GTF_TO_REFFLAT(ch_gtf)} - ch_refflat = Channel.fromPath(params.refflat) + ch_refflat = (!file(params.refflat).exists() || file(params.refflat).isEmpty()) ? GTF_TO_REFFLAT.refflat : Channel.fromPath(params.refflat) if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty() || !file(params.salmon_index_stub_check).exists() || file(params.salmon_index_stub_check).isEmpty()){ // add condition for qc GFFREAD(ch_gtf, ch_fasta.map{ meta, fasta -> [ fasta ] }) SALMON_INDEX(ch_fasta.map{ meta, fasta -> [ fasta ] }, GFFREAD.out.gffread_fasta.map{ meta, gffread_fasta -> [ gffread_fasta ] }) + ch_salmon_index = SALMON_INDEX.out.index + } else { + ch_salmon_index = Channel.fromPath(params.salmon_index) } - ch_salmon_index = Channel.fromPath(params.salmon_index) if ((params.starindex || params.all || params.starfusion || params.arriba) && (!file(params.starindex_ref).exists() || file(params.starindex_ref).isEmpty() || !file(params.starindex_ref_stub_check).exists() || file(params.starindex_ref_stub_check).isEmpty() )) { STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + ch_starindex_ref = STAR_GENOMEGENERATE.out.index + } else { + ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } } - ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } - if ((params.arriba || params.all) && - (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || - !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || - !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { - ARRIBA_DOWNLOAD()} - ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } - ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } - ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } +// if ((params.arriba || params.all) && +// (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || +// !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || +// !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { +// ARRIBA_DOWNLOAD() +// ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } +// ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } +// ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } +// } else { +// // need to update the module to emit blacklist,knownfusions etc +// } if ((params.fusioncatcher || params.all) && (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { if (params.download_refs) { - FUSIONCATCHER_DOWNLOAD(params.ensembl_version)} + FUSIONCATCHER_DOWNLOAD(params.ensembl_version) + ch_fusioncatcher_ref = FUSIONCATCHER_DOWNLOAD.out.reference} else { - FUSIONCATCHER_BUILD(params.ensembl_version)} - } - ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } + FUSIONCATCHER_BUILD(params.ensembl_version) + ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference} + } else { + ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } + } if ((params.starfusion || params.all) && (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { if (params.download_refs) { - STARFUSION_DOWNLOAD( ch_fasta, ch_gtf )} + ch_starfusion_ref = STARFUSION_DOWNLOAD( ch_fasta, ch_gtf ).out.reference } else { - STARFUSION_BUILD( ch_fasta, ch_gtf )} - } - ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] } + ch_starfusion_ref = STARFUSION_BUILD( ch_fasta, ch_gtf ).out.reference } + } else { + ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] }} if ((params.fusionreport || params.all) && (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } - FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd )} - ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ).out.reference + } else { + ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + } emit: ch_fasta @@ -143,9 +164,9 @@ workflow BUILD_REFERENCES { ch_refflat ch_salmon_index ch_starindex_ref - ch_arriba_ref_blacklist - ch_arriba_ref_known_fusions - ch_arriba_ref_protein_domains + // ch_arriba_ref_blacklist + // ch_arriba_ref_known_fusions + // ch_arriba_ref_protein_domains ch_fusioncatcher_ref ch_starfusion_ref ch_fusionreport_ref diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 64bd3f99..eb0e19f4 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -57,11 +57,6 @@ workflow RNAFUSION { main: - // Reference channels - - - - ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() @@ -73,59 +68,60 @@ workflow RNAFUSION { BUILD_REFERENCES() -// // -// // Create channel from input file provided through params.input -// // -// Channel -// .fromSamplesheet("input") -// .map { -// meta, fastq_1, fastq_2, strandedness -> -// if (!fastq_2) { -// return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] -// } else { -// return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] -// } -// } -// .groupTuple() -// .map { -// validateInputSamplesheet(it) -// } -// .branch { -// meta, fastqs -> -// single : fastqs.size() == 1 -// return [ meta, fastqs.flatten() ] -// multiple: fastqs.size() > 1 -// return [ meta, fastqs.flatten() ] -// } -// .set { ch_fastq } - -// // -// // MODULE: Concatenate FastQ files from same sample if required -// // -// CAT_FASTQ ( -// ch_fastq.multiple -// ) -// .reads -// .mix(ch_fastq.single) -// .set { ch_cat_fastq } -// ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + // + // Create channel from input file provided through params.input + // + Channel + .fromSamplesheet("input") + .map { + meta, fastq_1, fastq_2, strandedness -> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .branch { + meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } + .set { ch_fastq } + // + // MODULE: Concatenate FastQ files from same sample if required + // + CAT_FASTQ ( + ch_fastq.multiple + ) + .reads + .mix(ch_fastq.single) + .set { ch_cat_fastq } + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) -// // -// // MODULE: Run FastQC -// // -// FASTQC ( -// ch_cat_fastq -// ) -// ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) -// ch_versions = ch_versions.mix(FASTQC.out.versions) -// TRIM_WORKFLOW ( -// ch_cat_fastq -// ) -// ch_reads_fusioncatcher = TRIM_WORKFLOW.out.ch_reads_fusioncatcher -// ch_reads_all = TRIM_WORKFLOW.out.ch_reads_all -// ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) + // + // MODULE: Run FastQC + // + FASTQC ( + ch_cat_fastq + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQC.out.versions) + + + TRIM_WORKFLOW ( + ch_cat_fastq + ) + ch_reads_fusioncatcher = TRIM_WORKFLOW.out.ch_reads_fusioncatcher + ch_reads_all = TRIM_WORKFLOW.out.ch_reads_all + ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) // SALMON_QUANT( ch_reads_all, ch_salmon_index.map{ meta, index -> index }, ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') From 89be7c1ab16c8a25dc7dedc8cdc7a18a3c36d75f Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 13 Jun 2024 12:15:12 +0200 Subject: [PATCH 06/41] fix small issues --- modules/local/fusioncatcher/build/main.nf | 2 +- nextflow_schema.json | 2 +- subworkflows/local/build_references.nf | 5 ++--- workflows/rnafusion.nf | 10 +++++----- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index 15f7afed..0258a2cc 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -3,7 +3,7 @@ process FUSIONCATCHER_BUILD { label 'process_medium' conda "bioconda::fusioncatcher=1.33" - container "docker.io/clinicalgenomics/fusioncatcher:1.33" + container "docker.io/rannickscilifelab/fusioncatcher:1.33a" input: val ensembl_version diff --git a/nextflow_schema.json b/nextflow_schema.json index 604e7b00..95238d65 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -18,7 +18,7 @@ "skip_vcf": { "type": "boolean", "description": "Skip vcf creation step" - } + }, "skip_vis": { "type": "boolean", "description": "Skip visualisation steps" diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 32602cc1..49a18ce2 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -58,11 +58,10 @@ workflow BUILD_REFERENCES { if ((!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || - !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()) and not params.skip_vcf){ + !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()) && !params.skip_vcf){ HGNC_DOWNLOAD( ) ch_hgnc_ref = HGNC_DOWNLOAD.out.hgnc_ref - ch_hgnc_date =HGNC_DOWNLOAD.out.hgnc_date - + ch_hgnc_date = HGNC_DOWNLOAD.out.hgnc_date } else { ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { that -> [[id:that.Name], that] } ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { that -> [[id:that.Name], that] } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index eb0e19f4..8067210a 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -109,11 +109,11 @@ workflow RNAFUSION { // // MODULE: Run FastQC // - FASTQC ( - ch_cat_fastq - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions) + // FASTQC ( + // ch_cat_fastq + // ) + // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + // ch_versions = ch_versions.mix(FASTQC.out.versions) TRIM_WORKFLOW ( From 0c6fe8283c61f0dc4557970ce576d08fc97ee909 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:21:33 +0200 Subject: [PATCH 07/41] updates --- conf/base.config | 2 +- modules/local/fusioncatcher/build/main.nf | 2 +- modules/local/fusioncatcher/download/main.nf | 1 + modules/local/starfusion/download/main.nf | 1 + subworkflows/local/build_references.nf | 30 ++++++++++---------- subworkflows/local/trim_workflow.nf | 18 ++++-------- workflows/rnafusion.nf | 24 +++++++++------- 7 files changed, 38 insertions(+), 40 deletions(-) diff --git a/conf/base.config b/conf/base.config index 5b2e6f36..62d8dbf1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -31,7 +31,7 @@ process { } withLabel:process_medium { cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index 0258a2cc..15f7afed 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -3,7 +3,7 @@ process FUSIONCATCHER_BUILD { label 'process_medium' conda "bioconda::fusioncatcher=1.33" - container "docker.io/rannickscilifelab/fusioncatcher:1.33a" + container "docker.io/clinicalgenomics/fusioncatcher:1.33" input: val ensembl_version diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index ff146c95..aec16288 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -22,6 +22,7 @@ process FUSIONCATCHER_DOWNLOAD { def args2 = task.ext.args2 ?: '' def url = """ + TODO: move to my sourceforge wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.aa wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ab wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ac diff --git a/modules/local/starfusion/download/main.nf b/modules/local/starfusion/download/main.nf index 9c9b1482..e3a4b1b8 100644 --- a/modules/local/starfusion/download/main.nf +++ b/modules/local/starfusion/download/main.nf @@ -9,6 +9,7 @@ process STARFUSION_DOWNLOAD { path "ctat_genome_lib_build_dir/ref_annot.gtf", emit: chrgtf + TODO: move to my sourceforge script: """ wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz --no-check-certificate diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 49a18ce2..a1a64050 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -92,7 +92,7 @@ workflow BUILD_REFERENCES { ch_salmon_index = SALMON_INDEX.out.index } else { ch_salmon_index = Channel.fromPath(params.salmon_index) - } + } if ((params.starindex || params.all || params.starfusion || params.arriba) && @@ -105,17 +105,17 @@ workflow BUILD_REFERENCES { } -// if ((params.arriba || params.all) && -// (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || -// !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || -// !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { -// ARRIBA_DOWNLOAD() -// ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } -// ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } -// ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } -// } else { -// // need to update the module to emit blacklist,knownfusions etc -// } + if ((params.arriba || params.all) && + (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || + !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || + !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { + ARRIBA_DOWNLOAD() + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } + } else { +// TODO need to update the module to emit blacklist,knownfusions etc + } if ((params.fusioncatcher || params.all) && @@ -163,9 +163,9 @@ workflow BUILD_REFERENCES { ch_refflat ch_salmon_index ch_starindex_ref - // ch_arriba_ref_blacklist - // ch_arriba_ref_known_fusions - // ch_arriba_ref_protein_domains + ch_arriba_ref_blacklist + ch_arriba_ref_known_fusions + ch_arriba_ref_protein_domains ch_fusioncatcher_ref ch_starfusion_ref ch_fusionreport_ref diff --git a/subworkflows/local/trim_workflow.nf b/subworkflows/local/trim_workflow.nf index ea21134d..f963451b 100644 --- a/subworkflows/local/trim_workflow.nf +++ b/subworkflows/local/trim_workflow.nf @@ -8,9 +8,6 @@ workflow TRIM_WORKFLOW { main: ch_versions = Channel.empty() - ch_fastp_html = Channel.empty() - ch_fastp_json = Channel.empty() - ch_fastqc_trimmed = Channel.empty() if (params.fastp_trim) { FASTP(reads, params.adapter_fasta, false, false) @@ -19,24 +16,21 @@ workflow TRIM_WORKFLOW { FASTQC_FOR_FASTP(FASTP.out.reads) ch_versions = ch_versions.mix(FASTQC_FOR_FASTP.out.versions) - ch_reads_all = FASTP.out.reads - ch_reads_fusioncatcher = ch_reads_all + ch_reads = FASTP.out.reads ch_fastp_html = FASTP.out.html ch_fastp_json = FASTP.out.json ch_fastqc_trimmed = FASTQC_FOR_FASTP.out.zip } else { - ch_reads_all = reads - ch_reads_fusioncatcher = reads + ch_reads = reads } emit: - ch_reads_all - ch_reads_fusioncatcher - ch_fastp_html - ch_fastp_json - ch_fastqc_trimmed + trimmed_reads = ch_reads + fastp_html = ch_fastp_html.ifEmpty([]) + fastp_json = ch_fastp_json.ifEmpty([]) + fastqc_trimmed = ch_fastqc_trimmed.ifEmpty([]) versions = ch_versions } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 8067210a..302f2829 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -107,26 +107,28 @@ workflow RNAFUSION { // - // MODULE: Run FastQC + // QC from FASTQ files // - // FASTQC ( - // ch_cat_fastq - // ) - // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - // ch_versions = ch_versions.mix(FASTQC.out.versions) + FASTQC ( + ch_cat_fastq + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQC.out.versions) + SALMON_QUANT( ch_reads_all, ch_salmon_index.map{ meta, index -> index }, ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') + ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) + // + // Trimming + // TRIM_WORKFLOW ( ch_cat_fastq ) - ch_reads_fusioncatcher = TRIM_WORKFLOW.out.ch_reads_fusioncatcher - ch_reads_all = TRIM_WORKFLOW.out.ch_reads_all + ch_reads = TRIM_WORKFLOW.out.trimmed_reads ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) -// SALMON_QUANT( ch_reads_all, ch_salmon_index.map{ meta, index -> index }, ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') - - // // // // SUBWORKFLOW: Run STAR alignment and Arriba // // From eb6e354c180f6c533cbb862678403984e79275b2 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Mon, 23 Sep 2024 21:15:42 +0200 Subject: [PATCH 08/41] change strategy --- conf/modules.config | 3 ++- modules/local/ensembl/main.nf | 22 +++++++++-------- nextflow.config | 2 +- subworkflows/local/build_references.nf | 7 +++++- workflows/rnafusion.nf | 34 ++++++++++++++++++++++++-- 5 files changed, 53 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e78f73db..bbafa80d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,7 +49,8 @@ process { ext.when = { !params.build_references } } - withName: 'ENSEMBL_DOWNLOAD' { + withName: '.*BUILD_REFERENCES:ENSEMBL_DOWNLOAD' { + ext.when = { !params.fasta || ! params.gtf } publishDir = [ path: { "${params.genomes_base}/ensembl" }, mode: params.publish_dir_mode, diff --git a/modules/local/ensembl/main.nf b/modules/local/ensembl/main.nf index 8297e290..695ab312 100644 --- a/modules/local/ensembl/main.nf +++ b/modules/local/ensembl/main.nf @@ -14,20 +14,23 @@ process ENSEMBL_DOWNLOAD { output: tuple val(meta), path("Homo_sapiens.${genome}.${ensembl_version}.gtf") , emit: gtf - tuple val(meta), path("Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa") , emit: primary_assembly - tuple val(meta), path("Homo_sapiens.${genome}.${ensembl_version}.chr.gtf") , emit: chrgtf + tuple val(meta), path("Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa") , emit: fasta path "versions.yml" , emit: versions script: """ - wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${params.genome}.${ensembl_version}.gtf.gz - wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/fasta/homo_sapiens/dna/Homo_sapiens.${params.genome}.dna.primary_assembly.fa.gz -O Homo_sapiens.${params.genome}.${ensembl_version}.dna.primary_assembly.fa.gz - wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf.gz + if [ ${genome} == 'GRCh37' ]; then + wget ftp://ftp.ensembl.org/pub/grch37/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${genome}.87.gtf.gz -O Homo_sapiens.${genome}.${ensembl_version}.gtf.gz + wget ftp://ftp.ensembl.org/pub/grch37/release-${ensembl_version}/fasta/homo_sapiens/dna/Homo_sapiens.${genome}.dna.primary_assembly.fa.gz -O Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa.gz - gunzip Homo_sapiens.${params.genome}.${ensembl_version}.gtf.gz - gunzip Homo_sapiens.${params.genome}.${ensembl_version}.dna.primary_assembly.fa.gz - gunzip Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf.gz + + else: + wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${genome}.${ensembl_version}.gtf.gz + wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/fasta/homo_sapiens/dna/Homo_sapiens.${genome}.dna.primary_assembly.fa.gz -O Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa.gz + + gunzip Homo_sapiens.${genome}.${ensembl_version}.gtf.gz + gunzip Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,8 +43,7 @@ process ENSEMBL_DOWNLOAD { stub: """ touch "Homo_sapiens.${genome}.${ensembl_version}.gtf" - touch "Homo_sapiens.${params.genome}.${ensembl_version}.dna.primary_assembly.fa" - touch "Homo_sapiens.${params.genome}.${ensembl_version}.chr.gtf" + touch "Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index b7d78f7d..ccaf0348 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,7 +27,7 @@ params { // Genome genome = 'GRCh38' genomes_base = "${params.outdir}/references" - ensembl_version = 112 + ensembl_version = 111 read_length = 100 starfusion_build = true genomes = [:] diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index a1a64050..c7a62033 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -35,12 +35,17 @@ include { GFFREAD } from '../../modules/nf-core/gffread/ */ workflow BUILD_REFERENCES { - + take: + genome // channel: [mandatory] val(genome) + ensembl_version // channel: [mandatory] val(ensembl_version) main: ch_versions = Channel.empty() + + + if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || !file(params.chrgtf).exists() || file(params.chrgtf).isEmpty() || !file(params.gtf).exists() || file(params.gtf).isEmpty()){ diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 302f2829..36865840 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -65,8 +65,38 @@ workflow RNAFUSION { // Create references if necessary // - BUILD_REFERENCES() - + BUILD_REFERENCES(params.genome, params.ensembl_version) + ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) + + // Optional + ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map {it -> [[id:it[0].simpleName], it]}.collect() + : BUILD_REFERENCES.out.fasta.map {it -> [[id:it[0].simpleName], it]}.collect() + ch_gtf = params.gtf ? Channel.fromPath(params.gtf).map {it -> [[id:it[0].simpleName], it]}.collect() + : downloads.gtf.map {it -> [[id:it[0].simpleName], it]}.collect() + ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache) + : Channel.empty().mix(downloads.vep_cache) + ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files) + : Channel.empty().mix(downloads.vep_plugin) + ch_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_gene_panel_clinical_filter = params.gene_panel_clinical_filter ? Channel.fromPath(params.gene_panel_clinical_filter).collect() + : Channel.empty() + ch_ref_drop_annot_file = params.reference_drop_annot_file ? Channel.fromPath(params.reference_drop_annot_file).collect() + : Channel.empty() + ch_ref_drop_count_file = params.reference_drop_count_file ? Channel.fromPath(params.reference_drop_count_file).collect() + : Channel.empty() + ch_ref_drop_splice_folder = params.reference_drop_splice_folder ? Channel.fromPath(params.reference_drop_splice_folder).collect() + : Channel.empty() + ch_salmon_index = params.salmon_index ? Channel.fromPath(params.salmon_index) + : Channel.empty() + ch_star_index = params.star_index ? Channel.fromPath(params.star_index).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.empty() + ch_transcript_fasta = params.transcript_fasta ? Channel.fromPath(params.transcript_fasta) + : Channel.empty() + ch_sequence_dict = params.sequence_dict ? Channel.fromPath(params.sequence_dict).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_subsample_bed = params.subsample_bed ? Channel.fromPath(params.subsample_bed).collect() + : Channel.empty() // // Create channel from input file provided through params.input From 40fbd83d3d45d009af01dda1eea44ef9af44fe24 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Mon, 7 Oct 2024 14:41:49 +0200 Subject: [PATCH 09/41] intermediate state --- modules/local/fusioncatcher/download/main.nf | 5 +- modules/local/starfusion/download/main.nf | 2 +- nextflow.config | 3 +- subworkflows/local/build_references.nf | 63 ++++--- workflows/rnafusion.nf | 170 +++++++++---------- 5 files changed, 121 insertions(+), 122 deletions(-) diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index aec16288..916b4673 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -20,9 +20,10 @@ process FUSIONCATCHER_DOWNLOAD { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def url = + // TODO: move to my sourceforge + + // def url = """ - TODO: move to my sourceforge wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.aa wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ab wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ac diff --git a/modules/local/starfusion/download/main.nf b/modules/local/starfusion/download/main.nf index e3a4b1b8..fedadfa0 100644 --- a/modules/local/starfusion/download/main.nf +++ b/modules/local/starfusion/download/main.nf @@ -9,7 +9,7 @@ process STARFUSION_DOWNLOAD { path "ctat_genome_lib_build_dir/ref_annot.gtf", emit: chrgtf - TODO: move to my sourceforge + // TODO: move to my sourceforge script: """ wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz --no-check-certificate diff --git a/nextflow.config b/nextflow.config index ccaf0348..db5a3f63 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,7 +27,7 @@ params { // Genome genome = 'GRCh38' genomes_base = "${params.outdir}/references" - ensembl_version = 111 + ensembl_version = 112 read_length = 100 starfusion_build = true genomes = [:] @@ -72,7 +72,6 @@ params { fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa" fai = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa.fai" gtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf" - chrgtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.chr.gtf" refflat = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf.refflat" rrna_intervals = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.interval_list" ensembl_ref = "${params.genomes_base}/ensembl" diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index c7a62033..cfb4258b 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -42,25 +42,26 @@ workflow BUILD_REFERENCES { main: ch_versions = Channel.empty() - - - - if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || - !file(params.chrgtf).exists() || file(params.chrgtf).isEmpty() || !file(params.gtf).exists() || file(params.gtf).isEmpty()){ fake_meta = [:] fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" ENSEMBL_DOWNLOAD(params.ensembl_version, params.genome, fake_meta) + ch_versions = ENSEMBL_DOWNLOAD.out.versions ch_fasta = ENSEMBL_DOWNLOAD.out.primary_assembly - ch_chrgtf = ENSEMBL_DOWNLOAD.out.chrgtf ch_gtf = ENSEMBL_DOWNLOAD.out.gtf } else { ch_fasta = Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] } - ch_chrgtf = Channel.fromPath(params.chrgtf).map { that -> [[id:that.Name], that] } ch_gtf = Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] } } + if (!file(params.fai).exists() || file(params.fai).isEmpty()){ + SAMTOOLS_FAIDX(ch_fasta, [[],[]]).fai + ch_versions = SAMTOOLS_FAIDX.out.versions + ch_fai = SAMTOOLS_FAIDX.out.fai + } else { + ch_fai = Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } + } if ((!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()) && !params.skip_vcf){ @@ -72,10 +73,6 @@ workflow BUILD_REFERENCES { ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { that -> [[id:that.Name], that] } } - - ch_fai = (!file(params.fai).exists() || file(params.fai).isEmpty()) ? SAMTOOLS_FAIDX(ch_fasta, [[],[]]).fai : Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } - - if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty()){ GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) RRNA_TRANSCRIPTS(ch_gtf) @@ -86,9 +83,12 @@ workflow BUILD_REFERENCES { ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } } - - ch_refflat = (!file(params.refflat).exists() || file(params.refflat).isEmpty()) ? GTF_TO_REFFLAT.refflat : Channel.fromPath(params.refflat) - + if (!file(params.refflat).exists() || file(params.refflat).isEmpty()){ + GTF_TO_REFFLAT(ch_gtf) + ch_refflat = GTF_TO_REFFLAT.out.refflat + } else { + ch_refflat = Channel.fromPath(params.refflat).map { that -> [[id:that.Name], that] } + } if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty() || !file(params.salmon_index_stub_check).exists() || file(params.salmon_index_stub_check).isEmpty()){ // add condition for qc @@ -99,7 +99,6 @@ workflow BUILD_REFERENCES { ch_salmon_index = Channel.fromPath(params.salmon_index) } - if ((params.starindex || params.all || params.starfusion || params.arriba) && (!file(params.starindex_ref).exists() || file(params.starindex_ref).isEmpty() || !file(params.starindex_ref_stub_check).exists() || file(params.starindex_ref_stub_check).isEmpty() )) { @@ -109,18 +108,17 @@ workflow BUILD_REFERENCES { ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } } - - if ((params.arriba || params.all) && - (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || - !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || - !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { - ARRIBA_DOWNLOAD() - ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } - ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } - ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } - } else { -// TODO need to update the module to emit blacklist,knownfusions etc - } +// if ((params.arriba || params.all) && +// (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || +// !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || +// !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { +// ARRIBA_DOWNLOAD() +// ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } +// ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } +// ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } +// } else { +// // TODO need to update the module to emit blacklist,knownfusions etc +// } if ((params.fusioncatcher || params.all) && @@ -159,21 +157,22 @@ workflow BUILD_REFERENCES { emit: ch_fasta - ch_chrgtf ch_gtf + ch_fai + ch_hgnc_ref ch_hgnc_date - ch_fai ch_rrna_interval ch_refflat ch_salmon_index ch_starindex_ref - ch_arriba_ref_blacklist - ch_arriba_ref_known_fusions - ch_arriba_ref_protein_domains + // ch_arriba_ref_blacklist + // ch_arriba_ref_known_fusions + // ch_arriba_ref_protein_domains ch_fusioncatcher_ref ch_starfusion_ref ch_fusionreport_ref + versions = ch_versions } /* diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 36865840..67dedb4b 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -68,95 +68,95 @@ workflow RNAFUSION { BUILD_REFERENCES(params.genome, params.ensembl_version) ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) - // Optional - ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map {it -> [[id:it[0].simpleName], it]}.collect() - : BUILD_REFERENCES.out.fasta.map {it -> [[id:it[0].simpleName], it]}.collect() - ch_gtf = params.gtf ? Channel.fromPath(params.gtf).map {it -> [[id:it[0].simpleName], it]}.collect() - : downloads.gtf.map {it -> [[id:it[0].simpleName], it]}.collect() - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache) - : Channel.empty().mix(downloads.vep_cache) - ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files) - : Channel.empty().mix(downloads.vep_plugin) - ch_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_gene_panel_clinical_filter = params.gene_panel_clinical_filter ? Channel.fromPath(params.gene_panel_clinical_filter).collect() - : Channel.empty() - ch_ref_drop_annot_file = params.reference_drop_annot_file ? Channel.fromPath(params.reference_drop_annot_file).collect() - : Channel.empty() - ch_ref_drop_count_file = params.reference_drop_count_file ? Channel.fromPath(params.reference_drop_count_file).collect() - : Channel.empty() - ch_ref_drop_splice_folder = params.reference_drop_splice_folder ? Channel.fromPath(params.reference_drop_splice_folder).collect() - : Channel.empty() - ch_salmon_index = params.salmon_index ? Channel.fromPath(params.salmon_index) - : Channel.empty() - ch_star_index = params.star_index ? Channel.fromPath(params.star_index).map {it -> [[id:it[0].simpleName], it]}.collect() - : Channel.empty() - ch_transcript_fasta = params.transcript_fasta ? Channel.fromPath(params.transcript_fasta) - : Channel.empty() - ch_sequence_dict = params.sequence_dict ? Channel.fromPath(params.sequence_dict).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_subsample_bed = params.subsample_bed ? Channel.fromPath(params.subsample_bed).collect() - : Channel.empty() + // // Optional + // ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map {it -> [[id:it[0].simpleName], it]}.collect() + // : BUILD_REFERENCES.out.fasta.map {it -> [[id:it[0].simpleName], it]}.collect() + // ch_gtf = params.gtf ? Channel.fromPath(params.gtf).map {it -> [[id:it[0].simpleName], it]}.collect() + // : downloads.gtf.map {it -> [[id:it[0].simpleName], it]}.collect() + // ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache) + // : Channel.empty().mix(downloads.vep_cache) + // ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files) + // : Channel.empty().mix(downloads.vep_plugin) + // ch_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() + // : Channel.empty() + // ch_gene_panel_clinical_filter = params.gene_panel_clinical_filter ? Channel.fromPath(params.gene_panel_clinical_filter).collect() + // : Channel.empty() + // ch_ref_drop_annot_file = params.reference_drop_annot_file ? Channel.fromPath(params.reference_drop_annot_file).collect() + // : Channel.empty() + // ch_ref_drop_count_file = params.reference_drop_count_file ? Channel.fromPath(params.reference_drop_count_file).collect() + // : Channel.empty() + // ch_ref_drop_splice_folder = params.reference_drop_splice_folder ? Channel.fromPath(params.reference_drop_splice_folder).collect() + // : Channel.empty() + // ch_salmon_index = params.salmon_index ? Channel.fromPath(params.salmon_index) + // : Channel.empty() + // ch_star_index = params.star_index ? Channel.fromPath(params.star_index).map {it -> [[id:it[0].simpleName], it]}.collect() + // : Channel.empty() + // ch_transcript_fasta = params.transcript_fasta ? Channel.fromPath(params.transcript_fasta) + // : Channel.empty() + // ch_sequence_dict = params.sequence_dict ? Channel.fromPath(params.sequence_dict).map{ it -> [[id:it[0].simpleName], it] }.collect() + // : Channel.empty() + // ch_subsample_bed = params.subsample_bed ? Channel.fromPath(params.subsample_bed).collect() + // : Channel.empty() // // Create channel from input file provided through params.input // - Channel - .fromSamplesheet("input") - .map { - meta, fastq_1, fastq_2, strandedness -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { - validateInputSamplesheet(it) - } - .branch { - meta, fastqs -> - single : fastqs.size() == 1 - return [ meta, fastqs.flatten() ] - multiple: fastqs.size() > 1 - return [ meta, fastqs.flatten() ] - } - .set { ch_fastq } - - // - // MODULE: Concatenate FastQ files from same sample if required - // - CAT_FASTQ ( - ch_fastq.multiple - ) - .reads - .mix(ch_fastq.single) - .set { ch_cat_fastq } - ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) - - - // - // QC from FASTQ files - // - FASTQC ( - ch_cat_fastq - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions) - - SALMON_QUANT( ch_reads_all, ch_salmon_index.map{ meta, index -> index }, ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') - ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) - - // - // Trimming - // - TRIM_WORKFLOW ( - ch_cat_fastq - ) - ch_reads = TRIM_WORKFLOW.out.trimmed_reads - ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) + // Channel + // .fromSamplesheet("input") + // .map { + // meta, fastq_1, fastq_2, strandedness -> + // if (!fastq_2) { + // return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + // } else { + // return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + // } + // } + // .groupTuple() + // .map { + // validateInputSamplesheet(it) + // } + // .branch { + // meta, fastqs -> + // single : fastqs.size() == 1 + // return [ meta, fastqs.flatten() ] + // multiple: fastqs.size() > 1 + // return [ meta, fastqs.flatten() ] + // } + // .set { ch_fastq } + + // // + // // MODULE: Concatenate FastQ files from same sample if required + // // + // CAT_FASTQ ( + // ch_fastq.multiple + // ) + // .reads + // .mix(ch_fastq.single) + // .set { ch_cat_fastq } + // ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) + + + // // + // // QC from FASTQ files + // // + // FASTQC ( + // ch_cat_fastq + // ) + // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + // ch_versions = ch_versions.mix(FASTQC.out.versions) + + // SALMON_QUANT( ch_reads_all, BUILD_REFERENCES.out.ch_salmon_index.map{ meta, index -> index }, BUILD_REFERENCES.out.ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') + // ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.zip.collect{it[1]}) + // ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) + + // // + // // Trimming + // // + // TRIM_WORKFLOW ( + // ch_cat_fastq + // ) + // ch_reads = TRIM_WORKFLOW.out.trimmed_reads + // ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) // // From 7a889713da5ee69696daf159cb45d94e577cc483 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 16 Oct 2024 09:27:26 +0200 Subject: [PATCH 10/41] ensembl to gencode, removing chrgtf --- conf/modules.config | 11 +- modules.json | 10 ++ modules/local/ensembl/main.nf | 55 ------- modules/local/fusioncatcher/build/main.nf | 12 +- modules/local/gencode_download/main.nf | 49 +++++++ modules/local/hgnc/main.nf | 2 +- modules/local/starfusion/download/main.nf | 1 - .../nf-core/arriba/download/environment.yml | 5 + modules/nf-core/arriba/download/main.nf | 46 ++++++ modules/nf-core/arriba/download/meta.yml | 30 ++++ .../arriba/download/tests/main.nf.test | 35 +++++ .../arriba/download/tests/main.nf.test.snap | 35 +++++ modules/nf-core/gunzip/environment.yml | 7 + modules/nf-core/gunzip/main.nf | 55 +++++++ modules/nf-core/gunzip/meta.yml | 47 ++++++ modules/nf-core/gunzip/tests/main.nf.test | 121 ++++++++++++++++ .../nf-core/gunzip/tests/main.nf.test.snap | 134 ++++++++++++++++++ modules/nf-core/gunzip/tests/nextflow.config | 5 + modules/nf-core/gunzip/tests/tags.yml | 2 + nextflow.config | 19 ++- nextflow_schema.json | 19 +-- subworkflows/local/build_references.nf | 96 ++++++------- subworkflows/local/qc_workflow.nf | 2 +- subworkflows/local/starfusion_workflow.nf | 4 +- subworkflows/local/stringtie_workflow.nf | 6 +- workflows/rnafusion.nf | 10 +- 26 files changed, 663 insertions(+), 155 deletions(-) delete mode 100644 modules/local/ensembl/main.nf create mode 100644 modules/local/gencode_download/main.nf create mode 100644 modules/nf-core/arriba/download/environment.yml create mode 100644 modules/nf-core/arriba/download/main.nf create mode 100644 modules/nf-core/arriba/download/meta.yml create mode 100644 modules/nf-core/arriba/download/tests/main.nf.test create mode 100644 modules/nf-core/arriba/download/tests/main.nf.test.snap create mode 100644 modules/nf-core/gunzip/environment.yml create mode 100644 modules/nf-core/gunzip/main.nf create mode 100644 modules/nf-core/gunzip/meta.yml create mode 100644 modules/nf-core/gunzip/tests/main.nf.test create mode 100644 modules/nf-core/gunzip/tests/main.nf.test.snap create mode 100644 modules/nf-core/gunzip/tests/nextflow.config create mode 100644 modules/nf-core/gunzip/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index bbafa80d..af832498 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,10 +49,9 @@ process { ext.when = { !params.build_references } } - withName: '.*BUILD_REFERENCES:ENSEMBL_DOWNLOAD' { - ext.when = { !params.fasta || ! params.gtf } + withName: 'GENCODE_DOWNLOAD' { publishDir = [ - path: { "${params.genomes_base}/ensembl" }, + path: { "${params.genomes_base}/gencode" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -122,7 +121,7 @@ process { withName: 'GATK4_BEDTOINTERVALLIST' { publishDir = [ - path: { "${params.genomes_base}/ensembl" }, + path: { "${params.genomes_base}/gencode" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -139,7 +138,7 @@ process { withName: 'GTF_TO_REFFLAT' { publishDir = [ - path: { "${params.genomes_base}/ensembl" }, + path: { "${params.genomes_base}/gencode" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] @@ -209,7 +208,7 @@ process { withName: 'SAMTOOLS_FAIDX' { publishDir = [ - path: { "${params.genomes_base}/ensembl" }, + path: { "${params.genomes_base}/gencode" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] diff --git a/modules.json b/modules.json index 7f4cf0ef..886af975 100644 --- a/modules.json +++ b/modules.json @@ -15,6 +15,11 @@ "git_sha": "bbaeb3a3149def237b9d0acb9ab749ae98088e29", "installed_by": ["modules"] }, + "arriba/download": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "cat/cat": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", @@ -55,6 +60,11 @@ "git_sha": "6c996d7fbe0816dcbb68ce587ad5f873313682a1", "installed_by": ["modules"] }, + "gunzip": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", diff --git a/modules/local/ensembl/main.nf b/modules/local/ensembl/main.nf deleted file mode 100644 index 695ab312..00000000 --- a/modules/local/ensembl/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process ENSEMBL_DOWNLOAD { - tag "ensembl" - label 'process_low' - - conda "bioconda::gnu-wget=1.18" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : - 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" - - input: - val ensembl_version - val genome - val meta - - output: - tuple val(meta), path("Homo_sapiens.${genome}.${ensembl_version}.gtf") , emit: gtf - tuple val(meta), path("Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa") , emit: fasta - path "versions.yml" , emit: versions - - - script: - """ - if [ ${genome} == 'GRCh37' ]; then - wget ftp://ftp.ensembl.org/pub/grch37/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${genome}.87.gtf.gz -O Homo_sapiens.${genome}.${ensembl_version}.gtf.gz - wget ftp://ftp.ensembl.org/pub/grch37/release-${ensembl_version}/fasta/homo_sapiens/dna/Homo_sapiens.${genome}.dna.primary_assembly.fa.gz -O Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa.gz - - - else: - wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/gtf/homo_sapiens/Homo_sapiens.${genome}.${ensembl_version}.gtf.gz - wget ftp://ftp.ensembl.org/pub/release-${ensembl_version}/fasta/homo_sapiens/dna/Homo_sapiens.${genome}.dna.primary_assembly.fa.gz -O Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa.gz - - gunzip Homo_sapiens.${genome}.${ensembl_version}.gtf.gz - gunzip Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - - END_VERSIONS - """ - - stub: - """ - touch "Homo_sapiens.${genome}.${ensembl_version}.gtf" - touch "Homo_sapiens.${genome}.${ensembl_version}.dna.primary_assembly.fa" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ - -} diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index 15f7afed..51153253 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -6,11 +6,11 @@ process FUSIONCATCHER_BUILD { container "docker.io/clinicalgenomics/fusioncatcher:1.33" input: - val ensembl_version + val genome_gencode_version output: - path "human_v${ensembl_version}" , emit: reference - path "versions.yml" , emit: versions + path "human_v${genome_gencode_version}" , emit: reference + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,7 +21,7 @@ process FUSIONCATCHER_BUILD { """ fusioncatcher-build.py \\ -g homo_sapiens \\ - -o human_v${ensembl_version} \\ + -o human_v${genome_gencode_version} \\ $args cat <<-END_VERSIONS > versions.yml @@ -32,8 +32,8 @@ process FUSIONCATCHER_BUILD { stub: """ - mkdir human_v${ensembl_version} - touch human_v${ensembl_version}/ensembl_fully_overlapping_genes.txt + mkdir human_v${genome_gencode_version} + touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt cat <<-END_VERSIONS > versions.yml "${task.process}": fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) diff --git a/modules/local/gencode_download/main.nf b/modules/local/gencode_download/main.nf new file mode 100644 index 00000000..e9416cf4 --- /dev/null +++ b/modules/local/gencode_download/main.nf @@ -0,0 +1,49 @@ +process GENCODE_DOWNLOAD { + tag "gencode_download" + label 'process_low' + + conda "bioconda::gnu-wget=1.18" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : + 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" + + input: + val genome_gencode_version + val genome + + output: + path "*.fa" , emit: fasta + path "*.gtf" , emit: gtf + path "versions.yml", emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def folder_gencode = genome.contains("38") ? "" : "/${genome}_mapping" + def gtf_file_name = genome.contains("38") ? "gencode.v${genome_gencode_version}.primary_assembly.annotation.gtf.gz" : "gencode.v${genome_gencode_version}lift${genome_gencode_version}.annotation.gtf.gz" + """ + wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_${genome_gencode_version}/${folder_gencode}${genome}.primary_assembly.genome.fa.gz -O Homo_sapiens_${genome}_${genome_gencode_version}_dna_primary_assembly.fa.gz + gunzip Homo_sapiens_${genome}_${genome_gencode_version}_dna_primary_assembly.fa.gz + wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_${genome_gencode_version}/${folder_gencode}${gtf_file_name} -O Homo_sapiens_${genome}_${genome_gencode_version}.gtf.gz + gunzip Homo_sapiens_${genome}_${genome_gencode_version}.gtf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ + + stub: + """ + touch Homo_sapiens.${genome}.${genome_gencode_version}_dna_primary_assembly.fa + touch Homo_sapiens.${genome}.${genome_gencode_version}.gtf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + END_VERSIONS + """ + +} diff --git a/modules/local/hgnc/main.nf b/modules/local/hgnc/main.nf index 1b3808f6..6a055631 100644 --- a/modules/local/hgnc/main.nf +++ b/modules/local/hgnc/main.nf @@ -18,7 +18,7 @@ process HGNC_DOWNLOAD { script: """ - wget https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/tsv/hgnc_complete_set.txt + wget https://ftp.ebi.ac.uk/pub/databases/genenames/out_of_date_hgnc/tsv/hgnc_complete_set.txt date +%Y-%m-%d/%H:%M > HGNC-DB-timestamp.txt cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/starfusion/download/main.nf b/modules/local/starfusion/download/main.nf index fedadfa0..17bfa72e 100644 --- a/modules/local/starfusion/download/main.nf +++ b/modules/local/starfusion/download/main.nf @@ -6,7 +6,6 @@ process STARFUSION_DOWNLOAD { output: path "ctat_genome_lib_build_dir/*" , emit: reference - path "ctat_genome_lib_build_dir/ref_annot.gtf", emit: chrgtf // TODO: move to my sourceforge diff --git a/modules/nf-core/arriba/download/environment.yml b/modules/nf-core/arriba/download/environment.yml new file mode 100644 index 00000000..d0883a0d --- /dev/null +++ b/modules/nf-core/arriba/download/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::arriba=2.4.0 diff --git a/modules/nf-core/arriba/download/main.nf b/modules/nf-core/arriba/download/main.nf new file mode 100644 index 00000000..96a4e336 --- /dev/null +++ b/modules/nf-core/arriba/download/main.nf @@ -0,0 +1,46 @@ +process ARRIBA_DOWNLOAD { + tag "arriba" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : + 'biocontainers/arriba:2.4.0--h0033a41_2' }" + + input: + + output: + path "*" , emit: reference + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz + tar -xzvf arriba_v2.4.0.tar.gz + rm arriba_v2.4.0.tar.gz + mv arriba_v2.4.0/database/* . + rm -r arriba_v2.4.0 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba_download: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ + + stub: + """ + touch blacklist_hg38_GRCh38_v2.4.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.4.0.gff3 + touch cytobands_hg38_GRCh38_v2.4.0.tsv + touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz + touch protein_domains_hg38_GRCh38_v2.4.0.gff3 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + arriba_download: \$(arriba -h | grep 'Version:' 2>&1 | sed 's/Version:\s//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/arriba/download/meta.yml b/modules/nf-core/arriba/download/meta.yml new file mode 100644 index 00000000..531c66c2 --- /dev/null +++ b/modules/nf-core/arriba/download/meta.yml @@ -0,0 +1,30 @@ +name: arriba_download +description: Arriba is a command-line tool for the detection of gene fusions from + RNA-Seq data. +keywords: + - fusion + - arriba + - reference +tools: + - arriba: + description: Fast and accurate gene fusion detection from RNA-Seq data + homepage: https://github.com/suhrig/arriba + documentation: https://arriba.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/suhrig/arriba + doi: "10.1101/gr.257246.119" + licence: ["MIT"] + identifier: biotools:Arriba + +output: + - reference: + - "*": + type: directory + description: Folder with arriba references + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@peterpru" diff --git a/modules/nf-core/arriba/download/tests/main.nf.test b/modules/nf-core/arriba/download/tests/main.nf.test new file mode 100644 index 00000000..cccc95db --- /dev/null +++ b/modules/nf-core/arriba/download/tests/main.nf.test @@ -0,0 +1,35 @@ + +nextflow_process { + + name "Test Process ARRIBA_DOWNLOAD" + script "../main.nf" + process "ARRIBA_DOWNLOAD" + + tag "modules" + tag "modules_nfcore" + tag "arriba" + tag "arriba/download" + + test("test-arriba-download") { + + when { + process { + """ + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reference[0].collect { file(it).name }.toSorted(), + process.out.versions + ).match() + } + ) + } + } + +} diff --git a/modules/nf-core/arriba/download/tests/main.nf.test.snap b/modules/nf-core/arriba/download/tests/main.nf.test.snap new file mode 100644 index 00000000..eda3f706 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test-arriba-download": { + "content": [ + [ + "CREDITS", + "RefSeq_viral_genomes_v2.4.0.fa.gz", + "blacklist_hg19_hs37d5_GRCh37_v2.4.0.tsv.gz", + "blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "blacklist_mm10_GRCm38_v2.4.0.tsv.gz", + "blacklist_mm39_GRCm39_v2.4.0.tsv.gz", + "cytobands_hg19_hs37d5_GRCh37_v2.4.0.tsv", + "cytobands_hg38_GRCh38_v2.4.0.tsv", + "cytobands_mm10_GRCm38_v2.4.0.tsv", + "cytobands_mm39_GRCm39_v2.4.0.tsv", + "known_fusions_hg19_hs37d5_GRCh37_v2.4.0.tsv.gz", + "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "known_fusions_mm10_GRCm38_v2.4.0.tsv.gz", + "known_fusions_mm39_GRCm39_v2.4.0.tsv.gz", + "protein_domains_hg19_hs37d5_GRCh37_v2.4.0.gff3", + "protein_domains_hg38_GRCh38_v2.4.0.gff3", + "protein_domains_mm10_GRCm38_v2.4.0.gff3", + "protein_domains_mm39_GRCm39_v2.4.0.gff3", + "versions.yml" + ], + [ + "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-22T17:00:38.086459" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..c7794856 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..5e67e3b9 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,55 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..9066c035 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,47 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..776211ad --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..069967e7 --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/nextflow.config b/nextflow.config index db5a3f63..63369d6c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,7 +12,6 @@ params { // Input options input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' - build_references = false cosmic_username = null cosmic_passwd = null qiagen = false @@ -27,7 +26,7 @@ params { // Genome genome = 'GRCh38' genomes_base = "${params.outdir}/references" - ensembl_version = 112 + genome_gencode_version = 46 read_length = 100 starfusion_build = true genomes = [:] @@ -69,16 +68,16 @@ params { download_refs = true // Path to references - fasta = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa" - fai = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.dna.primary_assembly.fa.fai" - gtf = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf" - refflat = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.gtf.refflat" - rrna_intervals = "${params.genomes_base}/ensembl/Homo_sapiens.${params.genome}.${params.ensembl_version}.interval_list" - ensembl_ref = "${params.genomes_base}/ensembl" + fasta = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}_dna_primary_assembly.fa" + fai = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}_dna_primary_assembly.fa.fai" + gtf = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.gtf" + refflat = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.gtf.refflat" + rrna_intervals = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.interval_list" + gencode_ref = "${params.genomes_base}/gencode" arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz" arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz" arriba_ref_protein_domains = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3" - fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v${params.ensembl_version}" + fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v${params.genome_gencode_version}" hgnc_ref = "${params.genomes_base}/hgnc/hgnc_complete_set.txt" hgnc_date = "${params.genomes_base}/hgnc/HGNC-DB-timestamp.txt" salmon_index = "${params.genomes_base}/salmon/salmon" @@ -91,7 +90,7 @@ params { salmon_index_stub_check = "${params.genomes_base}/salmon/salmon/complete_ref_lens.bin" starindex_ref_stub_check = "${params.genomes_base}/star/star/Genome" fusionreport_ref_stub_check = "${params.genomes_base}/fusion_report_db/mitelman.db" - fusioncatcher_ref_stub_check = "${params.genomes_base}/fusioncatcher/human_v${params.ensembl_version}/ensembl_fully_overlapping_genes.txt" + fusioncatcher_ref_stub_check = "${params.genomes_base}/fusioncatcher/human_v${params.genome_gencode_version}/ensembl_fully_overlapping_genes.txt" starfusion_ref_stub_check = "${params.genomes_base}/starfusion/Pfam-A.hmm" // Path to fusion outputs diff --git a/nextflow_schema.json b/nextflow_schema.json index 95238d65..be31bcc3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -62,11 +62,6 @@ "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" }, - "build_references": { - "type": "boolean", - "fa_icon": "far fa-file-code", - "description": "Specifies which analysis type for the pipeline - either build references or analyse data" - }, "cosmic_username": { "type": "string", "fa_icon": "far fa-file-code", @@ -82,7 +77,7 @@ "fa_icon": "far fa-file-code", "description": "Path to reference folder" }, - "ensembl_version": { + "genome_gencode_version": { "type": "integer", "fa_icon": "far fa-file-code", "description": "ensembl version", @@ -134,10 +129,10 @@ "fa_icon": "far fa-file-code", "description": "Download references instead of building them (for fusioncatcher and starfusion)" }, - "ensembl_ref": { + "gencode_ref": { "type": "string", "fa_icon": "far fa-file-code", - "description": "Path to ensembl references" + "description": "Path to gencode references" }, "fusioncatcher": { "type": "boolean", @@ -342,14 +337,6 @@ "description": "Path to GTF genome file.", "fa_icon": "far fa-file-code" }, - "chrgtf": { - "type": "string", - "format": "file-path", - "mimetype": "text/plain", - "pattern": "^\\S+\\.gtf?(\\.gz)?$", - "description": "Path to GTF genome file.", - "fa_icon": "far fa-file-code" - }, "refflat": { "type": "string", "format": "file-path", diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index cfb4258b..8017f634 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -5,7 +5,7 @@ */ include { ARRIBA_DOWNLOAD } from '../../modules/local/arriba/download/main' -include { ENSEMBL_DOWNLOAD } from '../../modules/local/ensembl/main' +include { GENCODE_DOWNLOAD } from '../../modules/local/gencode_download/main' include { FUSIONCATCHER_DOWNLOAD } from '../../modules/local/fusioncatcher/download/main' include { FUSIONCATCHER_BUILD } from '../../modules/local/fusioncatcher/build/main' include { FUSIONREPORT_DOWNLOAD } from '../../modules/local/fusionreport/download/main' @@ -37,26 +37,24 @@ include { GFFREAD } from '../../modules/nf-core/gffread/ workflow BUILD_REFERENCES { take: genome // channel: [mandatory] val(genome) - ensembl_version // channel: [mandatory] val(ensembl_version) + genome_gencode_version // channel: [mandatory] val(genome_gencode_version) main: ch_versions = Channel.empty() if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || !file(params.gtf).exists() || file(params.gtf).isEmpty()){ - fake_meta = [:] - fake_meta.id = "Homo_sapiens.${params.genome}.${params.ensembl_version}" - ENSEMBL_DOWNLOAD(params.ensembl_version, params.genome, fake_meta) - ch_versions = ENSEMBL_DOWNLOAD.out.versions - ch_fasta = ENSEMBL_DOWNLOAD.out.primary_assembly - ch_gtf = ENSEMBL_DOWNLOAD.out.gtf + GENCODE_DOWNLOAD(params.genome_gencode_version, params.genome) + ch_versions = GENCODE_DOWNLOAD.out.versions + ch_fasta = GENCODE_DOWNLOAD.out.fasta.map { that -> [[id:that.Name], that] } + ch_gtf = GENCODE_DOWNLOAD.out.gtf.map { that -> [[id:that.Name], that] } } else { ch_fasta = Channel.fromPath(params.fasta).map { that -> [[id:that.Name], that] } ch_gtf = Channel.fromPath(params.gtf).map { that -> [[id:that.Name], that] } } if (!file(params.fai).exists() || file(params.fai).isEmpty()){ - SAMTOOLS_FAIDX(ch_fasta, [[],[]]).fai + SAMTOOLS_FAIDX(ch_fasta, [[],[]]) ch_versions = SAMTOOLS_FAIDX.out.versions ch_fai = SAMTOOLS_FAIDX.out.fai } else { @@ -121,39 +119,39 @@ workflow BUILD_REFERENCES { // } - if ((params.fusioncatcher || params.all) && - (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || - !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { - if (params.download_refs) { - FUSIONCATCHER_DOWNLOAD(params.ensembl_version) - ch_fusioncatcher_ref = FUSIONCATCHER_DOWNLOAD.out.reference} - else { - FUSIONCATCHER_BUILD(params.ensembl_version) - ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference} - } else { - ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } - } - - - if ((params.starfusion || params.all) && - (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || - !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { - if (params.download_refs) { - ch_starfusion_ref = STARFUSION_DOWNLOAD( ch_fasta, ch_gtf ).out.reference } - else { - ch_starfusion_ref = STARFUSION_BUILD( ch_fasta, ch_gtf ).out.reference } - } else { - ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] }} - - - if ((params.fusionreport || params.all) && - (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || - !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { - if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } - ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ).out.reference - } else { - ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } - } + // if ((params.fusioncatcher || params.all) && + // (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || + // !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { + // if (params.download_refs) { + // FUSIONCATCHER_DOWNLOAD(params.genome_gencode_version) + // ch_fusioncatcher_ref = FUSIONCATCHER_DOWNLOAD.out.reference} + // else { + // FUSIONCATCHER_BUILD(params.genome_gencode_version) + // ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference} + // } else { + // ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } + // } + + + // if ((params.starfusion || params.all) && + // (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || + // !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { + // if (params.download_refs) { + // ch_starfusion_ref = STARFUSION_DOWNLOAD( ch_fasta, ch_gtf ).out.reference } + // else { + // ch_starfusion_ref = STARFUSION_BUILD( ch_fasta, ch_gtf ).out.reference } + // } else { + // ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] }} + + + // if ((params.fusionreport || params.all) && + // (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || + // !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { + // if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } + // ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ).out.reference + // } else { + // ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + // } emit: ch_fasta @@ -162,16 +160,16 @@ workflow BUILD_REFERENCES { ch_hgnc_ref ch_hgnc_date - ch_rrna_interval - ch_refflat - ch_salmon_index - ch_starindex_ref + // ch_rrna_interval + // ch_refflat + // ch_salmon_index + // ch_starindex_ref // ch_arriba_ref_blacklist // ch_arriba_ref_known_fusions // ch_arriba_ref_protein_domains - ch_fusioncatcher_ref - ch_starfusion_ref - ch_fusionreport_ref + // ch_fusioncatcher_ref + // ch_starfusion_ref + // ch_fusionreport_ref versions = ch_versions } diff --git a/subworkflows/local/qc_workflow.nf b/subworkflows/local/qc_workflow.nf index d7958fec..177f42d2 100644 --- a/subworkflows/local/qc_workflow.nf +++ b/subworkflows/local/qc_workflow.nf @@ -11,7 +11,7 @@ workflow QC_WORKFLOW { ch_reads_all ch_bam_sorted ch_bam_sorted_indexed - ch_chrgtf + ch_gtf ch_refflat ch_fasta ch_fai diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index de99d8e5..d6a0668a 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -7,7 +7,7 @@ include { STARFUSION } from '../../mod workflow STARFUSION_WORKFLOW { take: reads - ch_chrgtf + ch_gtf ch_starindex_ref ch_fasta @@ -23,7 +23,7 @@ workflow STARFUSION_WORKFLOW { ch_starfusion_fusions = reads.combine(Channel.value(file(params.starfusion_fusions, checkIfExists:true))) .map { meta, reads, fusions -> [ meta, fusions ] } } else { - STAR_FOR_STARFUSION( reads, ch_starindex_ref, ch_chrgtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') + STAR_FOR_STARFUSION( reads, ch_starindex_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions) ch_align = STAR_FOR_STARFUSION.out.bam_sorted diff --git a/subworkflows/local/stringtie_workflow.nf b/subworkflows/local/stringtie_workflow.nf index 60bd4f38..d66ec0ee 100644 --- a/subworkflows/local/stringtie_workflow.nf +++ b/subworkflows/local/stringtie_workflow.nf @@ -5,14 +5,14 @@ include { STRINGTIE_MERGE } from '../../modules/nf-core/stringtie/merge/m workflow STRINGTIE_WORKFLOW { take: bam_sorted - ch_chrgtf + ch_gtf main: ch_versions = Channel.empty() ch_stringtie_gtf = Channel.empty() if ((params.stringtie || params.all) && !params.fusioninspector_only) { - STRINGTIE_STRINGTIE(bam_sorted, ch_chrgtf.map { meta, gtf -> [ gtf ]}) + STRINGTIE_STRINGTIE(bam_sorted, ch_gtf.map { meta, gtf -> [ gtf ]}) ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) STRINGTIE_STRINGTIE @@ -23,7 +23,7 @@ workflow STRINGTIE_WORKFLOW { ch_versions = ch_versions.mix(STRINGTIE_STRINGTIE.out.versions) - STRINGTIE_MERGE (stringtie_gtf, ch_chrgtf.map { meta, gtf -> [ gtf ]}) + STRINGTIE_MERGE (stringtie_gtf, ch_gtf.map { meta, gtf -> [ gtf ]}) ch_versions = ch_versions.mix(STRINGTIE_MERGE.out.versions) ch_stringtie_gtf = STRINGTIE_MERGE.out.gtf } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 67dedb4b..bb24df1f 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -65,7 +65,7 @@ workflow RNAFUSION { // Create references if necessary // - BUILD_REFERENCES(params.genome, params.ensembl_version) + BUILD_REFERENCES(params.genome, params.genome_gencode_version) ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) // // Optional @@ -177,7 +177,7 @@ workflow RNAFUSION { // //Run STAR fusion // STARFUSION_WORKFLOW ( // ch_reads_all, -// ch_chrgtf, +// ch_gtf, // ch_starindex_ref, // ch_fasta // ) @@ -194,7 +194,7 @@ workflow RNAFUSION { // //Run stringtie // STRINGTIE_WORKFLOW ( // STARFUSION_WORKFLOW.out.ch_bam_sorted, -// ch_chrgtf +// ch_gtf // ) // ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions) @@ -218,7 +218,7 @@ workflow RNAFUSION { // FUSIONREPORT_WORKFLOW.out.report, // FUSIONREPORT_WORKFLOW.out.csv, // STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, -// ch_chrgtf, +// ch_gtf, // ch_arriba_ref_protein_domains, // ch_arriba_ref_cytobands, // ch_hgnc_ref, @@ -232,7 +232,7 @@ workflow RNAFUSION { // ch_reads_all, // STARFUSION_WORKFLOW.out.ch_bam_sorted, // STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, -// ch_chrgtf, +// ch_gtf, // ch_refflat, // ch_fasta, // ch_fai, From 0b1a0672a59a31918c9fddb54fdffc883768800d Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 13 Nov 2024 10:03:44 +0100 Subject: [PATCH 11/41] syntax --- conf/modules.config | 2 +- modules/local/arriba/visualisation/main.nf | 8 ++++---- modules/local/hgnc/main.nf | 2 -- modules/nf-core/arriba/download/main.nf | 2 -- modules/nf-core/arriba/main.nf | 20 ++++++++++---------- subworkflows/local/build_references.nf | 18 +++++++++--------- 6 files changed, 24 insertions(+), 28 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index af832498..232efdd1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -63,7 +63,7 @@ process { withName: 'FASTQC' { ext.args = '--quiet' - ext.when = { !params.skip_qc and not !params.build_references } + ext.when = { !params.skip_qc && !params.build_references } publishDir = [ path: { "${params.outdir}/fastqc" }, mode: params.publish_dir_mode, diff --git a/modules/local/arriba/visualisation/main.nf b/modules/local/arriba/visualisation/main.nf index cc120119..f1aa097b 100644 --- a/modules/local/arriba/visualisation/main.nf +++ b/modules/local/arriba/visualisation/main.nf @@ -22,17 +22,17 @@ process ARRIBA_VISUALISATION { script: def args = task.ext.args ?: '' - def cytobands = cytobands ? " --cytobands=$cytobands" : "" + def arg_cytobands = cytobands ? " --cytobands=$cytobands" : "" + def arg_protein_domains = protein_domains ? "--proteinDomains=$protein_domains" : "" def prefix = task.ext.prefix ?: "${meta.id}" - def protein_domains = protein_domains ? "--proteinDomains=$protein_domains" : "" """ draw_fusions.R \\ --fusions=$fusions \\ --alignments=$bam \\ --output=${prefix}.pdf \\ --annotation=${gtf} \\ - $cytobands \\ - $protein_domains \\ + $arg_cytobands \\ + $arg_protein_domains \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/hgnc/main.nf b/modules/local/hgnc/main.nf index 6a055631..5a250f0f 100644 --- a/modules/local/hgnc/main.nf +++ b/modules/local/hgnc/main.nf @@ -7,8 +7,6 @@ process HGNC_DOWNLOAD { 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--h5bf99c6_5' : 'quay.io/biocontainers/gnu-wget:1.18--h5bf99c6_5' }" - input: - output: path "hgnc_complete_set.txt" , emit: hgnc_ref path "HGNC-DB-timestamp.txt" , emit: hgnc_date diff --git a/modules/nf-core/arriba/download/main.nf b/modules/nf-core/arriba/download/main.nf index 96a4e336..7f11898b 100644 --- a/modules/nf-core/arriba/download/main.nf +++ b/modules/nf-core/arriba/download/main.nf @@ -7,8 +7,6 @@ process ARRIBA_DOWNLOAD { 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : 'biocontainers/arriba:2.4.0--h0033a41_2' }" - input: - output: path "*" , emit: reference path "versions.yml" , emit: versions diff --git a/modules/nf-core/arriba/main.nf b/modules/nf-core/arriba/main.nf index 2537d05b..66809530 100644 --- a/modules/nf-core/arriba/main.nf +++ b/modules/nf-core/arriba/main.nf @@ -28,11 +28,11 @@ process ARRIBA { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def blacklist = blacklist ? "-b $blacklist" : "-f blacklist" - def known_fusions = known_fusions ? "-k $known_fusions" : "" - def structural_variants = structural_variants ? "-d $structual_variants" : "" - def tags = tags ? "-t $tags" : "" - def protein_domains = protein_domains ? "-p $protein_domains" : "" + def arg_blacklist = blacklist ? "-b $blacklist" : "-f blacklist" + def arg_known_fusions = known_fusions ? "-k $known_fusions" : "" + def arg_structural_variants = structural_variants ? "-d $structural_variants" : "" + def arg_tags = tags ? "-t $tags" : "" + def arg_protein_domains = protein_domains ? "-p $protein_domains" : "" """ arriba \\ @@ -41,11 +41,11 @@ process ARRIBA { -g $gtf \\ -o ${prefix}.fusions.tsv \\ -O ${prefix}.fusions.discarded.tsv \\ - $blacklist \\ - $known_fusions \\ - $structural_variants \\ - $tags \\ - $protein_domains \\ + $arg_blacklist \\ + $arg_known_fusions \\ + $arg_structural_variants \\ + $arg_tags \\ + $arg_protein_domains \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 8017f634..1d45e74c 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -133,15 +133,15 @@ workflow BUILD_REFERENCES { // } - // if ((params.starfusion || params.all) && - // (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || - // !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { - // if (params.download_refs) { - // ch_starfusion_ref = STARFUSION_DOWNLOAD( ch_fasta, ch_gtf ).out.reference } - // else { - // ch_starfusion_ref = STARFUSION_BUILD( ch_fasta, ch_gtf ).out.reference } - // } else { - // ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] }} + if ((params.starfusion || params.all) && + (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || + !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { + if (params.download_refs) { + ch_starfusion_ref = STARFUSION_DOWNLOAD( ch_fasta, ch_gtf ).out.reference } + else { + ch_starfusion_ref = STARFUSION_BUILD( ch_fasta, ch_gtf ).out.reference } + } else { + ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] }} // if ((params.fusionreport || params.all) && From f134b84c8a5977cee935aa5396eef504f6c278fd Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 13 Nov 2024 10:35:46 +0100 Subject: [PATCH 12/41] merging --- conf/modules.config | 14 -------------- modules/nf-core/salmon/quant/main.nf | 2 +- workflows/rnafusion.nf | 1 + 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7afa2817..113c406c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -127,7 +127,6 @@ process { ] } -<<<<<<< HEAD withName: 'GFFREAD' { ext.args = { '-w' } publishDir = [ @@ -145,8 +144,6 @@ process { ] } -======= ->>>>>>> 22d0e803ca87786359cf582bfb4ac8114adde8fd withName: 'HGNC_DOWNLOAD' { publishDir = [ path: { "${params.genomes_base}/hgnc" }, @@ -369,20 +366,9 @@ process { ] } -<<<<<<< HEAD withName: 'TRIM_WORKFLOW:*' { ext.when = { !params.build_references } -======= - withName: 'UCSC_GTFTOGENEPRED' { - ext.args = "-genePredExt -geneNameAsName2" - publishDir = [ - path: { "${params.genomes_base}/ensembl" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] ->>>>>>> 22d0e803ca87786359cf582bfb4ac8114adde8fd - } withName: 'VCF_COLLECT' { ext.when = {!params.fusioninspector_only} diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index 6c528b24..6d624e75 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -28,7 +28,7 @@ process SALMON_QUANT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def reference = "--index $index" + def reference = "--index $index" def reads1 = [], reads2 = [] meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 9b615d40..8f7f8ae8 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -39,6 +39,7 @@ include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW From 7a5a91700ad963723a0a837d7f8cdb212947e8fd Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 13 Nov 2024 14:20:16 +0100 Subject: [PATCH 13/41] cleanup --- conf/modules.config | 2 +- main.nf | 7 +- modules.json | 142 ++++++++++++------ modules/local/fusioncatcher/build/main.nf | 1 - modules/local/fusioncatcher/download/main.nf | 20 +-- modules/local/gencode_download/main.nf | 4 +- modules/local/get_rrna_transcripts.nf | 43 ++++++ modules/nf-core/arriba/arriba/environment.yml | 3 + modules/nf-core/arriba/arriba/main.nf | 29 ++-- modules/nf-core/arriba/arriba/meta.yml | 39 +---- .../nf-core/arriba/arriba/tests/main.nf.test | 107 +++++++++++++ .../arriba/arriba/tests/main.nf.test.snap | 55 +++++++ .../arriba/arriba/tests/nextflow.config | 8 + modules/nf-core/arriba/download/main.nf | 13 +- modules/nf-core/arriba/download/meta.yml | 32 +++- .../arriba/download/tests/main.nf.test | 29 +++- .../arriba/download/tests/main.nf.test.snap | 62 ++++---- .../nf-core/arriba/download/tests/tags.yml | 2 + modules/nf-core/gunzip/environment.yml | 7 - modules/nf-core/gunzip/main.nf | 55 ------- modules/nf-core/gunzip/meta.yml | 47 ------ modules/nf-core/gunzip/tests/main.nf.test | 121 --------------- .../nf-core/gunzip/tests/main.nf.test.snap | 134 ----------------- modules/nf-core/gunzip/tests/nextflow.config | 5 - modules/nf-core/gunzip/tests/tags.yml | 2 - modules/nf-core/salmon/quant/environment.yml | 2 +- modules/nf-core/salmon/quant/main.nf | 6 +- .../salmon/quant/tests/main.nf.test.snap | 64 ++++---- nextflow.config | 3 +- nextflow_schema.json | 19 ++- subworkflows/local/build_references.nf | 102 +++++++------ workflows/rnafusion.nf | 52 ++----- 32 files changed, 564 insertions(+), 653 deletions(-) create mode 100644 modules/local/get_rrna_transcripts.nf create mode 100644 modules/nf-core/arriba/arriba/tests/main.nf.test create mode 100644 modules/nf-core/arriba/arriba/tests/main.nf.test.snap create mode 100644 modules/nf-core/arriba/arriba/tests/nextflow.config create mode 100644 modules/nf-core/arriba/download/tests/tags.yml delete mode 100644 modules/nf-core/gunzip/environment.yml delete mode 100644 modules/nf-core/gunzip/main.nf delete mode 100644 modules/nf-core/gunzip/meta.yml delete mode 100644 modules/nf-core/gunzip/tests/main.nf.test delete mode 100644 modules/nf-core/gunzip/tests/main.nf.test.snap delete mode 100644 modules/nf-core/gunzip/tests/nextflow.config delete mode 100644 modules/nf-core/gunzip/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index 113c406c..123e02b0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -368,7 +368,7 @@ process { withName: 'TRIM_WORKFLOW:*' { ext.when = { !params.build_references } - + } withName: 'VCF_COLLECT' { ext.when = {!params.fusioninspector_only} diff --git a/main.nf b/main.nf index ce7352f1..09216f56 100644 --- a/main.nf +++ b/main.nf @@ -41,6 +41,8 @@ include { RNAFUSION } from './workflows/rnafusion' // WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_RNAFUSION { + take: + samplesheet main: @@ -48,8 +50,7 @@ workflow NFCORE_RNAFUSION { // WORKFLOW: Run pipeline // - ch_samplesheet = Channel.value(file(params.input, checkIfExists: true)) - RNAFUSION(ch_samplesheet) + RNAFUSION(samplesheet) } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -75,7 +76,7 @@ workflow { // // WORKFLOW: Run main workflow // - NFCORE_RNAFUSION () + NFCORE_RNAFUSION (PIPELINE_INITIALISATION.out.samplesheet) // // SUBWORKFLOW: Run completion tasks diff --git a/modules.json b/modules.json index e71954a6..7e506548 100644 --- a/modules.json +++ b/modules.json @@ -8,152 +8,198 @@ "agat/convertspgff2tsv": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "arriba/arriba": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "git_sha": "7741dfc830e77a8ead2fcb50b01461ee09d0cdfe", + "installed_by": [ + "modules" + ] }, "arriba/download": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "git_sha": "467c202a876d26af544fa8c4b22a050a535462a7", + "installed_by": [ + "modules" + ] }, "bedops/convert2bed": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] - }, - "arriba/download": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastp": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gatk4/markduplicates": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gffread": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] - }, - "gunzip": { - "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectinsertsizemetrics": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectrnaseqmetrics": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectwgsmetrics": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "rrnatranscripts": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "salmon/index": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "salmon/quant": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/index": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "star/align": { "branch": "master", "git_sha": "30a97c755895b7dfe40a730b0695c554a10f1cdd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "star/genomegenerate": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "stringtie/merge": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "stringtie/stringtie": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ucsc/gtftogenepred": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -162,20 +208,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index 51153253..f6ff0987 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -2,7 +2,6 @@ process FUSIONCATCHER_BUILD { tag "fusioncatcher_build" label 'process_medium' - conda "bioconda::fusioncatcher=1.33" container "docker.io/clinicalgenomics/fusioncatcher:1.33" input: diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index 916b4673..20e3681f 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -7,10 +7,10 @@ process FUSIONCATCHER_DOWNLOAD { input: - val ensembl_version + val genome_gencode_version output: - path "human_v${ensembl_version}" , emit: reference + path "human_v${genome_gencode_version}" , emit: reference path "versions.yml" , emit: versions when: @@ -24,12 +24,12 @@ process FUSIONCATCHER_DOWNLOAD { // def url = """ - wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.aa - wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ab - wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ac - wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${ensembl_version}.tar.gz.ad - cat human_${ensembl_version}.tar.gz.* | tar xz - rm human_${ensembl_version}.tar* + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.aa + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ab + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ac + wget $args http://sourceforge.net/projects/fusioncatcher/files/data/human_${genome_gencode_version}.tar.gz.ad + cat human_${genome_gencode_version}.tar.gz.* | tar xz + rm human_${genome_gencode_version}.tar* cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -39,8 +39,8 @@ process FUSIONCATCHER_DOWNLOAD { stub: """ - mkdir human_v${ensembl_version} - touch human_v${ensembl_version}/ensembl_fully_overlapping_genes.txt + mkdir human_v${genome_gencode_version} + touch human_v${genome_gencode_version}/ensembl_fully_overlapping_genes.txt cat <<-END_VERSIONS > versions.yml "${task.process}": fusioncatcher: \$(echo \$(fusioncatcher --version 2>&1)) diff --git a/modules/local/gencode_download/main.nf b/modules/local/gencode_download/main.nf index e9416cf4..1f466d26 100644 --- a/modules/local/gencode_download/main.nf +++ b/modules/local/gencode_download/main.nf @@ -31,7 +31,7 @@ process GENCODE_DOWNLOAD { cat <<-END_VERSIONS > versions.yml "${task.process}": - wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) END_VERSIONS """ @@ -42,7 +42,7 @@ process GENCODE_DOWNLOAD { cat <<-END_VERSIONS > versions.yml "${task.process}": - wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3 > versions.yml) + wget: \$(echo wget -V 2>&1 | grep "GNU Wget" | cut -d" " -f3) END_VERSIONS """ diff --git a/modules/local/get_rrna_transcripts.nf b/modules/local/get_rrna_transcripts.nf new file mode 100644 index 00000000..cb39b3e4 --- /dev/null +++ b/modules/local/get_rrna_transcripts.nf @@ -0,0 +1,43 @@ +process GET_RRNA_TRANSCRIPTS { + tag 'get_rrna_bed' + label 'process_low' + + conda "bioconda::pirate=1.0.4 bioconda::perl-bioperl=1.7.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pirate:1.0.4--hdfd78af_2' : + 'biocontainers/pirate:1.0.4--hdfd78af_2' }" + + input: + tuple val(meta), path(gtf) + + output: + path('rrna.gtf') , emit: rrnagtf + path('rrna.bed') , emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + $baseDir/bin/get_rrna_transcripts.py --gtf ${gtf} --output rrna.gtf + + $baseDir/bin/gtf2bed rrna.gtf > rrna.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_rrna_transcripts: v1.0 + END_VERSIONS + """ + + stub: + """ + touch rrna.gtf + touch rrna.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_rrna_transcripts: v1.0 + END_VERSIONS + """ +} diff --git a/modules/nf-core/arriba/arriba/environment.yml b/modules/nf-core/arriba/arriba/environment.yml index d0883a0d..c3a88ffb 100644 --- a/modules/nf-core/arriba/arriba/environment.yml +++ b/modules/nf-core/arriba/arriba/environment.yml @@ -1,5 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda + dependencies: - bioconda::arriba=2.4.0 diff --git a/modules/nf-core/arriba/arriba/main.nf b/modules/nf-core/arriba/arriba/main.nf index df6d4e39..75dbf93a 100644 --- a/modules/nf-core/arriba/arriba/main.nf +++ b/modules/nf-core/arriba/arriba/main.nf @@ -8,14 +8,13 @@ process ARRIBA_ARRIBA { 'biocontainers/arriba:2.4.0--h0033a41_2' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(bam) tuple val(meta2), path(fasta) tuple val(meta3), path(gtf) - tuple val(meta4), path(blacklist) - tuple val(meta5), path(known_fusions) - tuple val(meta6), path(structural_variants) - tuple val(meta7), path(tags) - tuple val(meta8), path(protein_domains) + path(blacklist) + path(known_fusions) + path(cytobands) + path(protein_domains) output: tuple val(meta), path("*.fusions.tsv") , emit: fusions @@ -28,11 +27,10 @@ process ARRIBA_ARRIBA { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def arg_blacklist = blacklist ? "-b $blacklist" : "-f blacklist" - def arg_known_fusions = known_fusions ? "-k $known_fusions" : "" - def arg_structural_variants = structural_variants ? "-d $structural_variants" : "" - def arg_tags = tags ? "-t $tags" : "" - def arg_protein_domains = protein_domains ? "-p $protein_domains" : "" + def blacklist = blacklist ? "-b $blacklist" : "-f blacklist" + def known_fusions = known_fusions ? "-k $known_fusions" : "" + def cytobands = cytobands ? "-d $cytobands" : "" + def protein_domains = protein_domains ? "-p $protein_domains" : "" """ arriba \\ @@ -41,11 +39,10 @@ process ARRIBA_ARRIBA { -g $gtf \\ -o ${prefix}.fusions.tsv \\ -O ${prefix}.fusions.discarded.tsv \\ - $arg_blacklist \\ - $arg_known_fusions \\ - $arg_structural_variants \\ - $arg_tags \\ - $arg_protein_domains \\ + $blacklist \\ + $known_fusions \\ + $cytobands \\ + $protein_domains \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/arriba/arriba/meta.yml b/modules/nf-core/arriba/arriba/meta.yml index f230dda1..91ba2103 100644 --- a/modules/nf-core/arriba/arriba/meta.yml +++ b/modules/nf-core/arriba/arriba/meta.yml @@ -43,48 +43,19 @@ input: type: file description: Annotation GTF file pattern: "*.{gtf}" - - - meta4: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - blacklist: + - - blacklist: type: file description: Blacklist file pattern: "*.{tsv}" - - - meta5: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - known_fusions: + - - known_fusions: type: file description: Known fusions file pattern: "*.{tsv}" - - - meta6: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - structural_variants: - type: file - description: Structural variants file - pattern: "*.{tsv}" - - - meta7: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - tags: + - - cytobands: type: file - description: Tags file + description: Cytobands file pattern: "*.{tsv}" - - - meta8: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - protein_domains: + - - protein_domains: type: file description: Protein domains file pattern: "*.{gff3}" diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test b/modules/nf-core/arriba/arriba/tests/main.nf.test new file mode 100644 index 00000000..4cff86e5 --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/main.nf.test @@ -0,0 +1,107 @@ + +nextflow_process { + + name "Test Process ARRIBA_ARRIBA" + script "../main.nf" + process "ARRIBA_ARRIBA" + + tag "modules" + tag "modules_nfcore" + tag "arriba" + tag "arriba/arriba" + tag "arriba/download" + tag "star/genomegenerate" + tag "star/align" + + setup { + config "./nextflow.config" + options "-stub" + run("ARRIBA_DOWNLOAD") { + script "../../../arriba/download/main.nf" + process { + """ + input[0] = 'GRCh38' + """ + } + } + run("STAR_GENOMEGENERATE") { + script "../../../star/genomegenerate/main.nf" + options "-stub" + process { + """ + input[0] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + run("STAR_ALIGN") { + script "../../../star/align/main.nf" + options "-stub" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = STAR_GENOMEGENERATE.out.index + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = false + input[4] = 'illumina' + input[5] = false + """ + } + } + } + + test("homo_sapiens - paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'test_fasta' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + ]) + input[2] = Channel.of([ + [ id:'test_gtf' ], + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ] + ]) + input[3] = [] + + input[4] = [] + + input[5] = [] + + input[6] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test.snap b/modules/nf-core/arriba/arriba/tests/main.nf.test.snap new file mode 100644 index 00000000..14d46f6b --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "homo_sapiens - paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "2": [ + "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30" + ], + "fusions": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "fusions_fail": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260" + ] + ], + "versions": [ + "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-08T15:41:23.945072" + } +} \ No newline at end of file diff --git a/modules/nf-core/arriba/arriba/tests/nextflow.config b/modules/nf-core/arriba/arriba/tests/nextflow.config new file mode 100644 index 00000000..d3d5b00f --- /dev/null +++ b/modules/nf-core/arriba/arriba/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: STAR_GENOMEGENERATE { + ext.args = '--genomeSAindexNbases 11' + } + withName: STAR_ALIGN { + ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50' + } +} diff --git a/modules/nf-core/arriba/download/main.nf b/modules/nf-core/arriba/download/main.nf index 7f11898b..c45120ad 100644 --- a/modules/nf-core/arriba/download/main.nf +++ b/modules/nf-core/arriba/download/main.nf @@ -7,16 +7,22 @@ process ARRIBA_DOWNLOAD { 'https://depot.galaxyproject.org/singularity/arriba:2.4.0--h0033a41_2' : 'biocontainers/arriba:2.4.0--h0033a41_2' }" + input: + val(genome) + output: - path "*" , emit: reference - path "versions.yml" , emit: versions + path "blacklist*${genome}*.tsv.gz" , emit: blacklist + path "cytobands*${genome}*.tsv" , emit: cytobands + path "protein_domains*${genome}*.gff3" , emit: protein_domains + path "known_fusions*${genome}*.tsv.gz" , emit: known_fusions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: """ - wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz + wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz --no-check-certificate tar -xzvf arriba_v2.4.0.tar.gz rm arriba_v2.4.0.tar.gz mv arriba_v2.4.0/database/* . @@ -34,7 +40,6 @@ process ARRIBA_DOWNLOAD { touch protein_domains_hg38_GRCh38_v2.4.0.gff3 touch cytobands_hg38_GRCh38_v2.4.0.tsv touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz - touch protein_domains_hg38_GRCh38_v2.4.0.gff3 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/arriba/download/meta.yml b/modules/nf-core/arriba/download/meta.yml index 531c66c2..bdf542eb 100644 --- a/modules/nf-core/arriba/download/meta.yml +++ b/modules/nf-core/arriba/download/meta.yml @@ -15,12 +15,34 @@ tools: licence: ["MIT"] identifier: biotools:Arriba +input: + - - genome: + type: string + description: hg38, hg19, GRCh38, GRCh37 for humans are accepted output: - - reference: - - "*": - type: directory - description: Folder with arriba references - pattern: "*" + - blacklist: + - blacklist*${genome}*.tsv.gz: + type: string + description: The blacklist removes recurrent alignment artifacts and transcripts + which are present in healthy tissue + pattern: ".tsv.gz" + - cytobands: + - cytobands*${genome}*.tsv: + type: file + description: Coordinates of the Giemsa staining bands. This information is used + to draw ideograms + pattern: ".tsv" + - protein_domains: + - protein_domains*${genome}*.gff3: + type: file + description: Protein domain annotations + patter: "*.gff3" + - known_fusions: + - known_fusions*${genome}*.tsv.gz: + type: file + description: Arriba is more sensitive to those fusions to improve the detection + rate of expected or highly relevant events, such as recurrent fusions + patter: "*.tsv.gz" - versions: - versions.yml: type: file diff --git a/modules/nf-core/arriba/download/tests/main.nf.test b/modules/nf-core/arriba/download/tests/main.nf.test index cccc95db..2c32c7a0 100644 --- a/modules/nf-core/arriba/download/tests/main.nf.test +++ b/modules/nf-core/arriba/download/tests/main.nf.test @@ -11,11 +11,10 @@ nextflow_process { tag "arriba/download" test("test-arriba-download") { - when { process { """ - + input[0] = 'GRCh38' """ } } @@ -23,11 +22,27 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - process.out.reference[0].collect { file(it).name }.toSorted(), - process.out.versions - ).match() - } + { assert snapshot(process.out).match() } + ) + } + + } + + test("download - stub") { + + options "-stub" + + when { + process { + """ + input[0] = 'GRCh38' + """ + } + } + + then { + assertAll( + { assert process.success } ) } } diff --git a/modules/nf-core/arriba/download/tests/main.nf.test.snap b/modules/nf-core/arriba/download/tests/main.nf.test.snap index eda3f706..fe9b18b9 100644 --- a/modules/nf-core/arriba/download/tests/main.nf.test.snap +++ b/modules/nf-core/arriba/download/tests/main.nf.test.snap @@ -1,35 +1,43 @@ { - "test-arriba-download": { + "download": { "content": [ - [ - "CREDITS", - "RefSeq_viral_genomes_v2.4.0.fa.gz", - "blacklist_hg19_hs37d5_GRCh37_v2.4.0.tsv.gz", - "blacklist_hg38_GRCh38_v2.4.0.tsv.gz", - "blacklist_mm10_GRCm38_v2.4.0.tsv.gz", - "blacklist_mm39_GRCm39_v2.4.0.tsv.gz", - "cytobands_hg19_hs37d5_GRCh37_v2.4.0.tsv", - "cytobands_hg38_GRCh38_v2.4.0.tsv", - "cytobands_mm10_GRCm38_v2.4.0.tsv", - "cytobands_mm39_GRCm39_v2.4.0.tsv", - "known_fusions_hg19_hs37d5_GRCh37_v2.4.0.tsv.gz", - "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", - "known_fusions_mm10_GRCm38_v2.4.0.tsv.gz", - "known_fusions_mm39_GRCm39_v2.4.0.tsv.gz", - "protein_domains_hg19_hs37d5_GRCh37_v2.4.0.gff3", - "protein_domains_hg38_GRCh38_v2.4.0.gff3", - "protein_domains_mm10_GRCm38_v2.4.0.gff3", - "protein_domains_mm39_GRCm39_v2.4.0.gff3", - "versions.yml" - ], - [ - "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" - ] + { + "0": [ + "blacklist_hg38_GRCh38_v2.4.0.tsv.gz:md5,e3098a4be51aece78aede64b55c39318" + ], + "1": [ + "cytobands_hg38_GRCh38_v2.4.0.tsv:md5,7bd504feefb33fcfc9be0517439a423c" + ], + "2": [ + "protein_domains_hg38_GRCh38_v2.4.0.gff3:md5,43c387a784ebeed71b4147076cebf978" + ], + "3": [ + "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz:md5,4f00f81ccb5f4db283f1a22b8b0da67c" + ], + "4": [ + "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" + ], + "blacklist": [ + "blacklist_hg38_GRCh38_v2.4.0.tsv.gz:md5,e3098a4be51aece78aede64b55c39318" + ], + "cytobands": [ + "cytobands_hg38_GRCh38_v2.4.0.tsv:md5,7bd504feefb33fcfc9be0517439a423c" + ], + "known_fusions": [ + "known_fusions_hg38_GRCh38_v2.4.0.tsv.gz:md5,4f00f81ccb5f4db283f1a22b8b0da67c" + ], + "protein_domains": [ + "protein_domains_hg38_GRCh38_v2.4.0.gff3:md5,43c387a784ebeed71b4147076cebf978" + ], + "versions": [ + "versions.yml:md5,98c69df5eaea5caf0b4af7b8d7af4893" + ] + } ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-22T17:00:38.086459" + "timestamp": "2024-10-08T11:12:17.010496" } } \ No newline at end of file diff --git a/modules/nf-core/arriba/download/tests/tags.yml b/modules/nf-core/arriba/download/tests/tags.yml new file mode 100644 index 00000000..f510bbf1 --- /dev/null +++ b/modules/nf-core/arriba/download/tests/tags.yml @@ -0,0 +1,2 @@ +arriba/download: + - "modules/nf-core/arriba/download/**" diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml deleted file mode 100644 index c7794856..00000000 --- a/modules/nf-core/gunzip/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -channels: - - conda-forge - - bioconda -dependencies: - - conda-forge::grep=3.11 - - conda-forge::sed=4.8 - - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf deleted file mode 100644 index 5e67e3b9..00000000 --- a/modules/nf-core/gunzip/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process GUNZIP { - tag "$archive" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : - 'nf-core/ubuntu:22.04' }" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("$gunzip"), emit: gunzip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".$extension" - def prefix = task.ext.prefix ?: name - gunzip = prefix + ".$extension" - """ - # Not calling gunzip itself because it creates files - # with the original group ownership rather than the - # default one for that user / the work directory - gzip \\ - -cd \\ - $args \\ - $archive \\ - > $gunzip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] - def name = archive.toString() - '.gz' - ".$extension" - def prefix = task.ext.prefix ?: name - gunzip = prefix + ".$extension" - """ - touch $gunzip - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml deleted file mode 100644 index 9066c035..00000000 --- a/modules/nf-core/gunzip/meta.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: gunzip -description: Compresses and decompresses files. -keywords: - - gunzip - - compression - - decompression -tools: - - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] - identifier: "" -input: - - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" -output: - - gunzip: - - meta: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - $gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" -maintainers: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" - - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test deleted file mode 100644 index 776211ad..00000000 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ /dev/null @@ -1,121 +0,0 @@ -nextflow_process { - - name "Test Process GUNZIP" - script "../main.nf" - process "GUNZIP" - tag "gunzip" - tag "modules_nfcore" - tag "modules" - - test("Should run without failures") { - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - prefix") { - - config './nextflow.config' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - stub") { - - options '-stub' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("Should run without failures - prefix - stub") { - - options '-stub' - config './nextflow.config' - - when { - params { - outdir = "$outputDir" - } - process { - """ - input[0] = Channel.of([ - [ id: 'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) - ] - ) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap deleted file mode 100644 index 069967e7..00000000 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ /dev/null @@ -1,134 +0,0 @@ -{ - "Should run without failures - prefix - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ], - "gunzip": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-25T11:35:10.861293" - }, - "Should run without failures - stub": { - "content": [ - { - "0": [ - [ - [ - - ], - "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ], - "gunzip": [ - [ - [ - - ], - "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-25T11:35:05.857145" - }, - "Should run without failures": { - "content": [ - { - "0": [ - [ - [ - - ], - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "1": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ], - "gunzip": [ - [ - [ - - ], - "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "versions": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2023-10-17T15:35:37.690477896" - }, - "Should run without failures - prefix": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "1": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ], - "gunzip": [ - [ - { - "id": "test" - }, - "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" - ] - ], - "versions": [ - "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" - }, - "timestamp": "2024-06-25T11:33:32.921739" - } -} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config deleted file mode 100644 index dec77642..00000000 --- a/modules/nf-core/gunzip/tests/nextflow.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: GUNZIP { - ext.prefix = { "${meta.id}.xyz" } - } -} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml deleted file mode 100644 index fd3f6915..00000000 --- a/modules/nf-core/gunzip/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -gunzip: - - modules/nf-core/gunzip/** diff --git a/modules/nf-core/salmon/quant/environment.yml b/modules/nf-core/salmon/quant/environment.yml index 471164fa..b3f75777 100644 --- a/modules/nf-core/salmon/quant/environment.yml +++ b/modules/nf-core/salmon/quant/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::salmon=1.10.1 + - bioconda::salmon=1.10.3 diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index 6d624e75..f1e3b5cd 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -4,8 +4,8 @@ process SALMON_QUANT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/salmon:1.10.1--h7e5ed60_0' : - 'biocontainers/salmon:1.10.1--h7e5ed60_0' }" + 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--h6dccd9a_2' : + 'biocontainers/salmon:1.10.3--h6dccd9a_2' }" input: tuple val(meta), path(reads) @@ -28,7 +28,7 @@ process SALMON_QUANT { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def reference = "--index $index" + def reference = "--index $index" def reads1 = [], reads2 = [] meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" diff --git a/modules/nf-core/salmon/quant/tests/main.nf.test.snap b/modules/nf-core/salmon/quant/tests/main.nf.test.snap index 547ce2de..ea22a80c 100644 --- a/modules/nf-core/salmon/quant/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/quant/tests/main.nf.test.snap @@ -2,7 +2,7 @@ "sarscov2 - single_end": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -15,15 +15,15 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:51:22.424672" + "timestamp": "2024-10-18T10:01:16.989080539" }, "sarscov2 - single_end stub": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -36,15 +36,15 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:51:32.292277" + "timestamp": "2024-10-18T10:01:29.340996235" }, "sarscov2 - single_end lib type A": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -57,15 +57,15 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:51:42.656382" + "timestamp": "2024-10-18T10:01:43.056167576" }, "sarscov2 - pair_end multiple": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -78,15 +78,15 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:52:33.03647" + "timestamp": "2024-10-18T10:03:05.500792631" }, "sarscov2 - pair_end multiple stub": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -99,15 +99,15 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:52:43.388379" + "timestamp": "2024-10-18T10:03:26.428959203" }, "sarscov2 - single_end lib type A stub": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -120,15 +120,15 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:51:52.612664" + "timestamp": "2024-10-18T10:02:03.420850208" }, "sarscov2 - pair_end": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -141,15 +141,15 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:52:05.577881" + "timestamp": "2024-10-18T10:02:16.130074696" }, "sarscov2 - pair_end stub": { "content": [ [ - "versions.yml:md5,80eb3d2ad36960c7e9263f81ede9d263" + "versions.yml:md5,0d510d5db6398e2c8ca9443330740607" ], [ [ @@ -162,9 +162,9 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-14T09:52:15.286461" + "timestamp": "2024-10-18T10:02:39.470004547" } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 3e4e4b4d..98535a01 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,7 @@ params { // Input options input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' - build_references = false + references_only = false cosmic_username = null cosmic_passwd = null qiagen = false @@ -75,6 +75,7 @@ params { rrna_intervals = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}.interval_list" gencode_ref = "${params.genomes_base}/gencode" arriba_ref_blacklist = "${params.genomes_base}/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz" + arriba_ref_cytobands = "${params.genomes_base}/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv" arriba_ref_known_fusions = "${params.genomes_base}/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz" arriba_ref_protein_domains = "${params.genomes_base}/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3" fusioncatcher_ref = "${params.genomes_base}/fusioncatcher/human_v${params.genome_gencode_version}" diff --git a/nextflow_schema.json b/nextflow_schema.json index 480aa091..15915510 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -79,8 +79,7 @@ "genome_gencode_version": { "type": "integer", "fa_icon": "far fa-file-code", - "description": "ensembl version", - "default": 102 + "description": "gencode version" }, "starfusion_build": { "type": "boolean", @@ -108,6 +107,11 @@ "fa_icon": "far fa-file-code", "description": "Path to arriba reference blacklist" }, + "arriba_ref_cytobands": { + "type": "string", + "fa_icon": "far fa-file-code", + "description": "Path to arriba reference cytobands" + }, "arriba_ref_known_fusions": { "type": "string", "fa_icon": "far fa-file-code", @@ -307,9 +311,9 @@ "fa_icon": "fas fa-dna", "description": "Reference genome related files and options required for the workflow.", "properties": { - "genome": { - "type": "string", - "description": "Name of iGenomes reference.", + "references_only": { + "type": "boolean", + "description": "Skip running the analysis, only builds the references", "fa_icon": "fas fa-book" }, "fasta": { @@ -328,6 +332,11 @@ "description": "Path to FASTA genome index file.", "fa_icon": "far fa-file-code" }, + "genome": { + "type": "string", + "description": "Name of iGenomes reference.", + "fa_icon": "fas fa-book" + }, "gtf": { "type": "string", "format": "file-path", diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 1d45e74c..0c6c9916 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -4,7 +4,6 @@ ======================================================================================== */ -include { ARRIBA_DOWNLOAD } from '../../modules/local/arriba/download/main' include { GENCODE_DOWNLOAD } from '../../modules/local/gencode_download/main' include { FUSIONCATCHER_DOWNLOAD } from '../../modules/local/fusioncatcher/download/main' include { FUSIONCATCHER_BUILD } from '../../modules/local/fusioncatcher/build/main' @@ -14,13 +13,14 @@ include { STARFUSION_BUILD } from '../../modules/local/starfusion include { STARFUSION_DOWNLOAD } from '../../modules/local/starfusion/download/main' include { GTF_TO_REFFLAT } from '../../modules/local/uscs/custom_gtftogenepred/main' include { RRNA_TRANSCRIPTS } from '../../modules/local/rrnatranscripts/main' -include { CONVERT2BED } from '../../modules/local/convert2bed/main' +include { GET_RRNA_TRANSCRIPTS } from '../../modules/local/get_rrna_transcripts' +// include { CONVERT2BED } from '../../modules/local/convert2bed/main' /* ======================================================================================== IMPORT NF-CORE MODULES/SUBWORKFLOWS ======================================================================================== */ - +include { ARRIBA_DOWNLOAD } from '../../modules/nf-core/arriba/download/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { STAR_GENOMEGENERATE } from '../../modules/nf-core/star/genomegenerate/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/gatk4/createsequencedictionary/main' @@ -73,9 +73,8 @@ workflow BUILD_REFERENCES { if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty()){ GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) - RRNA_TRANSCRIPTS(ch_gtf) - CONVERT2BED(RRNA_TRANSCRIPTS.out.rrna_gtf) - GATK4_BEDTOINTERVALLIST(CONVERT2BED.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict) + GET_RRNA_TRANSCRIPTS(ch_gtf) + GATK4_BEDTOINTERVALLIST(GET_RRNA_TRANSCRIPTS.out.bed.map { it -> [ [id:it.name], it ] }, GATK4_CREATESEQUENCEDICTIONARY.out.dict ) ch_rrna_interval = GATK4_BEDTOINTERVALLIST.out.interval_list } else { ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } @@ -106,31 +105,35 @@ workflow BUILD_REFERENCES { ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } } -// if ((params.arriba || params.all) && -// (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || -// !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || -// !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { -// ARRIBA_DOWNLOAD() -// ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } -// ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } -// ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } -// } else { -// // TODO need to update the module to emit blacklist,knownfusions etc -// } - - - // if ((params.fusioncatcher || params.all) && - // (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || - // !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { - // if (params.download_refs) { - // FUSIONCATCHER_DOWNLOAD(params.genome_gencode_version) - // ch_fusioncatcher_ref = FUSIONCATCHER_DOWNLOAD.out.reference} - // else { - // FUSIONCATCHER_BUILD(params.genome_gencode_version) - // ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference} - // } else { - // ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } - // } + if ((params.arriba || params.all) && + (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || + !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || + !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { + ARRIBA_DOWNLOAD(genome) + ch_arriba_ref_blacklist = ARRIBA_DOWNLOAD.out.blacklist + ch_arriba_ref_cytobands = ARRIBA_DOWNLOAD.out.cytobands + ch_arriba_ref_known_fusions = ARRIBA_DOWNLOAD.out.known_fusions + ch_arriba_ref_protein_domains = ARRIBA_DOWNLOAD.out.protein_domains + } else { + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } + ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { that -> [[id:that.Name], that] } + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } + } + + + if ((params.fusioncatcher || params.all) && + (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || + !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { + if (params.download_refs) { + FUSIONCATCHER_DOWNLOAD(params.genome_gencode_version) + ch_fusioncatcher_ref = FUSIONCATCHER_DOWNLOAD.out.reference} + else { + FUSIONCATCHER_BUILD(params.genome_gencode_version) + ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference} + } else { + ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } + } if ((params.starfusion || params.all) && @@ -144,14 +147,14 @@ workflow BUILD_REFERENCES { ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] }} - // if ((params.fusionreport || params.all) && - // (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || - // !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { - // if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } - // ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ).out.reference - // } else { - // ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } - // } + if ((params.fusionreport || params.all) && + (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || + !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { + if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } + ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ).out.reference + } else { + ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + } emit: ch_fasta @@ -160,16 +163,17 @@ workflow BUILD_REFERENCES { ch_hgnc_ref ch_hgnc_date - // ch_rrna_interval - // ch_refflat - // ch_salmon_index - // ch_starindex_ref - // ch_arriba_ref_blacklist - // ch_arriba_ref_known_fusions - // ch_arriba_ref_protein_domains - // ch_fusioncatcher_ref - // ch_starfusion_ref - // ch_fusionreport_ref + ch_rrna_interval + ch_refflat + ch_salmon_index + ch_starindex_ref + ch_arriba_ref_blacklist + ch_arriba_ref_cytobands + ch_arriba_ref_known_fusions + ch_arriba_ref_protein_domains + ch_fusioncatcher_ref + ch_starfusion_ref + ch_fusionreport_ref versions = ch_versions } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 8f7f8ae8..5e34db97 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -4,41 +4,21 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// include { BUILD_REFERENCES } from '../subworkflows/local/build_references' -include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' -include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' -include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' -include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' -include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow' -include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' -include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' -include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' -include { validateInputSamplesheet } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' +// include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' +// include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' +// include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' +// include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' +// include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow' +// include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' +// include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' +// include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' +// include { validateInputSamplesheet } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' + +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +// include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -54,7 +34,7 @@ workflow RNAFUSION { ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() + // ch_multiqc_files = Channel.empty() // // Create references if necessary @@ -268,7 +248,7 @@ workflow RNAFUSION { // QC_WORKFLOW ( // ch_reads_all, // STARFUSION_WORKFLOW.out.ch_bam_sorted, - // ch_chrgtf, + // ch_gtf, // ch_refflat, // ch_fasta, // ch_fai, From d18b303eb7a1644405fe877bc7a2a80f88197f19 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:45:10 +0100 Subject: [PATCH 14/41] update modules, remove chrgtf, adapt with meta/no meta --- .github/workflows/awsfulltest.yml | 18 +- .github/workflows/awstest.yml | 26 -- .github/workflows/ci.yml | 9 +- AAA_BBB.html | 0 README.md | 6 +- conf/modules.config | 6 +- docs/usage.md | 16 +- modules.json | 146 +++------ modules/local/fusionreport/detect/main.nf | 12 +- modules/nf-core/cat/fastq/environment.yml | 2 +- modules/nf-core/cat/fastq/main.nf | 4 +- .../nf-core/cat/fastq/tests/main.nf.test.snap | 100 +++--- .../collectinsertsizemetrics/environment.yml | 2 +- .../picard/collectinsertsizemetrics/main.nf | 4 +- .../tests/main.nf.test.snap | 14 +- .../collectrnaseqmetrics/environment.yml | 2 +- .../picard/collectrnaseqmetrics/main.nf | 4 +- .../collectrnaseqmetrics/tests/main.nf.test | 81 +++++ .../tests/main.nf.test.snap | 60 ++++ .../tests/nextflow.config | 13 + .../picard/collectwgsmetrics/environment.yml | 4 +- .../nf-core/picard/collectwgsmetrics/main.nf | 4 +- .../collectwgsmetrics/tests/main.nf.test.snap | 12 +- modules/nf-core/salmon/index/environment.yml | 2 +- modules/nf-core/salmon/index/main.nf | 4 +- .../salmon/index/tests/main.nf.test.snap | 16 +- modules/nf-core/salmon/quant/main.nf | 3 +- modules/nf-core/samtools/sort/main.nf | 10 +- .../nf-core/samtools/sort/tests/main.nf.test | 64 ++++ .../samtools/sort/tests/main.nf.test.snap | 99 +++++- modules/nf-core/samtools/view/environment.yml | 2 + modules/nf-core/samtools/view/main.nf | 4 +- modules/nf-core/star/align/environment.yml | 6 +- modules/nf-core/star/align/main.nf | 4 +- modules/nf-core/star/align/meta.yml | 111 ++++--- modules/nf-core/star/align/tests/main.nf.test | 36 +- .../star/align/tests/main.nf.test.snap | 190 +++-------- .../star/genomegenerate/environment.yml | 6 +- modules/nf-core/star/genomegenerate/main.nf | 4 +- .../genomegenerate/tests/main.nf.test.snap | 28 +- .../stringtie/stringtie/environment.yml | 2 +- modules/nf-core/stringtie/stringtie/main.nf | 4 +- .../stringtie/tests/main.nf.test.snap | 64 ++-- subworkflows/local/arriba_workflow.nf | 3 +- subworkflows/local/build_references.nf | 46 ++- subworkflows/local/fusionreport_workflow.nf | 2 +- test.fusionreport.tsv | 0 test.fusionreport_filtered.tsv | 0 test.fusions.csv | 0 test.fusions.json | 0 test_fusionreport_index.html | 0 tests/main.nf.test | 22 +- workflows/rnafusion.nf | 307 ++++++------------ 53 files changed, 785 insertions(+), 799 deletions(-) create mode 100644 AAA_BBB.html create mode 100644 modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test create mode 100644 modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap create mode 100644 modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config create mode 100644 test.fusionreport.tsv create mode 100644 test.fusionreport_filtered.tsv create mode 100644 test.fusions.csv create mode 100644 test.fusions.json create mode 100644 test_fusionreport_index.html diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index fe30ca56..d1492d8b 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -46,7 +46,6 @@ jobs: "cosmic_username": "${{ secrets.cosmic_username }}", "cosmic_passwd": "${{ secrets.cosmic_passwd }}", "all": true, - "build_references": true } profiles: test_full,aws_tower - uses: actions/upload-artifact@v4 @@ -55,19 +54,4 @@ jobs: path: | seqera_platform_action_*.log seqera_platform_action_*.json - - name: Launch run workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "all": true, - } - profiles: test_full,aws_tower + diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 8f6bfb01..78bdb317 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -26,8 +26,6 @@ jobs: "cosmic_username": "${{ secrets.cosmic_username }}", "cosmic_passwd": "${{ secrets.cosmic_passwd }}", "all": true, - "stub": true, - "build_references": true } profiles: test,aws_tower - uses: actions/upload-artifact@v4 @@ -36,27 +34,3 @@ jobs: path: | tower_action_*.log tower_action_*.json - - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v2 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/rnafusion/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}", - "genomes_base": "s3://${{ secrets.AWS_S3_BUCKET }}/rnafusion/results-${{ github.sha }}/references", - "cosmic_username": "${{ secrets.cosmic_username }}", - "cosmic_passwd": "${{ secrets.cosmic_passwd }}", - "all": true, - "stub": true - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v4 - with: - name: Seqera Platform debug log file - path: | - seqera_platform_action_*.log - seqera_platform_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e1028ed8..2a87529d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,16 +83,11 @@ jobs: - name: Clean up Disk space uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: "Run pipeline build with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} \ - -stub --build_references \ + -stub \ --outdir /home/runner/work/rnafusion/rnafusion/results --all \ --genomes_base /home/runner/work/rnafusion/rnafusion/results/references \ --cosmic_username ${{ secrets.COSMIC_USERNAME }} --cosmic_passwd ${{ secrets.COSMIC_PASSWD }} - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" - run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} -stub \ - --outdir /home/runner/work/rnafusion/rnafusion/results --all ${{ matrix.trim_parameters }} \ - --genomes_base /home/runner/work/rnafusion/rnafusion/results/references diff --git a/AAA_BBB.html b/AAA_BBB.html new file mode 100644 index 00000000..e69de29b diff --git a/README.md b/README.md index 41e2bc96..a5210091 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,9 @@ In rnafusion the full-sized test includes reference building and fusion detectio ### Build references -`--build_references` triggers a parallel workflow to build references, which is a prerequisite to running the pipeline: +`--references_only` triggers a workflow to ONLY build references, otherwise the references are built with the analysis run: -1. Download ensembl fasta and gtf files +1. Download gencode fasta and gtf files 2. Create [STAR](https://github.com/alexdobin/STAR) index 3. Download [Arriba](https://github.com/suhrig/arriba) references 4. Download [FusionCatcher](https://github.com/ndaniel/fusioncatcher) references @@ -78,7 +78,7 @@ First, build the references: nextflow run nf-core/rnafusion \ -profile test, \ --outdir \ - --build_references \ + --references_only \ -stub ``` diff --git a/conf/modules.config b/conf/modules.config index 123e02b0..b07f5333 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -46,7 +46,7 @@ process { } withName: 'CAT_FASTQ' { - ext.when = { !params.build_references } + ext.when = { !params.references_only } } withName: 'GENCODE_DOWNLOAD' { @@ -63,7 +63,7 @@ process { withName: 'FASTQC' { ext.args = '--quiet' - ext.when = { !params.skip_qc && !params.build_references } + ext.when = { !params.skip_qc && !params.references_only } publishDir = [ path: { "${params.outdir}/fastqc" }, mode: params.publish_dir_mode, @@ -367,7 +367,7 @@ process { } withName: 'TRIM_WORKFLOW:*' { - ext.when = { !params.build_references } + ext.when = { !params.references_only } } withName: 'VCF_COLLECT' { diff --git a/docs/usage.md b/docs/usage.md index b7bc6794..86ede4dd 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,7 +10,7 @@ The pipeline is divided into two parts: 1. Download and build references -- specified with `--build_references` parameter +- specified with `--references_only` parameter - required only once before running the pipeline - **Important**: has to be run with each new release @@ -32,7 +32,7 @@ The rnafusion pipeline needs references for the fusion detection tools, so downl ```bash nextflow run nf-core/rnafusion \ -profile \ - --build_references --all \ + --references_only --all \ --cosmic_username --cosmic_passwd \ --genomes_base \ --outdir @@ -43,7 +43,7 @@ References for each tools can also be downloaded separately with: ```bash nextflow run nf-core/rnafusion \ -profile \ - --build_references -- -- ... \ + --references_only -- -- ... \ --cosmic_username --cosmic_passwd \ --genomes_base \ --outdir @@ -62,7 +62,7 @@ Use credentials from QIAGEN and add `--qiagen` ```bash nextflow run nf-core/rnafusion \ -profile \ - --build_references -- -- ... \ + --references_only -- -- ... \ --cosmic_username --cosmic_passwd \ --genomes_base \ --outdir --qiagen @@ -79,7 +79,7 @@ If process `FUSIONREPORT_DOWNLOAD` times out, it could be due to network restric ```bash nextflow run nf-core/rnafusion \ -profile \ - --build_references \ + --references_only \ --cosmic_username --cosmic_passwd \ --fusionreport \ --genomes_base \ @@ -91,7 +91,7 @@ Where the custom configuration could look like (adaptation to local machine nece ```text process { - withName: 'NFCORE_RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD' { + withName: 'NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD' { memory = '8.GB' cpus = 4 } @@ -158,7 +158,7 @@ nextflow run nf-core/rnafusion \ > **IMPORTANT: Either `--all` or `--`** is necessary to run detection tools -`--genomes_base` should be the path to the directory containing the folder `references/` that was built with `--build_references`. +`--genomes_base` should be the path to the directory containing the folder `references/` that was built with `--references_only`. Note that the pipeline will create the following files in your working directory: @@ -393,7 +393,7 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `test` - A profile with a complete configuration for automated testing - Includes links to test data so needs no other parameters - - Needs to run in two steps: with `--build_references` first and then without `--build_references` to run the analysis + - Needs to run in two steps: with `--references_only` first and then without `--references_only` to run the analysis - !!!! Run with `-stub` as all references need to be downloaded otherwise !!!! ### `-resume` diff --git a/modules.json b/modules.json index 7e506548..358910d4 100644 --- a/modules.json +++ b/modules.json @@ -8,198 +8,142 @@ "agat/convertspgff2tsv": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "arriba/arriba": { "branch": "master", "git_sha": "7741dfc830e77a8ead2fcb50b01461ee09d0cdfe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "arriba/download": { "branch": "master", "git_sha": "467c202a876d26af544fa8c4b22a050a535462a7", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedops/convert2bed": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "git_sha": "a1abf90966a2a4016d3c3e41e228bfcbd4811ccc", + "installed_by": ["modules"] }, "fastp": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/markduplicates": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gffread": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "picard/collectinsertsizemetrics": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] }, "picard/collectrnaseqmetrics": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] }, "picard/collectwgsmetrics": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] }, "rrnatranscripts": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "salmon/index": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] }, "salmon/quant": { "branch": "master", "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", - "installed_by": [ - "modules" - ] + "git_sha": "669eb24fd82a9d3cb18ad0e73673ecb26827f683", + "installed_by": ["modules"] }, "star/align": { "branch": "master", - "git_sha": "30a97c755895b7dfe40a730b0695c554a10f1cdd", - "installed_by": [ - "modules" - ] + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "git_sha": "a5ad53288c79fa52c5ae708c317e09ec2dd149ab", + "installed_by": ["modules"] }, "stringtie/merge": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "stringtie/stringtie": { "branch": "master", - "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "git_sha": "49f4e50534fe4b64101e62ea41d5dc43b1324358", + "installed_by": ["modules"] }, "ucsc/gtftogenepred": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -208,26 +152,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/local/fusionreport/detect/main.nf b/modules/local/fusionreport/detect/main.nf index 8024d8f8..f2557bf0 100644 --- a/modules/local/fusionreport/detect/main.nf +++ b/modules/local/fusionreport/detect/main.nf @@ -8,17 +8,17 @@ process FUSIONREPORT { input: tuple val(meta), path(reads), path(arriba_fusions), path(starfusion_fusions), path(fusioncatcher_fusions) - tuple val(meta2), path(fusionreport_ref) + path(fusionreport_ref) val(tools_cutoff) output: - path "versions.yml" , emit: versions tuple val(meta), path("*fusionreport.tsv") , emit: fusion_list tuple val(meta), path("*fusionreport_filtered.tsv") , emit: fusion_list_filtered - tuple val(meta), path("*index.html") , emit: report + tuple val(meta), path("*index.html") , emit: report tuple val(meta), path("*_*.html") , optional:true, emit: html tuple val(meta), path("*.csv") , optional:true, emit: csv tuple val(meta), path("*.json") , optional:true, emit: json + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -58,7 +58,11 @@ process FUSIONREPORT { cat <<-END_VERSIONS > versions.yml "${task.process}": - fusion_report: \$(fusion_report --version | sed 's/fusion-report //') + fusion_report: sth END_VERSIONS """ } + // cat <<-END_VERSIONS > versions.yml + // "${task.process}": + // fusion_report: \$(fusion_report --version 2>&1 | sed 's/fusion-report //') + // END_VERSIONS diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml index c7eb9bd1..71e04c3d 100644 --- a/modules/nf-core/cat/fastq/environment.yml +++ b/modules/nf-core/cat/fastq/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - conda-forge::coreutils=8.30 + - conda-forge::coreutils=9.5 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index b68e5f91..4364a389 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -4,8 +4,8 @@ process CAT_FASTQ { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c2/c262fc09eca59edb5a724080eeceb00fb06396f510aefb229c2d2c6897e63975/data' : + 'community.wave.seqera.io/library/coreutils:9.5--ae99c88a9b28c264' }" input: tuple val(meta), path(reads, stageAs: "input*/*") diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index aec119a9..f8689a1c 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -24,15 +24,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-01-17T17:30:39.816981" + "timestamp": "2024-10-19T20:02:07.519211144" }, "test_cat_fastq_single_end_same_name": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -59,15 +59,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-01-17T17:32:35.229332" + "timestamp": "2024-10-19T20:02:31.618628921" }, "test_cat_fastq_single_end_single_file": { "content": [ @@ -82,7 +82,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -94,15 +94,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-01-17T17:34:00.058829" + "timestamp": "2024-10-19T20:02:57.904149581" }, "test_cat_fastq_paired_end_same_name": { "content": [ @@ -120,7 +120,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -135,15 +135,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-01-17T17:33:33.031555" + "timestamp": "2024-10-19T20:02:44.577183829" }, "test_cat_fastq_single_end - stub": { "content": [ @@ -158,7 +158,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -170,15 +170,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-05T12:07:28.244999" + "timestamp": "2024-10-19T20:03:10.603734777" }, "test_cat_fastq_paired_end_same_name - stub": { "content": [ @@ -196,7 +196,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -211,15 +211,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-05T12:07:57.070911" + "timestamp": "2024-10-19T20:03:46.041808828" }, "test_cat_fastq_single_end_same_name - stub": { "content": [ @@ -234,7 +234,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -246,15 +246,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-05T12:07:46.796254" + "timestamp": "2024-10-19T20:03:34.13865402" }, "test_cat_fastq_paired_end": { "content": [ @@ -272,7 +272,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -287,15 +287,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-01-17T17:32:02.270935" + "timestamp": "2024-10-19T20:02:19.64383573" }, "test_cat_fastq_paired_end - stub": { "content": [ @@ -313,7 +313,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -328,15 +328,15 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-05T12:07:37.807553" + "timestamp": "2024-10-19T20:03:22.597246066" }, "test_cat_fastq_single_end_single_file - stub": { "content": [ @@ -351,7 +351,7 @@ ] ], "1": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ], "reads": [ [ @@ -363,14 +363,14 @@ ] ], "versions": [ - "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + "versions.yml:md5,6ef4fd28546a005865b9454bbedbf81a" ] } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-07-05T12:14:51.861264" + "timestamp": "2024-10-19T20:03:58.44849001" } } \ No newline at end of file diff --git a/modules/nf-core/picard/collectinsertsizemetrics/environment.yml b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml index ff4a85ed..1d715d56 100644 --- a/modules/nf-core/picard/collectinsertsizemetrics/environment.yml +++ b/modules/nf-core/picard/collectinsertsizemetrics/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::picard=3.2.0 + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/collectinsertsizemetrics/main.nf b/modules/nf-core/picard/collectinsertsizemetrics/main.nf index 1a49a7f0..c3014d80 100644 --- a/modules/nf-core/picard/collectinsertsizemetrics/main.nf +++ b/modules/nf-core/picard/collectinsertsizemetrics/main.nf @@ -4,8 +4,8 @@ process PICARD_COLLECTINSERTSIZEMETRICS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.2.0--hdfd78af_0' : - 'biocontainers/picard:3.2.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap index bcb5b455..cbe9329d 100644 --- a/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap +++ b/modules/nf-core/picard/collectinsertsizemetrics/tests/main.nf.test.snap @@ -9,14 +9,14 @@ ], "test.pdf", [ - "versions.yml:md5,4423bfe2194f61d919c382d2225b0835" + "versions.yml:md5,38d39e9882afe7ac015213c286745056" ] ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-30T12:23:29.392944" + "timestamp": "2024-10-18T10:55:45.769771444" }, "test-picard-collectinsertsizemetrics-stub": { "content": [ @@ -40,7 +40,7 @@ ] ], "2": [ - "versions.yml:md5,4423bfe2194f61d919c382d2225b0835" + "versions.yml:md5,38d39e9882afe7ac015213c286745056" ], "histogram": [ [ @@ -61,14 +61,14 @@ ] ], "versions": [ - "versions.yml:md5,4423bfe2194f61d919c382d2225b0835" + "versions.yml:md5,38d39e9882afe7ac015213c286745056" ] } ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-30T12:20:03.841884" + "timestamp": "2024-10-18T10:56:09.914953495" } } \ No newline at end of file diff --git a/modules/nf-core/picard/collectrnaseqmetrics/environment.yml b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml index ff4a85ed..1d715d56 100644 --- a/modules/nf-core/picard/collectrnaseqmetrics/environment.yml +++ b/modules/nf-core/picard/collectrnaseqmetrics/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::picard=3.2.0 + - bioconda::picard=3.3.0 diff --git a/modules/nf-core/picard/collectrnaseqmetrics/main.nf b/modules/nf-core/picard/collectrnaseqmetrics/main.nf index 0c8f2cfa..eb80fdc7 100644 --- a/modules/nf-core/picard/collectrnaseqmetrics/main.nf +++ b/modules/nf-core/picard/collectrnaseqmetrics/main.nf @@ -4,8 +4,8 @@ process PICARD_COLLECTRNASEQMETRICS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.2.0--hdfd78af_0' : - 'biocontainers/picard:3.2.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test new file mode 100644 index 00000000..9ab18552 --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process PICARD_COLLECTRNASEQMETRICS" + script "../main.nf" + process "PICARD_COLLECTRNASEQMETRICS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/collectrnaseqmetrics" + tag "ucsc/gtftogenepred" + + setup { + run("UCSC_GTFTOGENEPRED") { + script "../../../ucsc/gtftogenepred/main.nf" + process { + """ + input[0] = Channel.of([ + [ id:'test' ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gtf', checkIfExists: true) ] + ]) + """ + } + } + } + + test("sarscov2 - fasta - gtf") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert file(process.out.metrics[0][1]).text.contains('CollectRnaSeqMetrics') }, + { assert snapshot( + process.out.versions, + process.out.pdf + ).match() } + ) + } + } + + test("sarscov2 - fasta - gtf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false, strandedness:'forward' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ] + input[1] = UCSC_GTFTOGENEPRED.out.refflat.map{ it[1] } + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap new file mode 100644 index 00000000..ad6503af --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2 - fasta - gtf": { + "content": [ + [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:39.199344417" + }, + "sarscov2 - fasta - gtf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ], + "metrics": [ + [ + { + "id": "test", + "single_end": false, + "strandedness": "forward" + }, + "test.rna_metrics:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pdf": [ + + ], + "versions": [ + "versions.yml:md5,8d0867f89947c081711de09338138c2e" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-18T16:10:57.248132065" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config new file mode 100644 index 00000000..bc82e10c --- /dev/null +++ b/modules/nf-core/picard/collectrnaseqmetrics/tests/nextflow.config @@ -0,0 +1,13 @@ +process { + withName:UCSC_GTFTOGENEPRED { + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" + } + withName:PICARD_COLLECTRNASEQMETRICS { + ext.args = { ( meta.strandedness == "forward" || meta.single_end ) ? + "--STRAND_SPECIFICITY FIRST_READ_TRANSCRIPTION_STRAND" : + meta.strandedness == "reverse" ? + "--STRAND_SPECIFICITY SECOND_READ_TRANSCRIPTION_STRAND" : + "--STRAND_SPECIFICITY NONE" + } + } +} diff --git a/modules/nf-core/picard/collectwgsmetrics/environment.yml b/modules/nf-core/picard/collectwgsmetrics/environment.yml index 58d52581..13265842 100644 --- a/modules/nf-core/picard/collectwgsmetrics/environment.yml +++ b/modules/nf-core/picard/collectwgsmetrics/environment.yml @@ -2,5 +2,5 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::picard=3.2.0 - - r::r-base + - bioconda::picard=3.3.0 + - conda-forge::r-base=4.4.1 diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf index 6002a7ca..39cf7d2b 100644 --- a/modules/nf-core/picard/collectwgsmetrics/main.nf +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -4,8 +4,8 @@ process PICARD_COLLECTWGSMETRICS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.2.0--hdfd78af_0' : - 'biocontainers/picard:3.2.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.3.0--hdfd78af_0' : + 'biocontainers/picard:3.3.0--hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap index f188382b..1958fcde 100644 --- a/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap +++ b/modules/nf-core/picard/collectwgsmetrics/tests/main.nf.test.snap @@ -3,26 +3,26 @@ "content": [ true, [ - "versions.yml:md5,06b5898fb06823b736c90e1dcebe75fe" + "versions.yml:md5,9927db69fdd55176be5cdbd427d000c2" ] ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-23T11:02:46.301176" + "timestamp": "2024-10-18T10:15:18.13771243" }, "test-picard-collectwgsmetrics": { "content": [ true, [ - "versions.yml:md5,06b5898fb06823b736c90e1dcebe75fe" + "versions.yml:md5,9927db69fdd55176be5cdbd427d000c2" ] ], "meta": { - "nf-test": "0.8.4", + "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-08-23T11:02:25.132069" + "timestamp": "2024-10-18T10:14:57.786056996" } } \ No newline at end of file diff --git a/modules/nf-core/salmon/index/environment.yml b/modules/nf-core/salmon/index/environment.yml index 471164fa..b3f75777 100644 --- a/modules/nf-core/salmon/index/environment.yml +++ b/modules/nf-core/salmon/index/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::salmon=1.10.1 + - bioconda::salmon=1.10.3 diff --git a/modules/nf-core/salmon/index/main.nf b/modules/nf-core/salmon/index/main.nf index e755d9a3..3d653c0d 100644 --- a/modules/nf-core/salmon/index/main.nf +++ b/modules/nf-core/salmon/index/main.nf @@ -4,8 +4,8 @@ process SALMON_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/salmon:1.10.1--h7e5ed60_0' : - 'biocontainers/salmon:1.10.1--h7e5ed60_0' }" + 'https://depot.galaxyproject.org/singularity/salmon:1.10.3--h6dccd9a_2' : + 'biocontainers/salmon:1.10.3--h6dccd9a_2' }" input: path genome_fasta diff --git a/modules/nf-core/salmon/index/tests/main.nf.test.snap b/modules/nf-core/salmon/index/tests/main.nf.test.snap index 703e455c..e5899b51 100644 --- a/modules/nf-core/salmon/index/tests/main.nf.test.snap +++ b/modules/nf-core/salmon/index/tests/main.nf.test.snap @@ -2,25 +2,25 @@ "versions": { "content": [ [ - "versions.yml:md5,563eeafb4577be0b13801d7021c0bf42" + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2023-11-22T14:26:33.32036" + "timestamp": "2024-10-18T10:00:47.087293189" }, "versions stub": { "content": [ [ - "versions.yml:md5,563eeafb4577be0b13801d7021c0bf42" + "versions.yml:md5,85337fa0a286ea35073ee5260974e307" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-04-23T09:47:58.828124" + "timestamp": "2024-10-18T10:01:03.89824494" } } \ No newline at end of file diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index f1e3b5cd..e2d27352 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -29,7 +29,8 @@ process SALMON_QUANT { prefix = task.ext.prefix ?: "${meta.id}" def reference = "--index $index" - def reads1 = [], reads2 = [] + def reads1 = [] + def reads2 = [] meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" if (alignment_mode) { diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index acfd9252..caf3c61a 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -12,11 +12,11 @@ process SAMTOOLS_SORT { tuple val(meta2), path(fasta) output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test index c2ea9c72..b05e6691 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -39,6 +39,40 @@ nextflow_process { } } + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + test("cram") { config "./nextflow_cram.config" @@ -98,6 +132,36 @@ nextflow_process { } } + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("cram - stub") { options "-stub" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap index 2d6b2900..469891fe 100644 --- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -159,6 +159,101 @@ }, "timestamp": "2024-09-16T08:50:19.061912443" }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:55.479443" + }, + "multiple bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:36:13.781404" + }, "bam": { "content": [ [ @@ -185,8 +280,8 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.4" + "nextflow": "24.09.0" }, - "timestamp": "2024-09-16T08:49:43.971158333" + "timestamp": "2024-10-08T11:59:46.372244" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index 62054fc9..02cda6e6 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -4,5 +4,7 @@ channels: - conda-forge - bioconda dependencies: + # renovate: datasource=conda depName=bioconda/htslib - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 37e05cec..41fa3d6a 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_VIEW { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : - 'biocontainers/samtools:1.21--h50ea8bc_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : + 'community​.wave​.seqera​.io/library/htslib_samtools:1​.21--6cb89bfd40cbaabf' }" input: tuple val(meta), path(input), path(index) diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml index 1debc4c9..7c57530a 100644 --- a/modules/nf-core/star/align/environment.yml +++ b/modules/nf-core/star/align/environment.yml @@ -3,7 +3,7 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.18 - - bioconda::samtools=1.18 - - bioconda::star=2.7.10a + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/align/main.nf b/modules/nf-core/star/align/main.nf index b5bc9ddf..417071ba 100644 --- a/modules/nf-core/star/align/main.nf +++ b/modules/nf-core/star/align/main.nf @@ -4,8 +4,8 @@ process STAR_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" input: tuple val(meta), path(reads, stageAs: "input*/*") diff --git a/modules/nf-core/star/align/meta.yml b/modules/nf-core/star/align/meta.yml index d9cba2aa..5cfe763e 100644 --- a/modules/nf-core/star/align/meta.yml +++ b/modules/nf-core/star/align/meta.yml @@ -56,27 +56,30 @@ input: output: - log_final: - meta: - type: file - description: STAR final log file - pattern: "*Log.final.out" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*Log.final.out": type: file description: STAR final log file pattern: "*Log.final.out" - log_out: - meta: - type: file - description: STAR lot out file - pattern: "*Log.out" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*Log.out": type: file description: STAR lot out file pattern: "*Log.out" - log_progress: - meta: - type: file - description: STAR log progress file - pattern: "*Log.progress.out" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*Log.progress.out": type: file description: STAR log progress file @@ -88,57 +91,70 @@ output: pattern: "versions.yml" - bam: - meta: - type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*d.out.bam": type: file description: Output BAM file containing read alignments pattern: "*.{bam}" - bam_sorted: - meta: - type: file - description: Sorted BAM file of read alignments (optional) - pattern: "*sortedByCoord.out.bam" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - ${prefix}.sortedByCoord.out.bam: type: file description: Sorted BAM file of read alignments (optional) pattern: "*sortedByCoord.out.bam" - bam_sorted_aligned: - - meta: {} - - ${prefix}.Aligned.sortedByCoord.out.bam: {} - - bam_transcript: - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.Aligned.sortedByCoord.out.bam: type: file - description: Output BAM file of transcriptome alignment (optional) - pattern: "*toTranscriptome.out.bam" + description: Sorted BAM file of read alignments (optional) + pattern: "*.Aligned.sortedByCoord.out.bam" + - bam_transcript: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*toTranscriptome.out.bam": type: file description: Output BAM file of transcriptome alignment (optional) pattern: "*toTranscriptome.out.bam" - bam_unsorted: - meta: - type: file - description: Unsorted BAM file of read alignments (optional) - pattern: "*Aligned.unsort.out.bam" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*Aligned.unsort.out.bam": type: file description: Unsorted BAM file of read alignments (optional) pattern: "*Aligned.unsort.out.bam" - fastq: - meta: - type: file - description: Unmapped FastQ files (optional) - pattern: "*fastq.gz" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*fastq.gz": type: file description: Unmapped FastQ files (optional) pattern: "*fastq.gz" - tab: - meta: - type: file - description: STAR output tab file(s) (optional) - pattern: "*.tab" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*.tab": type: file description: STAR output tab file(s) (optional) @@ -149,12 +165,9 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - pattern: "*.SJ.out.tab" - "*.SJ.out.tab": - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + type: file + description: STAR output splice junction tab file pattern: "*.SJ.out.tab" - read_per_gene_tab: - meta: @@ -162,18 +175,16 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - pattern: "*.ReadsPerGene.out.tab" - "*.ReadsPerGene.out.tab": + type: file + description: STAR output read per gene tab file + pattern: "*.ReadsPerGene.out.tab" + - junction: + - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - pattern: "*.ReadsPerGene.out.tab" - - junction: - - meta: - type: file - description: STAR chimeric junction output file (optional) - pattern: "*.out.junction" - "*.out.junction": type: file description: STAR chimeric junction output file (optional) @@ -186,25 +197,25 @@ output: e.g. [ id:'test', single_end:false ] pattern: "*.out.sam" - "*.out.sam": + type: file + description: STAR output SAM file(s) (optional) + pattern: "*.out.sam" + - wig: + - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - pattern: "*.out.sam" - - wig: - - meta: - type: file - description: STAR output wiggle format file(s) (optional) - pattern: "*.wig" - "*.wig": type: file description: STAR output wiggle format file(s) (optional) pattern: "*.wig" - bedgraph: - meta: - type: file - description: STAR output bedGraph format file(s) (optional) - pattern: "*.bg" + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - "*.bg": type: file description: STAR output bedGraph format file(s) (optional) diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test index 2d9f72dd..a62c17db 100644 --- a/modules/nf-core/star/align/tests/main.nf.test +++ b/modules/nf-core/star/align/tests/main.nf.test @@ -56,13 +56,10 @@ nextflow_process { file(process.out.log_final[0][1]).name, file(process.out.log_out[0][1]).name, file(process.out.log_progress[0][1]).name, - process.out.bam, - process.out.bam_sorted, - process.out.bam_transcript, - process.out.bam_unsorted, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), process.out.bedgraph, process.out.fastq, - process.out.junction, process.out.read_per_gene_tab, process.out.sam, process.out.spl_junc_tab, @@ -124,13 +121,10 @@ nextflow_process { file(process.out.log_final[0][1]).name, file(process.out.log_out[0][1]).name, file(process.out.log_progress[0][1]).name, - process.out.bam, - process.out.bam_sorted, - process.out.bam_transcript, - process.out.bam_unsorted, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), process.out.bedgraph, process.out.fastq, - process.out.junction, process.out.read_per_gene_tab, process.out.sam, process.out.spl_junc_tab, @@ -192,13 +186,9 @@ nextflow_process { file(process.out.log_final[0][1]).name, file(process.out.log_out[0][1]).name, file(process.out.log_progress[0][1]).name, - process.out.bam, - process.out.bam_sorted, - process.out.bam_transcript, - process.out.bam_unsorted, + bam(process.out.bam[0][1]).getReadsMD5(), process.out.bedgraph, process.out.fastq, - process.out.junction, process.out.read_per_gene_tab, process.out.sam, process.out.spl_junc_tab, @@ -260,13 +250,10 @@ nextflow_process { file(process.out.log_final[0][1]).name, file(process.out.log_out[0][1]).name, file(process.out.log_progress[0][1]).name, - process.out.bam, - process.out.bam_sorted, - process.out.bam_transcript, - process.out.bam_unsorted, + file(process.out.junction[0][1]).name, + bam(process.out.bam[0][1]).getReadsMD5(), process.out.bedgraph, process.out.fastq, - process.out.junction, process.out.read_per_gene_tab, process.out.sam, process.out.spl_junc_tab, @@ -330,13 +317,10 @@ nextflow_process { file(process.out.log_final[0][1]).name, file(process.out.log_out[0][1]).name, file(process.out.log_progress[0][1]).name, - process.out.bam, - process.out.bam_sorted, - process.out.bam_transcript, - process.out.bam_unsorted, + bam(process.out.bam[0][1]).getReadsMD5(), + bam(process.out.bam_sorted_aligned[0][1]).getReadsMD5(), process.out.bedgraph, process.out.fastq, - process.out.junction, process.out.read_per_gene_tab, process.out.sam, process.out.spl_junc_tab, @@ -606,4 +590,4 @@ nextflow_process { ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/star/align/tests/main.nf.test.snap b/modules/nf-core/star/align/tests/main.nf.test.snap index 0da28d37..b533fb8b 100644 --- a/modules/nf-core/star/align/tests/main.nf.test.snap +++ b/modules/nf-core/star/align/tests/main.nf.test.snap @@ -97,7 +97,7 @@ ] ], "3": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "4": [ [ @@ -307,7 +307,7 @@ ] ], "versions": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "wig": [ [ @@ -322,9 +322,9 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:39:15.440397" + "timestamp": "2024-10-20T17:09:08.738074176" }, "homo_sapiens - paired_end - arriba - stub": { "content": [ @@ -424,7 +424,7 @@ ] ], "3": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "4": [ [ @@ -634,7 +634,7 @@ ] ], "versions": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "wig": [ [ @@ -649,33 +649,17 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:40:15.472109" + "timestamp": "2024-10-20T17:09:36.122131869" }, "homo_sapiens - single_end": { "content": [ "test.Log.final.out", "test.Log.out", "test.Log.progress.out", - [ - [ - { - "id": "test", - "single_end": true - }, - "test.Aligned.sortedByCoord.out.bam:md5,c6cfaccaf91bc7fdabed3cfe236d4535" - ] - ], - [ - - ], - [ - - ], - [ - - ], + "9f76be49a6607613a64f760101bdddce", + "9f76be49a6607613a64f760101bdddce", [ [ { @@ -696,9 +680,6 @@ ], [ - ], - [ - ], [ [ @@ -722,38 +703,22 @@ ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:23:08.001162" + "timestamp": "2024-10-20T17:01:22.197991909" }, "homo_sapiens - paired_end": { "content": [ "test.Log.final.out", "test.Log.out", "test.Log.progress.out", - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Aligned.sortedByCoord.out.bam:md5,b9ee1c607e07323bc1652ef3babb543f" - ] - ], - [ - - ], - [ - - ], - [ - - ], + "db9a8324b5163b025bcc0c33e848486", + "db9a8324b5163b025bcc0c33e848486", [ [ { @@ -774,9 +739,6 @@ ], [ - ], - [ - ], [ [ @@ -800,14 +762,14 @@ ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:24:46.295219" + "timestamp": "2024-10-20T17:02:06.988663857" }, "homo_sapiens - paired_end - multiple - stub": { "content": [ @@ -907,7 +869,7 @@ ] ], "3": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "4": [ [ @@ -1117,7 +1079,7 @@ ] ], "versions": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "wig": [ [ @@ -1132,33 +1094,17 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:41:07.310866" + "timestamp": "2024-10-20T17:10:12.005468781" }, "homo_sapiens - paired_end - multiple": { "content": [ "test.Log.final.out", "test.Log.out", "test.Log.progress.out", - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Aligned.sortedByCoord.out.bam:md5,ab07c21d63ab0a6c07d171d213c81d5a" - ] - ], - [ - - ], - [ - - ], - [ - - ], + "3e54e45f5dc3e9c1f2fc55bc41531a87", + "3e54e45f5dc3e9c1f2fc55bc41531a87", [ [ { @@ -1179,9 +1125,6 @@ ], [ - ], - [ - ], [ [ @@ -1205,14 +1148,14 @@ ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:38:46.605044" + "timestamp": "2024-10-20T17:08:54.877286681" }, "homo_sapiens - paired_end - stub": { "content": [ @@ -1312,7 +1255,7 @@ ] ], "3": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "4": [ [ @@ -1522,7 +1465,7 @@ ] ], "versions": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "wig": [ [ @@ -1537,24 +1480,17 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:39:51.595873" + "timestamp": "2024-10-20T17:09:20.911466345" }, "homo_sapiens - paired_end - starfusion": { "content": [ "test.Log.final.out", "test.Log.out", "test.Log.progress.out", - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Aligned.out.bam:md5,bcad07b838f6762fc01eea52b5cd3f84" - ] - ], + "test.Chimeric.out.junction", + "caee9dcda13882d4913456973c25b57a", [ ], @@ -1566,24 +1502,6 @@ ], [ - ], - [ - - ], - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Chimeric.out.junction:md5,c10ef219f4a30e83711b995bc5e40dba" - ] - ], - [ - - ], - [ - ], [ [ @@ -1607,41 +1525,21 @@ ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:35:43.874508" + "timestamp": "2024-10-20T17:07:25.0639914" }, "homo_sapiens - paired_end - arriba": { "content": [ "test.Log.final.out", "test.Log.out", "test.Log.progress.out", - [ - [ - { - "id": "test", - "single_end": false - }, - "test.Aligned.out.bam:md5,c1b1747f5873f2d17762725636e891d5" - ] - ], - [ - - ], - [ - - ], - [ - - ], - [ - - ], + "1a3abe88fb2490589c58497d39921bcc", [ ], @@ -1676,14 +1574,14 @@ ], [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:28:32.599223" + "timestamp": "2024-10-20T17:04:00.685784211" }, "homo_sapiens - paired_end - starfusion - stub": { "content": [ @@ -1783,7 +1681,7 @@ ] ], "3": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "4": [ [ @@ -1993,7 +1891,7 @@ ] ], "versions": [ - "versions.yml:md5,2e6b6d8809f5a17f38f4d27c45dcb22f" + "versions.yml:md5,a149bba1dbb5194560abdd813c7848e3" ], "wig": [ [ @@ -2008,8 +1906,8 @@ ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-10-01T11:40:42.29249" + "timestamp": "2024-10-20T17:09:53.173671551" } } \ No newline at end of file diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml index 1debc4c9..7c57530a 100644 --- a/modules/nf-core/star/genomegenerate/environment.yml +++ b/modules/nf-core/star/genomegenerate/environment.yml @@ -3,7 +3,7 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.18 - - bioconda::samtools=1.18 - - bioconda::star=2.7.10a + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 + - bioconda::star=2.7.11b - conda-forge::gawk=5.1.0 diff --git a/modules/nf-core/star/genomegenerate/main.nf b/modules/nf-core/star/genomegenerate/main.nf index b8855715..8f0c67e7 100644 --- a/modules/nf-core/star/genomegenerate/main.nf +++ b/modules/nf-core/star/genomegenerate/main.nf @@ -4,8 +4,8 @@ process STAR_GENOMEGENERATE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' : - 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:ded3841da0194af2701c780e9b3d653a85d27489-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b425bc2a95806d878993f9a66dae3ae80ac2dafff4c208c5ae01b7a90a32fa91/data' : + 'community.wave.seqera.io/library/star_samtools_htslib_gawk:10c6e8c834460019' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap index 207f4b4f..3db25678 100644 --- a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap +++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap @@ -3,14 +3,14 @@ "content": [ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]", [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T14:55:35.478401" + "timestamp": "2024-10-19T20:37:47.410432728" }, "fasta_gtf_stub": { "content": [ @@ -41,7 +41,7 @@ ] ], "1": [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" ], "index": [ [ @@ -69,15 +69,15 @@ ] ], "versions": [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T14:55:57.247585" + "timestamp": "2024-10-19T20:38:09.165234795" }, "fasta_stub": { "content": [ @@ -101,7 +101,7 @@ ] ], "1": [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" ], "index": [ [ @@ -122,27 +122,27 @@ ] ], "versions": [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T14:56:07.01742" + "timestamp": "2024-10-19T20:38:19.530862664" }, "fasta": { "content": [ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]", [ - "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f" + "versions.yml:md5,14b05d04c9eca568e9ed4888aaf26fa6" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T14:55:45.48784" + "timestamp": "2024-10-19T20:37:58.667436398" } } \ No newline at end of file diff --git a/modules/nf-core/stringtie/stringtie/environment.yml b/modules/nf-core/stringtie/stringtie/environment.yml index 0556de41..906b7486 100644 --- a/modules/nf-core/stringtie/stringtie/environment.yml +++ b/modules/nf-core/stringtie/stringtie/environment.yml @@ -2,4 +2,4 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::stringtie=2.2.1 + - bioconda::stringtie=2.2.3 diff --git a/modules/nf-core/stringtie/stringtie/main.nf b/modules/nf-core/stringtie/stringtie/main.nf index 6e25ba27..4635c8c5 100644 --- a/modules/nf-core/stringtie/stringtie/main.nf +++ b/modules/nf-core/stringtie/stringtie/main.nf @@ -4,8 +4,8 @@ process STRINGTIE_STRINGTIE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/stringtie:2.2.1--hecb563c_2' : - 'biocontainers/stringtie:2.2.1--hecb563c_2' }" + 'https://depot.galaxyproject.org/singularity/stringtie:2.2.3--h43eeafb_0' : + 'biocontainers/stringtie:2.2.3--h43eeafb_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap index 124dd4cb..d4645de3 100644 --- a/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap +++ b/modules/nf-core/stringtie/stringtie/tests/main.nf.test.snap @@ -31,18 +31,18 @@ "id": "test", "strandedness": "forward" }, - "test.transcripts.gtf:md5,f56cf8aba2c4a5673bc7963ba5f12d04" + "test.transcripts.gtf:md5,37154e7bda96544f24506ee902bb561d" ] ], [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:33:44.299962" + "timestamp": "2024-10-18T09:56:50.294157199" }, "sarscov2 [bam] - forward strandedness": { "content": [ @@ -61,18 +61,18 @@ "id": "test", "strandedness": "forward" }, - "test.transcripts.gtf:md5,569137af5be452413086b50653a97203" + "test.transcripts.gtf:md5,6087dfc9700a52d9e4a1ae3fcd1d1dfd" ] ], [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:33:35.177738" + "timestamp": "2024-10-18T09:56:39.4249133" }, "sarscov2 [bam] - forward strandedness - stub": { "content": [ @@ -114,7 +114,7 @@ ] ], "4": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ], "abundance": [ [ @@ -153,15 +153,15 @@ ] ], "versions": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:36:32.885078" + "timestamp": "2024-10-18T09:57:23.008470065" }, "sarscov2 [bam] - forward strandedness + reference annotation - stub": { "content": [ @@ -203,7 +203,7 @@ ] ], "4": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ], "abundance": [ [ @@ -242,15 +242,15 @@ ] ], "versions": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:36:43.325777" + "timestamp": "2024-10-18T09:57:33.622824981" }, "sarscov2 [bam] - reverse strandedness + reference annotation - stub": { "content": [ @@ -292,7 +292,7 @@ ] ], "4": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ], "abundance": [ [ @@ -331,15 +331,15 @@ ] ], "versions": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:37:06.085936" + "timestamp": "2024-10-18T09:57:55.803421433" }, "sarscov2 [bam] - reverse strandedness - stub": { "content": [ @@ -381,7 +381,7 @@ ] ], "4": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ], "abundance": [ [ @@ -420,15 +420,15 @@ ] ], "versions": [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] } ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:36:53.837578" + "timestamp": "2024-10-18T09:57:44.825389635" }, "sarscov2 [bam] - reverse strandedness + reference annotation": { "content": [ @@ -462,18 +462,18 @@ "id": "test", "strandedness": "reverse" }, - "test.transcripts.gtf:md5,bb346053a8c156b803b055133376c7fa" + "test.transcripts.gtf:md5,fbabb4e3888bbede67f11f692e484880" ] ], [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:34:03.114695" + "timestamp": "2024-10-18T09:57:11.793664242" }, "sarscov2 [bam] - reverse strandedness": { "content": [ @@ -492,17 +492,17 @@ "id": "test", "strandedness": "reverse" }, - "test.transcripts.gtf:md5,31c34aec2bf36bb0ea3c16c2afeeeb1f" + "test.transcripts.gtf:md5,01d6da00a3c458420841e57427297183" ] ], [ - "versions.yml:md5,3410e8ac349d18c85ddee89337851d38" + "versions.yml:md5,06593ea00cc35bf06f2de2753e0c3913" ] ], "meta": { "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nextflow": "24.04.4" }, - "timestamp": "2024-07-22T12:33:52.874479" + "timestamp": "2024-10-18T09:57:01.166309777" } } \ No newline at end of file diff --git a/subworkflows/local/arriba_workflow.nf b/subworkflows/local/arriba_workflow.nf index ce142a74..fd2ec963 100644 --- a/subworkflows/local/arriba_workflow.nf +++ b/subworkflows/local/arriba_workflow.nf @@ -11,6 +11,7 @@ workflow ARRIBA_WORKFLOW { ch_fasta ch_starindex_ref ch_arriba_ref_blacklist + ch_arriba_ref_cytobands ch_arriba_ref_known_fusions ch_arriba_ref_protein_domains @@ -28,7 +29,7 @@ workflow ARRIBA_WORKFLOW { .map { meta, reads, fusions -> [ meta, fusions ] } ch_arriba_fusion_fail = ch_dummy_file } else { - ARRIBA_ARRIBA ( STAR_FOR_ARRIBA.out.bam, ch_fasta, ch_gtf, ch_arriba_ref_blacklist, ch_arriba_ref_known_fusions, [[],[]], [[],[]], ch_arriba_ref_protein_domains ) + ARRIBA_ARRIBA ( STAR_FOR_ARRIBA.out.bam, ch_fasta, ch_gtf, ch_arriba_ref_blacklist, ch_arriba_ref_known_fusions, ch_arriba_ref_cytobands, ch_arriba_ref_protein_domains ) ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 0c6c9916..3de5af74 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -35,9 +35,6 @@ include { GFFREAD } from '../../modules/nf-core/gffread/ */ workflow BUILD_REFERENCES { - take: - genome // channel: [mandatory] val(genome) - genome_gencode_version // channel: [mandatory] val(genome_gencode_version) main: ch_versions = Channel.empty() @@ -82,7 +79,7 @@ workflow BUILD_REFERENCES { if (!file(params.refflat).exists() || file(params.refflat).isEmpty()){ GTF_TO_REFFLAT(ch_gtf) - ch_refflat = GTF_TO_REFFLAT.out.refflat + ch_refflat = GTF_TO_REFFLAT.out.refflat.map { that -> [[id:that.Name], that] } } else { ch_refflat = Channel.fromPath(params.refflat).map { that -> [[id:that.Name], that] } } @@ -109,58 +106,55 @@ workflow BUILD_REFERENCES { (!file(params.arriba_ref_blacklist).exists() || file(params.arriba_ref_blacklist).isEmpty() || !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { - ARRIBA_DOWNLOAD(genome) + ARRIBA_DOWNLOAD(params.genome) ch_arriba_ref_blacklist = ARRIBA_DOWNLOAD.out.blacklist ch_arriba_ref_cytobands = ARRIBA_DOWNLOAD.out.cytobands ch_arriba_ref_known_fusions = ARRIBA_DOWNLOAD.out.known_fusions ch_arriba_ref_protein_domains = ARRIBA_DOWNLOAD.out.protein_domains } else { - ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { that -> [[id:that.Name], that] } - ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { that -> [[id:that.Name], that] } - ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { that -> [[id:that.Name], that] } - ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { that -> [[id:that.Name], that] } + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist) + ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands) + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions) + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains) } if ((params.fusioncatcher || params.all) && (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { - if (params.download_refs) { - FUSIONCATCHER_DOWNLOAD(params.genome_gencode_version) - ch_fusioncatcher_ref = FUSIONCATCHER_DOWNLOAD.out.reference} - else { FUSIONCATCHER_BUILD(params.genome_gencode_version) - ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference} - } else { - ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref).map { that -> [[id:that.Name], that] } - } + ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference + } + else { + ch_fusioncatcher_ref = Channel.fromPath(params.fusioncatcher_ref) + } if ((params.starfusion || params.all) && (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { - if (params.download_refs) { - ch_starfusion_ref = STARFUSION_DOWNLOAD( ch_fasta, ch_gtf ).out.reference } - else { - ch_starfusion_ref = STARFUSION_BUILD( ch_fasta, ch_gtf ).out.reference } - } else { - ch_starfusion_ref = Channel.fromPath(params.starfusion_ref).map { that -> [[id:that.Name], that] }} + STARFUSION_BUILD(ch_fasta, ch_gtf) + ch_starfusion_ref = STARFUSION_BUILD.out.reference + } + else { + ch_starfusion_ref = Channel.fromPath(params.starfusion_ref) + } if ((params.fusionreport || params.all) && (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } - ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD( params.cosmic_username, params.cosmic_passwd ).out.reference + FUSIONREPORT_DOWNLOAD(params.cosmic_username, params.cosmic_passwd) + ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.reference } else { - ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } + ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref) } emit: ch_fasta ch_gtf ch_fai - ch_hgnc_ref ch_hgnc_date ch_rrna_interval diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf index 09ec9965..f8fc90dc 100644 --- a/subworkflows/local/fusionreport_workflow.nf +++ b/subworkflows/local/fusionreport_workflow.nf @@ -23,7 +23,7 @@ workflow FUSIONREPORT_WORKFLOW { FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) ch_fusion_list = FUSIONREPORT.out.fusion_list ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered - ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) + // ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) ch_report = FUSIONREPORT.out.report ch_csv = FUSIONREPORT.out.csv } else { diff --git a/test.fusionreport.tsv b/test.fusionreport.tsv new file mode 100644 index 00000000..e69de29b diff --git a/test.fusionreport_filtered.tsv b/test.fusionreport_filtered.tsv new file mode 100644 index 00000000..e69de29b diff --git a/test.fusions.csv b/test.fusions.csv new file mode 100644 index 00000000..e69de29b diff --git a/test.fusions.json b/test.fusions.json new file mode 100644 index 00000000..e69de29b diff --git a/test_fusionreport_index.html b/test_fusionreport_index.html new file mode 100644 index 00000000..e69de29b diff --git a/tests/main.nf.test b/tests/main.nf.test index 715335d8..31c71b6f 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -5,11 +5,10 @@ nextflow_pipeline { tag "pipeline" tag "pipeline_rnafusion" - test("Run build references with profile test") { + test("Run fusion detection with profile test") { when { params { - build_references = true outdir = "results" genome_base = "references" max_cpus = 2 @@ -21,25 +20,6 @@ nextflow_pipeline { } } - then { - assertAll( - { assert workflow.success } - ) - } - } - test("Run fusion detection with profile test") { - - when { - params { - outdir = "results" - genome_base = "references" - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' - } - } - then { assertAll( { assert workflow.success } diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 5e34db97..238bcc3a 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -5,20 +5,20 @@ */ include { BUILD_REFERENCES } from '../subworkflows/local/build_references' -// include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' -// include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' -// include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' -// include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' -// include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow' -// include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' -// include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' -// include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' -// include { validateInputSamplesheet } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { TRIM_WORKFLOW } from '../subworkflows/local/trim_workflow' +include { ARRIBA_WORKFLOW } from '../subworkflows/local/arriba_workflow' +include { QC_WORKFLOW } from '../subworkflows/local/qc_workflow' +include { STARFUSION_WORKFLOW } from '../subworkflows/local/starfusion_workflow' +include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringtie_workflow' +include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' +include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' +include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' +include { validateInputSamplesheet } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' -// include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' +include { SALMON_QUANT } from '../modules/nf-core/salmon/quant/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -34,204 +34,123 @@ workflow RNAFUSION { ch_versions = Channel.empty() - // ch_multiqc_files = Channel.empty() + ch_multiqc_files = Channel.empty() // // Create references if necessary // - BUILD_REFERENCES(params.genome, params.genome_gencode_version) + BUILD_REFERENCES() ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) - // // Optional - // ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map {it -> [[id:it[0].simpleName], it]}.collect() - // : BUILD_REFERENCES.out.fasta.map {it -> [[id:it[0].simpleName], it]}.collect() - // ch_gtf = params.gtf ? Channel.fromPath(params.gtf).map {it -> [[id:it[0].simpleName], it]}.collect() - // : downloads.gtf.map {it -> [[id:it[0].simpleName], it]}.collect() - // ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache) - // : Channel.empty().mix(downloads.vep_cache) - // ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files) - // : Channel.empty().mix(downloads.vep_plugin) - // ch_fai = params.fai ? Channel.fromPath(params.fai).map {it -> [[id:it[0].simpleName], it]}.collect() - // : Channel.empty() - // ch_gene_panel_clinical_filter = params.gene_panel_clinical_filter ? Channel.fromPath(params.gene_panel_clinical_filter).collect() - // : Channel.empty() - // ch_ref_drop_annot_file = params.reference_drop_annot_file ? Channel.fromPath(params.reference_drop_annot_file).collect() - // : Channel.empty() - // ch_ref_drop_count_file = params.reference_drop_count_file ? Channel.fromPath(params.reference_drop_count_file).collect() - // : Channel.empty() - // ch_ref_drop_splice_folder = params.reference_drop_splice_folder ? Channel.fromPath(params.reference_drop_splice_folder).collect() - // : Channel.empty() - // ch_salmon_index = params.salmon_index ? Channel.fromPath(params.salmon_index) - // : Channel.empty() - // ch_star_index = params.star_index ? Channel.fromPath(params.star_index).map {it -> [[id:it[0].simpleName], it]}.collect() - // : Channel.empty() - // ch_transcript_fasta = params.transcript_fasta ? Channel.fromPath(params.transcript_fasta) - // : Channel.empty() - // ch_sequence_dict = params.sequence_dict ? Channel.fromPath(params.sequence_dict).map{ it -> [[id:it[0].simpleName], it] }.collect() - // : Channel.empty() - // ch_subsample_bed = params.subsample_bed ? Channel.fromPath(params.subsample_bed).collect() - // : Channel.empty() // - // Create channel from input file provided through params.input + // QC from FASTQ files // - // Channel - // .fromSamplesheet("input") - // .map { - // meta, fastq_1, fastq_2, strandedness -> - // if (!fastq_2) { - // return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - // } else { - // return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - // } - // } - // .groupTuple() - // .map { - // validateInputSamplesheet(it) - // } - // .branch { - // meta, fastqs -> - // single : fastqs.size() == 1 - // return [ meta, fastqs.flatten() ] - // multiple: fastqs.size() > 1 - // return [ meta, fastqs.flatten() ] - // } - // .set { ch_fastq } + FASTQC ( + ch_samplesheet + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) + ch_versions = ch_versions.mix(FASTQC.out.versions) - // // - // // MODULE: Concatenate FastQ files from same sample if required - // // - // CAT_FASTQ ( - // ch_fastq.multiple - // ) - // .reads - // .mix(ch_fastq.single) - // .set { ch_cat_fastq } - // ch_versions = ch_versions.mix(CAT_FASTQ.out.versions) - - - // // - // // QC from FASTQ files - // // - // FASTQC ( - // ch_cat_fastq - // ) - // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - // ch_versions = ch_versions.mix(FASTQC.out.versions) - // SALMON_QUANT( ch_reads_all, BUILD_REFERENCES.out.ch_salmon_index.map{ meta, index -> index }, BUILD_REFERENCES.out.ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') - // ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.zip.collect{it[1]}) + // + // Trimming + // + TRIM_WORKFLOW ( + ch_samplesheet + ) + ch_reads = TRIM_WORKFLOW.out.trimmed_reads + ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) + + // SALMON_QUANT( ch_reads, BUILD_REFERENCES.out.ch_salmon_index.map{ meta, index -> index }, BUILD_REFERENCES.out.ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') + // ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.zi.collect{it[1]}) // ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) - // // - // // Trimming - // // - // TRIM_WORKFLOW ( - // ch_cat_fastq - // ) - // ch_reads = TRIM_WORKFLOW.out.trimmed_reads - // ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) + // - // MODULE: Run FastQC + // SUBWORKFLOW: Run STAR alignment and Arriba // - // FASTQC ( - // ch_samplesheet - // ) - // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - // ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + ARRIBA_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_starindex_ref, + BUILD_REFERENCES.out.ch_arriba_ref_blacklist, + BUILD_REFERENCES.out.ch_arriba_ref_cytobands, + BUILD_REFERENCES.out.ch_arriba_ref_known_fusions, + BUILD_REFERENCES.out.ch_arriba_ref_protein_domains + ) + ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) + + +//Run STAR fusion + STARFUSION_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_starindex_ref, + BUILD_REFERENCES.out.ch_fasta + ) + ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) + + +//Run fusioncatcher + FUSIONCATCHER_WORKFLOW ( + ch_reads + ) + ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) + + +//Run stringtie + STRINGTIE_WORKFLOW ( + STARFUSION_WORKFLOW.out.ch_bam_sorted, + BUILD_REFERENCES.out.ch_gtf + ) + ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions) + + + //Run fusion-report + FUSIONREPORT_WORKFLOW ( + ch_reads, + BUILD_REFERENCES.out.ch_fusionreport_ref, + ARRIBA_WORKFLOW.out.fusions, + STARFUSION_WORKFLOW.out.fusions, + FUSIONCATCHER_WORKFLOW.out.fusions + ) + ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) + + + + //Run fusionInpector + FUSIONINSPECTOR_WORKFLOW ( + ch_reads, + FUSIONREPORT_WORKFLOW.out.fusion_list, + FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, + FUSIONREPORT_WORKFLOW.out.report, + FUSIONREPORT_WORKFLOW.out.csv, + STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_arriba_ref_protein_domains, + BUILD_REFERENCES.out.ch_arriba_ref_cytobands, + BUILD_REFERENCES.out.ch_hgnc_ref, + BUILD_REFERENCES.out.ch_hgnc_date + ) + ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions) - // TRIM_WORKFLOW ( - // ch_samplesheet + // //QC + // QC_WORKFLOW ( + // ch_reads, + // STARFUSION_WORKFLOW.out.ch_bam_sorted, + // STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, + // BUILD_REFERENCES.out.ch_gtf, + // BUILD_REFERENCES.out.ch_refflat, + // BUILD_REFERENCES.out.ch_fasta, + // BUILD_REFERENCES.out.ch_fai, + // BUILD_REFERENCES.out.ch_rrna_interval // ) - // ch_reads_fusioncatcher = TRIM_WORKFLOW.out.ch_reads_fusioncatcher - // ch_reads_all = TRIM_WORKFLOW.out.ch_reads_all - // ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) - - -// // -// // SUBWORKFLOW: Run STAR alignment and Arriba -// // -// ARRIBA_WORKFLOW ( -// ch_reads_all, -// ch_gtf, -// ch_fasta, -// ch_starindex_ensembl_ref, -// ch_arriba_ref_blacklist, -// ch_arriba_ref_known_fusions, -// ch_arriba_ref_protein_domains -// ) -// ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) - - -// //Run STAR fusion -// STARFUSION_WORKFLOW ( -// ch_reads_all, -// ch_gtf, -// ch_starindex_ref, -// ch_fasta -// ) -// ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) - - -// //Run fusioncatcher -// FUSIONCATCHER_WORKFLOW ( -// ch_reads_fusioncatcher -// ) -// ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) - - -// //Run stringtie -// STRINGTIE_WORKFLOW ( -// STARFUSION_WORKFLOW.out.ch_bam_sorted, -// ch_gtf -// ) -// ch_versions = ch_versions.mix(STRINGTIE_WORKFLOW.out.versions) - - -// //Run fusion-report -// FUSIONREPORT_WORKFLOW ( -// ch_reads_all, -// ch_fusionreport_ref, -// ARRIBA_WORKFLOW.out.fusions, -// STARFUSION_WORKFLOW.out.fusions, -// FUSIONCATCHER_WORKFLOW.out.fusions -// ) -// ch_versions = ch_versions.mix(FUSIONREPORT_WORKFLOW.out.versions) - - - -// //Run fusionInpector -// FUSIONINSPECTOR_WORKFLOW ( -// ch_reads_all, -// FUSIONREPORT_WORKFLOW.out.fusion_list, -// FUSIONREPORT_WORKFLOW.out.fusion_list_filtered, -// FUSIONREPORT_WORKFLOW.out.report, -// FUSIONREPORT_WORKFLOW.out.csv, -// STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, -// ch_gtf, -// ch_arriba_ref_protein_domains, -// ch_arriba_ref_cytobands, -// ch_hgnc_ref, -// ch_hgnc_date -// ) -// ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions) - - -// //QC -// QC_WORKFLOW ( -// ch_reads_all, -// STARFUSION_WORKFLOW.out.ch_bam_sorted, -// STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, -// ch_gtf, -// ch_refflat, -// ch_fasta, -// ch_fai, -// ch_rrna_interval -// ) -// ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) + // ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) // // // // Collate and save software versions @@ -244,18 +163,6 @@ workflow RNAFUSION { // newLine: true // ).set { ch_collated_versions } - // //QC - // QC_WORKFLOW ( - // ch_reads_all, - // STARFUSION_WORKFLOW.out.ch_bam_sorted, - // ch_gtf, - // ch_refflat, - // ch_fasta, - // ch_fai, - // ch_rrna_interval - // ) - // ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) - // // // // Collate and save software versions From 786b9bf4ec84fd6e2571ddd034b9923151cd4128 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 14 Nov 2024 15:45:31 +0100 Subject: [PATCH 15/41] remove unnecessary file --- AAA_BBB.html | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 AAA_BBB.html diff --git a/AAA_BBB.html b/AAA_BBB.html deleted file mode 100644 index e69de29b..00000000 From 460b6450ced2459dc4f2ce8abc5368dc5bc872c3 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 29 Nov 2024 09:18:26 +0100 Subject: [PATCH 16/41] add parameter interpretation in modules.config --- conf/modules.config | 18 +- main.nf | 6 +- modules/local/fusionreport/detect/main.nf | 6 +- modules/local/fusionreport/download/main.nf | 2 +- .../utils_nfcore_rnafusion_pipeline/main.nf | 3 +- workflows/rnafusion.nf | 182 +++++++++--------- 6 files changed, 104 insertions(+), 113 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b07f5333..3b176c87 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -36,7 +36,7 @@ process { } withName: 'ARRIBA_VISUALISATION' { - ext.when = { !params.fusioninspector_only && (params.starfusion || params.all) } + ext.when = { {!params.fusioninspector_only} && ({params.starfusion} || {params.all}) } ext.prefix = { "${meta.id}_combined_fusions_arriba_visualisation" } publishDir = [ path: { "${params.outdir}/arriba_visualisation" }, @@ -63,7 +63,7 @@ process { withName: 'FASTQC' { ext.args = '--quiet' - ext.when = { !params.skip_qc && !params.references_only } + ext.when = { {!params.skip_qc} && {!params.references_only} } publishDir = [ path: { "${params.outdir}/fastqc" }, mode: params.publish_dir_mode, @@ -73,7 +73,7 @@ process { withName: 'FASTQC_FOR_FASTP' { ext.args = '--quiet' - ext.when = ( !params.skip_qc) + ext.when = { !params.skip_qc } ext.prefix = { "${meta.id}_trimmed" } publishDir = [ path: { "${params.outdir}/fastqc_for_fastp" }, @@ -96,7 +96,7 @@ process { withName: 'FUSIONINSPECTOR' { ext.when = { !params.skip_vis } - ext.args = { params.fusioninspector_limitSjdbInsertNsj != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' } + ext.args = { ${params.fusioninspector_limitSjdbInsertNsj} != 1000000 ? "--STAR_xtra_params \"--limitSjdbInsertNsj ${params.fusioninspector_limitSjdbInsertNsj}\"" : '' } ext.args2 = '--annotate --examine_coding_effect' } @@ -111,7 +111,7 @@ process { } withName: 'FUSIONREPORT_DOWNLOAD' { - ext.args = { params.qiagen ? "--qiagen" : "" } + ext.args = { ${params.qiagen} ? "--qiagen" : "" } publishDir = [ path: { "${params.genomes_base}/fusion_report_db" }, mode: params.publish_dir_mode, @@ -153,7 +153,7 @@ process { } withName: 'MULTIQC' { ext.when = { !params.skip_qc } - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = {params.multiqc_title} ? "--title \"$params.multiqc_title\"" : '' publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, @@ -162,12 +162,12 @@ process { } withName: 'PICARD_COLLECTRNASEQMETRICS' { - ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } + ext.when = { {!params.skip_qc} && {!params.fusioninspector_only} && ( {params.starfusion} || {params.all}) } } withName: 'GATK4_MARKDUPLICATES' { - ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } + ext.when = { {!params.skip_qc} && {!params.fusioninspector_only} && ( {params.starfusion}|| {params.all}) } publishDir = [ path: { "${params.outdir}/picard" }, mode: params.publish_dir_mode, @@ -176,7 +176,7 @@ process { } withName: 'PICARD_COLLECTINSERTSIZEMETRICS' { - ext.when = { !params.skip_qc && !params.fusioninspector_only && (params.starfusion || params.all) } + ext.when = { ${!params.skip_qc} && ${!params.fusioninspector_only} && (${params.starfusion} || ${params.all}) } ext.prefix = { "${meta.id}_collectinsertsize"} publishDir = [ path: { "${params.outdir}/picard" }, diff --git a/main.nf b/main.nf index 09216f56..78781bed 100644 --- a/main.nf +++ b/main.nf @@ -51,6 +51,9 @@ workflow NFCORE_RNAFUSION { // RNAFUSION(samplesheet) + + emit: + multiqc_report = RNAFUSION.out.multiqc_report } /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -67,10 +70,8 @@ workflow { PIPELINE_INITIALISATION ( params.version, params.validate_params, - params.monochrome_logs, args, params.outdir, - params.input ) // @@ -88,6 +89,7 @@ workflow { params.outdir, params.monochrome_logs, params.hook_url, + NFCORE_RNAFUSION.out.multiqc_report, ) } diff --git a/modules/local/fusionreport/detect/main.nf b/modules/local/fusionreport/detect/main.nf index f2557bf0..ec7a076c 100644 --- a/modules/local/fusionreport/detect/main.nf +++ b/modules/local/fusionreport/detect/main.nf @@ -58,11 +58,7 @@ process FUSIONREPORT { cat <<-END_VERSIONS > versions.yml "${task.process}": - fusion_report: sth + fusion_report: \$(fusion_report --version | sed 's/fusion-report //') END_VERSIONS """ } - // cat <<-END_VERSIONS > versions.yml - // "${task.process}": - // fusion_report: \$(fusion_report --version 2>&1 | sed 's/fusion-report //') - // END_VERSIONS diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf index 971db1b7..04e069ee 100644 --- a/modules/local/fusionreport/download/main.nf +++ b/modules/local/fusionreport/download/main.nf @@ -10,7 +10,7 @@ process FUSIONREPORT_DOWNLOAD { val(passwd) output: - path "*" , emit: reference + path "*.db" , emit: reference path "versions.yml" , emit: versions script: diff --git a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf index ae62c8d4..b217a88d 100644 --- a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf @@ -30,10 +30,8 @@ workflow PIPELINE_INITIALISATION { take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved - input // string: Path to input samplesheet main: @@ -114,6 +112,7 @@ workflow PIPELINE_COMPLETION { outdir // path: Path to output directory where results will be published monochrome_logs // boolean: Disable ANSI colour codes in log output hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report main: summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 238bcc3a..e14ac799 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -13,6 +13,10 @@ include { STRINGTIE_WORKFLOW } from '../subworkflows/local/stringti include { FUSIONCATCHER_WORKFLOW } from '../subworkflows/local/fusioncatcher_workflow' include { FUSIONINSPECTOR_WORKFLOW } from '../subworkflows/local/fusioninspector_workflow' include { FUSIONREPORT_WORKFLOW } from '../subworkflows/local/fusionreport_workflow' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { validateInputSamplesheet } from '../subworkflows/local/utils_nfcore_rnafusion_pipeline' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' @@ -63,9 +67,10 @@ workflow RNAFUSION { ch_reads = TRIM_WORKFLOW.out.trimmed_reads ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) - // SALMON_QUANT( ch_reads, BUILD_REFERENCES.out.ch_salmon_index.map{ meta, index -> index }, BUILD_REFERENCES.out.ch_gtf.map{ meta, gtf -> gtf }, [], false, 'A') - // ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.zi.collect{it[1]}) - // ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) + + SALMON_QUANT( ch_reads, BUILD_REFERENCES.out.ch_salmon_index.map{ it -> it[1] }, BUILD_REFERENCES.out.ch_gtf.map{ it -> it[1] }, [], false, 'A') + ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.json_info.collect{it[1]}) + ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) // @@ -139,97 +144,86 @@ workflow RNAFUSION { ch_versions = ch_versions.mix(FUSIONINSPECTOR_WORKFLOW.out.versions) - // //QC - // QC_WORKFLOW ( - // ch_reads, - // STARFUSION_WORKFLOW.out.ch_bam_sorted, - // STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, - // BUILD_REFERENCES.out.ch_gtf, - // BUILD_REFERENCES.out.ch_refflat, - // BUILD_REFERENCES.out.ch_fasta, - // BUILD_REFERENCES.out.ch_fai, - // BUILD_REFERENCES.out.ch_rrna_interval - // ) - // ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) - -// // -// // Collate and save software versions -// // -// softwareVersionsToYAML(ch_versions) -// .collectFile( -// storeDir: "${params.outdir}/pipeline_info", -// name: 'nf_core_pipeline_software_mqc_versions.yml', -// sort: true, -// newLine: true -// ).set { ch_collated_versions } - - - // // - // // Collate and save software versions - // // - // softwareVersionsToYAML(ch_versions) - // .collectFile( - // storeDir: "${params.outdir}/pipeline_info", - // name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', - // sort: true, - // newLine: true - // ).set { ch_collated_versions } - - // // - // // MODULE: MultiQC - // // - // ch_multiqc_config = Channel.fromPath( - // "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - // ch_multiqc_custom_config = params.multiqc_config ? - // Channel.fromPath(params.multiqc_config, checkIfExists: true) : - // Channel.empty() - // ch_multiqc_logo = params.multiqc_logo ? - // Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - // Channel.empty() - - // summary_params = paramsSummaryMap( - // workflow, parameters_schema: "nextflow_schema.json") - // ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - // ch_multiqc_files = ch_multiqc_files.mix( - // ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - // ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - // file(params.multiqc_methods_description, checkIfExists: true) : - // file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - // ch_methods_description = Channel.value( - // methodsDescriptionText(ch_multiqc_custom_methods_description)) - - // ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - // ch_multiqc_files = ch_multiqc_files.mix( - // ch_methods_description.collectFile( - // name: 'methods_description_mqc.yaml', - // sort: true - // ) - // ) - // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_html.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastp_json.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.ch_fastqc_trimmed.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_gene_count.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.insertsize_metrics.collect{it[1]}.ifEmpty([])) - // ch_multiqc_files = ch_multiqc_files.mix(FUSIONINSPECTOR_WORKFLOW.out.ch_arriba_visualisation.collect{it[1]}.ifEmpty([])) - - // MULTIQC ( - // ch_multiqc_files.collect(), - // ch_multiqc_config.toList(), - // ch_multiqc_custom_config.toList(), - // ch_multiqc_logo.toList(), - // [], - // [] - // ) - - - - // emit: - // multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - // versions = ch_versions // channel: [ path(versions.yml) ] + //QC + QC_WORKFLOW ( + ch_reads, + STARFUSION_WORKFLOW.out.ch_bam_sorted, + STARFUSION_WORKFLOW.out.ch_bam_sorted_indexed, + BUILD_REFERENCES.out.ch_gtf, + BUILD_REFERENCES.out.ch_refflat, + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_fai, + BUILD_REFERENCES.out.ch_rrna_interval + ) + ch_versions = ch_versions.mix(QC_WORKFLOW.out.versions) + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + + + // + // MODULE: MultiQC + // + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.fastp_html.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.fastp_json.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(TRIM_WORKFLOW.out.fastqc_trimmed.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_stats.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(STARFUSION_WORKFLOW.out.star_gene_count.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.rnaseq_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.duplicate_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(QC_WORKFLOW.out.insertsize_metrics.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(FUSIONINSPECTOR_WORKFLOW.out.ch_arriba_visualisation.collect{it[1]}.ifEmpty([])) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList(), + [], + [] + ) + + + + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } From d290fcdc0513cf1fdfc5f97c4e06d43d02829d8f Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 6 Dec 2024 10:56:21 +0100 Subject: [PATCH 17/41] update fusioncatcher container --- modules/local/fusioncatcher/build/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/fusioncatcher/build/main.nf b/modules/local/fusioncatcher/build/main.nf index f6ff0987..4aaaf504 100644 --- a/modules/local/fusioncatcher/build/main.nf +++ b/modules/local/fusioncatcher/build/main.nf @@ -2,7 +2,7 @@ process FUSIONCATCHER_BUILD { tag "fusioncatcher_build" label 'process_medium' - container "docker.io/clinicalgenomics/fusioncatcher:1.33" + container "docker.io/rannickscilifelab/fusioncatcher:1.34" input: val genome_gencode_version From be6e10262720181536a80ca56fe12a79227ef815 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:42:59 +0100 Subject: [PATCH 18/41] fix linting issues --- modules.json | 6 +- .../local/get_rrna_transcript/environment.yml | 6 ++ .../main.nf} | 10 +-- .../local/uscs/custom_gtftogenepred/main.nf | 3 +- modules/nf-core/salmon/quant/main.nf | 3 +- subworkflows/local/build_references.nf | 22 +++-- subworkflows/local/fusionreport_workflow.nf | 2 +- .../nf-core/utils_nextflow_pipeline/main.nf | 2 + .../tests/main.workflow.nf.test | 10 ++- .../nf-core/utils_nfcore_pipeline/main.nf | 89 +++++-------------- .../tests/main.function.nf.test | 46 ++++------ .../tests/main.function.nf.test.snap | 30 ------- .../utils_nfschema_plugin/tests/main.nf.test | 4 +- 13 files changed, 86 insertions(+), 147 deletions(-) create mode 100644 modules/local/get_rrna_transcript/environment.yml rename modules/local/{get_rrna_transcripts.nf => get_rrna_transcript/main.nf} (79%) diff --git a/modules.json b/modules.json index 48fa1f6b..b41d5fa5 100644 --- a/modules.json +++ b/modules.json @@ -152,17 +152,17 @@ "nf-core": { "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", + "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba", + "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "bbd5a41f4535a8defafe6080e00ea74c45f4f96c", + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", "installed_by": ["subworkflows"] } } diff --git a/modules/local/get_rrna_transcript/environment.yml b/modules/local/get_rrna_transcript/environment.yml new file mode 100644 index 00000000..66b65c3a --- /dev/null +++ b/modules/local/get_rrna_transcript/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::pirate=1.0.5 + - bioconda::perl-bioperl=1.7.8 diff --git a/modules/local/get_rrna_transcripts.nf b/modules/local/get_rrna_transcript/main.nf similarity index 79% rename from modules/local/get_rrna_transcripts.nf rename to modules/local/get_rrna_transcript/main.nf index cb39b3e4..5331f534 100644 --- a/modules/local/get_rrna_transcripts.nf +++ b/modules/local/get_rrna_transcript/main.nf @@ -2,17 +2,17 @@ process GET_RRNA_TRANSCRIPTS { tag 'get_rrna_bed' label 'process_low' - conda "bioconda::pirate=1.0.4 bioconda::perl-bioperl=1.7.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pirate:1.0.4--hdfd78af_2' : - 'biocontainers/pirate:1.0.4--hdfd78af_2' }" + 'https://depot.galaxyproject.org/singularity/pirate:1.0.5--hdfd78af_0' : + 'biocontainers/pirate:1.0.5--hdfd78af_0' }" input: tuple val(meta), path(gtf) output: - path('rrna.gtf') , emit: rrnagtf - path('rrna.bed') , emit: bed + tuple val(meta), path('rrna.gtf') , emit: rrnagtf + tuple val(meta), path('rrna.bed') , emit: bed path "versions.yml" , emit: versions when: diff --git a/modules/local/uscs/custom_gtftogenepred/main.nf b/modules/local/uscs/custom_gtftogenepred/main.nf index 53a74e3d..9cc15765 100644 --- a/modules/local/uscs/custom_gtftogenepred/main.nf +++ b/modules/local/uscs/custom_gtftogenepred/main.nf @@ -11,7 +11,8 @@ process GTF_TO_REFFLAT { tuple val(meta), path (gtf) output: - path('*.refflat'), emit: refflat + path('*.refflat') , emit: refflat + path "versions.yml" , emit: versions script: def genepred = gtf + '.genepred' diff --git a/modules/nf-core/salmon/quant/main.nf b/modules/nf-core/salmon/quant/main.nf index e2d27352..f1e3b5cd 100644 --- a/modules/nf-core/salmon/quant/main.nf +++ b/modules/nf-core/salmon/quant/main.nf @@ -29,8 +29,7 @@ process SALMON_QUANT { prefix = task.ext.prefix ?: "${meta.id}" def reference = "--index $index" - def reads1 = [] - def reads2 = [] + def reads1 = [], reads2 = [] meta.single_end ? [reads].flatten().each{reads1 << it} : reads.eachWithIndex{ v, ix -> ( ix & 1 ? reads2 : reads1) << v } def input_reads = meta.single_end ? "-r ${reads1.join(" ")}" : "-1 ${reads1.join(" ")} -2 ${reads2.join(" ")}" if (alignment_mode) { diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 7d942601..8685feeb 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -5,15 +5,13 @@ */ include { GENCODE_DOWNLOAD } from '../../modules/local/gencode_download/main' -include { FUSIONCATCHER_DOWNLOAD } from '../../modules/local/fusioncatcher/download/main' include { FUSIONCATCHER_BUILD } from '../../modules/local/fusioncatcher/build/main' include { FUSIONREPORT_DOWNLOAD } from '../../modules/local/fusionreport/download/main' include { HGNC_DOWNLOAD } from '../../modules/local/hgnc/main' include { STARFUSION_BUILD } from '../../modules/local/starfusion/build/main' -include { STARFUSION_DOWNLOAD } from '../../modules/local/starfusion/download/main' include { GTF_TO_REFFLAT } from '../../modules/local/uscs/custom_gtftogenepred/main' -include { GET_RRNA_TRANSCRIPTS } from '../../modules/local/get_rrna_transcripts' -// include { CONVERT2BED } from '../../modules/local/convert2bed/main' +include { GET_RRNA_TRANSCRIPTS } from '../../modules/local/get_rrna_transcript/main' + /* ======================================================================================== IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -41,7 +39,7 @@ workflow BUILD_REFERENCES { if (!file(params.fasta).exists() || file(params.fasta).isEmpty() || !file(params.gtf).exists() || file(params.gtf).isEmpty()){ GENCODE_DOWNLOAD(params.genome_gencode_version, params.genome) - ch_versions = GENCODE_DOWNLOAD.out.versions + ch_versions = ch_versions.mix(GENCODE_DOWNLOAD.out.versions) ch_fasta = GENCODE_DOWNLOAD.out.fasta.map { that -> [[id:that.Name], that] } ch_gtf = GENCODE_DOWNLOAD.out.gtf.map { that -> [[id:that.Name], that] } } else { @@ -51,7 +49,7 @@ workflow BUILD_REFERENCES { if (!file(params.fai).exists() || file(params.fai).isEmpty()){ SAMTOOLS_FAIDX(ch_fasta, [[],[]]) - ch_versions = SAMTOOLS_FAIDX.out.versions + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) ch_fai = SAMTOOLS_FAIDX.out.fai } else { ch_fai = Channel.fromPath(params.fai).map { that -> [[id:that.Name], that] } @@ -60,6 +58,7 @@ workflow BUILD_REFERENCES { if ((!file(params.hgnc_ref).exists() || file(params.hgnc_ref).isEmpty() || !file(params.hgnc_date).exists() || file(params.hgnc_date).isEmpty()) && !params.skip_vcf){ HGNC_DOWNLOAD( ) + ch_versions = ch_versions.mix(HGNC_DOWNLOAD.out.versions) ch_hgnc_ref = HGNC_DOWNLOAD.out.hgnc_ref ch_hgnc_date = HGNC_DOWNLOAD.out.hgnc_date } else { @@ -69,8 +68,11 @@ workflow BUILD_REFERENCES { if (!file(params.rrna_intervals).exists() || file(params.rrna_intervals).isEmpty()){ GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) + ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) GET_RRNA_TRANSCRIPTS(ch_gtf) + ch_versions = ch_versions.mix(GET_RRNA_TRANSCRIPTS.out.versions) GATK4_BEDTOINTERVALLIST(GET_RRNA_TRANSCRIPTS.out.bed.map { it -> [ [id:it.name], it ] }, GATK4_CREATESEQUENCEDICTIONARY.out.dict ) + ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) ch_rrna_interval = GATK4_BEDTOINTERVALLIST.out.interval_list } else { ch_rrna_interval = Channel.fromPath(params.rrna_intervals).map { that -> [[id:that.Name], that] } @@ -78,6 +80,7 @@ workflow BUILD_REFERENCES { if (!file(params.refflat).exists() || file(params.refflat).isEmpty()){ GTF_TO_REFFLAT(ch_gtf) + ch_versions = ch_versions.mix(GTF_TO_REFFLAT.out.versions) ch_refflat = GTF_TO_REFFLAT.out.refflat.map { that -> [[id:that.Name], that] } } else { ch_refflat = Channel.fromPath(params.refflat).map { that -> [[id:that.Name], that] } @@ -86,7 +89,9 @@ workflow BUILD_REFERENCES { if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty() || !file(params.salmon_index_stub_check).exists() || file(params.salmon_index_stub_check).isEmpty()){ // add condition for qc GFFREAD(ch_gtf, ch_fasta.map{ meta, fasta -> [ fasta ] }) + ch_versions = ch_versions.mix(GFFREAD.out.versions) SALMON_INDEX(ch_fasta.map{ meta, fasta -> [ fasta ] }, GFFREAD.out.gffread_fasta.map{ meta, gffread_fasta -> [ gffread_fasta ] }) + ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) ch_salmon_index = SALMON_INDEX.out.index } else { ch_salmon_index = Channel.fromPath(params.salmon_index) @@ -96,6 +101,7 @@ workflow BUILD_REFERENCES { (!file(params.starindex_ref).exists() || file(params.starindex_ref).isEmpty() || !file(params.starindex_ref_stub_check).exists() || file(params.starindex_ref_stub_check).isEmpty() )) { STAR_GENOMEGENERATE(ch_fasta, ch_gtf) + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) ch_starindex_ref = STAR_GENOMEGENERATE.out.index } else { ch_starindex_ref = Channel.fromPath(params.starindex_ref).map { that -> [[id:that.Name], that] } @@ -106,6 +112,7 @@ workflow BUILD_REFERENCES { !file(params.arriba_ref_known_fusions).exists() || file(params.arriba_ref_known_fusions).isEmpty() || !file(params.arriba_ref_protein_domains).exists() || file(params.arriba_ref_protein_domains).isEmpty())) { ARRIBA_DOWNLOAD(params.genome) + ch_versions = ch_versions.mix(ARRIBA_DOWNLOAD.out.versions) ch_arriba_ref_blacklist = ARRIBA_DOWNLOAD.out.blacklist ch_arriba_ref_cytobands = ARRIBA_DOWNLOAD.out.cytobands ch_arriba_ref_known_fusions = ARRIBA_DOWNLOAD.out.known_fusions @@ -122,6 +129,7 @@ workflow BUILD_REFERENCES { (!file(params.fusioncatcher_ref).exists() || file(params.fusioncatcher_ref).isEmpty() || !file(params.fusioncatcher_ref_stub_check).exists() || file(params.fusioncatcher_ref_stub_check).isEmpty() )) { FUSIONCATCHER_BUILD(params.genome_gencode_version) + ch_versions = ch_versions.mix(FUSIONCATCHER_BUILD.out.versions) ch_fusioncatcher_ref = FUSIONCATCHER_BUILD.out.reference } else { @@ -133,6 +141,7 @@ workflow BUILD_REFERENCES { (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { STARFUSION_BUILD(ch_fasta, ch_gtf) + ch_versions = ch_versions.mix(STARFUSION_BUILD.out.versions) ch_starfusion_ref = STARFUSION_BUILD.out.reference } else { @@ -145,6 +154,7 @@ workflow BUILD_REFERENCES { !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } FUSIONREPORT_DOWNLOAD(params.cosmic_username, params.cosmic_passwd) + ch_versions = ch_versions.mix(FUSIONREPORT_DOWNLOAD.out.versions) ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.reference } else { ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref) diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf index a61759f7..5c9bfde1 100644 --- a/subworkflows/local/fusionreport_workflow.nf +++ b/subworkflows/local/fusionreport_workflow.nf @@ -23,7 +23,7 @@ workflow FUSIONREPORT_WORKFLOW { FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) ch_fusion_list = FUSIONREPORT.out.fusion_list ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered - // ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) + ch_versions = ch_versions.mix(FUSIONREPORT.out.versions) ch_report = FUSIONREPORT.out.report ch_csv = FUSIONREPORT.out.csv } else { diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index 0fcbf7b3..d6e593e8 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -92,10 +92,12 @@ def checkCondaChannels() { channels = config.channels } catch (NullPointerException e) { + log.debug(e) log.warn("Could not verify conda channel configuration.") return null } catch (IOException e) { + log.debug(e) log.warn("Could not verify conda channel configuration.") return null } diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test index ca964ce8..02dbf094 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -52,10 +52,12 @@ nextflow_workflow { } then { - assertAll( - { assert workflow.success }, - { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } - ) + expect { + with(workflow) { + assert success + assert "nextflow_workflow v9.9.9" in stdout + } + } } } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index 5cb7bafe..bfd25876 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -56,21 +56,6 @@ def checkProfileProvided(nextflow_cli_args) { } } -// -// Citation string for pipeline -// -def workflowCitation() { - def temp_doi_ref = "" - def manifest_doi = workflow.manifest.doi.tokenize(",") - // Handling multiple DOIs - // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers - // Removing ` ` since the manifest.doi is a string and not a proper list - manifest_doi.each { doi_ref -> - temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" - } - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + temp_doi_ref + "\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" -} - // // Generate workflow version string // @@ -150,33 +135,6 @@ def paramsSummaryMultiqc(summary_params) { return yaml_file_text } -// -// nf-core logo -// -def nfCoreLogo(monochrome_logs=true) { - def colors = logColours(monochrome_logs) as Map - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) -} - -// -// Return dashed line -// -def dashedLine(monochrome_logs=true) { - def colors = logColours(monochrome_logs) as Map - return "-${colors.dim}----------------------------------------------------${colors.reset}-" -} - // // ANSII colours used for terminal logging // @@ -245,28 +203,24 @@ def logColours(monochrome_logs=true) { return colorcodes } -// -// Attach the multiqc report to email -// -def attachMultiqcReport(multiqc_report) { - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") - } - mqc_report = mqc_report[0] - } +// Return a single report from an object that may be a Path or List +// +def getSingleReport(multiqc_reports) { + if (multiqc_reports instanceof Path) { + return multiqc_reports + } else if (multiqc_reports instanceof List) { + if (multiqc_reports.size() == 0) { + log.warn("[${workflow.manifest.name}] No reports found from process 'MULTIQC'") + return null + } else if (multiqc_reports.size() == 1) { + return multiqc_reports.first() + } else { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + return multiqc_reports.first() } + } else { + return null } - catch (Exception all) { - if (multiqc_report) { - log.warn("[${workflow.manifest.name}] Could not attach MultiQC report to summary email") - } - } - return mqc_report } // @@ -320,7 +274,7 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi email_fields['summary'] = summary << misc_fields // On success try attach the multiqc report - def mqc_report = attachMultiqcReport(multiqc_report) + def mqc_report = getSingleReport(multiqc_report) // Check if we are only sending emails on failure def email_address = email @@ -340,7 +294,7 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -351,14 +305,17 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi if (email_address) { try { if (plaintext_email) { -new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') } + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } // Try to send HTML e-mail using sendmail def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") sendmail_tf.withWriter { w -> w << sendmail_html } ['sendmail', '-t'].execute() << sendmail_html log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") } - catch (Exception all) { + catch (Exception msg) { + log.debug(msg.toString()) + log.debug("Trying with mail instead of sendmail") // Catch failures and try with plaintext def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] mail_cmd.execute() << email_html diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test index 1dc317f8..f117040c 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -41,26 +41,14 @@ nextflow_function { } } - test("Test Function workflowCitation") { - - function "workflowCitation" - - then { - assertAll( - { assert function.success }, - { assert snapshot(function.result).match() } - ) - } - } - - test("Test Function nfCoreLogo") { + test("Test Function without logColours") { - function "nfCoreLogo" + function "logColours" when { function { """ - input[0] = false + input[0] = true """ } } @@ -73,9 +61,8 @@ nextflow_function { } } - test("Test Function dashedLine") { - - function "dashedLine" + test("Test Function with logColours") { + function "logColours" when { function { @@ -93,14 +80,13 @@ nextflow_function { } } - test("Test Function without logColours") { - - function "logColours" + test("Test Function getSingleReport with a single file") { + function "getSingleReport" when { function { """ - input[0] = true + input[0] = file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) """ } } @@ -108,18 +94,22 @@ nextflow_function { then { assertAll( { assert function.success }, - { assert snapshot(function.result).match() } + { assert function.result.contains("test.tsv") } ) } } - test("Test Function with logColours") { - function "logColours" + test("Test Function getSingleReport with multiple files") { + function "getSingleReport" when { function { """ - input[0] = false + input[0] = [ + file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/network.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/expression.tsv', checkIfExists: true) + ] """ } } @@ -127,7 +117,9 @@ nextflow_function { then { assertAll( { assert function.success }, - { assert snapshot(function.result).match() } + { assert function.result.contains("test.tsv") }, + { assert !function.result.contains("network.tsv") }, + { assert !function.result.contains("expression.tsv") } ) } } diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap index 1037232c..02c67014 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -17,26 +17,6 @@ }, "timestamp": "2024-02-28T12:02:59.729647" }, - "Test Function nfCoreLogo": { - "content": [ - "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:10.562934" - }, - "Test Function workflowCitation": { - "content": [ - "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:07.019761" - }, "Test Function without logColours": { "content": [ { @@ -95,16 +75,6 @@ }, "timestamp": "2024-02-28T12:03:17.969323" }, - "Test Function dashedLine": { - "content": [ - "-\u001b[2m----------------------------------------------------\u001b[0m-" - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T12:03:14.366181" - }, "Test Function with logColours": { "content": [ { diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test index 842dc432..8fb30164 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -42,7 +42,7 @@ nextflow_workflow { params { test_data = '' - outdir = 1 + outdir = null } workflow { @@ -94,7 +94,7 @@ nextflow_workflow { params { test_data = '' - outdir = 1 + outdir = null } workflow { From 41523f932dfd76f044d1519a5210336128a55cc6 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:43:53 +0100 Subject: [PATCH 19/41] prettier --- .github/workflows/awsfulltest.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index d1492d8b..baaa0461 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -54,4 +54,3 @@ jobs: path: | seqera_platform_action_*.log seqera_platform_action_*.json - From 86c299d54f63a9a387ded1e118868b287f1890e7 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Mon, 9 Dec 2024 15:41:31 +0100 Subject: [PATCH 20/41] update comments --- README.md | 2 +- docs/usage.md | 1 - modules/local/hgnc/main.nf | 1 - modules/local/starfusion/build/main.nf | 1 + subworkflows/local/build_references.nf | 2 +- subworkflows/local/trim_workflow/main.nf | 3 +-- workflows/rnafusion.nf | 4 +--- 7 files changed, 5 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index aa873457..809de85f 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ In rnafusion the full-sized test includes reference building and fusion detectio ### Build references -`--references_only` triggers a workflow to ONLY build references, otherwise the references are built with the analysis run: +`--references_only` triggers a workflow to ONLY build references, otherwise the references are build when the analysis is run: 1. Download gencode fasta and gtf files 2. Create [STAR](https://github.com/alexdobin/STAR) index diff --git a/docs/usage.md b/docs/usage.md index 1b0db1c5..c383df54 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -397,7 +397,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - `test` - A profile with a complete configuration for automated testing - Includes links to test data so needs no other parameters - - Needs to run in two steps: with `--references_only` first and then without `--references_only` to run the analysis - !!!! Run with `-stub` as all references need to be downloaded otherwise !!!! ### `-resume` diff --git a/modules/local/hgnc/main.nf b/modules/local/hgnc/main.nf index 2aa56038..aa5c077c 100644 --- a/modules/local/hgnc/main.nf +++ b/modules/local/hgnc/main.nf @@ -10,7 +10,6 @@ process HGNC_DOWNLOAD { output: path "hgnc_complete_set.txt" , emit: hgnc_ref path "HGNC-DB-timestamp.txt" , emit: hgnc_date - path "versions.yml" , emit: versions diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 6ac3c694..436a5e4f 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -10,6 +10,7 @@ process STARFUSION_BUILD { output: path "*" , emit: reference + path "versions.yml" , emit: versions script: def binPath = (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) ? "prep_genome_lib.pl" : "/usr/local/src/STAR-Fusion/ctat-genome-lib-builder/prep_genome_lib.pl" diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 8685feeb..2399acbd 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -94,7 +94,7 @@ workflow BUILD_REFERENCES { ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) ch_salmon_index = SALMON_INDEX.out.index } else { - ch_salmon_index = Channel.fromPath(params.salmon_index) + ch_salmon_index = Channel.fromPath({params.salmon_index}) } if ((params.starindex || params.all || params.starfusion || params.arriba) && diff --git a/subworkflows/local/trim_workflow/main.nf b/subworkflows/local/trim_workflow/main.nf index d2492116..845adbaf 100644 --- a/subworkflows/local/trim_workflow/main.nf +++ b/subworkflows/local/trim_workflow/main.nf @@ -7,7 +7,6 @@ workflow TRIM_WORKFLOW { take: reads // channel [ meta, [ fastq files ] ] - adapter_fasta // channel [ path ] main: ch_versions = Channel.empty() @@ -16,7 +15,7 @@ workflow TRIM_WORKFLOW { ch_fastqc_trimmed = Channel.empty() if ( {params.fastp_trim} ) { - FASTP(reads, adapter_fasta.ifEmpty( [] ), false, false, false) + FASTP(reads, {params.adapter_fasta}.ifEmpty( [] ), false, false, false) ch_versions = ch_versions.mix(FASTP.out.versions) FASTQC_FOR_FASTP(FASTP.out.reads) diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 52348721..ee0e1e57 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -66,7 +66,6 @@ workflow RNAFUSION { BUILD_REFERENCES() ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) - // // QC from FASTQ files // @@ -81,8 +80,7 @@ workflow RNAFUSION { // Trimming // TRIM_WORKFLOW ( - ch_samplesheet, - ch_adapter_fastp, + ch_samplesheet ) ch_reads = TRIM_WORKFLOW.out.ch_reads_all ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) From 35b5d9cb50a489c6c576bc2aa93f7cd5a6991e6c Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:44:24 +0100 Subject: [PATCH 21/41] fix i/o in channels --- conf/modules.config | 8 +++---- subworkflows/local/build_references.nf | 6 +++--- subworkflows/local/trim_workflow/main.nf | 2 +- workflows/rnafusion.nf | 27 +++++------------------- 4 files changed, 13 insertions(+), 30 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d20ca0f2..d021ae36 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,10 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: '.*' { - ext.when = { !params.references_only || task.process.contains('BUILD_REFERENCES') } - } - withName: 'ARRIBA_ARRIBA' { publishDir = [ path: { "${params.outdir}/arriba" }, @@ -371,4 +367,8 @@ process { withName: 'VCF_COLLECT' { ext.when = { {!params.fusioninspector_only} && {!params.skip_vcf} } } + + withName: '.*' { + ext.when = { !params.references_only || task.process.contains('BUILD_REFERENCES') } + } } diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 2399acbd..e158f566 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -71,7 +71,7 @@ workflow BUILD_REFERENCES { ch_versions = ch_versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) GET_RRNA_TRANSCRIPTS(ch_gtf) ch_versions = ch_versions.mix(GET_RRNA_TRANSCRIPTS.out.versions) - GATK4_BEDTOINTERVALLIST(GET_RRNA_TRANSCRIPTS.out.bed.map { it -> [ [id:it.name], it ] }, GATK4_CREATESEQUENCEDICTIONARY.out.dict ) + GATK4_BEDTOINTERVALLIST(GET_RRNA_TRANSCRIPTS.out.bed, GATK4_CREATESEQUENCEDICTIONARY.out.dict ) ch_versions = ch_versions.mix(GATK4_BEDTOINTERVALLIST.out.versions) ch_rrna_interval = GATK4_BEDTOINTERVALLIST.out.interval_list } else { @@ -153,9 +153,9 @@ workflow BUILD_REFERENCES { (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } - FUSIONREPORT_DOWNLOAD(params.cosmic_username, params.cosmic_passwd) + FUSIONREPORT_DOWNLOAD() ch_versions = ch_versions.mix(FUSIONREPORT_DOWNLOAD.out.versions) - ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.reference + ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.mitelman.map{ it.parent } } else { ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref) } diff --git a/subworkflows/local/trim_workflow/main.nf b/subworkflows/local/trim_workflow/main.nf index 845adbaf..512a07b5 100644 --- a/subworkflows/local/trim_workflow/main.nf +++ b/subworkflows/local/trim_workflow/main.nf @@ -15,7 +15,7 @@ workflow TRIM_WORKFLOW { ch_fastqc_trimmed = Channel.empty() if ( {params.fastp_trim} ) { - FASTP(reads, {params.adapter_fasta}.ifEmpty( [] ), false, false, false) + FASTP(reads, {params.adapter_fasta}, false, false, false) ch_versions = ch_versions.mix(FASTP.out.versions) FASTQC_FOR_FASTP(FASTP.out.reads) diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index ee0e1e57..cf358533 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -38,27 +38,10 @@ workflow RNAFUSION { main: - ch_starindex_ref = params.starfusion_build ? Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.starfusion_ref}/ref_genome.fa.star.idx").map { it -> [[id:it.Name], it] }.collect() - ch_starindex_ensembl_ref = Channel.fromPath(params.starindex_ref).map { it -> [[id:it.Name], it] }.collect() - ch_refflat = params.starfusion_build ? Channel.fromPath(params.refflat).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.gtf.refflat").map { it -> [[id:it.Name], it] }.collect() - ch_rrna_interval = params.starfusion_build ? Channel.fromPath(params.rrna_intervals).map { it -> [[id:it.Name], it] }.collect() : Channel.fromPath("${params.ensembl_ref}/ref_annot.interval_list").map { it -> [[id:it.Name], it] }.collect() - ch_adapter_fastp = params.adapter_fasta ? Channel.fromPath(params.adapter_fasta, checkIfExists: true) : Channel.empty() - ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { it -> [[id:it.Name], it] }.collect() - ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { it -> [[id:it.Name], it] }.collect() - ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { it -> [[id:it.Name], it] }.collect() - ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { it -> [[id:it.Name], it] }.collect() - ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { it -> [[id:it.Name], it] }.collect() - ch_hgnc_ref = Channel.fromPath(params.hgnc_ref).map { it -> [[id:it.Name], it] }.collect() - ch_hgnc_date = Channel.fromPath(params.hgnc_date).map { it -> [[id:it.Name], it] }.collect() - ch_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it.Name], it] }.collect() - ch_gtf = Channel.fromPath(params.gtf).map { it -> [[id:it.Name], it] }.collect() - ch_salmon_index = Channel.fromPath(params.salmon_index).map { it -> [[id:it.Name], it] }.collect() - ch_fai = Channel.fromPath(params.fai).map { it -> [[id:it.Name], it] }.collect() - - ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + // // Create references if necessary // @@ -66,6 +49,7 @@ workflow RNAFUSION { BUILD_REFERENCES() ch_versions = ch_versions.mix(BUILD_REFERENCES.out.versions) + // // QC from FASTQ files // @@ -85,7 +69,6 @@ workflow RNAFUSION { ch_reads = TRIM_WORKFLOW.out.ch_reads_all ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) - SALMON_QUANT( ch_reads, BUILD_REFERENCES.out.ch_salmon_index.map{ it -> it[1] }, BUILD_REFERENCES.out.ch_gtf.map{ it -> it[1] }, [], false, 'A') ch_multiqc_files = ch_multiqc_files.mix(SALMON_QUANT.out.json_info.collect{it[1]}) ch_versions = ch_versions.mix(SALMON_QUANT.out.versions) @@ -108,7 +91,7 @@ workflow RNAFUSION { ch_versions = ch_versions.mix(ARRIBA_WORKFLOW.out.versions) -//Run STAR fusion + //Run STAR fusion STARFUSION_WORKFLOW ( ch_reads, BUILD_REFERENCES.out.ch_gtf, @@ -118,14 +101,14 @@ workflow RNAFUSION { ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions) -//Run fusioncatcher + //Run fusioncatcher FUSIONCATCHER_WORKFLOW ( ch_reads ) ch_versions = ch_versions.mix(FUSIONCATCHER_WORKFLOW.out.versions) -//Run stringtie + //Run stringtie STRINGTIE_WORKFLOW ( STARFUSION_WORKFLOW.out.ch_bam_sorted, BUILD_REFERENCES.out.ch_gtf From 24a46103dd3a07ae43f22b094e2c7da943126c29 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 12:36:53 +0100 Subject: [PATCH 22/41] test build removed from ci as build_references happens before the run --- .github/workflows/ci.yml | 1 - conf/test_build.config | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71ff7245..a510d72d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,7 +39,6 @@ jobs: - "latest-stable" test_profile: - "test_stub" - - "test_build" compute_profile: - "docker" - "singularity" diff --git a/conf/test_build.config b/conf/test_build.config index 616d734f..26002327 100644 --- a/conf/test_build.config +++ b/conf/test_build.config @@ -15,7 +15,7 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - build_references = true + references_only = true input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' no_cosmic = true all = true From 76cf2aaffc1630665ec3ad9e1534549eefaa2002 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:20:11 +0100 Subject: [PATCH 23/41] update snapshots, add meta --- conf/modules.config | 2 +- conf/test.config | 2 + modules/local/fusionreport/detect/main.nf | 2 +- modules/local/fusionreport/download/main.nf | 20 +- subworkflows/local/build_references.nf | 6 +- subworkflows/local/fusionreport_workflow.nf | 1 - .../utils_nfcore_rnafusion_pipeline/main.nf | 2 +- test.xml | 1 + tests/test_cosmic.nf.test.snap | 68 +++ tests/test_stub.nf.test.snap | 459 +++++++++++++++++- 10 files changed, 540 insertions(+), 23 deletions(-) create mode 100644 test.xml create mode 100644 tests/test_cosmic.nf.test.snap diff --git a/conf/modules.config b/conf/modules.config index d021ae36..bbbee5c2 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -111,7 +111,7 @@ process { ext.args = { {params.no_cosmic} ? "--no-cosmic" : " --cosmic_usr ${params.cosmic_username} --cosmic_passwd ${params.cosmic_passwd}" } ext.args2 = { params.qiagen ? "--qiagen" : "" } publishDir = [ - path: { "${params.genomes_base}/fusion_report_db" }, + path: { "${params.genomes_base}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/test.config b/conf/test.config index 3cafa252..1902de0a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,4 +16,6 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv' + all = true + no_cosmic = true } diff --git a/modules/local/fusionreport/detect/main.nf b/modules/local/fusionreport/detect/main.nf index 5482d4bf..56a29ab4 100644 --- a/modules/local/fusionreport/detect/main.nf +++ b/modules/local/fusionreport/detect/main.nf @@ -7,7 +7,7 @@ process FUSIONREPORT { input: tuple val(meta), path(reads), path(arriba_fusions), path(starfusion_fusions), path(fusioncatcher_fusions) - path(fusionreport_ref) + tuple val(meta2), path(fusionreport_ref) val(tools_cutoff) output: diff --git a/modules/local/fusionreport/download/main.nf b/modules/local/fusionreport/download/main.nf index b19a6446..a2c2a96b 100644 --- a/modules/local/fusionreport/download/main.nf +++ b/modules/local/fusionreport/download/main.nf @@ -6,17 +6,17 @@ process FUSIONREPORT_DOWNLOAD { container "docker.io/clinicalgenomics/fusion-report:3.1.0" output: - tuple val(meta), path("fusionreport_dbs"), emit: fusionreport_db + tuple val(meta), path("fusion_report_db"), emit: fusionreport_ref path "versions.yml" , emit: versions script: - meta = [id: 'fusionreport_dbs'] + meta = [id: 'fusion_report_db'] def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' """ fusion_report download $args ./ - mkdir fusionreport_dbs - mv *.txt *.log *.db fusionreport_dbs + mkdir fusion_report_db + mv *.txt *.log *.db fusion_report_db cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -27,12 +27,12 @@ process FUSIONREPORT_DOWNLOAD { stub: meta = [id: 'fusionreport_dbs'] """ - mkdir fusionreport_dbs - touch fusionreport_dbs/cosmic.db - touch fusionreport_dbs/fusiongdb2.db - touch fusionreport_dbs/mitelman.db - touch fusionreport_dbs/DB-timestamp.txt - touch fusionreport_dbs/fusion_report.log + mkdir fusion_report_db + touch fusion_report_db/cosmic.db + touch fusion_report_db/fusiongdb2.db + touch fusion_report_db/mitelman.db + touch fusion_report_db/DB-timestamp.txt + touch fusion_report_db/fusion_report.log cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index e158f566..8e9cc4b1 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -152,12 +152,12 @@ workflow BUILD_REFERENCES { if ((params.fusionreport || params.all) && (!file(params.fusionreport_ref).exists() || file(params.fusionreport_ref).isEmpty() || !file(params.fusionreport_ref_stub_check).exists() || file(params.fusionreport_ref_stub_check).isEmpty())) { - if (!params.cosmic_username || !params.cosmic_passwd) { exit 1, 'COSMIC username and/or password missing' } + if (!params.no_cosmic && (!params.cosmic_username || !params.cosmic_passwd)) { exit 1, 'COSMIC username and/or password missing' } FUSIONREPORT_DOWNLOAD() ch_versions = ch_versions.mix(FUSIONREPORT_DOWNLOAD.out.versions) - ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.mitelman.map{ it.parent } + ch_fusionreport_ref = FUSIONREPORT_DOWNLOAD.out.fusionreport_ref } else { - ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref) + ch_fusionreport_ref = Channel.fromPath(params.fusionreport_ref).map { that -> [[id:that.Name], that] } } emit: diff --git a/subworkflows/local/fusionreport_workflow.nf b/subworkflows/local/fusionreport_workflow.nf index 5c9bfde1..80fe7337 100644 --- a/subworkflows/local/fusionreport_workflow.nf +++ b/subworkflows/local/fusionreport_workflow.nf @@ -19,7 +19,6 @@ workflow FUSIONREPORT_WORKFLOW { .join(arriba_fusions, remainder: true) .join(starfusion_fusions, remainder: true) .join(fusioncatcher_fusions, remainder: true) - FUSIONREPORT(reads_fusions, fusionreport_ref, params.tools_cutoff) ch_fusion_list = FUSIONREPORT.out.fusion_list ch_fusion_list_filtered = FUSIONREPORT.out.fusion_list_filtered diff --git a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf index 4b46c5c0..1f905a44 100644 --- a/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_rnafusion_pipeline/main.nf @@ -157,7 +157,7 @@ def validateInputParameters() { genomeExistsError() if (params.no_cosmic) { - log.warn("Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD`") + log.warn("Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD` and skip using it in `FUSIONREPORT`") } } diff --git a/test.xml b/test.xml new file mode 100644 index 00000000..e8f1835b --- /dev/null +++ b/test.xml @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tests/test_cosmic.nf.test.snap b/tests/test_cosmic.nf.test.snap new file mode 100644 index 00000000..dc3ab947 --- /dev/null +++ b/tests/test_cosmic.nf.test.snap @@ -0,0 +1,68 @@ +{ + "test cosmic no fastp trim build": { + "content": [ + 0, + { + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T12:42:49.558429" + }, + "test cosmic no fastp trim": { + "content": [ + 0, + { + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T12:42:06.51355" + }, + "test cosmic with fastp trim": { + "content": [ + 0, + { + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, + [ + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-10T12:41:17.841938" + } +} \ No newline at end of file diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index 3b83c240..febef3c8 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -1,22 +1,131 @@ { "stub test no fastp trim": { "content": [ - 4, + 31, { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "FASTP": { + "fastp": "0.23.4" + }, "FASTQC": { "fastqc": "0.12.1" }, + "FASTQC_FOR_FASTP": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER": { + "fusioncatcher": 1.33 + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "fusioncatcher.py 1.35" + }, "FUSIONREPORT": { "fusion_report": "2.1.5" }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.7.0" + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.7.0" + }, + "STAR_FOR_ARRIBA": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_FOR_STARFUSION": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, "Workflow": { "nf-core/rnafusion": "v4.0.0dev" } }, [ + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", + "fastp", + "fastp/test.fastp.html", + "fastp/test.fastp.json", + "fastp/test.fastp.log", + "fastp/test_1.fastp.fastq.gz", + "fastp/test_2.fastp.fastq.gz", "fastqc", "fastqc/test.html", "fastqc/test.zip", + "fastqc_for_fastp", + "fastqc_for_fastp/test_trimmed.html", + "fastqc_for_fastp/test_trimmed.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", "fusionreport", "fusionreport/test", "fusionreport/test/AAA_BBB.html", @@ -25,28 +134,156 @@ "fusionreport/test/test.fusions.csv", "fusionreport/test/test.fusions.json", "fusionreport/test/test_fusionreport_index.html", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_plots", "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "references/starfusion", + "references/starfusion/ctat_genome_lib_build_dir", + "references/starfusion/ref_annot.cdna.fa", "salmon", "salmon/test", "salmon/test_lib_format_counts.json", - "salmon/test_meta_info.json" + "salmon/test_meta_info.json", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-03T19:27:00.717876194" + "timestamp": "2024-12-10T14:12:48.526275" }, "stub test with fastp trim": { "content": [ - 6, + 31, { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, "FASTP": { "fastp": "0.23.4" }, @@ -56,14 +293,96 @@ "FASTQC_FOR_FASTP": { "fastqc": "0.12.1" }, + "FUSIONCATCHER": { + "fusioncatcher": 1.33 + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "fusioncatcher.py 1.35" + }, "FUSIONREPORT": { "fusion_report": "2.1.5" }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.7.0" + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.7.0" + }, + "STAR_FOR_ARRIBA": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_FOR_STARFUSION": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, "Workflow": { "nf-core/rnafusion": "v4.0.0dev" } }, [ + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", "fastp", "fastp/test.fastp.html", "fastp/test.fastp.json", @@ -76,6 +395,12 @@ "fastqc_for_fastp", "fastqc_for_fastp/test_trimmed.html", "fastqc_for_fastp/test_trimmed.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", "fusionreport", "fusionreport/test", "fusionreport/test/AAA_BBB.html", @@ -84,22 +409,144 @@ "fusionreport/test/test.fusions.csv", "fusionreport/test/test.fusions.json", "fusionreport/test/test_fusionreport_index.html", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_plots", "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "references/starfusion", + "references/starfusion/ctat_genome_lib_build_dir", + "references/starfusion/ref_annot.cdna.fa", "salmon", "salmon/test", "salmon/test_lib_format_counts.json", - "salmon/test_meta_info.json" + "salmon/test_meta_info.json", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-03T19:26:43.375635514" + "timestamp": "2024-12-10T14:10:38.383967" } } \ No newline at end of file From 56ed001b0fce281ba4375db685969541cc70e7af Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:25:59 +0100 Subject: [PATCH 24/41] remove trash files --- test.fusionreport.tsv | 0 test.fusionreport_filtered.tsv | 0 test.fusions.csv | 0 test.fusions.json | 0 test.xml | 1 - test_fusionreport_index.html | 0 6 files changed, 1 deletion(-) delete mode 100644 test.fusionreport.tsv delete mode 100644 test.fusionreport_filtered.tsv delete mode 100644 test.fusions.csv delete mode 100644 test.fusions.json delete mode 100644 test.xml delete mode 100644 test_fusionreport_index.html diff --git a/test.fusionreport.tsv b/test.fusionreport.tsv deleted file mode 100644 index e69de29b..00000000 diff --git a/test.fusionreport_filtered.tsv b/test.fusionreport_filtered.tsv deleted file mode 100644 index e69de29b..00000000 diff --git a/test.fusions.csv b/test.fusions.csv deleted file mode 100644 index e69de29b..00000000 diff --git a/test.fusions.json b/test.fusions.json deleted file mode 100644 index e69de29b..00000000 diff --git a/test.xml b/test.xml deleted file mode 100644 index e8f1835b..00000000 --- a/test.xml +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/test_fusionreport_index.html b/test_fusionreport_index.html deleted file mode 100644 index e69de29b..00000000 From 23c4014a6a22839223540a7dd98426fb89cd31d7 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:24:20 +0100 Subject: [PATCH 25/41] use stubs --- .github/workflows/awstest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 78bdb317..977adb49 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -26,6 +26,7 @@ jobs: "cosmic_username": "${{ secrets.cosmic_username }}", "cosmic_passwd": "${{ secrets.cosmic_passwd }}", "all": true, + "stub": true } profiles: test,aws_tower - uses: actions/upload-artifact@v4 From d52f0a055fb82ae8def0b400c016820af38bf222 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:24:42 +0100 Subject: [PATCH 26/41] update changelog, first step --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 690024b2..1980cd86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated to nf-core/tools 3.0.2 [#504](https://github.com/nf-core/rnafusion/pull/504) - Remove local module `RRNA_TRANSCRIPTS` (replaced by nf-core module) [#541](https://github.com/nf-core/rnafusion/pull/541) - Allow fastq files without a dot before .fn(.gz)/.fastq(.gz) files [#548](https://github.com/nf-core/rnafusion/pull/548) - + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" GTF_TO_REFFLAT ### Fixed - Fixed some Nextflow run-commands in the docs [#491](https://github.com/nf-core/rnafusion/pull/491) From 5409f8c2566342a4d2debe406ef4e19b52231d74 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:25:02 +0100 Subject: [PATCH 27/41] update GTF_TO_REFFLAT options, update snapshots --- conf/modules.config | 1 + modules/local/fusioncatcher/build/meta.yml | 3 +- modules/local/fusioncatcher/download/main.nf | 2 +- modules/local/starfusion/download/main.nf | 2 +- nextflow.config | 3 +- tests/test_build.nf.test.snap | 33 +++----------------- tests/test_cosmic.nf.test.snap | 22 ++++--------- 7 files changed, 15 insertions(+), 51 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index bbbee5c2..3c376545 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -144,6 +144,7 @@ process { } withName: 'GTF_TO_REFFLAT' { + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" publishDir = [ path: { "${params.genomes_base}/gencode" }, mode: params.publish_dir_mode, diff --git a/modules/local/fusioncatcher/build/meta.yml b/modules/local/fusioncatcher/build/meta.yml index 40421a4e..202be7e1 100644 --- a/modules/local/fusioncatcher/build/meta.yml +++ b/modules/local/fusioncatcher/build/meta.yml @@ -1,4 +1,4 @@ -name: fusioncatcher_download +name: fusioncatcher_build description: Build genome for fusioncatcher keywords: - sort @@ -19,7 +19,6 @@ output: - reference: type: directory description: Path to fusioncatcher references - pattern: "*" authors: - "@praveenraj2018, @rannick" diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index 7afdb4c6..ed97c834 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -20,7 +20,7 @@ process FUSIONCATCHER_DOWNLOAD { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - // TODO: move to my sourceforge + // TODO: move to S3 // def url = """ diff --git a/modules/local/starfusion/download/main.nf b/modules/local/starfusion/download/main.nf index 85bb39c1..6ef1df63 100644 --- a/modules/local/starfusion/download/main.nf +++ b/modules/local/starfusion/download/main.nf @@ -8,7 +8,7 @@ process STARFUSION_DOWNLOAD { path "ctat_genome_lib_build_dir/*" , emit: reference - // TODO: move to my sourceforge + // TODO: move to S3 script: """ wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/__genome_libs_StarFv1.10/GRCh38_gencode_v37_CTAT_lib_Mar012021.plug-n-play.tar.gz --no-check-certificate diff --git a/nextflow.config b/nextflow.config index 31e75497..db9eb141 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,13 +61,12 @@ params { // Skip steps skip_qc = false - skip_vcf = false skip_vis = false skip_vcf = false skip_salmon_index = false // Download references option - download_refs = true + download_refs = false // Path to references fasta = "${params.genomes_base}/gencode/Homo_sapiens_${params.genome}_${params.genome_gencode_version}_dna_primary_assembly.fa" diff --git a/tests/test_build.nf.test.snap b/tests/test_build.nf.test.snap index 16b6c007..5332634b 100644 --- a/tests/test_build.nf.test.snap +++ b/tests/test_build.nf.test.snap @@ -2,46 +2,21 @@ "test_build": { "content": [ [ - "bedops", - "bedops/Homo_sapiens.GRCh38.102_rrna_intervals.gtf.bed", - "gatk4", - "gatk4/Homo_sapiens.GRCh38.102.dna.primary_assembly.dict", - "gffread", - "gffread/Homo_sapiens.GRCh38.102.fasta", "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", "references", - "references/ensembl", - "references/ensembl/Homo_sapiens.GRCh38.102.chr.gtf", - "references/ensembl/Homo_sapiens.GRCh38.102.dna.primary_assembly.fa", - "references/ensembl/Homo_sapiens.GRCh38.102.dna.primary_assembly.fa.fai", - "references/ensembl/Homo_sapiens.GRCh38.102.genepred", - "references/ensembl/Homo_sapiens.GRCh38.102.gtf", - "references/ensembl/Homo_sapiens.GRCh38.102.refflat", - "references/ensembl/Homo_sapiens.GRCh38.102_rrna_intervals.gtf.interval_list", "references/hgnc", "references/hgnc/HGNC-DB-timestamp.txt", - "references/hgnc/hgnc_complete_set.txt", - "rrnatranscripts", - "rrnatranscripts/Homo_sapiens.GRCh38.102_rrna_intervals.gtf" + "references/hgnc/hgnc_complete_set.txt" ], [ - "Homo_sapiens.GRCh38.102_rrna_intervals.gtf.bed:md5,5bc3ccc76735ae46699a75269f0ea65b", - "Homo_sapiens.GRCh38.102.dna.primary_assembly.dict:md5,092f1bf29fce906ff7b5ece02b4b21c8", - "Homo_sapiens.GRCh38.102.fasta:md5,d1f17b045dc60c49f2cc29e30006afc0", - "Homo_sapiens.GRCh38.102.chr.gtf:md5,0069687307852c63596b4d2ebdbbaf0c", - "Homo_sapiens.GRCh38.102.dna.primary_assembly.fa.fai:md5,d527f3eb6b664020cf4d882b5820056f", - "Homo_sapiens.GRCh38.102.genepred:md5,59c577ca4ab033c0ce1feac1e387bcab", - "Homo_sapiens.GRCh38.102.gtf:md5,defac755cd9aa4e82ec33398c27745ef", - "Homo_sapiens.GRCh38.102.refflat:md5,ef095e13743811c31d44752c32e9673e", - "Homo_sapiens.GRCh38.102_rrna_intervals.gtf.interval_list:md5,0abf61877f65247b15c438d605d85599", - "hgnc_complete_set.txt:md5,a563a2f8432ec0ab7d3dc74d769102b8", - "Homo_sapiens.GRCh38.102_rrna_intervals.gtf:md5,744bf505deb50837b15441e808cad345" + "hgnc_complete_set.txt:md5,a563a2f8432ec0ab7d3dc74d769102b8" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-09T13:33:28.517098377" + "timestamp": "2024-12-10T15:09:04.076558" } } \ No newline at end of file diff --git a/tests/test_cosmic.nf.test.snap b/tests/test_cosmic.nf.test.snap index dc3ab947..b8e6d4a5 100644 --- a/tests/test_cosmic.nf.test.snap +++ b/tests/test_cosmic.nf.test.snap @@ -24,14 +24,9 @@ "test cosmic no fastp trim": { "content": [ 0, - { - "Workflow": { - "nf-core/rnafusion": "v4.0.0dev" - } - }, + null, [ - "pipeline_info", - "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + "pipeline_info" ], [ @@ -41,19 +36,14 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-10T12:42:06.51355" + "timestamp": "2024-12-10T15:09:53.130295" }, "test cosmic with fastp trim": { "content": [ 0, - { - "Workflow": { - "nf-core/rnafusion": "v4.0.0dev" - } - }, + null, [ - "pipeline_info", - "pipeline_info/nf_core_pipeline_software_mqc_versions.yml" + "pipeline_info" ], [ @@ -63,6 +53,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-10T12:41:17.841938" + "timestamp": "2024-12-10T15:09:29.277399" } } \ No newline at end of file From ef5ab96f749a1dd7d6c32f54f39fa395892c7a20 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:00:35 +0100 Subject: [PATCH 28/41] update star in snapshot --- tests/test_stub.nf.test.snap | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index febef3c8..0c4a3578 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -80,17 +80,17 @@ "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": null, + "star": 2.7.11b, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": null, + "star": 2.7.11b, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": null, + "star": 2.7.11b, "samtools": 1.2, "gawk": "5.1.0" }, @@ -355,17 +355,17 @@ "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": null, + "star": 2.7.11b, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": null, + "star": 2.7.11b, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": null, + "star": 2.7.11b, "samtools": 1.2, "gawk": "5.1.0" }, @@ -549,4 +549,4 @@ }, "timestamp": "2024-12-10T14:10:38.383967" } -} \ No newline at end of file +} From 117884c783977964fe3ee9196ebbc44944f2eaa3 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:39:08 +0100 Subject: [PATCH 29/41] add human_gencode_filter to starfusion build --- CHANGELOG.md | 3 +- modules/local/fusioncatcher/download/main.nf | 1 - modules/local/starfusion/build/main.nf | 1 + subworkflows/local/arriba_workflow.nf | 2 +- test.xml | 240 +++++++++++++++++++ 5 files changed, 244 insertions(+), 3 deletions(-) create mode 100644 test.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 1980cd86..a1444c08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated to nf-core/tools 3.0.2 [#504](https://github.com/nf-core/rnafusion/pull/504) - Remove local module `RRNA_TRANSCRIPTS` (replaced by nf-core module) [#541](https://github.com/nf-core/rnafusion/pull/541) - Allow fastq files without a dot before .fn(.gz)/.fastq(.gz) files [#548](https://github.com/nf-core/rnafusion/pull/548) - ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" GTF_TO_REFFLAT + ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" GTF_TO_REFFLAT + ### Fixed - Fixed some Nextflow run-commands in the docs [#491](https://github.com/nf-core/rnafusion/pull/491) diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index ed97c834..03c0a1f3 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -19,7 +19,6 @@ process FUSIONCATCHER_DOWNLOAD { script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' // TODO: move to S3 // def url = diff --git a/modules/local/starfusion/build/main.nf b/modules/local/starfusion/build/main.nf index 436a5e4f..10485ac0 100644 --- a/modules/local/starfusion/build/main.nf +++ b/modules/local/starfusion/build/main.nf @@ -33,6 +33,7 @@ process STARFUSION_BUILD { --pfam_db Pfam-A.hmm \\ --dfam_db homo_sapiens_dfam.hmm \\ --max_readlength $params.read_length \\ + --human_gencode_filter \\ --CPU $task.cpus cat <<-END_VERSIONS > versions.yml diff --git a/subworkflows/local/arriba_workflow.nf b/subworkflows/local/arriba_workflow.nf index 5d844678..5575f5c0 100644 --- a/subworkflows/local/arriba_workflow.nf +++ b/subworkflows/local/arriba_workflow.nf @@ -52,7 +52,7 @@ workflow ARRIBA_WORKFLOW { } else { ch_arriba_fusions = reads.combine(Channel.value( file(ch_dummy_file, checkIfExists:true ) ) ) - .map { meta, reads, fusions -> [ meta, fusions ] } + .map { it -> [ it[0], it[2] ] } ch_arriba_fusion_fail = ch_dummy_file } diff --git a/test.xml b/test.xml new file mode 100644 index 00000000..2aed29e1 --- /dev/null +++ b/test.xml @@ -0,0 +1,240 @@ +Nextflow stdout: + +N E X T F L O W ~ version 24.10.2 +Launching `/Users/annick.renevey/Projects/rnafusion/tests/../main.nf` [elated_pasteur] DSL2 - revision: dc78081240 + +------------------------------------------------------ + ,--./,-. + ___ __ __ __ ___ /,-._.--~' + |\ | |__ __ / ` / \ |__) |__ } { + | \| | \__, \__/ | \ |___ \`-._,-`-, + `._,._,' + nf-core/rnafusion 4.0.0dev +------------------------------------------------------ +Input/output options + input : https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv + outdir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output + genomes_base : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references + genome_gencode_version : 46 + starfusion_build : true + all : true + arriba_ref_blacklist : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz + arriba_ref_cytobands : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv + arriba_ref_known_fusions : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz + arriba_ref_protein_domains : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3 + gencode_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode + fusioncatcher_limitSjdbInsertNsj : 2000000 + fusioncatcher_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusioncatcher/human_v46 + fusioncatcher_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt + fusioninspector_limitSjdbInsertNsj: 1000000 + fusionreport_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusion_report_db + fusionreport_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusion_report_db/mitelman.db + hgnc_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/hgnc/hgnc_complete_set.txt + hgnc_date : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/hgnc/HGNC-DB-timestamp.txt + salmon_index : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/salmon/salmon + salmon_index_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/salmon/salmon/complete_ref_lens.bin + starfusion_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/starfusion/ctat_genome_lib_build_dir + starfusion_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/starfusion/Pfam-A.hmm + starindex_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/star + starindex_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/star/star/Genome + tools_cutoff : 1 + +Read trimming options + fastp_trim : true + adapter_fasta : [] + +Alignment compression options + cram : [] + +Reference genome options + fasta : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa + fai : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa.fai + genome : GRCh38 + gtf : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46.gtf + refflat : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46.gtf.refflat + rrna_intervals : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46.interval_list + no_cosmic : true + +Institutional config options + config_profile_name : Test profile + config_profile_description : Minimal test dataset to check pipeline function + +Core Nextflow options + runName : elated_pasteur + containerEngine : docker + launchDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0 + workDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/work + projectDir : /Users/annick.renevey/Projects/rnafusion + userName : annick.renevey + profile : test,docker + configFiles :  + +!! Only displaying parameters that differ from the pipeline defaults !! +------------------------------------------------------ +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/rnafusion/blob/master/CITATIONS.md + +WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected. +HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`. + +WARN: Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD` and skip using it in `FUSIONREPORT` +[13/a75f51] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONCATCHER_BUILD (fusioncatcher_build) +[78/f27e8a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:HGNC_DOWNLOAD (hgnc) +[c9/d109b6] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:ARRIBA_DOWNLOAD (arriba) +[1b/cb8505] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GENCODE_DOWNLOAD (gencode_download) +[61/738ba4] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD (fusionreport) +[6c/ccb83a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FASTQC (test) +[52/593378] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTP (test) +[18/18b5f3] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SAMTOOLS_FAIDX (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) +[38/36e8dd] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_CREATESEQUENCEDICTIONARY (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) +[b3/7aad17] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GET_RRNA_TRANSCRIPTS (get_rrna_bed) +[53/e4d111] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STAR_GENOMEGENERATE (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) +[37/d0930c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GFFREAD (Homo_sapiens.GRCh38.46.gtf) +[26/aa9b36] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GTF_TO_REFFLAT (Homo_sapiens.GRCh38.46.gtf) +[18/66b9c3] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD (star-fusion) +[05/6e1246] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTQC_FOR_FASTP (test) +[83/52bcc1] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONCATCHER_WORKFLOW:FUSIONCATCHER (test) +[5b/c0d7bc] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_BEDTOINTERVALLIST (Homo_sapiens.GRCh38.46.gtf) +[24/c5d0b9] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STAR_FOR_STARFUSION (test) +[43/8be8df] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:STAR_FOR_ARRIBA (test) +[f9/87735b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SALMON_INDEX (Homo_sapiens.GRCh38.46.gtf.fasta) +[b4/64a1cd] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTRNASEQMETRICS (test) +[cc/99a46b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTINSERTSIZEMETRICS (test) +[0b/18a022] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STARFUSION (test) +[aa/000cc3] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:SAMTOOLS_INDEX_FOR_STARFUSION (test) +[3d/4dfbba] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_STRINGTIE (test) +[10/6bc883] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:GATK4_MARKDUPLICATES (test) +[74/397e36] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:ARRIBA_ARRIBA (test) +[df/483739] Submitted process > NFCORE_RNAFUSION:RNAFUSION:SALMON_QUANT (test) +[92/376a4b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_MERGE (1) +[b6/26ca21] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONREPORT_WORKFLOW:FUSIONREPORT (test) +[51/2177ee] Submitted process > NFCORE_RNAFUSION:RNAFUSION:MULTIQC +-[nf-core/rnafusion] Pipeline completed successfully- +Nextflow stderr: + +Nextflow stdout: + +N E X T F L O W ~ version 24.10.2 +Launching `/Users/annick.renevey/Projects/rnafusion/tests/../main.nf` [naughty_becquerel] DSL2 - revision: dc78081240 + +------------------------------------------------------ + ,--./,-. + ___ __ __ __ ___ /,-._.--~' + |\ | |__ __ / ` / \ |__) |__ } { + | \| | \__, \__/ | \ |___ \`-._,-`-, + `._,._,' + nf-core/rnafusion 4.0.0dev +------------------------------------------------------ +Input/output options + input : https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv + outdir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output + genomes_base : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references + genome_gencode_version : 46 + starfusion_build : true + all : true + arriba_ref_blacklist : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz + arriba_ref_cytobands : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv + arriba_ref_known_fusions : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz + arriba_ref_protein_domains : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3 + gencode_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode + fusioncatcher_limitSjdbInsertNsj : 2000000 + fusioncatcher_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusioncatcher/human_v46 + fusioncatcher_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt + fusioninspector_limitSjdbInsertNsj: 1000000 + fusionreport_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusion_report_db + fusionreport_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusion_report_db/mitelman.db + hgnc_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/hgnc/hgnc_complete_set.txt + hgnc_date : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/hgnc/HGNC-DB-timestamp.txt + salmon_index : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/salmon/salmon + salmon_index_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/salmon/salmon/complete_ref_lens.bin + starfusion_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/starfusion/ctat_genome_lib_build_dir + starfusion_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/starfusion/Pfam-A.hmm + starindex_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/star + starindex_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/star/star/Genome + tools_cutoff : 1 + +Read trimming options + adapter_fasta : [] + +Alignment compression options + cram : [] + +Reference genome options + fasta : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa + fai : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa.fai + genome : GRCh38 + gtf : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46.gtf + refflat : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46.gtf.refflat + rrna_intervals : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46.interval_list + no_cosmic : true + +Institutional config options + config_profile_name : Test profile + config_profile_description : Minimal test dataset to check pipeline function + +Core Nextflow options + runName : naughty_becquerel + containerEngine : docker + launchDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1 + workDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/work + projectDir : /Users/annick.renevey/Projects/rnafusion + userName : annick.renevey + profile : test,docker + configFiles :  + +!! Only displaying parameters that differ from the pipeline defaults !! +------------------------------------------------------ +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/rnafusion/blob/master/CITATIONS.md + +WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected. +HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`. + +WARN: Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD` and skip using it in `FUSIONREPORT` +[8d/ee20bb] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:HGNC_DOWNLOAD (hgnc) +[d6/69323a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GENCODE_DOWNLOAD (gencode_download) +[b9/5c89ae] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD (fusionreport) +[43/355279] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONCATCHER_BUILD (fusioncatcher_build) +[94/cf96bf] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:ARRIBA_DOWNLOAD (arriba) +[e4/d33580] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_CREATESEQUENCEDICTIONARY (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) +[db/fa3dd4] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SAMTOOLS_FAIDX (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) +[90/118532] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FASTQC (test) +[b9/6593b4] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTP (test) +[b4/28611c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GTF_TO_REFFLAT (Homo_sapiens.GRCh38.46.gtf) +[d0/9b767c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GET_RRNA_TRANSCRIPTS (get_rrna_bed) +[c2/fd36fd] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GFFREAD (Homo_sapiens.GRCh38.46.gtf) +[76/b9b243] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STAR_GENOMEGENERATE (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) +[d6/350166] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD (star-fusion) +[89/c38359] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTQC_FOR_FASTP (test) +[92/431af8] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONCATCHER_WORKFLOW:FUSIONCATCHER (test) +[43/c4079e] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_BEDTOINTERVALLIST (Homo_sapiens.GRCh38.46.gtf) +[e6/3c5e19] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SALMON_INDEX (Homo_sapiens.GRCh38.46.gtf.fasta) +[71/f9cbec] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:STAR_FOR_ARRIBA (test) +[96/59d525] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STAR_FOR_STARFUSION (test) +[99/6d4d21] Submitted process > NFCORE_RNAFUSION:RNAFUSION:SALMON_QUANT (test) +[9c/fd7ac5] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:ARRIBA_ARRIBA (test) +[29/6b167b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_STRINGTIE (test) +[39/c6c23e] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STARFUSION (test) +[35/474e2f] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:SAMTOOLS_INDEX_FOR_STARFUSION (test) +[66/b0ff6a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:GATK4_MARKDUPLICATES (test) +[63/a246b6] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTRNASEQMETRICS (test) +[41/50018c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTINSERTSIZEMETRICS (test) +[8c/96974e] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_MERGE (1) +[fe/e581e9] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONREPORT_WORKFLOW:FUSIONREPORT (test) +[99/47d296] Submitted process > NFCORE_RNAFUSION:RNAFUSION:MULTIQC +-[nf-core/rnafusion] Pipeline completed successfully- +Nextflow stderr: + + \ No newline at end of file From 3788788d1b26dd07dd9cfb85c8cb1091f6630f8e Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:44:52 +0100 Subject: [PATCH 30/41] remove test.xml --- test.xml | 240 ------------------------------------------------------- 1 file changed, 240 deletions(-) delete mode 100644 test.xml diff --git a/test.xml b/test.xml deleted file mode 100644 index 2aed29e1..00000000 --- a/test.xml +++ /dev/null @@ -1,240 +0,0 @@ -Nextflow stdout: - -N E X T F L O W ~ version 24.10.2 -Launching `/Users/annick.renevey/Projects/rnafusion/tests/../main.nf` [elated_pasteur] DSL2 - revision: dc78081240 - ------------------------------------------------------- - ,--./,-. - ___ __ __ __ ___ /,-._.--~' - |\ | |__ __ / ` / \ |__) |__ } { - | \| | \__, \__/ | \ |___ \`-._,-`-, - `._,._,' - nf-core/rnafusion 4.0.0dev ------------------------------------------------------- -Input/output options - input : https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv - outdir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output - genomes_base : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references - genome_gencode_version : 46 - starfusion_build : true - all : true - arriba_ref_blacklist : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz - arriba_ref_cytobands : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv - arriba_ref_known_fusions : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz - arriba_ref_protein_domains : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3 - gencode_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode - fusioncatcher_limitSjdbInsertNsj : 2000000 - fusioncatcher_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusioncatcher/human_v46 - fusioncatcher_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt - fusioninspector_limitSjdbInsertNsj: 1000000 - fusionreport_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusion_report_db - fusionreport_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/fusion_report_db/mitelman.db - hgnc_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/hgnc/hgnc_complete_set.txt - hgnc_date : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/hgnc/HGNC-DB-timestamp.txt - salmon_index : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/salmon/salmon - salmon_index_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/salmon/salmon/complete_ref_lens.bin - starfusion_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/starfusion/ctat_genome_lib_build_dir - starfusion_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/starfusion/Pfam-A.hmm - starindex_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/star - starindex_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/star/star/Genome - tools_cutoff : 1 - -Read trimming options - fastp_trim : true - adapter_fasta : [] - -Alignment compression options - cram : [] - -Reference genome options - fasta : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa - fai : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa.fai - genome : GRCh38 - gtf : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46.gtf - refflat : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46.gtf.refflat - rrna_intervals : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/output/references/gencode/Homo_sapiens_GRCh38_46.interval_list - no_cosmic : true - -Institutional config options - config_profile_name : Test profile - config_profile_description : Minimal test dataset to check pipeline function - -Core Nextflow options - runName : elated_pasteur - containerEngine : docker - launchDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0 - workDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/acc69f529568702c4fabd8a30cf1fcb0/work - projectDir : /Users/annick.renevey/Projects/rnafusion - userName : annick.renevey - profile : test,docker - configFiles :  - -!! Only displaying parameters that differ from the pipeline defaults !! ------------------------------------------------------- -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/rnafusion/blob/master/CITATIONS.md - -WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected. -HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`. - -WARN: Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD` and skip using it in `FUSIONREPORT` -[13/a75f51] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONCATCHER_BUILD (fusioncatcher_build) -[78/f27e8a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:HGNC_DOWNLOAD (hgnc) -[c9/d109b6] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:ARRIBA_DOWNLOAD (arriba) -[1b/cb8505] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GENCODE_DOWNLOAD (gencode_download) -[61/738ba4] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD (fusionreport) -[6c/ccb83a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FASTQC (test) -[52/593378] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTP (test) -[18/18b5f3] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SAMTOOLS_FAIDX (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) -[38/36e8dd] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_CREATESEQUENCEDICTIONARY (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) -[b3/7aad17] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GET_RRNA_TRANSCRIPTS (get_rrna_bed) -[53/e4d111] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STAR_GENOMEGENERATE (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) -[37/d0930c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GFFREAD (Homo_sapiens.GRCh38.46.gtf) -[26/aa9b36] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GTF_TO_REFFLAT (Homo_sapiens.GRCh38.46.gtf) -[18/66b9c3] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD (star-fusion) -[05/6e1246] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTQC_FOR_FASTP (test) -[83/52bcc1] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONCATCHER_WORKFLOW:FUSIONCATCHER (test) -[5b/c0d7bc] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_BEDTOINTERVALLIST (Homo_sapiens.GRCh38.46.gtf) -[24/c5d0b9] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STAR_FOR_STARFUSION (test) -[43/8be8df] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:STAR_FOR_ARRIBA (test) -[f9/87735b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SALMON_INDEX (Homo_sapiens.GRCh38.46.gtf.fasta) -[b4/64a1cd] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTRNASEQMETRICS (test) -[cc/99a46b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTINSERTSIZEMETRICS (test) -[0b/18a022] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STARFUSION (test) -[aa/000cc3] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:SAMTOOLS_INDEX_FOR_STARFUSION (test) -[3d/4dfbba] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_STRINGTIE (test) -[10/6bc883] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:GATK4_MARKDUPLICATES (test) -[74/397e36] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:ARRIBA_ARRIBA (test) -[df/483739] Submitted process > NFCORE_RNAFUSION:RNAFUSION:SALMON_QUANT (test) -[92/376a4b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_MERGE (1) -[b6/26ca21] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONREPORT_WORKFLOW:FUSIONREPORT (test) -[51/2177ee] Submitted process > NFCORE_RNAFUSION:RNAFUSION:MULTIQC --[nf-core/rnafusion] Pipeline completed successfully- -Nextflow stderr: - -Nextflow stdout: - -N E X T F L O W ~ version 24.10.2 -Launching `/Users/annick.renevey/Projects/rnafusion/tests/../main.nf` [naughty_becquerel] DSL2 - revision: dc78081240 - ------------------------------------------------------- - ,--./,-. - ___ __ __ __ ___ /,-._.--~' - |\ | |__ __ / ` / \ |__) |__ } { - | \| | \__, \__/ | \ |___ \`-._,-`-, - `._,._,' - nf-core/rnafusion 4.0.0dev ------------------------------------------------------- -Input/output options - input : https://raw.githubusercontent.com/nf-core/test-datasets/rnafusion/testdata/human/samplesheet_valid.csv - outdir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output - genomes_base : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references - genome_gencode_version : 46 - starfusion_build : true - all : true - arriba_ref_blacklist : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz - arriba_ref_cytobands : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv - arriba_ref_known_fusions : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz - arriba_ref_protein_domains : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3 - gencode_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode - fusioncatcher_limitSjdbInsertNsj : 2000000 - fusioncatcher_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusioncatcher/human_v46 - fusioncatcher_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt - fusioninspector_limitSjdbInsertNsj: 1000000 - fusionreport_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusion_report_db - fusionreport_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/fusion_report_db/mitelman.db - hgnc_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/hgnc/hgnc_complete_set.txt - hgnc_date : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/hgnc/HGNC-DB-timestamp.txt - salmon_index : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/salmon/salmon - salmon_index_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/salmon/salmon/complete_ref_lens.bin - starfusion_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/starfusion/ctat_genome_lib_build_dir - starfusion_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/starfusion/Pfam-A.hmm - starindex_ref : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/star - starindex_ref_stub_check : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/star/star/Genome - tools_cutoff : 1 - -Read trimming options - adapter_fasta : [] - -Alignment compression options - cram : [] - -Reference genome options - fasta : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa - fai : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46_dna_primary_assembly.fa.fai - genome : GRCh38 - gtf : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46.gtf - refflat : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46.gtf.refflat - rrna_intervals : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/output/references/gencode/Homo_sapiens_GRCh38_46.interval_list - no_cosmic : true - -Institutional config options - config_profile_name : Test profile - config_profile_description : Minimal test dataset to check pipeline function - -Core Nextflow options - runName : naughty_becquerel - containerEngine : docker - launchDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1 - workDir : /Users/annick.renevey/Projects/rnafusion/.nf-test/tests/5d4f02e50caba01d5f317164e187f7c1/work - projectDir : /Users/annick.renevey/Projects/rnafusion - userName : annick.renevey - profile : test,docker - configFiles :  - -!! Only displaying parameters that differ from the pipeline defaults !! ------------------------------------------------------- -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/rnafusion/blob/master/CITATIONS.md - -WARN: nf-core pipelines do not accept positional arguments. The positional argument `true` has been detected. -HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`. - -WARN: Skipping COSMIC DB download from `FUSIONREPORT_DOWNLOAD` and skip using it in `FUSIONREPORT` -[8d/ee20bb] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:HGNC_DOWNLOAD (hgnc) -[d6/69323a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GENCODE_DOWNLOAD (gencode_download) -[b9/5c89ae] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONREPORT_DOWNLOAD (fusionreport) -[43/355279] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:FUSIONCATCHER_BUILD (fusioncatcher_build) -[94/cf96bf] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:ARRIBA_DOWNLOAD (arriba) -[e4/d33580] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_CREATESEQUENCEDICTIONARY (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) -[db/fa3dd4] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SAMTOOLS_FAIDX (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) -[90/118532] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FASTQC (test) -[b9/6593b4] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTP (test) -[b4/28611c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GTF_TO_REFFLAT (Homo_sapiens.GRCh38.46.gtf) -[d0/9b767c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GET_RRNA_TRANSCRIPTS (get_rrna_bed) -[c2/fd36fd] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GFFREAD (Homo_sapiens.GRCh38.46.gtf) -[76/b9b243] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STAR_GENOMEGENERATE (Homo_sapiens.GRCh38.46_dna_primary_assembly.fa) -[d6/350166] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD (star-fusion) -[89/c38359] Submitted process > NFCORE_RNAFUSION:RNAFUSION:TRIM_WORKFLOW:FASTQC_FOR_FASTP (test) -[92/431af8] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONCATCHER_WORKFLOW:FUSIONCATCHER (test) -[43/c4079e] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:GATK4_BEDTOINTERVALLIST (Homo_sapiens.GRCh38.46.gtf) -[e6/3c5e19] Submitted process > NFCORE_RNAFUSION:RNAFUSION:BUILD_REFERENCES:SALMON_INDEX (Homo_sapiens.GRCh38.46.gtf.fasta) -[71/f9cbec] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:STAR_FOR_ARRIBA (test) -[96/59d525] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STAR_FOR_STARFUSION (test) -[99/6d4d21] Submitted process > NFCORE_RNAFUSION:RNAFUSION:SALMON_QUANT (test) -[9c/fd7ac5] Submitted process > NFCORE_RNAFUSION:RNAFUSION:ARRIBA_WORKFLOW:ARRIBA_ARRIBA (test) -[29/6b167b] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_STRINGTIE (test) -[39/c6c23e] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:STARFUSION (test) -[35/474e2f] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STARFUSION_WORKFLOW:SAMTOOLS_INDEX_FOR_STARFUSION (test) -[66/b0ff6a] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:GATK4_MARKDUPLICATES (test) -[63/a246b6] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTRNASEQMETRICS (test) -[41/50018c] Submitted process > NFCORE_RNAFUSION:RNAFUSION:QC_WORKFLOW:PICARD_COLLECTINSERTSIZEMETRICS (test) -[8c/96974e] Submitted process > NFCORE_RNAFUSION:RNAFUSION:STRINGTIE_WORKFLOW:STRINGTIE_MERGE (1) -[fe/e581e9] Submitted process > NFCORE_RNAFUSION:RNAFUSION:FUSIONREPORT_WORKFLOW:FUSIONREPORT (test) -[99/47d296] Submitted process > NFCORE_RNAFUSION:RNAFUSION:MULTIQC --[nf-core/rnafusion] Pipeline completed successfully- -Nextflow stderr: - - \ No newline at end of file From 8e9be7cd6f83b175bdc15ef0a3fc8f9c89a33897 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 11 Dec 2024 12:45:38 +0100 Subject: [PATCH 31/41] update snap --- tests/test_stub.nf.test.snap | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index 0c4a3578..6cefb329 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -80,17 +80,17 @@ "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": 2.7.11b, + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": 2.7.11b, + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": 2.7.11b, + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, @@ -272,7 +272,7 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-10T14:12:48.526275" + "timestamp": "2024-12-11T12:43:59.534889" }, "stub test with fastp trim": { "content": [ @@ -355,17 +355,17 @@ "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": 2.7.11b, + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": 2.7.11b, + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": 2.7.11b, + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, @@ -547,6 +547,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-10T14:10:38.383967" + "timestamp": "2024-12-11T12:40:22.066494" } -} +} \ No newline at end of file From 5cdf95aabc8c45e9687889558db88eddb603c925 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 11 Dec 2024 13:23:10 +0100 Subject: [PATCH 32/41] try updating snapshot again --- tests/test_stub.nf.test.snap | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index 6cefb329..83c76dd7 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -80,17 +80,17 @@ "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, @@ -355,17 +355,17 @@ "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, @@ -549,4 +549,4 @@ }, "timestamp": "2024-12-11T12:40:22.066494" } -} \ No newline at end of file +} From fa128b6805a6884a7ff29fbe30159e8fa229a5a3 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:16:28 +0100 Subject: [PATCH 33/41] avoid using meta for remapping --- subworkflows/local/arriba_workflow.nf | 2 +- subworkflows/local/starfusion_workflow.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/arriba_workflow.nf b/subworkflows/local/arriba_workflow.nf index 5575f5c0..10ddc16a 100644 --- a/subworkflows/local/arriba_workflow.nf +++ b/subworkflows/local/arriba_workflow.nf @@ -33,7 +33,7 @@ workflow ARRIBA_WORKFLOW { ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions - ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ meta, file -> return file} + ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ it -> it[1]} } if (params.cram.contains('arriba') ){ diff --git a/subworkflows/local/starfusion_workflow.nf b/subworkflows/local/starfusion_workflow.nf index d6a0668a..f70396f5 100644 --- a/subworkflows/local/starfusion_workflow.nf +++ b/subworkflows/local/starfusion_workflow.nf @@ -21,7 +21,7 @@ workflow STARFUSION_WORKFLOW { if ((params.starfusion || params.all || params.stringtie) && !params.fusioninspector_only) { if (params.starfusion_fusions){ ch_starfusion_fusions = reads.combine(Channel.value(file(params.starfusion_fusions, checkIfExists:true))) - .map { meta, reads, fusions -> [ meta, fusions ] } + .map { it -> [ it[0], it[2] ] } } else { STAR_FOR_STARFUSION( reads, ch_starindex_ref, ch_gtf, params.star_ignore_sjdbgtf, '', params.seq_center ?: '') ch_versions = ch_versions.mix(STAR_FOR_STARFUSION.out.versions) @@ -51,7 +51,7 @@ workflow STARFUSION_WORKFLOW { } else { ch_starfusion_fusions = reads.combine(Channel.value(file(ch_dummy_file, checkIfExists:true))) - .map { meta, reads, fusions -> [ meta, fusions ] } + .map { it -> [ it[0], it[2] ] } ch_star_stats = Channel.empty() ch_star_gene_count = Channel.empty() } From 17ef23354947b4965c84131f9eb927bc323d1cbc Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Tue, 17 Dec 2024 17:49:22 +0100 Subject: [PATCH 34/41] use custom container for fusioncatcher, fix typo --- modules/local/fusioncatcher/detect/main.nf | 3 +- modules/local/fusioncatcher/download/main.nf | 2 +- tests/test_stub.nf.test.snap | 198 ++----------------- workflows/rnafusion.nf | 4 +- 4 files changed, 19 insertions(+), 188 deletions(-) diff --git a/modules/local/fusioncatcher/detect/main.nf b/modules/local/fusioncatcher/detect/main.nf index 9a092846..e753ff85 100644 --- a/modules/local/fusioncatcher/detect/main.nf +++ b/modules/local/fusioncatcher/detect/main.nf @@ -3,7 +3,8 @@ process FUSIONCATCHER { label 'process_high' conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f" + + container "docker.io/rannickscilifelab/fusioncatcher:1.34" input: tuple val(meta), path(fasta) diff --git a/modules/local/fusioncatcher/download/main.nf b/modules/local/fusioncatcher/download/main.nf index 03c0a1f3..e04c24f6 100644 --- a/modules/local/fusioncatcher/download/main.nf +++ b/modules/local/fusioncatcher/download/main.nf @@ -3,7 +3,7 @@ process FUSIONCATCHER_DOWNLOAD { label 'process_medium' conda "${moduleDir}/environment.yml" - container "community.wave.seqera.io/library/fusioncatcher:1.33--4733482b637ef92f" + container "docker.io/rannickscilifelab/fusioncatcher:1.34" input: diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index 83c76dd7..e3a8bb73 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -1,11 +1,8 @@ { "stub test no fastp trim": { "content": [ - 31, + 20, { - "ARRIBA_ARRIBA": { - "arriba": "2.4.0" - }, "ARRIBA_DOWNLOAD": { "arriba_download": "2.4.0" }, @@ -19,14 +16,11 @@ "fastqc": "0.12.1" }, "FUSIONCATCHER": { - "fusioncatcher": 1.33 + "fusioncatcher": 1.35 }, "FUSIONCATCHER_BUILD": { "fusioncatcher": "fusioncatcher.py 1.35" }, - "FUSIONREPORT": { - "fusion_report": "2.1.5" - }, "FUSIONREPORT_DOWNLOAD": { "fusion_report": "2.1.5" }, @@ -36,10 +30,6 @@ "GATK4_CREATESEQUENCEDICTIONARY": { "gatk4": "4.6.1.0" }, - "GATK4_MARKDUPLICATES": { - "gatk4": "4.5.0.0", - "samtools": "1.19.2" - }, "GENCODE_DOWNLOAD": { "wget": null }, @@ -55,59 +45,32 @@ "HGNC_DOWNLOAD": { "wget": null }, - "PICARD_COLLECTINSERTSIZEMETRICS": { - "picard": "3.3.0" - }, - "PICARD_COLLECTRNASEQMETRICS": { - "picard": "3.3.0" - }, - "SALMON_INDEX": { - "salmon": "1.10.3" - }, - "SALMON_QUANT": { - "salmon": "1.10.3" - }, "SAMTOOLS_FAIDX": { "samtools": 1.21 }, - "SAMTOOLS_INDEX_FOR_STARFUSION": { - "samtools": 1.21 - }, - "STARFUSION": { - "STAR-Fusion": "1.7.0" - }, "STARFUSION_BUILD": { "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": "2.7.11b", + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": "2.7.11b", + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": "2.7.11b", + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, - "STRINGTIE_MERGE": { - "stringtie": "2.2.1" - }, - "STRINGTIE_STRINGTIE": { - "stringtie": "2.2.3" - }, "Workflow": { "nf-core/rnafusion": "v4.0.0dev" } }, [ - "arriba", - "arriba/test.arriba.fusions.discarded.tsv", - "arriba/test.arriba.fusions.tsv", "fastp", "fastp/test.fastp.html", "fastp/test.fastp.json", @@ -126,32 +89,11 @@ "fusioncatcher/test.fusioncatcher.fusion-genes.txt", "fusioncatcher/test.fusioncatcher.log", "fusioncatcher/test.fusioncatcher.summary.txt", - "fusionreport", - "fusionreport/test", - "fusionreport/test/AAA_BBB.html", - "fusionreport/test/test.fusionreport.tsv", - "fusionreport/test/test.fusionreport_filtered.tsv", - "fusionreport/test/test.fusions.csv", - "fusionreport/test/test.fusions.json", - "fusionreport/test/test_fusionreport_index.html", "gatk4", "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", "get", "get/rrna.bed", "get/rrna.gtf", - "multiqc", - "multiqc/multiqc_data", - "multiqc/multiqc_plots", - "multiqc/multiqc_report.html", - "picard", - "picard/test.bai", - "picard/test.bam", - "picard/test.bam.metrics", - "picard/test.cram", - "picard/test.cram.crai", - "picard/test.rna_metrics", - "picard/test_collectinsertsize.pdf", - "picard/test_collectinsertsize.txt", "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", "references", @@ -214,10 +156,6 @@ "references/starfusion", "references/starfusion/ctat_genome_lib_build_dir", "references/starfusion/ref_annot.cdna.fa", - "salmon", - "salmon/test", - "salmon/test_lib_format_counts.json", - "salmon/test_meta_info.json", "star_for_arriba", "star_for_arriba/test.Aligned.sortedByCoord.out.bam", "star_for_arriba/test.Aligned.unsort.out.bam", @@ -249,38 +187,23 @@ "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", "star_for_starfusion/test.out.sam", "star_for_starfusion/test.sortedByCoord.out.bam", - "star_for_starfusion/test.sortedByCoord.out.bam.bai", "star_for_starfusion/test.tab", "star_for_starfusion/test.toTranscriptome.out.bam", "star_for_starfusion/test.unmapped_1.fastq.gz", "star_for_starfusion/test.unmapped_2.fastq.gz", - "star_for_starfusion/testXd.out.bam", - "starfusion", - "starfusion/test.starfusion.abridged.coding_effect.tsv", - "starfusion/test.starfusion.abridged.tsv", - "starfusion/test.starfusion.fusion_predictions.tsv", - "stringtie", - "stringtie/[:]", - "stringtie/[:]/stringtie.merged.gtf", - "stringtie/test.ballgown", - "stringtie/test.coverage.gtf", - "stringtie/test.gene.abundance.txt", - "stringtie/test.transcripts.gtf" + "star_for_starfusion/testXd.out.bam" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T12:43:59.534889" + "timestamp": "2024-12-17T16:44:02.121512" }, "stub test with fastp trim": { "content": [ - 31, + 19, { - "ARRIBA_ARRIBA": { - "arriba": "2.4.0" - }, "ARRIBA_DOWNLOAD": { "arriba_download": "2.4.0" }, @@ -294,14 +217,11 @@ "fastqc": "0.12.1" }, "FUSIONCATCHER": { - "fusioncatcher": 1.33 + "fusioncatcher": 1.35 }, "FUSIONCATCHER_BUILD": { "fusioncatcher": "fusioncatcher.py 1.35" }, - "FUSIONREPORT": { - "fusion_report": "2.1.5" - }, "FUSIONREPORT_DOWNLOAD": { "fusion_report": "2.1.5" }, @@ -311,10 +231,6 @@ "GATK4_CREATESEQUENCEDICTIONARY": { "gatk4": "4.6.1.0" }, - "GATK4_MARKDUPLICATES": { - "gatk4": "4.5.0.0", - "samtools": "1.19.2" - }, "GENCODE_DOWNLOAD": { "wget": null }, @@ -330,59 +246,27 @@ "HGNC_DOWNLOAD": { "wget": null }, - "PICARD_COLLECTINSERTSIZEMETRICS": { - "picard": "3.3.0" - }, - "PICARD_COLLECTRNASEQMETRICS": { - "picard": "3.3.0" - }, - "SALMON_INDEX": { - "salmon": "1.10.3" - }, - "SALMON_QUANT": { - "salmon": "1.10.3" - }, "SAMTOOLS_FAIDX": { "samtools": 1.21 }, - "SAMTOOLS_INDEX_FOR_STARFUSION": { - "samtools": 1.21 - }, - "STARFUSION": { - "STAR-Fusion": "1.7.0" - }, "STARFUSION_BUILD": { "STAR-Fusion": "1.7.0" }, "STAR_FOR_ARRIBA": { - "star": "2.7.11b", - "samtools": 1.2, - "gawk": "5.1.0" - }, - "STAR_FOR_STARFUSION": { - "star": "2.7.11b", + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": "2.7.11b", + "star": null, "samtools": 1.2, "gawk": "5.1.0" }, - "STRINGTIE_MERGE": { - "stringtie": "2.2.1" - }, - "STRINGTIE_STRINGTIE": { - "stringtie": "2.2.3" - }, "Workflow": { "nf-core/rnafusion": "v4.0.0dev" } }, [ - "arriba", - "arriba/test.arriba.fusions.discarded.tsv", - "arriba/test.arriba.fusions.tsv", "fastp", "fastp/test.fastp.html", "fastp/test.fastp.json", @@ -401,32 +285,11 @@ "fusioncatcher/test.fusioncatcher.fusion-genes.txt", "fusioncatcher/test.fusioncatcher.log", "fusioncatcher/test.fusioncatcher.summary.txt", - "fusionreport", - "fusionreport/test", - "fusionreport/test/AAA_BBB.html", - "fusionreport/test/test.fusionreport.tsv", - "fusionreport/test/test.fusionreport_filtered.tsv", - "fusionreport/test/test.fusions.csv", - "fusionreport/test/test.fusions.json", - "fusionreport/test/test_fusionreport_index.html", "gatk4", "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", "get", "get/rrna.bed", "get/rrna.gtf", - "multiqc", - "multiqc/multiqc_data", - "multiqc/multiqc_plots", - "multiqc/multiqc_report.html", - "picard", - "picard/test.bai", - "picard/test.bam", - "picard/test.bam.metrics", - "picard/test.cram", - "picard/test.cram.crai", - "picard/test.rna_metrics", - "picard/test_collectinsertsize.pdf", - "picard/test_collectinsertsize.txt", "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", "references", @@ -452,23 +315,6 @@ "references/hgnc", "references/hgnc/HGNC-DB-timestamp.txt", "references/hgnc/hgnc_complete_set.txt", - "references/salmon", - "references/salmon/salmon", - "references/salmon/salmon/complete_ref_lens.bin", - "references/salmon/salmon/ctable.bin", - "references/salmon/salmon/ctg_offsets.bin", - "references/salmon/salmon/duplicate_clusters.tsv", - "references/salmon/salmon/info.json", - "references/salmon/salmon/mphf.bin", - "references/salmon/salmon/pos.bin", - "references/salmon/salmon/pre_indexing.log", - "references/salmon/salmon/rank.bin", - "references/salmon/salmon/refAccumLengths.bin", - "references/salmon/salmon/ref_indexing.log", - "references/salmon/salmon/reflengths.bin", - "references/salmon/salmon/refseq.bin", - "references/salmon/salmon/seq.bin", - "references/salmon/salmon/versionInfo.json", "references/star", "references/star/Genome", "references/star/Log.out", @@ -489,10 +335,6 @@ "references/starfusion", "references/starfusion/ctat_genome_lib_build_dir", "references/starfusion/ref_annot.cdna.fa", - "salmon", - "salmon/test", - "salmon/test_lib_format_counts.json", - "salmon/test_meta_info.json", "star_for_arriba", "star_for_arriba/test.Aligned.sortedByCoord.out.bam", "star_for_arriba/test.Aligned.unsort.out.bam", @@ -524,29 +366,17 @@ "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", "star_for_starfusion/test.out.sam", "star_for_starfusion/test.sortedByCoord.out.bam", - "star_for_starfusion/test.sortedByCoord.out.bam.bai", "star_for_starfusion/test.tab", "star_for_starfusion/test.toTranscriptome.out.bam", "star_for_starfusion/test.unmapped_1.fastq.gz", "star_for_starfusion/test.unmapped_2.fastq.gz", - "star_for_starfusion/testXd.out.bam", - "starfusion", - "starfusion/test.starfusion.abridged.coding_effect.tsv", - "starfusion/test.starfusion.abridged.tsv", - "starfusion/test.starfusion.fusion_predictions.tsv", - "stringtie", - "stringtie/[:]", - "stringtie/[:]/stringtie.merged.gtf", - "stringtie/test.ballgown", - "stringtie/test.coverage.gtf", - "stringtie/test.gene.abundance.txt", - "stringtie/test.transcripts.gtf" + "star_for_starfusion/testXd.out.bam" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T12:40:22.066494" + "timestamp": "2024-12-17T16:42:11.182735" } -} +} \ No newline at end of file diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index d992bf58..91742e1b 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -90,7 +90,7 @@ workflow RNAFUSION { BUILD_REFERENCES.out.ch_arriba_ref_blacklist, BUILD_REFERENCES.out.ch_arriba_ref_cytobands, BUILD_REFERENCES.out.ch_arriba_ref_known_fusions, - BUILD_REFERENCES.out.ch_arriba_ref_protein_domains + BUILD_REFERENCES.out.ch_arriba_ref_protein_domains, params.arriba, // boolean params.all, // boolean params.fusioninspector_only, // boolean @@ -139,7 +139,7 @@ workflow RNAFUSION { - //Run fusionInpector + //Run fusionInspector FUSIONINSPECTOR_WORKFLOW ( ch_reads, FUSIONREPORT_WORKFLOW.out.fusion_list, From f8dd5169afb714f6f823ba64a3a58099ba695407 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:27:34 +0100 Subject: [PATCH 35/41] update changelog --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a52f01f4..33c5b0cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,9 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove double nested folder introduced in [#577](https://github.com/nf-core/rnafusion/pull/577), [#581](https://github.com/nf-core/rnafusion/pull/581) - Use docker.io and galaxy containers for fusioncatcher and starfusion (incl. fusioninspector) instead of wave as they are not functional on wave [#588](https://github.com/nf-core/rnafusion/pull/588) - Update STAR-Fusion to 1.14 [#588](https://github.com/nf-core/rnafusion/pull/588) - - ext.args = "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" GTF_TO_REFFLAT - +- Use -genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons for GTF_TO_REFFLAT[#505](https://github.com/nf-core/rnafusion/pull/505) ### Fixed From d62c4b5d1f3e691497956dfe77574cc3f7c95dc7 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:54:27 +0100 Subject: [PATCH 36/41] update changelog, retrofit trim_workflowo --- CHANGELOG.md | 17 +++++++++++++---- subworkflows/local/trim_workflow/main.nf | 6 ++++-- workflows/rnafusion.nf | 4 +++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 33c5b0cc..51fef20e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add nf-test to local subworkflow: `TRIM_WORKFLOW` [#572](https://github.com/nf-core/rnafusion/pull/572) - Add nf-test to local module: `FUSIONREPORT_DETECT`. Improve `FUSIONREPORT_DOWNLOAD` module [#577](https://github.com/nf-core/rnafusion/pull/577) - Add nf-test to local subworkflow: `ARRIBA_WORKFLOW` [#578](https://github.com/nf-core/rnafusion/pull/578) +- Add parameter `--references_only` when no data should be analysed, but only the references should be built [#505](https://github.com/nf-core/rnafusion/pull/505) ### Changed @@ -31,7 +32,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove double nested folder introduced in [#577](https://github.com/nf-core/rnafusion/pull/577), [#581](https://github.com/nf-core/rnafusion/pull/581) - Use docker.io and galaxy containers for fusioncatcher and starfusion (incl. fusioninspector) instead of wave as they are not functional on wave [#588](https://github.com/nf-core/rnafusion/pull/588) - Update STAR-Fusion to 1.14 [#588](https://github.com/nf-core/rnafusion/pull/588) -- Use -genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons for GTF_TO_REFFLAT[#505](https://github.com/nf-core/rnafusion/pull/505) +- Use "-genePredExt -geneNameAsName2 -ignoreGroupsWithoutExons" (to mimic gms/tomte) for GTF_TO_REFFLAT [#505](https://github.com/nf-core/rnafusion/pull/505) +- Integrate reference building in the main workflow [#505](https://github.com/nf-core/rnafusion/pull/505) +- Move from ensembl to gencode base [#505](https://github.com/nf-core/rnafusion/pull/505) +- Update from ensembl 102 to gencode 46 default references [#505](https://github.com/nf-core/rnafusion/pull/505) ### Fixed @@ -46,12 +50,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - Remove fusionGDB from documentation and fusion-report download stubs [#503](https://github.com/nf-core/rnafusion/pull/503) +- Removed test-build as reference building gets integrated in the main workflow [#505](https://github.com/nf-core/rnafusion/pull/505) +- Removed parameter `--build_references` + ### Parameters -| Old parameter | New parameter | -| ------------- | ------------- | -| | `--no_cosmic` | +| Old parameter | New parameter | +| ------------------- | ------------------- | +| | `--no_cosmic` | +| `-build_references` | `--references_only` | + ## v3.0.2 - [2024-04-10] diff --git a/subworkflows/local/trim_workflow/main.nf b/subworkflows/local/trim_workflow/main.nf index 512a07b5..d548c8ae 100644 --- a/subworkflows/local/trim_workflow/main.nf +++ b/subworkflows/local/trim_workflow/main.nf @@ -7,6 +7,8 @@ workflow TRIM_WORKFLOW { take: reads // channel [ meta, [ fastq files ] ] + adapter_fasta // channel [ path ] + fastp_trim // boolean main: ch_versions = Channel.empty() @@ -14,8 +16,8 @@ workflow TRIM_WORKFLOW { ch_fastp_json = Channel.empty() ch_fastqc_trimmed = Channel.empty() - if ( {params.fastp_trim} ) { - FASTP(reads, {params.adapter_fasta}, false, false, false) + if ( fastp_trim ) { + FASTP(reads, adapter_fasta.ifEmpty( [] ), false, false, false) ch_versions = ch_versions.mix(FASTP.out.versions) FASTQC_FOR_FASTP(FASTP.out.reads) diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 91742e1b..a755311c 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -64,7 +64,9 @@ workflow RNAFUSION { // Trimming // TRIM_WORKFLOW ( - ch_samplesheet + ch_samplesheet, + params.adapter_fasta, + params.fastp_trim ) ch_reads = TRIM_WORKFLOW.out.ch_reads_all ch_versions = ch_versions.mix(TRIM_WORKFLOW.out.versions) From de5d6d5aea8306d2853fef7c788f7c20f7a50ee8 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 20 Dec 2024 09:57:45 +0100 Subject: [PATCH 37/41] add species as parameter --- CHANGELOG.md | 4 +- nextflow.config | 1 + subworkflows/local/build_references.nf | 6 +- tests/test_stub.nf.test.snap | 368 +------------------------ 4 files changed, 13 insertions(+), 366 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51fef20e..1838af9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,15 +53,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Removed test-build as reference building gets integrated in the main workflow [#505](https://github.com/nf-core/rnafusion/pull/505) - Removed parameter `--build_references` - ### Parameters -| Old parameter | New parameter | +| Old parameter | New parameter | | ------------------- | ------------------- | | | `--no_cosmic` | | `-build_references` | `--references_only` | - ## v3.0.2 - [2024-04-10] ### Added diff --git a/nextflow.config b/nextflow.config index a1e746dd..a61462e3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,6 +31,7 @@ params { starfusion_build = true genomes = [:] fusion_annot_lib = "https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz" // path to dat.gz CTAT genome lib // TODO: Update to latest with s3 link when available + species = "human" // Filtering tools_cutoff = 1 diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 8e9cc4b1..56fd8eb2 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -88,9 +88,9 @@ workflow BUILD_REFERENCES { if (!file(params.salmon_index).exists() || file(params.salmon_index).isEmpty() || !file(params.salmon_index_stub_check).exists() || file(params.salmon_index_stub_check).isEmpty()){ // add condition for qc - GFFREAD(ch_gtf, ch_fasta.map{ meta, fasta -> [ fasta ] }) + GFFREAD(ch_gtf, ch_fasta.map{ it -> it[1] }) ch_versions = ch_versions.mix(GFFREAD.out.versions) - SALMON_INDEX(ch_fasta.map{ meta, fasta -> [ fasta ] }, GFFREAD.out.gffread_fasta.map{ meta, gffread_fasta -> [ gffread_fasta ] }) + SALMON_INDEX(ch_fasta.map{ it -> it[1] }, GFFREAD.out.gffread_fasta.map{ it -> it[1] }) ch_versions = ch_versions.mix(SALMON_INDEX.out.versions) ch_salmon_index = SALMON_INDEX.out.index } else { @@ -140,7 +140,7 @@ workflow BUILD_REFERENCES { if ((params.starfusion || params.all) && (!file(params.starfusion_ref).exists() || file(params.starfusion_ref).isEmpty() || !file(params.starfusion_ref_stub_check).exists() || file(params.starfusion_ref_stub_check).isEmpty() )) { - STARFUSION_BUILD(ch_fasta, ch_gtf) + STARFUSION_BUILD(ch_fasta, ch_gtf, params.fusion_annot_lib, params.species) ch_versions = ch_versions.mix(STARFUSION_BUILD.out.versions) ch_starfusion_ref = STARFUSION_BUILD.out.reference } diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index e3a8bb73..f661aac6 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -1,382 +1,30 @@ { "stub test no fastp trim": { "content": [ - 20, - { - "ARRIBA_DOWNLOAD": { - "arriba_download": "2.4.0" - }, - "FASTP": { - "fastp": "0.23.4" - }, - "FASTQC": { - "fastqc": "0.12.1" - }, - "FASTQC_FOR_FASTP": { - "fastqc": "0.12.1" - }, - "FUSIONCATCHER": { - "fusioncatcher": 1.35 - }, - "FUSIONCATCHER_BUILD": { - "fusioncatcher": "fusioncatcher.py 1.35" - }, - "FUSIONREPORT_DOWNLOAD": { - "fusion_report": "2.1.5" - }, - "GATK4_BEDTOINTERVALLIST": { - "gatk4": "4.6.1.0" - }, - "GATK4_CREATESEQUENCEDICTIONARY": { - "gatk4": "4.6.1.0" - }, - "GENCODE_DOWNLOAD": { - "wget": null - }, - "GET_RRNA_TRANSCRIPTS": { - "get_rrna_transcripts": "v1.0" - }, - "GFFREAD": { - "gffread": "0.12.7" - }, - "GTF_TO_REFFLAT": { - "gtfToGenePred": 377 - }, - "HGNC_DOWNLOAD": { - "wget": null - }, - "SAMTOOLS_FAIDX": { - "samtools": 1.21 - }, - "STARFUSION_BUILD": { - "STAR-Fusion": "1.7.0" - }, - "STAR_FOR_ARRIBA": { - "star": null, - "samtools": 1.2, - "gawk": "5.1.0" - }, - "STAR_FOR_STARFUSION": { - "star": null, - "samtools": 1.2, - "gawk": "5.1.0" - }, - "STAR_GENOMEGENERATE": { - "star": null, - "samtools": 1.2, - "gawk": "5.1.0" - }, - "Workflow": { - "nf-core/rnafusion": "v4.0.0dev" - } - }, + 0, + null, [ - "fastp", - "fastp/test.fastp.html", - "fastp/test.fastp.json", - "fastp/test.fastp.log", - "fastp/test_1.fastp.fastq.gz", - "fastp/test_2.fastp.fastq.gz", - "fastqc", - "fastqc/test.html", - "fastqc/test.zip", - "fastqc_for_fastp", - "fastqc_for_fastp/test_trimmed.html", - "fastqc_for_fastp/test_trimmed.zip", - "fusioncatcher", - "fusioncatcher/human_v46", - "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", - "fusioncatcher/test.fusioncatcher.fusion-genes.txt", - "fusioncatcher/test.fusioncatcher.log", - "fusioncatcher/test.fusioncatcher.summary.txt", - "gatk4", - "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", - "get", - "get/rrna.bed", - "get/rrna.gtf", - "pipeline_info", - "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", - "references", - "references/arriba", - "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", - "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", - "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", - "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", - "references/fusion_report_db", - "references/fusion_report_db/DB-timestamp.txt", - "references/fusion_report_db/cosmic.db", - "references/fusion_report_db/fusion_report.log", - "references/fusion_report_db/fusiongdb2.db", - "references/fusion_report_db/mitelman.db", - "references/gencode", - "references/gencode/Homo_sapiens.GRCh38.46.gtf", - "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", - "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", - "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", - "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", - "references/gffread", - "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", - "references/hgnc", - "references/hgnc/HGNC-DB-timestamp.txt", - "references/hgnc/hgnc_complete_set.txt", - "references/salmon", - "references/salmon/salmon", - "references/salmon/salmon/complete_ref_lens.bin", - "references/salmon/salmon/ctable.bin", - "references/salmon/salmon/ctg_offsets.bin", - "references/salmon/salmon/duplicate_clusters.tsv", - "references/salmon/salmon/info.json", - "references/salmon/salmon/mphf.bin", - "references/salmon/salmon/pos.bin", - "references/salmon/salmon/pre_indexing.log", - "references/salmon/salmon/rank.bin", - "references/salmon/salmon/refAccumLengths.bin", - "references/salmon/salmon/ref_indexing.log", - "references/salmon/salmon/reflengths.bin", - "references/salmon/salmon/refseq.bin", - "references/salmon/salmon/seq.bin", - "references/salmon/salmon/versionInfo.json", - "references/star", - "references/star/Genome", - "references/star/Log.out", - "references/star/SA", - "references/star/SAindex", - "references/star/chrLength.txt", - "references/star/chrName.txt", - "references/star/chrNameLength.txt", - "references/star/chrStart.txt", - "references/star/exonGeTrInfo.tab", - "references/star/exonInfo.tab", - "references/star/geneInfo.tab", - "references/star/genomeParameters.txt", - "references/star/sjdbInfo.txt", - "references/star/sjdbList.fromGTF.out.tab", - "references/star/sjdbList.out.tab", - "references/star/transcriptInfo.tab", - "references/starfusion", - "references/starfusion/ctat_genome_lib_build_dir", - "references/starfusion/ref_annot.cdna.fa", - "star_for_arriba", - "star_for_arriba/test.Aligned.sortedByCoord.out.bam", - "star_for_arriba/test.Aligned.unsort.out.bam", - "star_for_arriba/test.Chimeric.out.junction", - "star_for_arriba/test.Log.final.out", - "star_for_arriba/test.Log.out", - "star_for_arriba/test.Log.progress.out", - "star_for_arriba/test.ReadsPerGene.out.tab", - "star_for_arriba/test.SJ.out.tab", - "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", - "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", - "star_for_arriba/test.out.sam", - "star_for_arriba/test.sortedByCoord.out.bam", - "star_for_arriba/test.tab", - "star_for_arriba/test.toTranscriptome.out.bam", - "star_for_arriba/test.unmapped_1.fastq.gz", - "star_for_arriba/test.unmapped_2.fastq.gz", - "star_for_arriba/testXd.out.bam", - "star_for_starfusion", - "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", - "star_for_starfusion/test.Aligned.unsort.out.bam", - "star_for_starfusion/test.Chimeric.out.junction", - "star_for_starfusion/test.Log.final.out", - "star_for_starfusion/test.Log.out", - "star_for_starfusion/test.Log.progress.out", - "star_for_starfusion/test.ReadsPerGene.out.tab", - "star_for_starfusion/test.SJ.out.tab", - "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", - "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", - "star_for_starfusion/test.out.sam", - "star_for_starfusion/test.sortedByCoord.out.bam", - "star_for_starfusion/test.tab", - "star_for_starfusion/test.toTranscriptome.out.bam", - "star_for_starfusion/test.unmapped_1.fastq.gz", - "star_for_starfusion/test.unmapped_2.fastq.gz", - "star_for_starfusion/testXd.out.bam" + "pipeline_info" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-17T16:44:02.121512" + "timestamp": "2024-12-20T09:49:41.743056" }, "stub test with fastp trim": { "content": [ - 19, - { - "ARRIBA_DOWNLOAD": { - "arriba_download": "2.4.0" - }, - "FASTP": { - "fastp": "0.23.4" - }, - "FASTQC": { - "fastqc": "0.12.1" - }, - "FASTQC_FOR_FASTP": { - "fastqc": "0.12.1" - }, - "FUSIONCATCHER": { - "fusioncatcher": 1.35 - }, - "FUSIONCATCHER_BUILD": { - "fusioncatcher": "fusioncatcher.py 1.35" - }, - "FUSIONREPORT_DOWNLOAD": { - "fusion_report": "2.1.5" - }, - "GATK4_BEDTOINTERVALLIST": { - "gatk4": "4.6.1.0" - }, - "GATK4_CREATESEQUENCEDICTIONARY": { - "gatk4": "4.6.1.0" - }, - "GENCODE_DOWNLOAD": { - "wget": null - }, - "GET_RRNA_TRANSCRIPTS": { - "get_rrna_transcripts": "v1.0" - }, - "GFFREAD": { - "gffread": "0.12.7" - }, - "GTF_TO_REFFLAT": { - "gtfToGenePred": 377 - }, - "HGNC_DOWNLOAD": { - "wget": null - }, - "SAMTOOLS_FAIDX": { - "samtools": 1.21 - }, - "STARFUSION_BUILD": { - "STAR-Fusion": "1.7.0" - }, - "STAR_FOR_ARRIBA": { - "star": null, - "samtools": 1.2, - "gawk": "5.1.0" - }, - "STAR_GENOMEGENERATE": { - "star": null, - "samtools": 1.2, - "gawk": "5.1.0" - }, - "Workflow": { - "nf-core/rnafusion": "v4.0.0dev" - } - }, + 0, + null, [ - "fastp", - "fastp/test.fastp.html", - "fastp/test.fastp.json", - "fastp/test.fastp.log", - "fastp/test_1.fastp.fastq.gz", - "fastp/test_2.fastp.fastq.gz", - "fastqc", - "fastqc/test.html", - "fastqc/test.zip", - "fastqc_for_fastp", - "fastqc_for_fastp/test_trimmed.html", - "fastqc_for_fastp/test_trimmed.zip", - "fusioncatcher", - "fusioncatcher/human_v46", - "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", - "fusioncatcher/test.fusioncatcher.fusion-genes.txt", - "fusioncatcher/test.fusioncatcher.log", - "fusioncatcher/test.fusioncatcher.summary.txt", - "gatk4", - "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", - "get", - "get/rrna.bed", - "get/rrna.gtf", - "pipeline_info", - "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", - "references", - "references/arriba", - "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", - "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", - "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", - "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", - "references/fusion_report_db", - "references/fusion_report_db/DB-timestamp.txt", - "references/fusion_report_db/cosmic.db", - "references/fusion_report_db/fusion_report.log", - "references/fusion_report_db/fusiongdb2.db", - "references/fusion_report_db/mitelman.db", - "references/gencode", - "references/gencode/Homo_sapiens.GRCh38.46.gtf", - "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", - "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", - "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", - "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", - "references/gffread", - "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", - "references/hgnc", - "references/hgnc/HGNC-DB-timestamp.txt", - "references/hgnc/hgnc_complete_set.txt", - "references/star", - "references/star/Genome", - "references/star/Log.out", - "references/star/SA", - "references/star/SAindex", - "references/star/chrLength.txt", - "references/star/chrName.txt", - "references/star/chrNameLength.txt", - "references/star/chrStart.txt", - "references/star/exonGeTrInfo.tab", - "references/star/exonInfo.tab", - "references/star/geneInfo.tab", - "references/star/genomeParameters.txt", - "references/star/sjdbInfo.txt", - "references/star/sjdbList.fromGTF.out.tab", - "references/star/sjdbList.out.tab", - "references/star/transcriptInfo.tab", - "references/starfusion", - "references/starfusion/ctat_genome_lib_build_dir", - "references/starfusion/ref_annot.cdna.fa", - "star_for_arriba", - "star_for_arriba/test.Aligned.sortedByCoord.out.bam", - "star_for_arriba/test.Aligned.unsort.out.bam", - "star_for_arriba/test.Chimeric.out.junction", - "star_for_arriba/test.Log.final.out", - "star_for_arriba/test.Log.out", - "star_for_arriba/test.Log.progress.out", - "star_for_arriba/test.ReadsPerGene.out.tab", - "star_for_arriba/test.SJ.out.tab", - "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", - "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", - "star_for_arriba/test.out.sam", - "star_for_arriba/test.sortedByCoord.out.bam", - "star_for_arriba/test.tab", - "star_for_arriba/test.toTranscriptome.out.bam", - "star_for_arriba/test.unmapped_1.fastq.gz", - "star_for_arriba/test.unmapped_2.fastq.gz", - "star_for_arriba/testXd.out.bam", - "star_for_starfusion", - "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", - "star_for_starfusion/test.Aligned.unsort.out.bam", - "star_for_starfusion/test.Chimeric.out.junction", - "star_for_starfusion/test.Log.final.out", - "star_for_starfusion/test.Log.out", - "star_for_starfusion/test.Log.progress.out", - "star_for_starfusion/test.ReadsPerGene.out.tab", - "star_for_starfusion/test.SJ.out.tab", - "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", - "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", - "star_for_starfusion/test.out.sam", - "star_for_starfusion/test.sortedByCoord.out.bam", - "star_for_starfusion/test.tab", - "star_for_starfusion/test.toTranscriptome.out.bam", - "star_for_starfusion/test.unmapped_1.fastq.gz", - "star_for_starfusion/test.unmapped_2.fastq.gz", - "star_for_starfusion/testXd.out.bam" + "pipeline_info" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-17T16:42:11.182735" + "timestamp": "2024-12-20T09:49:14.743338" } } \ No newline at end of file From bdae2931727b81c1b5a13d3c1deb894a84f1e07d Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:55:16 +0100 Subject: [PATCH 38/41] fix some issues with channels --- subworkflows/local/arriba_workflow/main.nf | 8 +- subworkflows/local/build_references.nf | 12 +- tests/test_stub.nf.test.snap | 246 ++++++++++++++++++++- workflows/rnafusion.nf | 2 +- 4 files changed, 253 insertions(+), 15 deletions(-) diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index 77e97f42..bb4c36a6 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -25,7 +25,7 @@ workflow ARRIBA_WORKFLOW { main: ch_versions = Channel.empty() ch_cram_index = Channel.empty() - ch_dummy_file = file("$baseDir/assets/dummy_file_arriba.txt", checkIfExists: true) + ch_dummy_file = file("$projectDir/assets/dummy_file_arriba.txt", checkIfExists: true) if (( arriba || all ) && !fusioninspector_only) { @@ -43,7 +43,7 @@ workflow ARRIBA_WORKFLOW { if ( arriba_fusions ) { ch_arriba_fusions = reads.combine( Channel.value( file( arriba_fusions, checkIfExists: true ) ) ) - .map { meta, reads, fusions -> [ meta, fusions ] } + .map { it -> [ it[0], it[2] ] } ch_arriba_fusion_fail = ch_dummy_file } else { @@ -61,7 +61,7 @@ workflow ARRIBA_WORKFLOW { ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) ch_arriba_fusions = ARRIBA_ARRIBA.out.fusions - ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ meta, file -> return file } + ch_arriba_fusion_fail = ARRIBA_ARRIBA.out.fusions_fail.map{ it -> return it[1] } } if ( cram.contains('arriba') ) { @@ -83,7 +83,7 @@ workflow ARRIBA_WORKFLOW { ch_arriba_fusions = reads .combine(Channel.value( file(ch_dummy_file, checkIfExists: true ) ) ) - .map { meta, reads, fusions -> [ meta, fusions ] } + .map { it -> [ it[0], it[2] ] } ch_arriba_fusion_fail = ch_dummy_file } diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 56fd8eb2..34102d2d 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -118,10 +118,14 @@ workflow BUILD_REFERENCES { ch_arriba_ref_known_fusions = ARRIBA_DOWNLOAD.out.known_fusions ch_arriba_ref_protein_domains = ARRIBA_DOWNLOAD.out.protein_domains } else { - ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist) - ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands) - ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions) - ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains) + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { file -> + [ id: file.baseName, path: file ]} + ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { file -> + [ id: file.baseName, path: file ]} + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { file -> + [ id: file.baseName, path: file ]} + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { file -> + [ id: file.baseName, path: file ]} } diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index f661aac6..a60d6d13 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -4,27 +4,261 @@ 0, null, [ - "pipeline_info" + ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-20T09:49:41.743056" + "timestamp": "2024-12-20T11:51:44.033131" }, "stub test with fastp trim": { "content": [ - 0, - null, + 18, + { + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "FASTP": { + "fastp": "0.23.4" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FASTQC_FOR_FASTP": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER": { + "fusioncatcher": 1.35 + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "fusioncatcher.py 1.35" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.14.0" + }, + "STAR_FOR_ARRIBA": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, [ - "pipeline_info" + "fastp", + "fastp/test.fastp.html", + "fastp/test.fastp.json", + "fastp/test.fastp.log", + "fastp/test_1.fastp.fastq.gz", + "fastp/test_2.fastp.fastq.gz", + "fastqc", + "fastqc/test.html", + "fastqc/test.zip", + "fastqc_for_fastp", + "fastqc_for_fastp/test_trimmed.html", + "fastqc_for_fastp/test_trimmed.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/ctat_genome_lib_build_dir", + "starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "starfusion/ctat_genome_lib_build_dir/__chkpts", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-20T09:49:14.743338" + "timestamp": "2024-12-20T11:53:19.316547" } } \ No newline at end of file diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index a755311c..755be7b2 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -65,7 +65,7 @@ workflow RNAFUSION { // TRIM_WORKFLOW ( ch_samplesheet, - params.adapter_fasta, + Channel.value(params.adapter_fasta), params.fastp_trim ) ch_reads = TRIM_WORKFLOW.out.ch_reads_all From 509a792e1197aeb69b22bb5218b6830cca9761cd Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:03:55 +0100 Subject: [PATCH 39/41] add species to schema --- nextflow_schema.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index db35f97d..1f83adc1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -375,6 +375,11 @@ "type": "string", "description": "Path to Fusion Annotation Library to be used in STARFUSION_BUILD.", "fa_icon": "far fa-file-code" + }, + "species": { + "type": "string", + "description": "Which species dfam should automatically download, default: human.", + "fa_icon": "far fa-file-code" } } }, From 8776835e747aaf65a0dbf5f493e5154b32cdbd11 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 20 Dec 2024 13:24:33 +0100 Subject: [PATCH 40/41] fix erroneous addition of meta --- subworkflows/local/arriba_workflow/main.nf | 8 +- subworkflows/local/build_references.nf | 12 +- tests/test_stub.nf.test.snap | 431 ++++++++++++++++++++- 3 files changed, 432 insertions(+), 19 deletions(-) diff --git a/subworkflows/local/arriba_workflow/main.nf b/subworkflows/local/arriba_workflow/main.nf index bb4c36a6..5fa2ed2b 100644 --- a/subworkflows/local/arriba_workflow/main.nf +++ b/subworkflows/local/arriba_workflow/main.nf @@ -52,10 +52,10 @@ workflow ARRIBA_WORKFLOW { STAR_FOR_ARRIBA.out.bam, ch_fasta, ch_gtf, - ch_arriba_ref_blacklist.map{ it[1] }, - ch_arriba_ref_known_fusions.map{ it[1] }, - ch_arriba_ref_cytobands.map{ it[1] }, - ch_arriba_ref_protein_domains.map{ it[1] } + ch_arriba_ref_blacklist, + ch_arriba_ref_known_fusions, + ch_arriba_ref_cytobands, + ch_arriba_ref_protein_domains ) ch_versions = ch_versions.mix(ARRIBA_ARRIBA.out.versions) diff --git a/subworkflows/local/build_references.nf b/subworkflows/local/build_references.nf index 34102d2d..56fd8eb2 100644 --- a/subworkflows/local/build_references.nf +++ b/subworkflows/local/build_references.nf @@ -118,14 +118,10 @@ workflow BUILD_REFERENCES { ch_arriba_ref_known_fusions = ARRIBA_DOWNLOAD.out.known_fusions ch_arriba_ref_protein_domains = ARRIBA_DOWNLOAD.out.protein_domains } else { - ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist).map { file -> - [ id: file.baseName, path: file ]} - ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands).map { file -> - [ id: file.baseName, path: file ]} - ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions).map { file -> - [ id: file.baseName, path: file ]} - ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains).map { file -> - [ id: file.baseName, path: file ]} + ch_arriba_ref_blacklist = Channel.fromPath(params.arriba_ref_blacklist) + ch_arriba_ref_cytobands = Channel.fromPath(params.arriba_ref_cytobands) + ch_arriba_ref_known_fusions = Channel.fromPath(params.arriba_ref_known_fusions) + ch_arriba_ref_protein_domains = Channel.fromPath(params.arriba_ref_protein_domains) } diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index a60d6d13..e1892a6a 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -1,22 +1,343 @@ { "stub test no fastp trim": { "content": [ - 0, - null, + 29, + { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, + "ARRIBA_DOWNLOAD": { + "arriba_download": "2.4.0" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FUSIONCATCHER": { + "fusioncatcher": 1.35 + }, + "FUSIONCATCHER_BUILD": { + "fusioncatcher": "fusioncatcher.py 1.35" + }, + "FUSIONREPORT": { + "fusion_report": "2.1.5" + }, + "FUSIONREPORT_DOWNLOAD": { + "fusion_report": "2.1.5" + }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, + "GATK4_CREATESEQUENCEDICTIONARY": { + "gatk4": "4.6.1.0" + }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, + "GENCODE_DOWNLOAD": { + "wget": null + }, + "GET_RRNA_TRANSCRIPTS": { + "get_rrna_transcripts": "v1.0" + }, + "GFFREAD": { + "gffread": "0.12.7" + }, + "GTF_TO_REFFLAT": { + "gtfToGenePred": 377 + }, + "HGNC_DOWNLOAD": { + "wget": null + }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, + "SAMTOOLS_FAIDX": { + "samtools": 1.21 + }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.14.0" + }, + "STARFUSION_BUILD": { + "STAR-Fusion": "1.14.0" + }, + "STAR_FOR_ARRIBA": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_FOR_STARFUSION": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STAR_GENOMEGENERATE": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, + "Workflow": { + "nf-core/rnafusion": "v4.0.0dev" + } + }, [ - + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", + "fastqc", + "fastqc/test.html", + "fastqc/test.zip", + "fusioncatcher", + "fusioncatcher/human_v46", + "fusioncatcher/human_v46/ensembl_fully_overlapping_genes.txt", + "fusioncatcher/test.fusioncatcher.fusion-genes.txt", + "fusioncatcher/test.fusioncatcher.log", + "fusioncatcher/test.fusioncatcher.summary.txt", + "fusionreport", + "fusionreport/test", + "fusionreport/test/AAA_BBB.html", + "fusionreport/test/test.fusionreport.tsv", + "fusionreport/test/test.fusionreport_filtered.tsv", + "fusionreport/test/test.fusions.csv", + "fusionreport/test/test.fusions.json", + "fusionreport/test/test_fusionreport_index.html", + "gatk4", + "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", + "get", + "get/rrna.bed", + "get/rrna.gtf", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", + "pipeline_info", + "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", + "references", + "references/arriba", + "references/arriba/blacklist_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/cytobands_hg38_GRCh38_v2.4.0.tsv", + "references/arriba/known_fusions_hg38_GRCh38_v2.4.0.tsv.gz", + "references/arriba/protein_domains_hg38_GRCh38_v2.4.0.gff3", + "references/fusion_report_db", + "references/fusion_report_db/DB-timestamp.txt", + "references/fusion_report_db/cosmic.db", + "references/fusion_report_db/fusion_report.log", + "references/fusion_report_db/fusiongdb2.db", + "references/fusion_report_db/mitelman.db", + "references/gencode", + "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", + "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", + "references/gffread", + "references/gffread/Homo_sapiens.GRCh38.46.gtf.fasta", + "references/hgnc", + "references/hgnc/HGNC-DB-timestamp.txt", + "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", + "references/star", + "references/star/Genome", + "references/star/Log.out", + "references/star/SA", + "references/star/SAindex", + "references/star/chrLength.txt", + "references/star/chrName.txt", + "references/star/chrNameLength.txt", + "references/star/chrStart.txt", + "references/star/exonGeTrInfo.tab", + "references/star/exonInfo.tab", + "references/star/geneInfo.tab", + "references/star/genomeParameters.txt", + "references/star/sjdbInfo.txt", + "references/star/sjdbList.fromGTF.out.tab", + "references/star/sjdbList.out.tab", + "references/star/transcriptInfo.tab", + "salmon", + "salmon/test", + "salmon/test_lib_format_counts.json", + "salmon/test_meta_info.json", + "star_for_arriba", + "star_for_arriba/test.Aligned.sortedByCoord.out.bam", + "star_for_arriba/test.Aligned.unsort.out.bam", + "star_for_arriba/test.Chimeric.out.junction", + "star_for_arriba/test.Log.final.out", + "star_for_arriba/test.Log.out", + "star_for_arriba/test.Log.progress.out", + "star_for_arriba/test.ReadsPerGene.out.tab", + "star_for_arriba/test.SJ.out.tab", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_arriba/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_arriba/test.out.sam", + "star_for_arriba/test.sortedByCoord.out.bam", + "star_for_arriba/test.tab", + "star_for_arriba/test.toTranscriptome.out.bam", + "star_for_arriba/test.unmapped_1.fastq.gz", + "star_for_arriba/test.unmapped_2.fastq.gz", + "star_for_arriba/testXd.out.bam", + "star_for_starfusion", + "star_for_starfusion/test.Aligned.sortedByCoord.out.bam", + "star_for_starfusion/test.Aligned.unsort.out.bam", + "star_for_starfusion/test.Chimeric.out.junction", + "star_for_starfusion/test.Log.final.out", + "star_for_starfusion/test.Log.out", + "star_for_starfusion/test.Log.progress.out", + "star_for_starfusion/test.ReadsPerGene.out.tab", + "star_for_starfusion/test.SJ.out.tab", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.bg", + "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", + "star_for_starfusion/test.out.sam", + "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", + "star_for_starfusion/test.tab", + "star_for_starfusion/test.toTranscriptome.out.bam", + "star_for_starfusion/test.unmapped_1.fastq.gz", + "star_for_starfusion/test.unmapped_2.fastq.gz", + "star_for_starfusion/testXd.out.bam", + "starfusion", + "starfusion/ctat_genome_lib_build_dir", + "starfusion/ctat_genome_lib_build_dir/AnnotFilterRule.pm", + "starfusion/ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz", + "starfusion/ctat_genome_lib_build_dir/__chkpts", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok", + "starfusion/ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.dat.gz", + "starfusion/ctat_genome_lib_build_dir/blast_pairs.idx", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.gz", + "starfusion/ctat_genome_lib_build_dir/fusion_annot_lib.idx", + "starfusion/ctat_genome_lib_build_dir/pfam_domains.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cds", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa", + "starfusion/ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu", + "starfusion/ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed", + "starfusion/ctat_genome_lib_build_dir/ref_annot.pep", + "starfusion/ctat_genome_lib_build_dir/ref_annot.prot_info.dbm", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.fai", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.mm2", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ndb", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nhr", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nin", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.njs", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.not", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nsq", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.ntf", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.nto", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab", + "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", + "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-20T11:51:44.033131" + "timestamp": "2024-12-20T13:17:20.018293" }, "stub test with fastp trim": { "content": [ - 18, + 31, { + "ARRIBA_ARRIBA": { + "arriba": "2.4.0" + }, "ARRIBA_DOWNLOAD": { "arriba_download": "2.4.0" }, @@ -35,12 +356,22 @@ "FUSIONCATCHER_BUILD": { "fusioncatcher": "fusioncatcher.py 1.35" }, + "FUSIONREPORT": { + "fusion_report": "2.1.5" + }, "FUSIONREPORT_DOWNLOAD": { "fusion_report": "2.1.5" }, + "GATK4_BEDTOINTERVALLIST": { + "gatk4": "4.6.1.0" + }, "GATK4_CREATESEQUENCEDICTIONARY": { "gatk4": "4.6.1.0" }, + "GATK4_MARKDUPLICATES": { + "gatk4": "4.5.0.0", + "samtools": "1.19.2" + }, "GENCODE_DOWNLOAD": { "wget": null }, @@ -56,9 +387,27 @@ "HGNC_DOWNLOAD": { "wget": null }, + "PICARD_COLLECTINSERTSIZEMETRICS": { + "picard": "3.3.0" + }, + "PICARD_COLLECTRNASEQMETRICS": { + "picard": "3.3.0" + }, + "SALMON_INDEX": { + "salmon": "1.10.3" + }, + "SALMON_QUANT": { + "salmon": "1.10.3" + }, "SAMTOOLS_FAIDX": { "samtools": 1.21 }, + "SAMTOOLS_INDEX_FOR_STARFUSION": { + "samtools": 1.21 + }, + "STARFUSION": { + "STAR-Fusion": "1.14.0" + }, "STARFUSION_BUILD": { "STAR-Fusion": "1.14.0" }, @@ -67,16 +416,30 @@ "samtools": 1.2, "gawk": "5.1.0" }, + "STAR_FOR_STARFUSION": { + "star": null, + "samtools": 1.2, + "gawk": "5.1.0" + }, "STAR_GENOMEGENERATE": { "star": null, "samtools": 1.2, "gawk": "5.1.0" }, + "STRINGTIE_MERGE": { + "stringtie": "2.2.1" + }, + "STRINGTIE_STRINGTIE": { + "stringtie": "2.2.3" + }, "Workflow": { "nf-core/rnafusion": "v4.0.0dev" } }, [ + "arriba", + "arriba/test.arriba.fusions.discarded.tsv", + "arriba/test.arriba.fusions.tsv", "fastp", "fastp/test.fastp.html", "fastp/test.fastp.json", @@ -95,11 +458,32 @@ "fusioncatcher/test.fusioncatcher.fusion-genes.txt", "fusioncatcher/test.fusioncatcher.log", "fusioncatcher/test.fusioncatcher.summary.txt", + "fusionreport", + "fusionreport/test", + "fusionreport/test/AAA_BBB.html", + "fusionreport/test/test.fusionreport.tsv", + "fusionreport/test/test.fusionreport_filtered.tsv", + "fusionreport/test/test.fusions.csv", + "fusionreport/test/test.fusions.json", + "fusionreport/test/test_fusionreport_index.html", "gatk4", "gatk4/Homo_sapiens.GRCh38.46_dna_primary_assembly.dict", "get", "get/rrna.bed", "get/rrna.gtf", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_plots", + "multiqc/multiqc_report.html", + "picard", + "picard/test.bai", + "picard/test.bam", + "picard/test.bam.metrics", + "picard/test.cram", + "picard/test.cram.crai", + "picard/test.rna_metrics", + "picard/test_collectinsertsize.pdf", + "picard/test_collectinsertsize.txt", "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", "references", @@ -116,6 +500,7 @@ "references/fusion_report_db/mitelman.db", "references/gencode", "references/gencode/Homo_sapiens.GRCh38.46.gtf", + "references/gencode/Homo_sapiens.GRCh38.46.gtf.interval_list", "references/gencode/Homo_sapiens.GRCh38.46.gtf.refflat", "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa", "references/gencode/Homo_sapiens.GRCh38.46_dna_primary_assembly.fa.fai", @@ -124,6 +509,23 @@ "references/hgnc", "references/hgnc/HGNC-DB-timestamp.txt", "references/hgnc/hgnc_complete_set.txt", + "references/salmon", + "references/salmon/salmon", + "references/salmon/salmon/complete_ref_lens.bin", + "references/salmon/salmon/ctable.bin", + "references/salmon/salmon/ctg_offsets.bin", + "references/salmon/salmon/duplicate_clusters.tsv", + "references/salmon/salmon/info.json", + "references/salmon/salmon/mphf.bin", + "references/salmon/salmon/pos.bin", + "references/salmon/salmon/pre_indexing.log", + "references/salmon/salmon/rank.bin", + "references/salmon/salmon/refAccumLengths.bin", + "references/salmon/salmon/ref_indexing.log", + "references/salmon/salmon/reflengths.bin", + "references/salmon/salmon/refseq.bin", + "references/salmon/salmon/seq.bin", + "references/salmon/salmon/versionInfo.json", "references/star", "references/star/Genome", "references/star/Log.out", @@ -141,6 +543,10 @@ "references/star/sjdbList.fromGTF.out.tab", "references/star/sjdbList.out.tab", "references/star/transcriptInfo.tab", + "salmon", + "salmon/test", + "salmon/test_lib_format_counts.json", + "salmon/test_meta_info.json", "star_for_arriba", "star_for_arriba/test.Aligned.sortedByCoord.out.bam", "star_for_arriba/test.Aligned.unsort.out.bam", @@ -172,6 +578,7 @@ "star_for_starfusion/test.Signal.UniqueMultiple.str1.out.wig", "star_for_starfusion/test.out.sam", "star_for_starfusion/test.sortedByCoord.out.bam", + "star_for_starfusion/test.sortedByCoord.out.bam.bai", "star_for_starfusion/test.tab", "star_for_starfusion/test.toTranscriptome.out.bam", "star_for_starfusion/test.unmapped_1.fastq.gz", @@ -252,13 +659,23 @@ "starfusion/ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab", "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat", "starfusion/ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm", - "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz" + "starfusion/ctat_genome_lib_build_dir/trans.blast.dat.gz", + "starfusion/test.starfusion.abridged.coding_effect.tsv", + "starfusion/test.starfusion.abridged.tsv", + "starfusion/test.starfusion.fusion_predictions.tsv", + "stringtie", + "stringtie/[:]", + "stringtie/[:]/stringtie.merged.gtf", + "stringtie/test.ballgown", + "stringtie/test.coverage.gtf", + "stringtie/test.gene.abundance.txt", + "stringtie/test.transcripts.gtf" ] ], "meta": { "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-20T11:53:19.316547" + "timestamp": "2024-12-20T13:15:06.974011" } } \ No newline at end of file From ab65540133a556fda772f776a3bc4391a1f12329 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Fri, 20 Dec 2024 14:02:55 +0100 Subject: [PATCH 41/41] fixes --- tests/test_stub.nf.test.snap | 78 +++++++++++++++++++++++++++++++----- workflows/rnafusion.nf | 6 +-- 2 files changed, 70 insertions(+), 14 deletions(-) diff --git a/tests/test_stub.nf.test.snap b/tests/test_stub.nf.test.snap index e1892a6a..0ac620e7 100644 --- a/tests/test_stub.nf.test.snap +++ b/tests/test_stub.nf.test.snap @@ -1,7 +1,7 @@ { "stub test no fastp trim": { "content": [ - 29, + 31, { "ARRIBA_ARRIBA": { "arriba": "2.4.0" @@ -9,6 +9,9 @@ "ARRIBA_DOWNLOAD": { "arriba_download": "2.4.0" }, + "CTATSPLICING_STARTOCANCERINTRONS": { + "ctat-splicing": "0.0.2" + }, "FASTQC": { "fastqc": "0.12.1" }, @@ -74,17 +77,17 @@ "STAR-Fusion": "1.14.0" }, "STAR_FOR_ARRIBA": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, @@ -102,6 +105,31 @@ "arriba", "arriba/test.arriba.fusions.discarded.tsv", "arriba/test.arriba.fusions.tsv", + "ctatsplicing", + "ctatsplicing/arriba", + "ctatsplicing/arriba/test.cancer.introns", + "ctatsplicing/arriba/test.cancer.introns.prelim", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/arriba/test.chckpts", + "ctatsplicing/arriba/test.ctat-splicing.igv.html", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/arriba/test.igv.tracks", + "ctatsplicing/arriba/test.introns", + "ctatsplicing/arriba/test.introns.for_IGV.bed", + "ctatsplicing/starfusion", + "ctatsplicing/starfusion/test.cancer.introns", + "ctatsplicing/starfusion/test.cancer.introns.prelim", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/starfusion/test.chckpts", + "ctatsplicing/starfusion/test.ctat-splicing.igv.html", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/starfusion/test.igv.tracks", + "ctatsplicing/starfusion/test.introns", + "ctatsplicing/starfusion/test.introns.for_IGV.bed", "fastqc", "fastqc/test.html", "fastqc/test.zip", @@ -329,11 +357,11 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-20T13:17:20.018293" + "timestamp": "2024-12-20T14:00:19.311265" }, "stub test with fastp trim": { "content": [ - 31, + 33, { "ARRIBA_ARRIBA": { "arriba": "2.4.0" @@ -341,6 +369,9 @@ "ARRIBA_DOWNLOAD": { "arriba_download": "2.4.0" }, + "CTATSPLICING_STARTOCANCERINTRONS": { + "ctat-splicing": "0.0.2" + }, "FASTP": { "fastp": "0.23.4" }, @@ -412,17 +443,17 @@ "STAR-Fusion": "1.14.0" }, "STAR_FOR_ARRIBA": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_FOR_STARFUSION": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, "STAR_GENOMEGENERATE": { - "star": null, + "star": "2.7.11b", "samtools": 1.2, "gawk": "5.1.0" }, @@ -440,6 +471,31 @@ "arriba", "arriba/test.arriba.fusions.discarded.tsv", "arriba/test.arriba.fusions.tsv", + "ctatsplicing", + "ctatsplicing/arriba", + "ctatsplicing/arriba/test.cancer.introns", + "ctatsplicing/arriba/test.cancer.introns.prelim", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/arriba/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/arriba/test.chckpts", + "ctatsplicing/arriba/test.ctat-splicing.igv.html", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/arriba/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/arriba/test.igv.tracks", + "ctatsplicing/arriba/test.introns", + "ctatsplicing/arriba/test.introns.for_IGV.bed", + "ctatsplicing/starfusion", + "ctatsplicing/starfusion/test.cancer.introns", + "ctatsplicing/starfusion/test.cancer.introns.prelim", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam", + "ctatsplicing/starfusion/test.cancer_intron_reads.sorted.bam.bai", + "ctatsplicing/starfusion/test.chckpts", + "ctatsplicing/starfusion/test.ctat-splicing.igv.html", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam", + "ctatsplicing/starfusion/test.gene_reads.sorted.sifted.bam.bai", + "ctatsplicing/starfusion/test.igv.tracks", + "ctatsplicing/starfusion/test.introns", + "ctatsplicing/starfusion/test.introns.for_IGV.bed", "fastp", "fastp/test.fastp.html", "fastp/test.fastp.json", @@ -676,6 +732,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.2" }, - "timestamp": "2024-12-20T13:15:06.974011" + "timestamp": "2024-12-20T13:57:10.179877" } -} \ No newline at end of file +} diff --git a/workflows/rnafusion.nf b/workflows/rnafusion.nf index 3c98cf6b..857827aa 100644 --- a/workflows/rnafusion.nf +++ b/workflows/rnafusion.nf @@ -92,7 +92,7 @@ workflow RNAFUSION { BUILD_REFERENCES.out.ch_arriba_ref_cytobands, BUILD_REFERENCES.out.ch_arriba_ref_known_fusions, BUILD_REFERENCES.out.ch_arriba_ref_protein_domains, - ch_starfusion_ref, + BUILD_REFERENCES.out.ch_starfusion_ref, params.arriba, // boolean params.all, // boolean params.fusioninspector_only, // boolean @@ -110,8 +110,8 @@ workflow RNAFUSION { ch_reads, BUILD_REFERENCES.out.ch_gtf, BUILD_REFERENCES.out.ch_starindex_ref, - BUILD_REFERENCES.out.ch_fasta - ch_starfusion_ref + BUILD_REFERENCES.out.ch_fasta, + BUILD_REFERENCES.out.ch_starfusion_ref ) ch_versions = ch_versions.mix(STARFUSION_WORKFLOW.out.versions)