diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bb9d56..fc35e26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.0.0 - Moonwalking Mamba - [7 September 2023] + +### `Added` + +1. Removed the alignment in favor of supplying BAM or CRAM files as input. This makes sure the annotation is closely related to the actual data it needs to be used on. + +### `Fixed` + +1. Improved handling of duplicate filenames + ## v0.1.0 - Dancing Panda - [4 July 2023] Initial release of CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index 585bcd3..4644533 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,9 @@ **CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq** is a bioinformatics pipeline for creating qDNAseq annotations -1. Trim FASTQ files to read lengths of 50 with Trimmomatic -2. Align the reads with BWA (aln and samse/sampe) -3. Create a mappability WIG file with GenMap -4. Convert the WIG to BigWig with UCSC WigToBigWig -5. Create the annotations using a custom R script +1. Create a mappability WIG file with GenMap +2. Convert the WIG to BigWig with UCSC WigToBigWig +3. Create the annotations using a custom R script ## Usage diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 84b4062..f555d25 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq + This report has been generated by the CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq analysis pipeline. report_section_order: "CenterForMedicalGeneticsGhent-nf-cmgg-qdnaseq-methods-description": diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 36d2407..aa49c49 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ -sample,fastq_1,fastq_2 -test1,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz -test2,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz, +cram,crai +https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/cram/test.cram,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/cram/test.cram.crai +https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/cram/test2.cram, diff --git a/assets/schema_input.json b/assets/schema_input.json index c9356ab..1daa466 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -7,23 +7,19 @@ "items": { "type": "object", "properties": { - "sample": { - "type": "string", - "meta": ["id"] - }, - "fastq_1": { + "cram": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^.*\\.fastq(\\.gz)?$" + "pattern": "^.*\\.(b|cr)am$" }, - "fastq_2": { + "crai": { "type": "string", "format": "file-path", "exists": true, - "pattern": "^.*\\.fastq(\\.gz)?$" + "pattern": "^.*\\.(b|cr)ai$" } }, - "required": ["fastq_1", "sample"] + "required": ["cram"] } } diff --git a/conf/modules.config b/conf/modules.config index e9df84b..67611f3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,14 +18,6 @@ if(!params.annotation_genome) { process { - withName: TRIMGALORE { - ext.args = "--hardtrim5 50" - } - - withName: BWA_ALN { - ext.args = "-n 2 -q 40" - } - withName: GAWK { ext.suffix = "sizes" ext.args2 = '\'{print $1"\t"$2}\'' @@ -37,6 +29,7 @@ process { } withName: CREATE_ANNOTATIONS { + stageInMode = "copy" // Because qdnaseq tries to fetch the indices from the link source publishDir = [ overwrite: true, enabled: true, diff --git a/conf/nf_test.config b/conf/nf_test.config index 963e210..aefe957 100644 --- a/conf/nf_test.config +++ b/conf/nf_test.config @@ -31,4 +31,5 @@ params { // Genome references genome = 'hg38' bin_sizes = "10,5" + species = "Hsapiens" } diff --git a/conf/test.config b/conf/test.config index fd57a5b..d6abd04 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,4 +28,5 @@ params { // Genome references genome = 'hg38' bin_sizes = "10,5" + species = "Hsapiens" } diff --git a/docs/parameters.md b/docs/parameters.md index 0630c53..032901f 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -22,9 +22,9 @@ Reference genome related files and options required for the workflow. | ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | | `genome` | Name of the genome. | `string` | | True | | | `annotation_genome` | The name of the genome used to create the annotations. This will default to the value supplied with --genome. | `string` | None | | | +| `species` | Name of the species. Needs to be in this format: Hsapiens (First name as a capital letter and last name as all lowercase letters) | `string` | | True | | | `fasta` | Path to FASTA genome file.
HelpThis parameter is _mandatory_ if `--genome` is not specified.
| `string` | | | | | `fai` | Path to FASTA genome index file. | `string` | | | | -| `bwa` | The BWA index. | `string` | | | | | `blacklist` | The blacklist BED file. | `string` | | | | | `igenomes_base` | Directory / URL base for iGenomes references. | `string` | | | True | | `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | | | True | diff --git a/docs/usage.md b/docs/usage.md index b7a0840..8c69fe9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,39 +12,18 @@ You will need to create a samplesheet with information about the samples you wou --input '[path to samplesheet file]' ``` -### Multiple runs of the same sample - -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: - -```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz -``` - ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. - -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. - ```console -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +cram,crai +test.cram,test.cram.crai +test2.cram, ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| ------ | ---------------------------------------------------- | +| `cram` | A input BAM or CRAM file to use for bins calculation | +| `crai` | The index for the BAM or CRAM file. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. diff --git a/main.nf b/main.nf index 70e38b5..7d5127c 100644 --- a/main.nf +++ b/main.nf @@ -17,7 +17,6 @@ nextflow.enable.dsl = 2 params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') -params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/modules.json b/modules.json index a72f268..7abced0 100644 --- a/modules.json +++ b/modules.json @@ -5,32 +5,9 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { - "bwa/aln": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwa/aln/bwa-aln.diff" - }, - "bwa/index": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "bwa/sampe": { - "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwa/sampe/bwa-sampe.diff" - }, - "bwa/samse": { - "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"], - "patch": "modules/nf-core/bwa/samse/bwa-samse.diff" - }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", "installed_by": ["modules"] }, "gawk": { @@ -50,7 +27,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", "installed_by": ["modules"] }, "samtools/convert": { @@ -69,24 +46,20 @@ "installed_by": ["modules"], "patch": "modules/nf-core/samtools/index/samtools-index.diff" }, - "tabix/bgzip": { + "samtools/merge": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" }, - "trimgalore": { + "tabix/bgzip": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "ucsc/wigtobigwig": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] - }, - "untar": { - "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", "installed_by": ["modules"] } } diff --git a/modules/local/create_annotations/Dockerfile b/modules/local/create_annotations/Dockerfile index b8244d6..58f4f4a 100644 --- a/modules/local/create_annotations/Dockerfile +++ b/modules/local/create_annotations/Dockerfile @@ -1,14 +1,19 @@ FROM mambaorg/micromamba:1.4-focal -LABEL version="0.0.1" maintainer="Nicolas Vannieuwkerke " +LABEL version="0.0.3" maintainer="Nicolas Vannieuwkerke " RUN micromamba install -y --name base -c conda-forge -c bioconda -c defaults \ bioconductor-qdnaseq==1.34.0 \ bioconductor-biobase==2.58.0 \ + bioconductor-bsgenome==1.66.3 \ ucsc-bigwigaverageoverbed==377 \ r-biocmanager==1.30.21 \ r-xml==3.99_0.14 \ r-restfulr==0.0.15 \ bioconductor-rtracklayer==1.58.0 \ r-r.cache==0.16.0 \ + r-lsr==0.5.2 \ && micromamba clean --all --yes + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +ENV PATH "$MAMBA_ROOT_PREFIX/bin:$PATH" diff --git a/modules/local/create_annotations/main.nf b/modules/local/create_annotations/main.nf index 6a46ee6..de6a2a1 100644 --- a/modules/local/create_annotations/main.nf +++ b/modules/local/create_annotations/main.nf @@ -1,14 +1,15 @@ process CREATE_ANNOTATIONS { tag "$bin_size" - label 'process_single' + label 'process_medium' - container "quay.io/cmgg/qdnaseq:0.0.1" + container "cmgg/qdnaseq:0.0.4" input: val(bin_size) tuple val(meta), path(bams, stageAs:"bams/*"), path(bais, stageAs:"bams/*") tuple val(meta2), path(bigwig) tuple val(meta3), path(blacklist) + path(genomes) output: tuple val(meta), path("*.rda"), emit: annotation @@ -24,7 +25,7 @@ process CREATE_ANNOTATIONS { def prefix = task.ext.prefix ?: "${params.annotation_genome}.${bin_size}kbp" """ - touch ${prefix}.rda + touch ${params.annotation_genome}.${bin_size}kbp.rda cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/create_annotations/templates/create_annotations.R b/modules/local/create_annotations/templates/create_annotations.R index 505f916..4517bbe 100644 --- a/modules/local/create_annotations/templates/create_annotations.R +++ b/modules/local/create_annotations/templates/create_annotations.R @@ -6,12 +6,11 @@ library(BiocManager) library(QDNAseq) library(future) -BiocManager::install("BSgenome.Hsapiens.UCSC.${params.annotation_genome}") -library(BSgenome.Hsapiens.UCSC.${params.annotation_genome}) +library($genomes, lib.loc="$genomes") binsize <- ${bin_size} -bins <- createBins(bsgenome=BSgenome.Hsapiens.UCSC.${params.annotation_genome}, binSize=binsize) +bins <- createBins(bsgenome=$genomes, binSize=binsize) bins\$mappability <- calculateMappability( bins, bigWigFile="${bigwig}", @@ -24,7 +23,7 @@ bins\$residual <- NA bins\$use <- bins\$bases > 0 # -tg <- binReadCounts(bins, path="bams") +tg <- binReadCounts(bins, path="bams", chunkSize=1E7) bins\$residual <- iterateResiduals(tg) diff --git a/modules/local/get_bsgenome/main.nf b/modules/local/get_bsgenome/main.nf new file mode 100644 index 0000000..d019586 --- /dev/null +++ b/modules/local/get_bsgenome/main.nf @@ -0,0 +1,31 @@ +process GET_BSGENOME { + tag "$genome" + label 'process_single' + + container "cmgg/qdnaseq:0.0.4" + + input: + val(genome) + val(species) + env R_LIBS_USER + + output: + path("BSgenome.${species}.UCSC.${genome}") , emit: genome + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template "get_bsgenome.R" + + stub: + """ + mkdir BSgenome.${species}.UCSC.${genome} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-biocmanager: 3.17 + END_VERSIONS + """ +} diff --git a/modules/local/get_bsgenome/templates/get_bsgenome.R b/modules/local/get_bsgenome/templates/get_bsgenome.R new file mode 100644 index 0000000..d122b3c --- /dev/null +++ b/modules/local/get_bsgenome/templates/get_bsgenome.R @@ -0,0 +1,12 @@ +#!/usr/bin/env Rscript + +# load required packages +library(BiocManager) + +dir.create("./BSgenome.${species}.UCSC.${genome}") + +install("BSgenome.${species}.UCSC.${genome}", lib="BSgenome.${species}.UCSC.${genome}") + +sink("versions.yml") +cat("\\"task.process\\":\n") +cat(" r-biocmanager: 1.30.21\n") diff --git a/modules/nf-core/bwa/aln/bwa-aln.diff b/modules/nf-core/bwa/aln/bwa-aln.diff deleted file mode 100644 index 6ccaf21..0000000 --- a/modules/nf-core/bwa/aln/bwa-aln.diff +++ /dev/null @@ -1,34 +0,0 @@ -Changes in module 'nf-core/bwa/aln' ---- modules/nf-core/bwa/aln/main.nf -+++ modules/nf-core/bwa/aln/main.nf -@@ -62,4 +62,28 @@ - END_VERSIONS - """ - } -+ -+ stub: -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ -+ if (meta.single_end) { -+ """ -+ touch ${prefix}.sai -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') -+ END_VERSIONS -+ """ -+ } else { -+ """ -+ touch ${prefix}.1.sai -+ touch ${prefix}.1.sai2 -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') -+ END_VERSIONS -+ """ -+ } - } - -************************************************************ diff --git a/modules/nf-core/bwa/aln/main.nf b/modules/nf-core/bwa/aln/main.nf deleted file mode 100644 index bc7a363..0000000 --- a/modules/nf-core/bwa/aln/main.nf +++ /dev/null @@ -1,89 +0,0 @@ -process BWA_ALN { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bwa=0.7.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--h5bf99c6_8' : - 'biocontainers/bwa:0.7.17--h5bf99c6_8' }" - - input: - tuple val(meta) , path(reads) - tuple val(meta2), path(index) - - output: - tuple val(meta), path("*.sai"), emit: sai - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - - if (meta.single_end) { - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` - - bwa aln \\ - $args \\ - -t $task.cpus \\ - -f ${prefix}.sai \\ - \$INDEX \\ - ${reads} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - } else { - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` - - bwa aln \\ - $args \\ - -t $task.cpus \\ - -f ${prefix}.1.sai \\ - \$INDEX \\ - ${reads[0]} - - bwa aln \\ - $args \\ - -t $task.cpus \\ - -f ${prefix}.2.sai \\ - \$INDEX \\ - ${reads[1]} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - - if (meta.single_end) { - """ - touch ${prefix}.sai - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - } else { - """ - touch ${prefix}.1.sai - touch ${prefix}.1.sai2 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/bwa/aln/meta.yml b/modules/nf-core/bwa/aln/meta.yml deleted file mode 100644 index f4cc8f3..0000000 --- a/modules/nf-core/bwa/aln/meta.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: bwa_aln -description: Find SA coordinates of the input reads for bwa short-read mapping -keywords: - - bwa - - aln - - short-read - - align - - reference - - fasta - - map - - fastq -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://bio-bwa.sourceforge.net/ - doi: "10.1093/bioinformatics/btp324" - licence: ["GPL-3.0-or-later"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: BWA genome index files - pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - sai: - type: file - description: Single or paired SA coordinate files - pattern: "*.sai" - -authors: - - "@jfy133" diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf deleted file mode 100644 index 8d2e56d..0000000 --- a/modules/nf-core/bwa/index/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process BWA_INDEX { - tag "$fasta" - label 'process_single' - - conda "bioconda::bwa=0.7.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'biocontainers/bwa:0.7.17--hed695b0_7' }" - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path(bwa) , emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - mkdir bwa - bwa \\ - index \\ - $args \\ - -p bwa/${fasta.baseName} \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - - stub: - """ - mkdir bwa - - touch bwa/genome.amb - touch bwa/genome.ann - touch bwa/genome.bwt - touch bwa/genome.pac - touch bwa/genome.sa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml deleted file mode 100644 index 2c6cfcd..0000000 --- a/modules/nf-core/bwa/index/meta.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: bwa_index -description: Create BWA index for reference genome -keywords: - - index - - fasta - - genome - - reference -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://www.htslib.org/doc/samtools.html - arxiv: arXiv:1303.3997 - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input genome fasta file -output: - - meta: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: file - description: BWA genome index files - pattern: "*.{amb,ann,bwt,pac,sa}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@maxulysse" diff --git a/modules/nf-core/bwa/sampe/bwa-sampe.diff b/modules/nf-core/bwa/sampe/bwa-sampe.diff deleted file mode 100644 index a245279..0000000 --- a/modules/nf-core/bwa/sampe/bwa-sampe.diff +++ /dev/null @@ -1,23 +0,0 @@ -Changes in module 'nf-core/bwa/sampe' ---- modules/nf-core/bwa/sampe/main.nf -+++ modules/nf-core/bwa/sampe/main.nf -@@ -39,4 +39,17 @@ - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -+ -+ stub: -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ -+ """ -+ touch ${prefix}.bam -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') -+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -+ END_VERSIONS -+ """ - } - -************************************************************ diff --git a/modules/nf-core/bwa/sampe/main.nf b/modules/nf-core/bwa/sampe/main.nf deleted file mode 100644 index d3eb613..0000000 --- a/modules/nf-core/bwa/sampe/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process BWA_SAMPE { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : - 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" - - input: - tuple val(meta), path(reads), path(sai) - tuple val(meta2), path(index) - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def read_group = meta.read_group ? "-r ${meta.read_group}" : "" - - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` - - bwa sampe \\ - $args \\ - $read_group \\ - \$INDEX \\ - $sai \\ - $reads | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwa/sampe/meta.yml b/modules/nf-core/bwa/sampe/meta.yml deleted file mode 100644 index 0cefb96..0000000 --- a/modules/nf-core/bwa/sampe/meta.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: bwa_sampe -description: Convert paired-end bwa SA coordinate files to SAM format -keywords: - - bwa - - aln - - short-read - - align - - reference - - fasta - - map - - sam - - bam -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://bio-bwa.sourceforge.net/ - doi: "10.1093/bioinformatics/btp324" - licence: ["GPL-3.0-or-later"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information. - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: FASTQ files specified alongside meta in input channel. - pattern: "*.{fastq,fq}.gz" - - sai: - type: file - description: SAI file specified alongside meta and reads in input channel. - pattern: "*.sai" - - meta2: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: directory - description: Directory containing BWA index files (amb,ann,bwt,pac,sa) from BWA_INDEX - pattern: "bwa/" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: BAM file - pattern: "*.bam" - -authors: - - "@jfy133" diff --git a/modules/nf-core/bwa/samse/bwa-samse.diff b/modules/nf-core/bwa/samse/bwa-samse.diff deleted file mode 100644 index 0f8c8c8..0000000 --- a/modules/nf-core/bwa/samse/bwa-samse.diff +++ /dev/null @@ -1,23 +0,0 @@ -Changes in module 'nf-core/bwa/samse' ---- modules/nf-core/bwa/samse/main.nf -+++ modules/nf-core/bwa/samse/main.nf -@@ -39,4 +39,17 @@ - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -+ -+ stub: -+ def prefix = task.ext.prefix ?: "${meta.id}" -+ -+ """ -+ touch ${prefix}.bam -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') -+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') -+ END_VERSIONS -+ """ - } - -************************************************************ diff --git a/modules/nf-core/bwa/samse/main.nf b/modules/nf-core/bwa/samse/main.nf deleted file mode 100644 index 10457a2..0000000 --- a/modules/nf-core/bwa/samse/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -process BWA_SAMSE { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : - 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" - - input: - tuple val(meta), path(reads), path(sai) - tuple val(meta2), path(index) - - output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def read_group = meta.read_group ? "-r ${meta.read_group}" : "" - - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` - - bwa samse \\ - $args \\ - $read_group \\ - \$INDEX \\ - $sai \\ - $reads | samtools sort -@ ${task.cpus - 1} -O bam - > ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - touch ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwa/samse/meta.yml b/modules/nf-core/bwa/samse/meta.yml deleted file mode 100644 index 1deab21..0000000 --- a/modules/nf-core/bwa/samse/meta.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: bwa_samse -description: Convert bwa SA coordinate file to SAM format -keywords: - - bwa - - aln - - short-read - - align - - reference - - fasta - - map - - sam - - bam - -tools: - - bwa: - description: | - BWA is a software package for mapping DNA sequences against - a large reference genome, such as the human genome. - homepage: http://bio-bwa.sourceforge.net/ - documentation: http://bio-bwa.sourceforge.net/ - doi: "10.1093/bioinformatics/btp324" - licence: ["GPL-3.0-or-later"] - -input: - - meta: - type: map - description: | - Groovy Map containing sample information. - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: FASTQ files specified alongside meta in input channel. - pattern: "*.{fastq,fq}.gz" - - sai: - type: file - description: SAI file specified alongside meta and reads in input channel. - pattern: "*.sai" - - meta2: - type: map - description: | - Groovy Map containing reference information. - e.g. [ id:'test', single_end:false ] - - index: - type: directory - description: Directory containing BWA index files (amb,ann,bwt,pac,sa) from BWA_INDEX - pattern: "bwa/" - -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - bam: - type: file - description: BAM file - pattern: "*.bam" - -authors: - - "@jfy133" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc8727..c9d014b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387b..65d7dd0 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf new file mode 100644 index 0000000..cc42549 --- /dev/null +++ b/modules/nf-core/samtools/merge/main.nf @@ -0,0 +1,56 @@ +process SAMTOOLS_MERGE { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input_files, stageAs: "?/*") + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.{bam,cram,sam}") , optional:true, emit: bam + tuple val(meta), path("*.csi") , optional:true, emit: csi + path "versions.yml" , emit: versions + + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = "bam" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ + --output-fmt BAM \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + $input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + """ + touch ${prefix}.${file_type} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml new file mode 100644 index 0000000..3a815f7 --- /dev/null +++ b/modules/nf-core/samtools/merge/meta.yml @@ -0,0 +1,73 @@ +name: samtools_merge +description: Merge BAM or CRAM file +keywords: + - merge + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_files: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of the reference file the CRAM was created with (optional) + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - cram: + type: file + description: CRAM file + pattern: "*.{cram}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" +authors: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/samtools-merge.diff b/modules/nf-core/samtools/merge/samtools-merge.diff new file mode 100644 index 0000000..7acd869 --- /dev/null +++ b/modules/nf-core/samtools/merge/samtools-merge.diff @@ -0,0 +1,34 @@ +Changes in module 'nf-core/samtools/merge' +--- modules/nf-core/samtools/merge/main.nf ++++ modules/nf-core/samtools/merge/main.nf +@@ -13,10 +13,9 @@ + tuple val(meta3), path(fai) + + output: +- tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam +- tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram +- tuple val(meta), path("*.csi") , optional:true, emit: csi +- path "versions.yml" , emit: versions ++ tuple val(meta), path("*.{bam,cram,sam}") , optional:true, emit: bam ++ tuple val(meta), path("*.csi") , optional:true, emit: csi ++ path "versions.yml" , emit: versions + + + when: +@@ -25,12 +24,13 @@ + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" +- def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() ++ def file_type = "bam" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + merge \\ + --threads ${task.cpus-1} \\ ++ --output-fmt BAM \\ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ + +************************************************************ diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf deleted file mode 100644 index dcb77ae..0000000 --- a/modules/nf-core/trimgalore/main.nf +++ /dev/null @@ -1,75 +0,0 @@ -process TRIMGALORE { - tag "$meta.id" - label 'process_high' - - conda "bioconda::trim-galore=0.6.7" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'biocontainers/trim-galore:0.6.7--hdfd78af_0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads - tuple val(meta), path("*report.txt") , emit: log , optional: true - tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true - tuple val(meta), path("*.html") , emit: html , optional: true - tuple val(meta), path("*.zip") , emit: zip , optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Calculate number of --cores for TrimGalore based on value of task.cpus - // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 - // See: https://github.com/nf-core/atacseq/pull/65 - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (meta.single_end) cores = (task.cpus as int) - 3 - if (cores < 1) cores = 1 - if (cores > 8) cores = 8 - } - - // Added soft-links to original fastqs for consistent naming in MultiQC - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - def args_list = args.split("\\s(?=--)").toList() - args_list.removeAll { it.toLowerCase().contains('_r2 ') } - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - trim_galore \\ - ${args_list.join(' ')} \\ - --cores $cores \\ - --gzip \\ - ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - trim_galore \\ - $args \\ - --cores $cores \\ - --paired \\ - --gzip \\ - ${prefix}_1.fastq.gz \\ - ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') - cutadapt: \$(cutadapt --version) - END_VERSIONS - """ - } -} diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml deleted file mode 100644 index f84c4d7..0000000 --- a/modules/nf-core/trimgalore/meta.yml +++ /dev/null @@ -1,64 +0,0 @@ -name: trimgalore -description: Trim FastQ files using Trim Galore! -keywords: - - trimming - - adapters - - sequencing adapters - - fastq -tools: - - trimgalore: - description: | - A wrapper tool around Cutadapt and FastQC to consistently apply quality - and adapter trimming to FastQ files, with some extra functionality for - MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ - documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input adapter trimmed FastQ files of size 1 and 2 for - single-end and paired-end data, respectively. - pattern: "*{3prime,5prime,trimmed,val}*.fq.gz" - - unpaired: - type: file - description: | - FastQ files containing unpaired reads from read 1 or read 2 - pattern: "*unpaired*.fq.gz" - - html: - type: file - description: FastQC report (optional) - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive (optional) - pattern: "*_{fastqc.zip}" - - log: - type: file - description: Trim Galore! trimming report - pattern: "*_{report.txt}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/ucsc/wigtobigwig/main.nf b/modules/nf-core/ucsc/wigtobigwig/main.nf index 493cf57..c5f215b 100644 --- a/modules/nf-core/ucsc/wigtobigwig/main.nf +++ b/modules/nf-core/ucsc/wigtobigwig/main.nf @@ -3,10 +3,10 @@ process UCSC_WIGTOBIGWIG { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "bioconda::ucsc-wigtobigwig=377" + conda "bioconda::ucsc-wigtobigwig=447" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ucsc-wigtobigwig:377--h0b8a92a_2' : - 'biocontainers/ucsc-wigtobigwig:377--h0b8a92a_2' }" + 'https://depot.galaxyproject.org/singularity/ucsc-wigtobigwig:447--h2a80c09_1' : + 'biocontainers/ucsc-wigtobigwig:447--h2a80c09_1' }" input: tuple val(meta), path(wig) @@ -22,7 +22,7 @@ process UCSC_WIGTOBIGWIG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ wigToBigWig \\ $args \\ @@ -38,7 +38,7 @@ process UCSC_WIGTOBIGWIG { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ touch ${prefix}.bw diff --git a/modules/nf-core/ucsc/wigtobigwig/meta.yml b/modules/nf-core/ucsc/wigtobigwig/meta.yml index a597fde..470967d 100644 --- a/modules/nf-core/ucsc/wigtobigwig/meta.yml +++ b/modules/nf-core/ucsc/wigtobigwig/meta.yml @@ -3,6 +3,7 @@ description: Convert ascii format wig file to binary big wig format keywords: - wig - bigwig + - ucsc tools: - ucsc: description: | diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf deleted file mode 100644 index 8cd1856..0000000 --- a/modules/nf-core/untar/main.nf +++ /dev/null @@ -1,63 +0,0 @@ -process UNTAR { - tag "$archive" - label 'process_single' - - conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("$prefix"), emit: untar - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) - - """ - mkdir $prefix - - ## Ensures --strip-components only applied when top level of tar contents is a directory - ## If just files or multiple directories, place all in prefix - if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then - tar \\ - -C $prefix --strip-components 1 \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - else - tar \\ - -C $prefix \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) - """ - mkdir $prefix - touch ${prefix}/file.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml deleted file mode 100644 index db241a6..0000000 --- a/modules/nf-core/untar/meta.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: untar -description: Extract files. -keywords: - - untar - - uncompress - - extract -tools: - - untar: - description: | - Extract tar.gz files. - documentation: https://www.gnu.org/software/tar/manual/ - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be untar - pattern: "*.{tar}.{gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - untar: - type: directory - description: Directory containing contents of archive - pattern: "*/" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@matthdsm" - - "@jfy133" diff --git a/nextflow.config b/nextflow.config index e252e55..5080f7a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,6 +14,8 @@ params { blacklist = null bin_sizes = "1000,500,50,30,15,10,5,1" annotation_genome = null + species = null + // References genome = null @@ -238,7 +240,7 @@ manifest { description = """A nextflow pipeline for creating bin annotations for qDNAseq""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.1.0' + version = '1.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 8c57e12..b902b0d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -53,7 +53,7 @@ "type": "object", "fa_icon": "fas fa-dna", "description": "Reference genome related files and options required for the workflow.", - "required": ["genome"], + "required": ["genome", "species"], "properties": { "genome": { "type": "string", @@ -65,6 +65,11 @@ "default": null, "description": "The name of the genome used to create the annotations. This will default to the value supplied with --genome." }, + "species": { + "type": "string", + "description": "Name of the species. Needs to be in this format: Hsapiens (First name as a capital letter and last name as all lowercase letters)", + "fa_icon": "fas fa-book" + }, "fasta": { "type": "string", "format": "file-path", @@ -84,13 +89,6 @@ "description": "Path to FASTA genome index file.", "fa_icon": "far fa-file-code" }, - "bwa": { - "type": "string", - "format": "path", - "mimetype": "text/plain", - "description": "The BWA index.", - "fa_icon": "far fa-file-code" - }, "blacklist": { "type": "string", "format": "file-path", diff --git a/subworkflows/local/prep_alignments/main.nf b/subworkflows/local/prep_alignments/main.nf index 01afd5e..81c410c 100644 --- a/subworkflows/local/prep_alignments/main.nf +++ b/subworkflows/local/prep_alignments/main.nf @@ -2,66 +2,73 @@ // Prepare the alignment files // -include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' -include { BWA_ALN } from '../../../modules/nf-core/bwa/aln/main' -include { BWA_SAMSE } from '../../../modules/nf-core/bwa/samse/main' -include { BWA_SAMPE } from '../../../modules/nf-core/bwa/sampe/main' +include { SAMTOOLS_MERGE } from '../../../modules/nf-core/samtools/merge/main' include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_CONVERT } from '../../../modules/nf-core/samtools/convert/main' workflow PREP_ALIGNMENTS { take: - ch_fastq // channel: [ val(meta), path(fastq_1), path(fastq_2)] - ch_bwa_index // channel: [ val(meta2), path(index) ] + ch_cram // channel: [ val(meta), path(cram), path(crai)] + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fai) ] main: ch_versions = Channel.empty() - TRIMGALORE( - ch_fastq - ) - ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) - - BWA_ALN( - TRIMGALORE.out.reads, - ch_bwa_index - ) - ch_versions = ch_versions.mix(BWA_ALN.out.versions.first()) - - ch_fastq - .join(BWA_ALN.out.sai, failOnDuplicate:true, failOnMismatch:true) - .branch { meta, reads, sai -> - single_end: meta.single_end - paired_end: !meta.single_end + ch_cram + .groupTuple() // No size needed here because it cannot create a bottleneck + .branch { meta, cram, crai -> + multiple: cram.size() > 1 + return [ meta, cram ] + single: cram.size() == 1 + return [ meta, cram[0], crai[0] ] } - .set { ch_sai } + .set { ch_merge_input} - BWA_SAMSE( - ch_sai.single_end, - ch_bwa_index + SAMTOOLS_MERGE( + ch_merge_input.multiple, + ch_fasta, + ch_fai ) - ch_versions = ch_versions.mix(BWA_SAMSE.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) + + SAMTOOLS_MERGE.out.bam.map { it + [[]] } + .mix(ch_merge_input.single) + .branch { meta, cram, crai -> + extension = cram.extension + cram: extension == "cram" + bam: extension == "bam" + } + .set { ch_convert_input } - BWA_SAMPE( - ch_sai.paired_end, - ch_bwa_index + SAMTOOLS_CONVERT( + ch_convert_input.cram, + ch_fasta.map { it[1] }, + ch_fai.map{ it[1] } ) - ch_versions = ch_versions.mix(BWA_SAMPE.out.versions.first()) - - BWA_SAMPE.out.bam - .mix(BWA_SAMSE.out.bam) - .set { ch_bams } + ch_versions = ch_versions.mix(SAMTOOLS_CONVERT.out.versions.first()) + + SAMTOOLS_CONVERT.out.alignment_index + .mix(ch_convert_input.bam) + .branch { meta, bam, bai -> + index: bai + no_index: !bai + return [ meta, bam ] + } + .set { ch_index_input } SAMTOOLS_INDEX( - ch_bams + ch_index_input.no_index ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - ch_bams - .join(SAMTOOLS_INDEX.out.index) + ch_index_input.no_index + .join(SAMTOOLS_INDEX.out.index, failOnDuplicate:true, failOnMismatch:true) + .mix(ch_index_input.index) .map { meta, bam, bai -> - [ [id:"bams"], bam, bai] + [ [id:"bams"], bam, bai ] } .groupTuple() .collect() diff --git a/tests/inputs/samplesheet.csv b/tests/inputs/samplesheet.csv index 36d2407..aa49c49 100644 --- a/tests/inputs/samplesheet.csv +++ b/tests/inputs/samplesheet.csv @@ -1,3 +1,3 @@ -sample,fastq_1,fastq_2 -test1,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz -test2,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz, +cram,crai +https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/cram/test.cram,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/cram/test.cram.crai +https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/cram/test2.cram, diff --git a/tests/main.nf.test b/tests/main.nf.test index 2ece124..456b331 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -10,12 +10,9 @@ nextflow_pipeline { def date = new java.text.SimpleDateFormat("yyyy-MM-dd").format(new Date()) assertAll( { assert workflow.success }, - { assert snapshot( - path("${outputDir}/annotations-${date}/hg38.5kbp.rda"), - path("${outputDir}/annotations-${date}/hg38.10kbp.rda"), - file("${outputDir}/multiqc_reports/multiqc_report.html").exists() - ).match() - } + { assert file("${outputDir}/annotations-${date}/hg38.5kbp.rda").exists() }, + { assert file("${outputDir}/annotations-${date}/hg38.10kbp.rda").exists() }, + { assert file("${outputDir}/multiqc_reports/multiqc_report.html").exists() } ) } diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap deleted file mode 100644 index 085b2d7..0000000 --- a/tests/main.nf.test.snap +++ /dev/null @@ -1,10 +0,0 @@ -{ - "Should run without failures": { - "content": [ - "hg38.5kbp.rda:md5,d41d8cd98f00b204e9800998ecf8427e", - "hg38.10kbp.rda:md5,d41d8cd98f00b204e9800998ecf8427e", - true - ], - "timestamp": "2023-07-04T09:02:29+0000" - } -} \ No newline at end of file diff --git a/workflows/qdnaseq.nf b/workflows/qdnaseq.nf index 27806b6..6d6826b 100644 --- a/workflows/qdnaseq.nf +++ b/workflows/qdnaseq.nf @@ -43,8 +43,7 @@ include { PREP_ALIGNMENTS } from '../subworkflows/local/prep_alignment include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' include { TABIX_BGZIP } from '../modules/nf-core/tabix/bgzip/main' -include { BWA_INDEX } from '../modules/nf-core/bwa/index/main' -include { UNTAR } from '../modules/nf-core/untar/main' +include { GET_BSGENOME } from '../modules/local/get_bsgenome/main' include { CREATE_ANNOTATIONS } from '../modules/local/create_annotations/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' @@ -107,44 +106,22 @@ workflow QDNASEQ { .collect() .set { ch_blacklist } - // BWA index - if(!params.bwa) { - BWA_INDEX( - ch_fasta - ) - ch_versions = ch_versions.mix(BWA_INDEX.out.versions) - - BWA_INDEX.out.index.set { ch_bwa_index } - } else { - ch_bwa_index_in = Channel.from([[id:"reference"], file(params.bwa, checkIfExists:true)]) - if(params.bwa.endswith("tar.gz")) { - UNTAR( - ch_bwa_index_in - ) - ch_versions = ch_versions.mix(UNTAR.out.versions) - - UNTAR.out.untar.set { ch_bwa_index } - } else { - ch_bwa_index_in.set { ch_bwa_index } - } - } - // Samplesheet Channel.fromSamplesheet("input", immutable_meta:false) - .map { meta, fastq_1, fastq_2 -> - new_meta = meta + [single_end:fastq_2 ? false : true] - output = fastq_2 ? [ new_meta, [fastq_1, fastq_2] ] : [ new_meta, fastq_1 ] - output + .map { cram, crai -> + meta = [id:cram.baseName] + [ meta, cram, crai ] } - .set { ch_fastq } + .set { ch_cram } // - // Prepare the aligment files + // Prepare the alignment files // PREP_ALIGNMENTS( - ch_fastq, - ch_bwa_index, + ch_cram, + ch_fasta, + ch_fai ) ch_versions = ch_versions.mix(PREP_ALIGNMENTS.out.versions) @@ -158,6 +135,17 @@ workflow QDNASEQ { ) ch_versions = ch_versions.mix(FASTA_MAPPABILITY_GENMAP.out.versions) + // + // Get the BSgenome for the genome + // + + GET_BSGENOME( + params.annotation_genome, + params.species, + "./BSgenome.${params.species}.UCSC.${params.annotation_genome}" + ) + ch_versions = ch_versions.mix(GET_BSGENOME.out.versions) + // // Create the qdnaseq annotations // @@ -166,7 +154,8 @@ workflow QDNASEQ { Channel.fromList(params.bin_sizes.tokenize(",")), PREP_ALIGNMENTS.out.bams, FASTA_MAPPABILITY_GENMAP.out.bigwig, - ch_blacklist + ch_blacklist, + GET_BSGENOME.out.genome.collect() ) ch_versions = ch_versions.mix(CREATE_ANNOTATIONS.out.versions.first())