diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7fcc7578a..13d3fcc5a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,8 +12,15 @@ on: - dev env: + NFT_DIFF: "pdiff" + NFT_DIFF_ARGS: "--line-numbers --width 120 --expand-tabs=2" + NFT_VER: "0.9.0" + NFT_WORKDIR: "~" NXF_ANSI_LOG: false - NFTEST_VER: "0.8.1" + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + SENTIEON_LICENSE_BASE64: ${{ secrets.SENTIEON_LICENSE_BASE64 }} + TEST_DATA_BASE: "${{ github.workspace }}/test-datasets" # Cancel if a newer run is started concurrency: @@ -28,7 +35,7 @@ jobs: # Expose matched filters as job 'tags' output variable tags: ${{ steps.filter.outputs.changes }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - uses: frouioui/paths-filter@main id: filter @@ -63,14 +70,10 @@ jobs: - tags: "sentieon/haplotyper_skip_filter" - NXF_VER: "latest-everything" tags: "joint_germline" - env: - NXF_ANSI_LOG: false - TEST_DATA_BASE: "${{ github.workspace }}/test-datasets" - SENTIEON_LICENSE_BASE64: ${{ secrets.SENTIEON_LICENSE_BASE64 }} steps: - name: Check out pipeline code - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - name: Hash Github Workspace id: hash_workspace @@ -78,9 +81,9 @@ jobs: echo "digest=$(echo sarek3_${{ github.workspace }} | md5sum | cut -c 1-25)" >> $GITHUB_OUTPUT - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5 with: - python-version: "3.x" + python-version: "3.11" cache: "pip" cache-dependency-path: | **/requirements.txt @@ -93,30 +96,43 @@ jobs: with: version: "${{ matrix.NXF_VER }}" + - name: Setup apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + - name: Set up Singularity if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-singularity@v5 - with: - singularity-version: 3.7.1 + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR - name: Set up miniconda if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 with: + miniconda-version: "latest" auto-update-conda: true - channels: conda-forge,bioconda,defaults - python-version: ${{ matrix.python-version }} + channels: conda-forge,bioconda + + - name: Conda setup + if: matrix.profile == 'conda' + run: | + conda clean -a + conda install -n base conda-libmamba-solver + conda config --set solver libmamba + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH - name: Cache test data id: cache-testdata - uses: actions/cache@v3 + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4 with: path: test-datasets/ key: ${{ steps.hash_workspace.outputs.digest }} - name: Check out test data if: steps.cache-testdata.outputs.cache-hit != 'true' - uses: actions/checkout@v4 + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 with: repository: nf-core/test-datasets ref: sarek3 @@ -164,16 +180,17 @@ jobs: - name: Upload logs on failure if: failure() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4 with: name: logs-${{ matrix.profile }} path: | - /home/runner/pytest_workflow_*/*/.nextflow.log - /home/runner/pytest_workflow_*/*/log.out - /home/runner/pytest_workflow_*/*/log.err - /home/runner/pytest_workflow_*/*/work - !/home/runner/pytest_workflow_*/*/work/conda - !/home/runner/pytest_workflow_*/*/work/singularity + /home/ubuntu/pytest_workflow_*/*/.nextflow.log + /home/ubuntu/pytest_workflow_*/*/log.out + /home/ubuntu/pytest_workflow_*/*/log.err + /home/ubuntu/pytest_workflow_*/*/work + !/home/ubuntu/pytest_workflow_*/*/work/conda + !/home/ubuntu/pytest_workflow_*/*/work/singularity + !${{ github.workspace }}/.singularity nftest-changes: name: Check for changes (nf-test) @@ -182,15 +199,12 @@ jobs: tags: ${{ steps.filter.outputs.changes }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - name: Combine all tags.yml files id: get_tags run: find . -name "tags.yml" -not -path "./.github/*" -exec cat {} + > .github/tags.yml - - name: debug - run: cat .github/tags.yml - - uses: frouioui/paths-filter@main id: filter with: @@ -224,14 +238,19 @@ jobs: - tags: "bwamem2/mem" - tags: "cat/cat" - tags: "cat/fastq" + - tags: "cnvkit/antitarget" - tags: "cnvkit/batch" - - tags: "deepvariant" + - tags: "cnvkit/reference" + - tags: "controlfreec/assesssignificance" + - tags: "controlfreec/freec" + - tags: "deepvariant/rundeepvariant" - tags: "dragmap/align" - tags: "dragmap/hashtable" - tags: "ensemblvep/download" - tags: "ensemblvep/vep" - tags: "fastp" - tags: "fastqc" + - tags: "fgbio/callmolecularconsensusreads" - tags: "fgbio/fastqtobam" - tags: "freebayes" - tags: "gatk4/applybqsr" @@ -245,15 +264,20 @@ jobs: - tags: "gatk4spark/applybqsr" - tags: "gatk4spark/markduplicates" - tags: "gawk" + - tags: "lofreq/callparallel" - tags: "minimap2/index" - tags: "minimap2/align" - tags: "mosdepth" - tags: "multiqc" + - tags: "ngscheckmate/ncm" - tags: "samblaster" - tags: "samtools/convert" - tags: "samtools/mpileup" - tags: "samtools/stats" + - tags: "sentieon/bwamem" + - tags: "sentieon/haplotyper" - tags: "snpeff/snpeff" + - tags: "spring/decompress" - tags: "strelka/germline" - tags: "strelka/somatic" - tags: "subworkflows/utils_nfvalidation_plugin" @@ -261,19 +285,11 @@ jobs: - tags: "tabix/tabix" - tags: "tiddit/sv" - tags: "untar" - - tags: "pipeline_sarek" - include: - - tags: "pipeline_sarek" - profile: "test,docker" - env: - NXF_ANSI_LOG: false - TEST_DATA_BASE: "${{ github.workspace }}/test-datasets" - SENTIEON_LICENSE_BASE64: ${{ secrets.SENTIEON_LICENSE_BASE64 }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4 - - uses: actions/setup-java@v3 + - uses: actions/setup-java@2dfa2011c5b2a0f1489bf9e433881c92c1631f88 # v4 with: distribution: "temurin" java-version: "17" @@ -283,20 +299,10 @@ jobs: with: version: "${{ matrix.NXF_VER }}" - - name: Cache nf-test installation - id: cache-software - uses: actions/cache@v3 - with: - path: | - /usr/local/bin/nf-test - /home/runner/.nf-test/nf-test.jar - key: ${{ runner.os }}-${{ env.NFTEST_VER }}-nftest - - name: Install nf-test - if: steps.cache-software.outputs.cache-hit != 'true' - run: | - wget -qO- https://code.askimed.com/install/nf-test | bash - sudo mv nf-test /usr/local/bin/ + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} - name: Setup apptainer if: matrix.profile == 'singularity' @@ -309,14 +315,15 @@ jobs: mkdir -p $NXF_SINGULARITY_LIBRARYDIR - name: Set up miniconda - uses: conda-incubator/setup-miniconda@v2 + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 with: miniconda-version: "latest" auto-update-conda: true - channels: conda-forge,bioconda,defaults - python-version: ${{ matrix.python-version }} + channels: conda-forge,bioconda - name: Conda setup + if: matrix.profile == 'conda' run: | conda clean -a conda install -n base conda-libmamba-solver @@ -324,7 +331,7 @@ jobs: echo $(realpath $CONDA)/condabin >> $GITHUB_PATH echo $(realpath python) >> $GITHUB_PATH - # Set up secrets + # Set up secrets - name: Set up nextflow secrets if: env.SENTIEON_LICENSE_BASE64 != null run: | @@ -339,13 +346,15 @@ jobs: - name: Disk space cleanup uses: jlumbroso/free-disk-space@v1.3.1 - # Test the module + # Test the component - name: Run nf-test run: | nf-test test \ - --profile=${{ matrix.profile }} \ + --profile="+${{ matrix.profile }}" \ --tag ${{ matrix.tags }} \ --tap=test.tap \ + --junitxml=test.xml \ + --debug \ --verbose confirm-pass: diff --git a/.gitignore b/.gitignore index 9bd5a6ee00..c807bd5d3f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ testing* .nf-test* .nf-test/ test-datasets/ +test.tap +test.xml diff --git a/.nf-core.yml b/.nf-core.yml index 4abfaf5a37..e0b3aa1f76 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -6,16 +6,11 @@ lint: - .github/workflows/awsfulltest.yml - .github/workflows/awstest.yml - conf/modules.config - - lib/WorkflowMain.groovy - - lib/NfcoreTemplate.groovy - - lib/WorkflowSarek.groovy files_unchanged: - .gitignore - .github/PULL_REQUEST_TEMPLATE.md - assets/nf-core-sarek_logo_light.png - docs/images/nf-core-sarek_logo_dark.png - docs/images/nf-core-sarek_logo_light.png - - lib/NfcoreTemplate.groovy - - lib/NfcoreSchema.groovy modules_config: False template_strings: False diff --git a/CHANGELOG.md b/CHANGELOG.md index 2642bdccc2..55d93338b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,80 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- [1640](https://github.com/nf-core/sarek/pull/1620) - Add `lofreq` as a tumor-only variant caller +- [1642](https://github.com/nf-core/sarek/pull/1642) - Back to dev +- [1653](https://github.com/nf-core/sarek/pull/1653) - Updates `sarek_subway` files with `lofreq` +- [1660](https://github.com/nf-core/sarek/pull/1642) - Add `--length_required` for minimal reads length with `FASTP` +- [1663](https://github.com/nf-core/sarek/pull/1663) - Massive conda modules update + +### Changed + +- [1669](https://github.com/nf-core/sarek/pull/1669) - Better nf-test pipeline level tests + +### Fixed + +- [1656](https://github.com/nf-core/sarek/pull/1656) - Retiring parameter `snpeff_genome` +- [1657](https://github.com/nf-core/sarek/pull/1657) - Update all actions used in the GHA CI +- [1661](https://github.com/nf-core/sarek/pull/1661) - nf-test pipeline level tests +- [1673](https://github.com/nf-core/sarek/pull/1673) - Print warning message instead of silent error with Nextflow versions prior to 24.08.0edge + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ------------- | ----------- | ----------- | +| `deepvariant` | 1.5.0 | 1.6.1 | +| `ensemblvep` | 111.0 | 112.0 | +| `fgbio` | 2.0.2 | 2.1.2 | +| `htslib` | 1.20 | 1.21 | +| `lofreq` | | 2.1.5 | +| `multiqc` | 1.21 | 1.25.1 | +| `samtools` | 1.20 | 1.21 | + +### Parameters + +| Params | Status | +| --------------- | ------- | +| `snpeff_db` | Updated | +| `snpeff_genome` | Removed | + +## [3.4.4](https://github.com/nf-core/sarek/releases/tag/3.4.4) - Ruopsokjåkhå + +Ruopsokjåkhå is another peak of the Pårte massif. + +### Added + +- [1614](https://github.com/nf-core/sarek/pull/1614) - Back to dev +- [1639](https://github.com/nf-core/sarek/pull/1639) - Bump version to prepare release + +### Changed + +- [1627](https://github.com/nf-core/sarek/pull/1627) - Correct tower reports/snpeff format + +### Fixed + +- [1623](https://github.com/nf-core/sarek/pull/1623) - Update docs to clarify vep cache folder organisation +- [1628](https://github.com/nf-core/sarek/pull/1628) - Fix dbsnp channel mapping in germline variant calling subworkflow + +### Removed + +### Dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | + +### Parameters + +## [3.4.3](https://github.com/nf-core/sarek/releases/tag/3.4.3) - Loametjåhkkå + +Loametjåhkkå is another one of the main peaks of the Pårte massif. + +### Added + - [#1502](https://github.com/nf-core/sarek/pull/1502) - export CNVs into VCF format in `bam_variant_calling_cnvkit` +- [#1534](https://github.com/nf-core/sarek/pull/1534), [#1573](https://github.com/nf-core/sarek/pull/1573) - Handling `.fastq.gz.spring` files as input +- [#1593](https://github.com/nf-core/sarek/pull/1593) - Prepare release `3.4.2` ### Changed @@ -18,14 +91,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1508](https://github.com/nf-core/sarek/pull/1508) - Sync `TEMPLATE` with `tools` `2.14.0` - [#1513](https://github.com/nf-core/sarek/pull/1513) - Back to dev - [#1518](https://github.com/nf-core/sarek/pull/1518) - Sync `TEMPLATE` with `tools` `2.14.1` +- [#1521](https://github.com/nf-core/sarek/pull/1521) - Minor code refactoring to simplify syntax in args handling +- [#1545](https://github.com/nf-core/sarek/pull/1545) - Update modules +- [#1552](https://github.com/nf-core/sarek/pull/1552) - Update samtools to v1.20 +- [#1545](https://github.com/nf-core/sarek/pull/1545) - Update modules +- [#1553](https://github.com/nf-core/sarek/pull/1553) - Update bcftools to v1.20 +- [#1557](https://github.com/nf-core/sarek/pull/1557) - Update ENSEMBLVEP cache to 111 ### Fixed +- [#1536](https://github.com/nf-core/sarek/pull/1536) - Correct typo `Strelka2` to `Strelka` +- [#1541](https://github.com/nf-core/sarek/pull/1541) - Getting bam and bai published in the same folder +- [#1542](https://github.com/nf-core/sarek/pull/1542) - Removing legacy configs of `CUSTOM_DUMPSOFTWAREVERSIONS` +- [#1547](https://github.com/nf-core/sarek/pull/1547) - Correct typo in help text in nextflow_schema.json +- [#1556](https://github.com/nf-core/sarek/pull/1556) - Fix display of some commands in `docs/usage.md` +- [#1563](https://github.com/nf-core/sarek/pull/1563) - Fix `vep_cache_path_full` so that `--refseq/--merged` will work for ENSEMBLVEP +- [#1570](https://github.com/nf-core/sarek/pull/1570) - Remove duplicated notes in FASTQC output docs +- [#1596](https://github.com/nf-core/sarek/pull/1596) - Fix haplotypecaller tests +- [#1597](https://github.com/nf-core/sarek/pull/1597) - Fix deepvariant tests +- [#1612](https://github.com/nf-core/sarek/pull/1612) - Remove empty output directories + ### Removed ### Dependencies -### Modules / Subworkflows +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `bcftools` | 1.18 | 1.20 | +| `bwa` | 0.7.17 | 0.7.18 | +| `cnvkit` | 0.9.10 | 0.9.11 | +| `htslib` | 1.19.1 | 1.20 | +| `samtools` | 1.19.2 | 1.20 | ### Parameters diff --git a/CITATIONS.md b/CITATIONS.md index 3f2e58836d..d9fc54eca3 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -150,6 +150,10 @@ > Danecek P, Auton A, Abecasis G, et al.: The variant call format and VCFtools. Bioinformatics. 2011 Aug 1;27(15):2156-8. doi: 10.1093/bioinformatics/btr330. Epub 2011 Jun 7. PubMed PMID: 21653522; PubMed Central PMCID: PMC3137218. +- [Lofreq](https://pubmed.ncbi.nlm.nih.gov/23066108/) + + > Wilm et al. LoFreq: A sequence-quality aware, ultra-sensitive variant caller for uncovering cell-population heterogeneity from high-throughput sequencing datasets. Nucleic Acids Res. 2012; 40(22):11189-201. + ## R packages - [R](https://www.R-project.org/) diff --git a/README.md b/README.md index 64ff612627..37c9bd0a63 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ Depending on the options and samples provided, the pipeline can currently perfor - `Sentieon Haplotyper` - `Strelka2` - `TIDDIT` + - `Lofreq` - Variant filtering and annotation (`SnpEff`, `Ensembl VEP`, `BCFtools annotate`) - Summarise and represent QC (`MultiQC`) @@ -131,6 +132,7 @@ We thank the following people for their extensive assistance in the development - [Abhinav Sharma](https://github.com/abhi18av) - [Adam Talbot](https://github.com/adamrtalbot) - [Adrian Lärkeryd](https://github.com/adrlar) +- [Àitor Olivares](https://github.com/AitorPeseta) - [Alexander Peltzer](https://github.com/apeltzer) - [Alison Meynert](https://github.com/ameynert) - [Anders Sune Pedersen](https://github.com/asp8200) @@ -142,12 +144,15 @@ We thank the following people for their extensive assistance in the development - [Chela James](https://github.com/chelauk) - [David Mas-Ponte](https://github.com/davidmasp) - [Edmund Miller](https://github.com/edmundmiller) +- [Famke Bäuerle](https://github.com/famosab) - [Francesco Lescai](https://github.com/lescai) +- [Francisco Martínez](https://github.com/nevinwu) - [Gavin Mackenzie](https://github.com/GCJMackenzie) - [Gisela Gabernet](https://github.com/ggabernet) - [Grant Neilson](https://github.com/grantn5) - [gulfshores](https://github.com/gulfshores) - [Harshil Patel](https://github.com/drpatelh) +- [Hongwei Ye](https://github.com/YeHW) - [James A. Fellows Yates](https://github.com/jfy133) - [Jesper Eisfeldt](https://github.com/J35P312) - [Johannes Alneberg](https://github.com/alneberg) diff --git a/assets/schema_input.json b/assets/schema_input.json index ad0b39f586..ce010b51dd 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -47,17 +47,20 @@ "pattern": "^\\S+$", "unique": ["patient", "sample"], "anyOf": [ + { + "dependentRequired": ["bam"] + }, { "dependentRequired": ["fastq_1"] }, { - "dependentRequired": ["bam"] + "dependentRequired": ["spring_1"] } ], "meta": ["lane"] }, "fastq_1": { - "errorMessage": "FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "errorMessage": "Gzipped FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", "anyOf": [ { "type": "string", @@ -72,7 +75,7 @@ "exists": true }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "errorMessage": "Gzipped FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", "dependentRequired": ["fastq_1"], "anyOf": [ { @@ -87,6 +90,37 @@ "format": "file-path", "exists": true }, + "spring_1": { + "errorMessage": "Gzipped and spring-compressed FastQ file for reads 1 cannot contain spaces and must have extension '.fq.gz.spring' or '.fastq.gz.spring'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz.spring$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, + "spring_2": { + "errorMessage": "Gzipped and spring-compressed FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz.spring' or '.fastq.gz.spring'", + "dependentRequired": ["spring_1"], + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz.spring$" + }, + { + "type": "string", + "maxLength": 0 + } + ], + "format": "file-path", + "exists": true + }, "table": { "errorMessage": "Recalibration table cannot contain spaces and must have extension '.table'", "anyOf": [ diff --git a/conf/base.config b/conf/base.config index bec2b779f4..21ee2b49fc 100644 --- a/conf/base.config +++ b/conf/base.config @@ -54,7 +54,7 @@ process { withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } - withName: 'UNZIP.*|UNTAR.*|TABIX.*|BUILD_INTERVALS|CREATE_INTERVALS_BED|CUSTOM_DUMPSOFTWAREVERSIONS|VCFTOOLS|BCFTOOLS.*|SAMTOOLS_INDEX' { + withName: 'UNZIP.*|UNTAR.*|TABIX.*|BUILD_INTERVALS|CREATE_INTERVALS_BED|VCFTOOLS|BCFTOOLS.*|SAMTOOLS_INDEX' { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 1.GB * task.attempt, 'memory' ) } } diff --git a/conf/igenomes.config b/conf/igenomes.config index a3f57e6d7e..2ca4531a22 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -36,8 +36,7 @@ params { known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz' mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" - snpeff_db = '87' - snpeff_genome = 'GRCh37' + snpeff_db = 'GRCh37.87' vep_cache_version = '111' vep_genome = 'GRCh37' vep_species = 'homo_sapiens' @@ -73,9 +72,8 @@ params { pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" sentieon_dnascope_model = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Sentieon/SentieonDNAscopeModel1.1.model" - snpeff_db = '105' - snpeff_genome = 'GRCh38' - vep_cache_version = '110' + snpeff_db = 'GRCh38.105' + vep_cache_version = '111' vep_genome = 'GRCh38' vep_species = 'homo_sapiens' } @@ -84,8 +82,7 @@ params { fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - snpeff_db = '87' - snpeff_genome = 'GRCh37' + snpeff_db = 'GRCh37.87' vep_cache_version = '111' vep_genome = 'GRCh37' vep_species = 'homo_sapiens' @@ -94,8 +91,7 @@ params { bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" ngscheckmate_bed ="${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" - snpeff_db = '105' - snpeff_genome = 'GRCh38' + snpeff_db = 'GRCh38.105' vep_cache_version = '111' vep_genome = 'GRCh38' vep_species = 'homo_sapiens' @@ -118,8 +114,7 @@ params { known_indels_tbi = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/MouseGenomeProject/mgp.v5.merged.indels.dbSNP142.normed.vcf.gz.tbi" mappability = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Control-FREEC/GRCm38_68_mm10.gem" readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - snpeff_db = '99' - snpeff_genome = 'GRCm38' + snpeff_db = 'GRCm38.99' vep_cache_version = '102' vep_genome = 'GRCm38' vep_species = 'mus_musculus' @@ -138,8 +133,7 @@ params { bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - snpeff_db = '75' - snpeff_genome = 'UMD3.1' + snpeff_db = 'UMD3.1.75' vep_cache_version = '94' vep_genome = 'UMD3.1' vep_species = 'bos_taurus' @@ -147,8 +141,7 @@ params { 'WBcel235' { bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = '105' - snpeff_genome = 'WBcel235' + snpeff_db = 'WBcel235.105' vep_cache_version = '111' vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' @@ -157,8 +150,7 @@ params { bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - snpeff_db = '99' - snpeff_genome = 'CanFam3.1' + snpeff_db = 'CanFam3.1.99' vep_cache_version = '104' vep_genome = 'CanFam3.1' vep_species = 'canis_lupus_familiaris' @@ -215,8 +207,7 @@ params { 'R64-1-1' { bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = '105' - snpeff_genome = 'R64-1-1' + snpeff_db = 'R64-1-1.105' vep_cache_version = '111' vep_genome = 'R64-1-1' vep_species = 'saccharomyces_cerevisiae' @@ -243,8 +234,7 @@ params { 'hg38' { bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - snpeff_db = '105' - snpeff_genome = 'GRCh38' + snpeff_db = 'GRCh38.105' vep_cache_version = '111' vep_genome = 'GRCh38' vep_species = 'homo_sapiens' @@ -253,8 +243,7 @@ params { bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - snpeff_db = '87' - snpeff_genome = 'GRCh37' + snpeff_db = 'GRCh37.87' vep_cache_version = '111' vep_genome = 'GRCh37' vep_species = 'homo_sapiens' @@ -263,8 +252,7 @@ params { bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - snpeff_db = '99' - snpeff_genome = 'GRCm38' + snpeff_db = 'GRCm38.99' vep_cache_version = '102' vep_genome = 'GRCm38' vep_species = 'mus_musculus' @@ -334,9 +322,8 @@ params { known_indels_tbi = "${params.igenomes_base}/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" known_indels_vqsr = '--resource:mills,known=false,training=true,truth=true,prior=10.0 mills_and_1000G.indels.vcf.gz' ngscheckmate_bed = "${params.igenomes_base}/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed" - snpeff_db = '105' - snpeff_genome = 'WBcel235' - vep_cache_version = '110' + snpeff_db = 'WBcel235.105' + vep_cache_version = '111' vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' } diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config index b02c1b3ef3..6459a686fa 100644 --- a/conf/modules/annotate.config +++ b/conf/modules/annotate.config @@ -64,12 +64,12 @@ process { // BCFTOOLS ANNOTATE if (params.tools && params.tools.split(',').contains('bcfann')) { withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_BCFTOOLS:BCFTOOLS_ANNOTATE' { - ext.args = { '--output-type z' } + ext.args = { '--output-type z --write-index=tbi' } ext.prefix = { input.baseName - '.vcf' + '_BCF.ann' } publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, - pattern: "*{gz}" + pattern: "*{gz,gz.tbi}" ] } } diff --git a/conf/modules/deepvariant.config b/conf/modules/deepvariant.config index 021990f7f6..ef5e31b796 100644 --- a/conf/modules/deepvariant.config +++ b/conf/modules/deepvariant.config @@ -15,8 +15,8 @@ process { - withName: 'DEEPVARIANT' { - ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } + withName: 'DEEPVARIANT_RUNDEEPVARIANT' { + ext.args = { params.wes ? "--model_type=WES" : "--model_type=WGS" } ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepvariant" : "${meta.id}.deepvariant.${intervals.baseName}" } ext.when = { params.tools && params.tools.split(',').contains('deepvariant') } publishDir = [ diff --git a/conf/modules/lofreq.config b/conf/modules/lofreq.config new file mode 100644 index 0000000000..253b252b3b --- /dev/null +++ b/conf/modules/lofreq.config @@ -0,0 +1,45 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +//LOFREQ + +process { + if (params.tools && params.tools.split(',').contains('lofreq')) { + + withName: "LOFREQ_CALLPARALLEL" { + ext.args = { "--call-indels" } + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.lofreq" : "${meta.id}.lofreq.${intervals.baseName}" } + ext.when = { params.tools && params.tools.split(',').contains('lofreq') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "lofreq/${meta.id}/${it}" } + ] + } + + withName:'VCFTOOLS_TSTV_COUNT'{ + errorStrategy = 'ignore' + } + + withName: 'MERGE_LOFREQ.*' { + ext.prefix = { "${meta.id}.lofreq" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/lofreq/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + } + +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index f24cb481b7..d87f97174f 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -18,15 +18,6 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/pipeline_info" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // QC withName: 'FASTQC' { ext.args = { '--quiet' } @@ -59,6 +50,13 @@ process { ] } + withName: 'NFCORE_SAREK:SAREK:SPRING_DECOMPRESS_.*' { + ext.prefix = { "${spring.simpleName}" } + publishDir = [ + enabled: false + ] + } + withName: 'MOSDEPTH' { ext.args = { !params.wes ? "-n --fast-mode --by 500" : ""} ext.prefix = { diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index aad4c982ad..c341af88df 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -39,9 +39,9 @@ process { ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/cnvkit" }, + path: { "${params.outdir}/reference" }, pattern: "*{bed}", - saveAs: { params.save_reference || params.build_only_index ? it : null } + saveAs: { params.save_reference || params.build_only_index ? "cnvkit/${it}" : null } ] } @@ -50,9 +50,9 @@ process { ext.when = { params.tools && params.tools.split(',').contains('cnvkit') && !params.cnvkit_reference } publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/cnvkit" }, + path: { "${params.outdir}/reference" }, pattern: "*{cnn}", - saveAs: { params.save_reference || params.build_only_index ? it : null } + saveAs: { params.save_reference || params.build_only_index ? "cnvkit/${it}" : null } ] } diff --git a/conf/modules/prepare_intervals.config b/conf/modules/prepare_intervals.config index 77814b5d42..815903b996 100644 --- a/conf/modules/prepare_intervals.config +++ b/conf/modules/prepare_intervals.config @@ -23,18 +23,18 @@ process { withName: 'CREATE_INTERVALS_BED' { publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/intervals" }, + path: { "${params.outdir}/reference" }, pattern: "*bed", - saveAs: { params.save_reference || params.build_only_index ? it : null } + saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } ] } withName: 'GATK4_INTERVALLISTTOBED' { publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/intervals" }, + path: { "${params.outdir}/reference" }, pattern: "*bed", - saveAs: { params.save_reference || params.build_only_index ? it : null } + saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } ] } @@ -42,9 +42,9 @@ process { ext.prefix = {"${meta.id}"} publishDir = [ mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/intervals" }, + path: { "${params.outdir}/reference" }, pattern: "*bed.gz", - saveAs: { params.save_reference || params.build_only_index ? it : null } + saveAs: { params.save_reference || params.build_only_index ? "intervals/${it}" : null } ] } } diff --git a/conf/modules/trimming.config b/conf/modules/trimming.config index 58be3b2539..0b7eff7b2d 100644 --- a/conf/modules/trimming.config +++ b/conf/modules/trimming.config @@ -16,14 +16,15 @@ process { withName: 'FASTP' { - ext.args = [ '', + ext.args = [ !params.trim_fastq ? '--disable_adapter_trimming' : '', // Disable adapter trimming params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : '', // Remove bp from the 5' end of read 1 params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : '', // Remove bp from the 5' end of read 2 params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : '', // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : '', // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed params.trim_nextseq ? '--trim_poly_g' : '', // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails - params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : '' + params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : '', // Output by limiting lines of each file with this option + params.length_required > 0 ? "--length_required ${params.length_required}": '', // Reads shorter will be discarded ].join(' ').trim() publishDir = [ [ @@ -32,10 +33,10 @@ process { pattern: "*.{html,json,log}" ], [ - path: { "${params.outdir}/preprocessing/fastp/${meta.sample}/" }, + path: { "${params.outdir}/preprocessing/" }, mode: params.publish_dir_mode, pattern: "*.fastp.fastq.gz", - saveAs: { params.save_trimmed || params.save_split_fastqs ? it : null } + saveAs: { params.save_trimmed || params.save_split_fastqs ? "fastp/${meta.sample}/${it}" : null } ] ] } diff --git a/conf/modules/umi.config b/conf/modules/umi.config index 7973dd16d8..336a02088f 100644 --- a/conf/modules/umi.config +++ b/conf/modules/umi.config @@ -73,7 +73,7 @@ process { } withName: 'CALLUMICONSENSUS' { - ext.args = { '-M 1 -S Coordinate' } + ext.args = { '-S Coordinate' } ext.prefix = { "${meta.id}_umi-consensus" } publishDir = [ path: { "${params.outdir}/preprocessing/umi/${meta.sample}" }, diff --git a/conf/test.config b/conf/test.config index 9dd0bca957..81567aed43 100644 --- a/conf/test.config +++ b/conf/test.config @@ -19,7 +19,7 @@ params { max_time = '8.h' // Base directory for nf-core/modules test data - modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules' + modules_testdata_base_path = 's3://ngi-igenomes/testdata/nf-core/modules/' // Input data input = "${projectDir}/tests/csv/3.0/fastq_single.csv" diff --git a/conf/test/alignment_from_everything.config b/conf/test/alignment_from_everything.config new file mode 100644 index 0000000000..25abd66cdf --- /dev/null +++ b/conf/test/alignment_from_everything.config @@ -0,0 +1,15 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/sarek -profile test,, --outdir +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +params { + input = "${projectDir}/tests/csv/3.0/bam_and_fastq_and_spring.csv" + tools = null +} diff --git a/conf/test/tools_germline_deepvariant.config b/conf/test/tools_germline_deepvariant.config new file mode 100644 index 0000000000..e50a48cbec --- /dev/null +++ b/conf/test/tools_germline_deepvariant.config @@ -0,0 +1,23 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/sarek -profile test,, --outdir +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +params { + input = "${projectDir}/tests/csv/3.0/mapped_single_cram.csv" + genome = null + igenomes_ignore = true + fasta = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta" + fasta_fai = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.fasta.fai" + intervals = "${params.modules_testdata_base_path}/genomics/homo_sapiens/genome/genome.bed" + nucleotides_per_second = 20 + step = 'variant_calling' + tools = null + wes = true +} diff --git a/conf/test/trimming.config b/conf/test/trimming.config index d904d17660..6786564037 100644 --- a/conf/test/trimming.config +++ b/conf/test/trimming.config @@ -14,6 +14,7 @@ params { clip_r2 = 1 three_prime_clip_r1 = 1 three_prime_clip_r2 = 1 + length_required = 50 tools = null trim_fastq = true } diff --git a/docs/images/sarek_subway.png b/docs/images/sarek_subway.png index 02937f57e5..a419afa6ac 100644 Binary files a/docs/images/sarek_subway.png and b/docs/images/sarek_subway.png differ diff --git a/docs/images/sarek_subway.svg b/docs/images/sarek_subway.svg index 2a7831b02b..af4544e18e 100644 --- a/docs/images/sarek_subway.svg +++ b/docs/images/sarek_subway.svg @@ -7,8 +7,8 @@ viewBox="7 0 314.58908 220.24502" version="1.1" id="svg5" - inkscape:version="1.3 (0e150ed, 2023-07-21)" - sodipodi:docname="sarek_subway(2).svg" + inkscape:version="1.3.2 (1:1.3.2+202404261509+091e20ef0f)" + sodipodi:docname="sarek_subway.svg" inkscape:export-filename="sarek_subway.png" inkscape:export-xdpi="600" inkscape:export-ydpi="600" @@ -31,14 +31,14 @@ inkscape:pageopacity="1" inkscape:pagecheckerboard="false" inkscape:document-units="mm" - showgrid="false" - inkscape:zoom="1.4142136" - inkscape:cx="521.84479" - inkscape:cy="348.60363" - inkscape:window-width="2560" - inkscape:window-height="1027" - inkscape:window-x="0" - inkscape:window-y="25" + showgrid="true" + inkscape:zoom="1" + inkscape:cx="621.99998" + inkscape:cy="435.49999" + inkscape:window-width="1854" + inkscape:window-height="1011" + inkscape:window-x="1986" + inkscape:window-y="32" inkscape:window-maximized="1" inkscape:current-layer="layer4" width="211mm" @@ -60,13 +60,13 @@ spacingy="1" spacingx="1" units="mm" - visible="false" />haplotypecallermantastrelka2tiddittidditmutect2ascatmsisensorprocontrolfreeccnvkitfreebayesmantastrelka2strelka2lofreqmpileupmpileupSentieon dnascopeMSI + id="tspan4735-8">MSI diff --git a/docs/images/sarek_workflow.png b/docs/images/sarek_workflow.png index 7fb4cd52c2..03709a89d6 100644 Binary files a/docs/images/sarek_workflow.png and b/docs/images/sarek_workflow.png differ diff --git a/docs/images/sarek_workflow.svg b/docs/images/sarek_workflow.svg index 5f4cbd2ddd..fc7e2a8f18 100644 --- a/docs/images/sarek_workflow.svg +++ b/docs/images/sarek_workflow.svg @@ -4,7 +4,7 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +Preprocessing Preprocessing + + + + + + + +bcftools annotate, snpeff, vep bcftools annotate, snpeff, vep + + +Annotation Annotation + + + + + + + +Reports Reports + + + + + + + + + + + + + +Variant Calling Variant Calling + + + strelka2 strelka2, lofreq• msisensorpro • msisensorpro + + +Germline Germline + deepvariant, freebayes + deepvariant, freebayes GATK GATK haplotypecaller, haplotypecaller, + mpileup, strelka2, mpileup, strelka2, + Sentieon haplotyper Sentieon haplotyper +• manta, tiddit • manta, tiddit +• cnvkit• cnvkit