From e06695f53e6eb5f8c9bfad8c7355e44398bbfd87 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 14:16:12 +0100 Subject: [PATCH 01/49] starting to reorganize conf files --- conf/modules/aligner.config | 80 +++++++++++++++++++ conf/{ => modules}/modules.config | 0 conf/{ => test}/test.config | 0 .../{ => test}/test_alignment_to_fastq.config | 0 conf/{ => test}/test_annotation.config | 0 conf/{ => test}/test_full.config | 0 conf/{ => test}/test_full_germline.config | 0 .../{ => test}/test_markduplicates_bam.config | 0 .../test_markduplicates_cram.config | 0 conf/{ => test}/test_no_intervals.config | 0 conf/{ => test}/test_pair.config | 0 .../test_prepare_recalibration_bam.config | 0 .../test_prepare_recalibration_cram.config | 0 conf/{ => test}/test_recalibrate_bam.config | 0 conf/{ => test}/test_recalibrate_cram.config | 0 conf/{ => test}/test_save_bam_mapped.config | 0 conf/{ => test}/test_skip_bqsr.config | 0 .../test_skip_markduplicates.config | 0 conf/{ => test}/test_split_fastq.config | 0 conf/{ => test}/test_targeted.config | 0 conf/{ => test}/test_tools.config | 0 conf/{ => test}/test_tools_germline.config | 0 conf/{ => test}/test_tools_somatic.config | 0 .../test_tools_somatic_ascat.config | 0 conf/{ => test}/test_tools_tumoronly.config | 0 conf/{ => test}/test_trimming.config | 0 conf/{ => test}/test_umi.config | 0 conf/{ => test}/test_use_gatk_spark.config | 0 .../test_variantcalling_channels.config | 0 nextflow.config | 57 ++++++------- 30 files changed, 109 insertions(+), 28 deletions(-) create mode 100644 conf/modules/aligner.config rename conf/{ => modules}/modules.config (100%) rename conf/{ => test}/test.config (100%) rename conf/{ => test}/test_alignment_to_fastq.config (100%) rename conf/{ => test}/test_annotation.config (100%) rename conf/{ => test}/test_full.config (100%) rename conf/{ => test}/test_full_germline.config (100%) rename conf/{ => test}/test_markduplicates_bam.config (100%) rename conf/{ => test}/test_markduplicates_cram.config (100%) rename conf/{ => test}/test_no_intervals.config (100%) rename conf/{ => test}/test_pair.config (100%) rename conf/{ => test}/test_prepare_recalibration_bam.config (100%) rename conf/{ => test}/test_prepare_recalibration_cram.config (100%) rename conf/{ => test}/test_recalibrate_bam.config (100%) rename conf/{ => test}/test_recalibrate_cram.config (100%) rename conf/{ => test}/test_save_bam_mapped.config (100%) rename conf/{ => test}/test_skip_bqsr.config (100%) rename conf/{ => test}/test_skip_markduplicates.config (100%) rename conf/{ => test}/test_split_fastq.config (100%) rename conf/{ => test}/test_targeted.config (100%) rename conf/{ => test}/test_tools.config (100%) rename conf/{ => test}/test_tools_germline.config (100%) rename conf/{ => test}/test_tools_somatic.config (100%) rename conf/{ => test}/test_tools_somatic_ascat.config (100%) rename conf/{ => test}/test_tools_tumoronly.config (100%) rename conf/{ => test}/test_trimming.config (100%) rename conf/{ => test}/test_umi.config (100%) rename conf/{ => test}/test_use_gatk_spark.config (100%) rename conf/{ => test}/test_variantcalling_channels.config (100%) diff --git a/conf/modules/aligner.config b/conf/modules/aligner.config new file mode 100644 index 0000000000..2c0be33c7c --- /dev/null +++ b/conf/modules/aligner.config @@ -0,0 +1,80 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +process { + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/pipeline_info" }, + pattern: '*_versions.yml' + ] + } + +// MAPPING + + if (params.step == 'mapping') { + withName: "BWAMEM1_MEM" { + ext.when = { params.aligner == "bwa-mem" } + } + withName: "BWAMEM2_MEM" { + ext.when = { params.aligner == "bwa-mem2" } + } + + withName: "DRAGMAP_ALIGN" { + ext.when = { params.aligner == "dragmap" } + ext.args = { "--RGSM ${meta.read_group}" } + } + + withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { + // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof + // However if it's skipped, reads need to be coordinate-sorted + // Only name sort if Spark for Markduplicates + duplicate marking is not skipped + ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*bam", + // Only save if (save_mapped OR (no_markduplicates AND save_output_as_bam)) AND only a single BAM file per sample + saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.id}/${it}" : null } + ] + } + + withName: "BWAMEM.*_MEM" { + // Using -B 3 for tumor samples + ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } + } + } + + withName: 'MERGE_BAM|INDEX_MERGE_BAM' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*{bam,bai}", + // Only save if (save_output_as_bam AND (no_markduplicates OR save_mapped )) + saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) ? "mapped/${meta.id}/${it}" : null } + ] + } + + withName: 'MERGE_BAM' { + ext.prefix = { "${meta.id}.sorted" } + } +} diff --git a/conf/modules.config b/conf/modules/modules.config similarity index 100% rename from conf/modules.config rename to conf/modules/modules.config diff --git a/conf/test.config b/conf/test/test.config similarity index 100% rename from conf/test.config rename to conf/test/test.config diff --git a/conf/test_alignment_to_fastq.config b/conf/test/test_alignment_to_fastq.config similarity index 100% rename from conf/test_alignment_to_fastq.config rename to conf/test/test_alignment_to_fastq.config diff --git a/conf/test_annotation.config b/conf/test/test_annotation.config similarity index 100% rename from conf/test_annotation.config rename to conf/test/test_annotation.config diff --git a/conf/test_full.config b/conf/test/test_full.config similarity index 100% rename from conf/test_full.config rename to conf/test/test_full.config diff --git a/conf/test_full_germline.config b/conf/test/test_full_germline.config similarity index 100% rename from conf/test_full_germline.config rename to conf/test/test_full_germline.config diff --git a/conf/test_markduplicates_bam.config b/conf/test/test_markduplicates_bam.config similarity index 100% rename from conf/test_markduplicates_bam.config rename to conf/test/test_markduplicates_bam.config diff --git a/conf/test_markduplicates_cram.config b/conf/test/test_markduplicates_cram.config similarity index 100% rename from conf/test_markduplicates_cram.config rename to conf/test/test_markduplicates_cram.config diff --git a/conf/test_no_intervals.config b/conf/test/test_no_intervals.config similarity index 100% rename from conf/test_no_intervals.config rename to conf/test/test_no_intervals.config diff --git a/conf/test_pair.config b/conf/test/test_pair.config similarity index 100% rename from conf/test_pair.config rename to conf/test/test_pair.config diff --git a/conf/test_prepare_recalibration_bam.config b/conf/test/test_prepare_recalibration_bam.config similarity index 100% rename from conf/test_prepare_recalibration_bam.config rename to conf/test/test_prepare_recalibration_bam.config diff --git a/conf/test_prepare_recalibration_cram.config b/conf/test/test_prepare_recalibration_cram.config similarity index 100% rename from conf/test_prepare_recalibration_cram.config rename to conf/test/test_prepare_recalibration_cram.config diff --git a/conf/test_recalibrate_bam.config b/conf/test/test_recalibrate_bam.config similarity index 100% rename from conf/test_recalibrate_bam.config rename to conf/test/test_recalibrate_bam.config diff --git a/conf/test_recalibrate_cram.config b/conf/test/test_recalibrate_cram.config similarity index 100% rename from conf/test_recalibrate_cram.config rename to conf/test/test_recalibrate_cram.config diff --git a/conf/test_save_bam_mapped.config b/conf/test/test_save_bam_mapped.config similarity index 100% rename from conf/test_save_bam_mapped.config rename to conf/test/test_save_bam_mapped.config diff --git a/conf/test_skip_bqsr.config b/conf/test/test_skip_bqsr.config similarity index 100% rename from conf/test_skip_bqsr.config rename to conf/test/test_skip_bqsr.config diff --git a/conf/test_skip_markduplicates.config b/conf/test/test_skip_markduplicates.config similarity index 100% rename from conf/test_skip_markduplicates.config rename to conf/test/test_skip_markduplicates.config diff --git a/conf/test_split_fastq.config b/conf/test/test_split_fastq.config similarity index 100% rename from conf/test_split_fastq.config rename to conf/test/test_split_fastq.config diff --git a/conf/test_targeted.config b/conf/test/test_targeted.config similarity index 100% rename from conf/test_targeted.config rename to conf/test/test_targeted.config diff --git a/conf/test_tools.config b/conf/test/test_tools.config similarity index 100% rename from conf/test_tools.config rename to conf/test/test_tools.config diff --git a/conf/test_tools_germline.config b/conf/test/test_tools_germline.config similarity index 100% rename from conf/test_tools_germline.config rename to conf/test/test_tools_germline.config diff --git a/conf/test_tools_somatic.config b/conf/test/test_tools_somatic.config similarity index 100% rename from conf/test_tools_somatic.config rename to conf/test/test_tools_somatic.config diff --git a/conf/test_tools_somatic_ascat.config b/conf/test/test_tools_somatic_ascat.config similarity index 100% rename from conf/test_tools_somatic_ascat.config rename to conf/test/test_tools_somatic_ascat.config diff --git a/conf/test_tools_tumoronly.config b/conf/test/test_tools_tumoronly.config similarity index 100% rename from conf/test_tools_tumoronly.config rename to conf/test/test_tools_tumoronly.config diff --git a/conf/test_trimming.config b/conf/test/test_trimming.config similarity index 100% rename from conf/test_trimming.config rename to conf/test/test_trimming.config diff --git a/conf/test_umi.config b/conf/test/test_umi.config similarity index 100% rename from conf/test_umi.config rename to conf/test/test_umi.config diff --git a/conf/test_use_gatk_spark.config b/conf/test/test_use_gatk_spark.config similarity index 100% rename from conf/test_use_gatk_spark.config rename to conf/test/test_use_gatk_spark.config diff --git a/conf/test_variantcalling_channels.config b/conf/test/test_variantcalling_channels.config similarity index 100% rename from conf/test_variantcalling_channels.config rename to conf/test/test_variantcalling_channels.config diff --git a/nextflow.config b/nextflow.config index 77f49e208a..8fe5f8ae43 100644 --- a/nextflow.config +++ b/nextflow.config @@ -203,35 +203,35 @@ profiles { executor.name = 'local' } // Basic test profile for CI - test { includeConfig 'conf/test.config' } + test { includeConfig 'conf/test/test.config' } // Extra test profiles for full tests on AWS - test_full { includeConfig 'conf/test_full.config' } - test_full_germline { includeConfig 'conf/test_full_germline.config' } + test_full { includeConfig 'conf/test/test_full.config' } + test_full_germline { includeConfig 'conf/test/test_full_germline.config' } // Extra test profiles for more complete CI - alignment_to_fastq { includeConfig 'conf/test_alignment_to_fastq.config' } - annotation { includeConfig 'conf/test_annotation.config' } - markduplicates_bam { includeConfig 'conf/test_markduplicates_bam.config' } - markduplicates_cram { includeConfig 'conf/test_markduplicates_cram.config' } - no_intervals { includeConfig 'conf/test_no_intervals.config' } - pair { includeConfig 'conf/test_pair.config' } - prepare_recalibration_bam { includeConfig 'conf/test_prepare_recalibration_bam.config' } - prepare_recalibration_cram { includeConfig 'conf/test_prepare_recalibration_cram.config' } - recalibrate_bam { includeConfig 'conf/test_recalibrate_bam.config' } - recalibrate_cram { includeConfig 'conf/test_recalibrate_cram.config' } - save_bam_mapped { includeConfig 'conf/test_save_bam_mapped.config' } - skip_bqsr { includeConfig 'conf/test_skip_bqsr.config' } - skip_markduplicates { includeConfig 'conf/test_skip_markduplicates.config' } - split_fastq { includeConfig 'conf/test_split_fastq.config' } - targeted { includeConfig 'conf/test_targeted.config' } - tools { includeConfig 'conf/test_tools.config' } - tools_germline { includeConfig 'conf/test_tools_germline.config' } - tools_somatic { includeConfig 'conf/test_tools_somatic.config' } - tools_somatic_ascat { includeConfig 'conf/test_tools_somatic_ascat.config' } - tools_tumoronly { includeConfig 'conf/test_tools_tumoronly.config' } - trimming { includeConfig 'conf/test_trimming.config' } - umi { includeConfig 'conf/test_umi.config' } - use_gatk_spark { includeConfig 'conf/test_use_gatk_spark.config' } - variantcalling_channels { includeConfig 'conf/test_variantcalling_channels.config' } + alignment_to_fastq { includeConfig 'conf/test/test_alignment_to_fastq.config' } + annotation { includeConfig 'conf/test/test_annotation.config' } + markduplicates_bam { includeConfig 'conf/test/test_markduplicates_bam.config' } + markduplicates_cram { includeConfig 'conf/test/test_markduplicates_cram.config' } + no_intervals { includeConfig 'conf/test/test_no_intervals.config' } + pair { includeConfig 'conf/test/test_pair.config' } + prepare_recalibration_bam { includeConfig 'conf/test/test_prepare_recalibration_bam.config' } + prepare_recalibration_cram { includeConfig 'conf/test/test_prepare_recalibration_cram.config' } + recalibrate_bam { includeConfig 'conf/test/test_recalibrate_bam.config' } + recalibrate_cram { includeConfig 'conf/test/test_recalibrate_cram.config' } + save_bam_mapped { includeConfig 'conf/test/test_save_bam_mapped.config' } + skip_bqsr { includeConfig 'conf/test/test_skip_bqsr.config' } + skip_markduplicates { includeConfig 'conf/test/test_skip_markduplicates.config' } + split_fastq { includeConfig 'conf/test/test_split_fastq.config' } + targeted { includeConfig 'conf/test/test_targeted.config' } + tools { includeConfig 'conf/test/test_tools.config' } + tools_germline { includeConfig 'conf/test/test_tools_germline.config' } + tools_somatic { includeConfig 'conf/test/test_tools_somatic.config' } + tools_somatic_ascat { includeConfig 'conf/test/test_tools_somatic_ascat.config' } + tools_tumoronly { includeConfig 'conf/test/test_tools_tumoronly.config' } + trimming { includeConfig 'conf/test/test_trimming.config' } + umi { includeConfig 'conf/test/test_umi.config' } + use_gatk_spark { includeConfig 'conf/test/test_use_gatk_spark.config' } + variantcalling_channels { includeConfig 'conf/test/test_variantcalling_channels.config' } } // Load igenomes.config if required @@ -285,7 +285,8 @@ manifest { } // Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +includeConfig 'conf/modules/modules.config' +includeConfig 'conf/modules/aligner.config' // Function to ensure that resource requirements don't go beyond // a maximum limit From 5fee072bf9a6840bdc9c2cf92380382714844ec9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 14:20:29 +0100 Subject: [PATCH 02/49] fix lint --- .nf-core.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.nf-core.yml b/.nf-core.yml index 4120c0234c..11d28f00cd 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -3,6 +3,9 @@ lint: files_unchanged: - assets/multiqc_config.yml - assets/nf-core-sarek_logo_light.png + - conf/modules.config + - conf/test.config + - conf/test_full.config - docs/images/nf-core-sarek_logo_dark.png - docs/images/nf-core-sarek_logo_light.png - lib/NfcoreTemplate.groovy From c2f6ee9b3623614134ea0fecd18290d04059118f Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 14:52:16 +0100 Subject: [PATCH 03/49] fix lint --- .nf-core.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 11d28f00cd..230681c724 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,11 +1,12 @@ repository_type: pipeline lint: - files_unchanged: - - assets/multiqc_config.yml - - assets/nf-core-sarek_logo_light.png + files_exist: - conf/modules.config - conf/test.config - conf/test_full.config + files_unchanged: + - assets/multiqc_config.yml + - assets/nf-core-sarek_logo_light.png - docs/images/nf-core-sarek_logo_dark.png - docs/images/nf-core-sarek_logo_light.png - lib/NfcoreTemplate.groovy From 2e728fc60bf69d4ec9c4028725594f9625424ee5 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 14:58:17 +0100 Subject: [PATCH 04/49] add first tests --- .github/workflows/ci.yml | 85 -------------- .github/workflows/pytest-workflow.yml | 111 ++++++++++++++++++ tests/configs/pytest_tags.yml | 14 +++ tests/test_aligner_bwamem.yml | 75 ++++++++++++ tests/test_aligner_bwamem2.yml | 75 ++++++++++++ ...t_aligner.yml => test_aligner_dragmap.yml} | 75 ------------ 6 files changed, 275 insertions(+), 160 deletions(-) create mode 100644 .github/workflows/pytest-workflow.yml create mode 100644 tests/configs/pytest_tags.yml create mode 100644 tests/test_aligner_bwamem.yml create mode 100644 tests/test_aligner_bwamem2.yml rename tests/{test_aligner.yml => test_aligner_dragmap.yml} (54%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a2d96f13b..0de1750e41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -107,88 +107,3 @@ jobs: /home/runner/pytest_workflow_*/*/work !/home/runner/pytest_workflow_*/*/work/conda !/home/runner/pytest_workflow_*/*/work/singularity - - test_all: - name: Run pipeline with test data (complete) - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/sarek') }}" - runs-on: ubuntu-latest - strategy: - # HACK Remove after DSL2 rewrite is done - fail-fast: false - matrix: - NXF_VER: - - "21.10.3" - test: - - "aligner" - - "alignment_to_fastq" - - "annotation" - - "cnvkit" - - "controlfreec" - - "deepvariant" - - "freebayes" - - "gatk4_spark" - - "haplotypecaller" - - "intervals" - - "manta" - - "markduplicates" - - "mpileup" - - "msisensorpro" - - "mutect2" - - "prepare_recalibration" - - "recalibrate" - - "save_mapped" - - "save_output_as_bam" - - "skip_markduplicates" - - "skip_qc" - - "split_fastq" - - "strelka" - - "strelkabp" - - "targeted" - - "tiddit" - - "trimming" - - "tumor_normal_pair" - - "umi" - - "variantcalling_channel" - profile: ["docker"] - steps: - - name: Check out pipeline code - uses: actions/checkout@v2 - - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: "3.x" - - - name: Install dependencies - run: python -m pip install --upgrade pip pytest-workflow - - - name: Run pipeline with tests settings - uses: Wandalen/wretry.action@v1.0.11 - with: - command: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.test }} --symlink --kwdof --git-aware --color=yes - attempt_limit: 3 - - - name: Output log on failure - if: failure() - run: | - sudo apt install bat > /dev/null - batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} - - - name: Upload logs on failure - if: failure() - uses: actions/upload-artifact@v2 - with: - name: logs-${{ matrix.profile }} - path: | - /home/runner/pytest_workflow_*/*/.nextflow.log - /home/runner/pytest_workflow_*/*/log.out - /home/runner/pytest_workflow_*/*/log.err - /home/runner/pytest_workflow_*/*/work - !/home/runner/pytest_workflow_*/*/work/conda - !/home/runner/pytest_workflow_*/*/work/singularity diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml new file mode 100644 index 0000000000..4f75ec6d2c --- /dev/null +++ b/.github/workflows/pytest-workflow.yml @@ -0,0 +1,111 @@ +name: pytest-workflow +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +on: + pull_request: + branches: + - dev + +env: + NXF_ANSI_LOG: false + +jobs: + changes: + name: Check for changes + runs-on: ubuntu-latest + outputs: + # Expose matched filters as job 'modules' output variable + tags: ${{ steps.filter.outputs.changes }} + steps: + - uses: actions/checkout@v2 + + - uses: dorny/paths-filter@v2 + id: filter + with: + filters: "tests/config/pytest_tags.yml" + + test_changes: + name: ${{ matrix.tags }} ${{ matrix.profile }} + runs-on: ubuntu-20.04 + needs: changes + if: needs.changes.outputs.tags != '[]' + strategy: + fail-fast: false + matrix: + tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] + profile: ["docker", "singularity", "conda"] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.x" + + - uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install Python dependencies + run: python -m pip install --upgrade pip pytest-workflow + + - uses: actions/cache@v2 + with: + path: /usr/local/bin/nextflow + key: ${{ runner.os }} + restore-keys: | + ${{ runner.os }}-nextflow- + + - name: Install Nextflow + env: + CAPSULE_LOG: none + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Set up Singularity + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-singularity@v5 + with: + singularity-version: 3.7.1 + + - name: Set up miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + channels: conda-forge,bioconda,defaults + python-version: ${{ matrix.python-version }} + + - name: Conda clean + if: matrix.profile == 'conda' + run: conda clean -a + + # Test the module + - name: Run pytest-workflow with tests settings + uses: Wandalen/wretry.action@v1.0.11 + with: + command: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.test }} --symlink --kwdof --git-aware --color=yes + attempt_limit: 3 + + - name: Output log on failure + if: failure() + run: | + sudo apt-get install bat > /dev/null + batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} + + - name: Upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-${{ matrix.profile }} + path: | + /home/runner/pytest_workflow_*/*/.nextflow.log + /home/runner/pytest_workflow_*/*/log.out + /home/runner/pytest_workflow_*/*/log.err + /home/runner/pytest_workflow_*/*/work + !/home/runner/pytest_workflow_*/*/work/conda + !/home/runner/pytest_workflow_*/*/work/singularity diff --git a/tests/configs/pytest_tags.yml b/tests/configs/pytest_tags.yml new file mode 100644 index 0000000000..3f53b469dc --- /dev/null +++ b/tests/configs/pytest_tags.yml @@ -0,0 +1,14 @@ +bwamem: + - conf/modules/aligner.config + - modules/nf-core/bwa/mem/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + +bwamem2: + - conf/modules/aligner.config + - modules/nf-core/bwamem2/mem/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + +dragmap: + - conf/modules/aligner.config + - modules/nf-core/dragmap/align/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf diff --git a/tests/test_aligner_bwamem.yml b/tests/test_aligner_bwamem.yml new file mode 100644 index 0000000000..bf3950851c --- /dev/null +++ b/tests/test_aligner_bwamem.yml @@ -0,0 +1,75 @@ +- name: Run bwamem + command: nextflow run main.nf -profile test --aligner bwa-mem --save_reference + tags: + - aligner + - bwamem + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changing on reruns + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changing on reruns + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changing on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changing on reruns + - path: results/reference/bwamem/genome.fasta.0123 + md5sum: d73300d44f733bcdb7c988fc3ff3e3e9 + - path: results/reference/bwamem/genome.fasta.amb + md5sum: 1891c1de381b3a96d4e72f590fde20c1 + - path: results/reference/bwamem/genome.fasta.ann + md5sum: 2df4aa2d7580639fa0fcdbcad5e2e969 + - path: results/reference/bwamem/genome.fasta.bwt.2bit.64 + md5sum: cd4bdf496eab05228a50c45ee43c1ed0 + - path: results/reference/bwamem/genome.fasta.pac + md5sum: 8569fbdb2c98c6fb16dfa73d8eacb070 + - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi + md5sum: 628232d0c870f2dbf73c3e81aff7b4b4 + - path: results/reference/dict/genome.dict + md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 + - path: results/reference/fai/genome.fasta.fai + md5sum: 3520cd30e1b100e55f578db9c855f685 + - path: results/reference/intervals/chr22_1-40001.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: results/reference/intervals/chr22_1-40001.bed.gz + md5sum: d3341fa28986c40b24fcc10a079dbb80 + - path: results/reference/intervals/genome.bed + md5sum: a87dc7d20ebca626f65cc16ff6c97a3e + - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi + md5sum: 1bb7ab8f22eb798efd796439d3b29b7a + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 diff --git a/tests/test_aligner_bwamem2.yml b/tests/test_aligner_bwamem2.yml new file mode 100644 index 0000000000..c93dab0339 --- /dev/null +++ b/tests/test_aligner_bwamem2.yml @@ -0,0 +1,75 @@ +- name: Run bwamem2 + command: nextflow run main.nf -profile test --aligner bwa-mem2 --save_reference + tags: + - aligner + - bwamem2 + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changing on reruns + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changing on reruns + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changing on reruns + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changing on reruns + - path: results/reference/bwamem2/genome.fasta.0123 + md5sum: d73300d44f733bcdb7c988fc3ff3e3e9 + - path: results/reference/bwamem2/genome.fasta.amb + md5sum: 1891c1de381b3a96d4e72f590fde20c1 + - path: results/reference/bwamem2/genome.fasta.ann + md5sum: 2df4aa2d7580639fa0fcdbcad5e2e969 + - path: results/reference/bwamem2/genome.fasta.bwt.2bit.64 + md5sum: cd4bdf496eab05228a50c45ee43c1ed0 + - path: results/reference/bwamem2/genome.fasta.pac + md5sum: 8569fbdb2c98c6fb16dfa73d8eacb070 + - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi + md5sum: 628232d0c870f2dbf73c3e81aff7b4b4 + - path: results/reference/dict/genome.dict + md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 + - path: results/reference/fai/genome.fasta.fai + md5sum: 3520cd30e1b100e55f578db9c855f685 + - path: results/reference/intervals/chr22_1-40001.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: results/reference/intervals/chr22_1-40001.bed.gz + md5sum: d3341fa28986c40b24fcc10a079dbb80 + - path: results/reference/intervals/genome.bed + md5sum: a87dc7d20ebca626f65cc16ff6c97a3e + - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi + md5sum: 1bb7ab8f22eb798efd796439d3b29b7a + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 diff --git a/tests/test_aligner.yml b/tests/test_aligner_dragmap.yml similarity index 54% rename from tests/test_aligner.yml rename to tests/test_aligner_dragmap.yml index 22b2406dc8..9950a62dcd 100644 --- a/tests/test_aligner.yml +++ b/tests/test_aligner_dragmap.yml @@ -1,78 +1,3 @@ -- name: Run bwa-mem2 - command: nextflow run main.nf -profile test --aligner bwa-mem2 --save_reference - tags: - - aligner - - bwa-mem2 - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changing on reruns - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changing on reruns - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changing on reruns - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changing on reruns - - path: results/reference/bwamem2/genome.fasta.0123 - md5sum: d73300d44f733bcdb7c988fc3ff3e3e9 - - path: results/reference/bwamem2/genome.fasta.amb - md5sum: 1891c1de381b3a96d4e72f590fde20c1 - - path: results/reference/bwamem2/genome.fasta.ann - md5sum: 2df4aa2d7580639fa0fcdbcad5e2e969 - - path: results/reference/bwamem2/genome.fasta.bwt.2bit.64 - md5sum: cd4bdf496eab05228a50c45ee43c1ed0 - - path: results/reference/bwamem2/genome.fasta.pac - md5sum: 8569fbdb2c98c6fb16dfa73d8eacb070 - - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi - md5sum: 628232d0c870f2dbf73c3e81aff7b4b4 - - path: results/reference/dict/genome.dict - md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 - - path: results/reference/fai/genome.fasta.fai - md5sum: 3520cd30e1b100e55f578db9c855f685 - - path: results/reference/intervals/chr22_1-40001.bed - md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - - path: results/reference/intervals/chr22_1-40001.bed.gz - md5sum: d3341fa28986c40b24fcc10a079dbb80 - - path: results/reference/intervals/genome.bed - md5sum: a87dc7d20ebca626f65cc16ff6c97a3e - - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi - md5sum: 1bb7ab8f22eb798efd796439d3b29b7a - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 - name: Run dragmap command: nextflow run main.nf -profile test,docker --aligner dragmap --save_reference tags: From a0173b8246ed0e42ee7729b68dae0dc0ad743245 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 15:00:24 +0100 Subject: [PATCH 05/49] fix path --- tests/{configs => config}/pytest_tags.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{configs => config}/pytest_tags.yml (100%) diff --git a/tests/configs/pytest_tags.yml b/tests/config/pytest_tags.yml similarity index 100% rename from tests/configs/pytest_tags.yml rename to tests/config/pytest_tags.yml From 72b4140cdc19d250bac3ea7c6a9ac3cd67d61d3b Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 15:09:14 +0100 Subject: [PATCH 06/49] fix workflow --- .github/workflows/pytest-workflow.yml | 29 ++++++++++++++++----------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index 4f75ec6d2c..0857401604 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -2,30 +2,32 @@ name: pytest-workflow # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: pull_request: - branches: - - dev + branches: [dev] -env: - NXF_ANSI_LOG: false +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true jobs: changes: name: Check for changes runs-on: ubuntu-latest outputs: - # Expose matched filters as job 'modules' output variable + # Expose matched filters as job 'tags' output variable tags: ${{ steps.filter.outputs.changes }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: dorny/paths-filter@v2 id: filter with: filters: "tests/config/pytest_tags.yml" - test_changes: - name: ${{ matrix.tags }} ${{ matrix.profile }} + test: runs-on: ubuntu-20.04 + + name: ${{ matrix.tags }} ${{ matrix.profile }} needs: changes if: needs.changes.outputs.tags != '[]' strategy: @@ -33,9 +35,11 @@ jobs: matrix: tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] profile: ["docker", "singularity", "conda"] + env: + NXF_ANSI_LOG: false steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v2 @@ -85,16 +89,17 @@ jobs: run: conda clean -a # Test the module - - name: Run pytest-workflow with tests settings + - name: Run pytest-workflow + # only use one thread for pytest-workflow to avoid race condition on conda cache. uses: Wandalen/wretry.action@v1.0.11 with: - command: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.test }} --symlink --kwdof --git-aware --color=yes + command: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof --git-aware --color=yes attempt_limit: 3 - name: Output log on failure if: failure() run: | - sudo apt-get install bat > /dev/null + sudo apt install bat > /dev/null batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err} - name: Upload logs on failure From 34965c0fab6c66100209c37858b88d2ed726086b Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:03:21 +0100 Subject: [PATCH 07/49] fixing aligner tests --- conf/modules/aligner.config | 18 +-- conf/modules/modules.config | 174 +---------------------------- conf/modules/prepare_genome.config | 137 +++++++++++++++++++++++ nextflow.config | 1 + 4 files changed, 140 insertions(+), 190 deletions(-) create mode 100644 conf/modules/prepare_genome.config diff --git a/conf/modules/aligner.config b/conf/modules/aligner.config index 2c0be33c7c..5a378698a1 100644 --- a/conf/modules/aligner.config +++ b/conf/modules/aligner.config @@ -11,25 +11,9 @@ ---------------------------------------------------------------------------------------- */ -process { - - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/pipeline_info" }, - pattern: '*_versions.yml' - ] - } - // MAPPING +process { if (params.step == 'mapping') { withName: "BWAMEM1_MEM" { ext.when = { params.aligner == "bwa-mem" } diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 096ebb8b37..6995896abc 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -28,128 +28,7 @@ process { ] } -// PREPARE_GENOME - - withName: 'BWAMEM1_INDEX' { - ext.when = { !params.bwa && params.step == "mapping" && params.aligner == "bwa-mem" } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "bwa" - ] - } - - withName: 'BWAMEM2_INDEX' { - ext.when = { !params.bwamem2 && params.step == "mapping" && params.aligner == "bwa-mem2" } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "bwamem2" - ] - } - - withName: 'DRAGMAP_HASHTABLE' { - ext.when = { !params.dragmap && params.step == "mapping" && params.aligner == "dragmap" } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference" }, - pattern: "dragmap" - ] - } - - withName: 'GATK4_CREATESEQUENCEDICTIONARY' { - ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dict" }, - pattern: "*dict" - ] - } - - withName: 'MSISENSORPRO_SCAN' { - ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/msi" }, - pattern: "*list" - ] - } - - withName: 'SAMTOOLS_FAIDX' { - ext.when = { !params.fasta_fai && params.step != "annotate" } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/fai" }, - pattern: "*fai" - ] - } - - withName: 'TABIX_DBSNP' { - ext.when = { !params.dbsnp_tbi && params.dbsnp && (params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('mutect2')) } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/dbsnp" }, - pattern: "*vcf.gz.tbi" - ] - } - - withName: 'TABIX_GERMLINE_RESOURCE' { - ext.when = { !params.germline_resource_tbi && params.germline_resource && params.tools && params.tools.split(',').contains('mutect2') } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/germline_resource" }, - pattern: "*vcf.gz.tbi" - ] - } - - withName: 'TABIX_KNOWN_INDELS' { - ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && params.tools.split(',').contains('haplotypecaller')) ) } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/known_indels" }, - pattern: "*vcf.gz.tbi" - ] - } - - withName: 'TABIX_KNOWN_SNPS' { - ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && params.tools.split(',').contains('haplotypecaller')) ) } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/known_snps" }, - pattern: "*vcf.gz.tbi" - ] - } - - withName: 'TABIX_PON' { - ext.when = { !params.pon_tbi && params.pon && params.tools && params.tools.split(',').contains('mutect2') } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/pon" }, - pattern: "*vcf.gz.tbi" - ] - } - - withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { - ext.when = { params.tools && params.tools.split(',').contains('ascat')} - publishDir = [ - enabled: false - ] - } - - withName: 'UNTAR_CHR_DIR' { - ext.when = { params.tools && params.tools.split(',').contains('controlfreec')} - } +// PREPARE REFERENCE CNVKIT withName: 'CNVKIT_ANTITARGET' { ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } @@ -172,7 +51,6 @@ process { ] } - // PREPARE INTERVALS withName: 'CREATE_INTERVALS_BED' { @@ -369,56 +247,6 @@ process { ] } -// MAPPING - - if (params.step == 'mapping') { - withName: "BWAMEM1_MEM" { - ext.when = { params.aligner == "bwa-mem" } - } - withName: "BWAMEM2_MEM" { - ext.when = { params.aligner == "bwa-mem2" } - } - - withName: "DRAGMAP_ALIGN" { - ext.when = { params.aligner == "dragmap" } - ext.args = { "--RGSM ${meta.read_group}" } - } - - withName: "(BWAMEM.*_MEM|DRAGMAP_ALIGN)" { - // Markduplicates Spark NEEDS name-sorted reads or runtime goes through the roof - // However if it's skipped, reads need to be coordinate-sorted - // Only name sort if Spark for Markduplicates + duplicate marking is not skipped - ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.split(',').contains('markduplicates'))) ? '-n' : '' } - ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.tokenize('.')[0]) : "${meta.id}.sorted" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*bam", - // Only save if (save_mapped OR (no_markduplicates AND save_output_as_bam)) AND only a single BAM file per sample - saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) && (meta.size * meta.numLanes == 1) ? "mapped/${meta.id}/${it}" : null } - ] - } - - withName: "BWAMEM.*_MEM" { - // Using -B 3 for tumor samples - ext.args = { meta.status == 1 ? "-K 100000000 -Y -B 3 -R ${meta.read_group}" : "-K 100000000 -Y -R ${meta.read_group}" } - } - } - - withName: 'MERGE_BAM|INDEX_MERGE_BAM' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*{bam,bai}", - // Only save if (save_output_as_bam AND (no_markduplicates OR save_mapped )) - saveAs: { (params.save_output_as_bam && (params.save_mapped || params.skip_tools && params.skip_tools.split(',').contains('markduplicates'))) ? "mapped/${meta.id}/${it}" : null } - ] - } - - withName: 'MERGE_BAM' { - ext.prefix = { "${meta.id}.sorted" } - } - // MARKDUPLICATES withName: 'CRAM_TO_BAM' { diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config new file mode 100644 index 0000000000..eebc9e9b7c --- /dev/null +++ b/conf/modules/prepare_genome.config @@ -0,0 +1,137 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_GENOME + +process { + withName: 'BWAMEM1_INDEX' { + ext.when = { !params.bwa && params.step == "mapping" && params.aligner == "bwa-mem" } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "bwa" + ] + } + + withName: 'BWAMEM2_INDEX' { + ext.when = { !params.bwamem2 && params.step == "mapping" && params.aligner == "bwa-mem2" } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "bwamem2" + ] + } + + withName: 'DRAGMAP_HASHTABLE' { + ext.when = { !params.dragmap && params.step == "mapping" && params.aligner == "dragmap" } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference" }, + pattern: "dragmap" + ] + } + + withName: 'GATK4_CREATESEQUENCEDICTIONARY' { + ext.when = { !params.dict && params.step != "annotate" && params.step != "controlfreec" } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/dict" }, + pattern: "*dict" + ] + } + + withName: 'MSISENSORPRO_SCAN' { + ext.when = { params.tools && params.tools.split(',').contains('msisensorpro') } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/msi" }, + pattern: "*list" + ] + } + + withName: 'SAMTOOLS_FAIDX' { + ext.when = { !params.fasta_fai && params.step != "annotate" } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/fai" }, + pattern: "*fai" + ] + } + + withName: 'TABIX_DBSNP' { + ext.when = { !params.dbsnp_tbi && params.dbsnp && (params.step == "mapping" || params.step == "markduplicates" || params.step == "prepare_recalibration") || params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('haplotypecaller') || params.tools.split(',').contains('mutect2')) } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/dbsnp" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_GERMLINE_RESOURCE' { + ext.when = { !params.germline_resource_tbi && params.germline_resource && params.tools && params.tools.split(',').contains('mutect2') } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/germline_resource" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_KNOWN_INDELS' { + ext.when = { !params.known_indels_tbi && params.known_indels && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && params.tools.split(',').contains('haplotypecaller')) ) } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/known_indels" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_KNOWN_SNPS' { + ext.when = { !params.known_snps_tbi && params.known_snps && (params.step == 'mapping' || params.step == "markduplicates" || params.step == 'prepare_recalibration' || (params.tools && params.tools.split(',').contains('haplotypecaller')) ) } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/known_snps" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'TABIX_PON' { + ext.when = { !params.pon_tbi && params.pon && params.tools && params.tools.split(',').contains('mutect2') } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/pon" }, + pattern: "*vcf.gz.tbi" + ] + } + + withName: 'UNZIP_ALLELES|UNZIP_LOCI|UNZIP_GC|UNZIP_RT' { + ext.when = { params.tools && params.tools.split(',').contains('ascat')} + publishDir = [ + enabled: false + ] + } + + withName: 'UNTAR_CHR_DIR' { + ext.when = { params.tools && params.tools.split(',').contains('controlfreec')} + } +} diff --git a/nextflow.config b/nextflow.config index 8fe5f8ae43..fe0e40f232 100644 --- a/nextflow.config +++ b/nextflow.config @@ -287,6 +287,7 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/modules.config' includeConfig 'conf/modules/aligner.config' +includeConfig 'conf/modules/prepare_genome.config' // Function to ensure that resource requirements don't go beyond // a maximum limit From 10850421f3bf7ef2a2db6f8cc41eb6765589ccce Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:17:14 +0100 Subject: [PATCH 08/49] fix aligner tests --- tests/test_aligner_bwamem.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_aligner_bwamem.yml b/tests/test_aligner_bwamem.yml index bf3950851c..feaacf5435 100644 --- a/tests/test_aligner_bwamem.yml +++ b/tests/test_aligner_bwamem.yml @@ -22,16 +22,16 @@ # binary changing on reruns - path: results/preprocessing/recalibrated/test/test.recal.cram.crai # binary changing on reruns - - path: results/reference/bwamem/genome.fasta.0123 - md5sum: d73300d44f733bcdb7c988fc3ff3e3e9 - - path: results/reference/bwamem/genome.fasta.amb + - path: results/reference/bwa/genome.amb md5sum: 1891c1de381b3a96d4e72f590fde20c1 - - path: results/reference/bwamem/genome.fasta.ann + - path: results/reference/bwa/genome.ann md5sum: 2df4aa2d7580639fa0fcdbcad5e2e969 - - path: results/reference/bwamem/genome.fasta.bwt.2bit.64 - md5sum: cd4bdf496eab05228a50c45ee43c1ed0 - - path: results/reference/bwamem/genome.fasta.pac + - path: results/reference/bwa/genome.bwt + md5sum: 815eded87e4cb6b0f1daab5c4d6e30af + - path: results/reference/bwa/genome.pac md5sum: 8569fbdb2c98c6fb16dfa73d8eacb070 + - path: results/reference/bwa/genome.sa + md5sum: e7cff62b919448a3a3d0fe4aaf427594 - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi md5sum: 628232d0c870f2dbf73c3e81aff7b4b4 - path: results/reference/dict/genome.dict From 31229509a72781ff71fdd5e8d0ad9f9071d23607 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:27:11 +0100 Subject: [PATCH 09/49] update annotation tests --- conf/modules/annotate.config | 94 +++++++++++++++++++ conf/modules/modules.config | 80 ---------------- nextflow.config | 3 +- tests/config/pytest_tags.yml | 21 +++++ ...notation.yml => test_annotation_merge.yml} | 65 ------------- tests/test_annotation_snpeff.yml | 29 ++++++ tests/test_annotation_vep.yml | 36 +++++++ 7 files changed, 182 insertions(+), 146 deletions(-) create mode 100644 conf/modules/annotate.config rename tests/{test_annotation.yml => test_annotation_merge.yml} (55%) create mode 100644 tests/test_annotation_snpeff.yml create mode 100644 tests/test_annotation_vep.yml diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config new file mode 100644 index 0000000000..26671b16cb --- /dev/null +++ b/conf/modules/annotate.config @@ -0,0 +1,94 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// ANNOTATE + +process { + // SNPEFF + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { + withName: 'SNPEFF' { + ext.prefix = { "${vcf.baseName.minus(".vcf")}_snpEff" } + ext.args = '-nodownload -canon -v' + if (!params.snpeff_cache) container = { params.snpeff_genome ? "nfcore/snpeff:${params.snpeff_version}.${params.snpeff_genome}" : "nfcore/snpeff:${params.snpeff_version}.${params.genome}" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/snpeff/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{csv,html,genes.txt}", + saveAs: { params.tools.split(',').contains('snpeff') ? it : null } + ] + ] + } + } + + // VEP + if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { + withName: 'ENSEMBLVEP' { + ext.args = { [ + '--everything --filter_common --per_gene --total_length --offline --format vcf', + (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', + (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', + (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', + (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', + (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf' + ].join(' ').trim() } + // If just VEP: _VEP.ann.vcf + ext.prefix = { "${vcf.baseName.minus(".vcf")}_VEP" } + if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:${params.vep_version}.${params.vep_genome}" : "nfcore/vep:${params.vep_version}.${params.genome}" } + publishDir = [ + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, + pattern: "*html" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{json,tab}" + ] + ] + } + } + + // SNPEFF THEN VEP + if (params.tools && params.tools.split(',').contains('merge')) { + withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_MERGE:ENSEMBLVEP" { + // If merge: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab + ext.prefix = { "${vcf.baseName.minus(".ann.vcf")}_VEP" } + } + } + + // ALL ANNOTATION TOOLS + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { + withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:.*:TABIX_BGZIPTABIX" { + ext.prefix = { input.baseName.minus(".vcf") } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}" + ] + } + } + + if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { + withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, + pattern: "*{gz,gz.tbi}", + saveAs: { params.tools.split(',').contains('snpeff') ? it : null } + ] + } + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 6995896abc..f821507185 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -1130,84 +1130,4 @@ process{ withName: 'VCFTOOLS_SUMMARY' { ext.args = "--FILTER-summary" } - -// ANNOTATE - - // SNPEFF - if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { - withName: 'SNPEFF' { - ext.prefix = { "${vcf.baseName.minus(".vcf")}_snpEff" } - ext.args = '-nodownload -canon -v' - if (!params.snpeff_cache) container = { params.snpeff_genome ? "nfcore/snpeff:${params.snpeff_version}.${params.snpeff_genome}" : "nfcore/snpeff:${params.snpeff_version}.${params.genome}" } - publishDir = [ - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/snpeff/${meta.variantcaller}/${meta.id}/" }, - pattern: "*{csv,html,genes.txt}", - saveAs: { params.tools.split(',').contains('snpeff') ? it : null } - ] - ] - } - } - - // VEP - if (params.tools && (params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { - withName: 'ENSEMBLVEP' { - ext.args = { [ - '--everything --filter_common --per_gene --total_length --offline --format vcf', - (params.vep_dbnsfp && params.dbnsfp && !params.dbnsfp_consequence) ? "--plugin dbNSFP,${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', - (params.vep_dbnsfp && params.dbnsfp && params.dbnsfp_consequence) ? "--plugin dbNSFP,'consequence=${params.dbnsfp_consequence}',${params.dbnsfp.split("/")[-1]},${params.dbnsfp_fields}" : '', - (params.vep_loftee) ? "--plugin LoF,loftee_path:/opt/conda/envs/nf-core-vep-${params.vep_version}/share/ensembl-vep-${params.vep_version}-0" : '', - (params.vep_spliceai && params.spliceai_snv && params.spliceai_indel) ? "--plugin SpliceAI,snv=${params.spliceai_snv.split("/")[-1]},indel=${params.spliceai_indel.split("/")[-1]}" : '', - (params.vep_spliceregion) ? '--plugin SpliceRegion' : '', - (params.vep_out_format) ? "--${params.vep_out_format}" : '--vcf' - ].join(' ').trim() } - // If just VEP: _VEP.ann.vcf - ext.prefix = { "${vcf.baseName.minus(".vcf")}_VEP" } - if (!params.vep_cache) container = { params.vep_genome ? "nfcore/vep:${params.vep_version}.${params.vep_genome}" : "nfcore/vep:${params.vep_version}.${params.genome}" } - publishDir = [ - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/EnsemblVEP/${meta.variantcaller}/${meta.id}/" }, - pattern: "*html" - ], - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, - pattern: "*{json,tab}" - ] - ] - } - } - - // SNPEFF THEN VEP - if (params.tools && params.tools.split(',').contains('merge')) { - withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_MERGE:ENSEMBLVEP" { - // If merge: Output file will have format *_snpEff_VEP.ann.vcf, *_snpEff_VEP.ann.json or *_snpEff_VEP.ann.tab - ext.prefix = { "${vcf.baseName.minus(".ann.vcf")}_VEP" } - } - } - - // ALL ANNOTATION TOOLS - if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('vep') || params.tools.split(',').contains('merge'))) { - withName: "NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:.*:TABIX_BGZIPTABIX" { - ext.prefix = { input.baseName.minus(".vcf") } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, - pattern: "*{gz,gz.tbi}" - ] - } - } - - if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { - withName: 'NFCORE_SAREK:SAREK:VCF_ANNOTATE_ALL:VCF_ANNOTATE_SNPEFF:TABIX_BGZIPTABIX' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/annotation/${meta.variantcaller}/${meta.id}/" }, - pattern: "*{gz,gz.tbi}", - saveAs: { params.tools.split(',').contains('snpeff') ? it : null } - ] - } - } } diff --git a/nextflow.config b/nextflow.config index fe0e40f232..611b161f16 100644 --- a/nextflow.config +++ b/nextflow.config @@ -286,8 +286,9 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/modules.config' -includeConfig 'conf/modules/aligner.config' includeConfig 'conf/modules/prepare_genome.config' +includeConfig 'conf/modules/aligner.config' +includeConfig 'conf/modules/annotate.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 3f53b469dc..a46df9c30b 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -12,3 +12,24 @@ dragmap: - conf/modules/aligner.config - modules/nf-core/dragmap/align/main.nf - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + +snpeff: + - conf/modules/annotate.config + - modules/nf-core/snpeff/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_snpeff/main.nf + +merge: + - conf/modules/annotate.config + - modules/nf-core/ensemblvep/main' + - modules/nf-core/snpeff/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_all/main.nf + - subworkflows/local/vcf_annotate_ensemblvep/main.nf + - subworkflows/local/vcf_annotate_snpeff/main.nf + +vep: + - conf/modules/annotate.config + - modules/nf-core/ensemblvep/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_ensemblvep/main.nf diff --git a/tests/test_annotation.yml b/tests/test_annotation_merge.yml similarity index 55% rename from tests/test_annotation.yml rename to tests/test_annotation_merge.yml index 487a2b270b..a10b50c112 100644 --- a/tests/test_annotation.yml +++ b/tests/test_annotation_merge.yml @@ -1,56 +1,3 @@ -- name: Run snpEff - command: nextflow run main.nf -profile test,annotation --tools snpeff - tags: - - annotation - - snpeff - files: - - path: results/annotation/test/test_snpEff.ann.vcf.gz - md5sum: 01f24fdd76f73eefd695beea7b3d3d8e - - path: results/annotation/test/test_snpEff.ann.vcf.gz.tbi - md5sum: 51e418d9be9bb33f1d4123493b15b6c9 - - path: results/multiqc - - path: results/reports/snpeff/test/snpEff_summary.html - # snpEff_summary.html changes md5sums on reruns. - contains: [" Genome total length ", " 100,286,402 ", " MT192765.1 "] - - path: results/reports/snpeff/test/test_snpEff.csv - # test_snpEff.csv changes md5sums on reruns. - contains: - [ - "Values , 50,100", - "Count , 1,8", - "Reference , 0", - "Het , 1", - "Hom , 8", - "Missing , 0", - "MT192765.1, Position,0,1", - "MT192765.1,Count,0,0", - ] - - path: results/reports/snpeff/test/test_snpEff.genes.txt - md5sum: 130536bf0237d7f3f746d32aaa32840a -- name: Run VEP - command: nextflow run main.nf -profile test,annotation --tools vep --skip_tools multiqc - tags: - - annotation - - vep - files: - - path: results/annotation/test/test_VEP.ann.vcf.gz - # binary changes md5sums on reruns. - - path: results/annotation/test/test_VEP.ann.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/reports/EnsemblVEP/test/test_VEP.summary.html - # test_VEP.summary.html changes md5sums on reruns. - contains: - [ - "Input filetest.vcf.gzOutput filetest_VEP.ann.vcf", - "General statistics", - "Lines of input read", - "Variants processed", - "Variants filtered out", - "Novel / existing variants", - "Overlapped genes", - "Overlapped transcripts", - "Overlapped regulatory features", - ] - name: Run snpEff followed by VEP command: nextflow run main.nf -profile test,annotation --tools merge --skip_tools multiqc tags: @@ -123,15 +70,3 @@ # text-based file changes md5sums on reruns. - path: results/reports/snpeff/test/test_snpEff.genes.txt md5sum: 130536bf0237d7f3f746d32aaa32840a -- name: Run VEP with fasta - command: nextflow run main.nf -profile test,annotation --tools vep --vep_include_fasta --skip_tools multiqc - tags: - - annotation - - vep - files: - - path: results/annotation/test/test_VEP.ann.vcf.gz - # binary changes md5sums on reruns. - - path: results/annotation/test/test_VEP.ann.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/reports/EnsemblVEP/test/test_VEP.summary.html - # text-based file changes md5sums on reruns. diff --git a/tests/test_annotation_snpeff.yml b/tests/test_annotation_snpeff.yml new file mode 100644 index 0000000000..3fd4d64d69 --- /dev/null +++ b/tests/test_annotation_snpeff.yml @@ -0,0 +1,29 @@ +- name: Run snpEff + command: nextflow run main.nf -profile test,annotation --tools snpeff + tags: + - annotation + - snpeff + files: + - path: results/annotation/test/test_snpEff.ann.vcf.gz + md5sum: 01f24fdd76f73eefd695beea7b3d3d8e + - path: results/annotation/test/test_snpEff.ann.vcf.gz.tbi + md5sum: 51e418d9be9bb33f1d4123493b15b6c9 + - path: results/multiqc + - path: results/reports/snpeff/test/snpEff_summary.html + # snpEff_summary.html changes md5sums on reruns. + contains: [" Genome total length ", " 100,286,402 ", " MT192765.1 "] + - path: results/reports/snpeff/test/test_snpEff.csv + # test_snpEff.csv changes md5sums on reruns. + contains: + [ + "Values , 50,100", + "Count , 1,8", + "Reference , 0", + "Het , 1", + "Hom , 8", + "Missing , 0", + "MT192765.1, Position,0,1", + "MT192765.1,Count,0,0", + ] + - path: results/reports/snpeff/test/test_snpEff.genes.txt + md5sum: 130536bf0237d7f3f746d32aaa32840a diff --git a/tests/test_annotation_vep.yml b/tests/test_annotation_vep.yml new file mode 100644 index 0000000000..569aed2e20 --- /dev/null +++ b/tests/test_annotation_vep.yml @@ -0,0 +1,36 @@ +- name: Run VEP + command: nextflow run main.nf -profile test,annotation --tools vep --skip_tools multiqc + tags: + - annotation + - vep + files: + - path: results/annotation/test/test_VEP.ann.vcf.gz + # binary changes md5sums on reruns. + - path: results/annotation/test/test_VEP.ann.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/reports/EnsemblVEP/test/test_VEP.summary.html + # test_VEP.summary.html changes md5sums on reruns. + contains: + [ + "Input filetest.vcf.gzOutput filetest_VEP.ann.vcf", + "General statistics", + "Lines of input read", + "Variants processed", + "Variants filtered out", + "Novel / existing variants", + "Overlapped genes", + "Overlapped transcripts", + "Overlapped regulatory features", + ] +- name: Run VEP with fasta + command: nextflow run main.nf -profile test,annotation --tools vep --vep_include_fasta --skip_tools multiqc + tags: + - annotation + - vep + files: + - path: results/annotation/test/test_VEP.ann.vcf.gz + # binary changes md5sums on reruns. + - path: results/annotation/test/test_VEP.ann.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/reports/EnsemblVEP/test/test_VEP.summary.html + # text-based file changes md5sums on reruns. From 68da3deb12128730c193e83d36672bb3e6566b91 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:27:25 +0100 Subject: [PATCH 10/49] test only with docker for now --- .github/workflows/pytest-workflow.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index 0857401604..e5549f1ce2 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -34,7 +34,10 @@ jobs: fail-fast: false matrix: tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] - profile: ["docker", "singularity", "conda"] + # Only docker for now for faster testing while feature in dev + profile: ["docker"] + # TODO: Need to uncomment that and add exclude for some combination with conda (ie annotation) + # profile: ["docker", "singularity", "conda"] env: NXF_ANSI_LOG: false steps: From 4c05d1cc21dfdb6c2230570466204896cb0efcdb Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:32:07 +0100 Subject: [PATCH 11/49] testing out adding an extra layer of tags --- tests/config/pytest_tags.yml | 67 +++++++++++++++++------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index a46df9c30b..9e94c521b4 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -1,35 +1,32 @@ -bwamem: - - conf/modules/aligner.config - - modules/nf-core/bwa/mem/main.nf - - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf - -bwamem2: - - conf/modules/aligner.config - - modules/nf-core/bwamem2/mem/main.nf - - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf - -dragmap: - - conf/modules/aligner.config - - modules/nf-core/dragmap/align/main.nf - - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf - -snpeff: - - conf/modules/annotate.config - - modules/nf-core/snpeff/main' - - modules/nf-core/tabix/bgziptabix/main' - - subworkflows/local/vcf_annotate_snpeff/main.nf - -merge: - - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main' - - modules/nf-core/snpeff/main' - - modules/nf-core/tabix/bgziptabix/main' - - subworkflows/local/vcf_annotate_all/main.nf - - subworkflows/local/vcf_annotate_ensemblvep/main.nf - - subworkflows/local/vcf_annotate_snpeff/main.nf - -vep: - - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main' - - modules/nf-core/tabix/bgziptabix/main' - - subworkflows/local/vcf_annotate_ensemblvep/main.nf +aligner: + bwamem: + - conf/modules/aligner.config + - modules/nf-core/bwa/mem/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + bwamem2: + - conf/modules/aligner.config + - modules/nf-core/bwamem2/mem/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + dragmap: + - conf/modules/aligner.config + - modules/nf-core/dragmap/align/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf +annotate: + snpeff: + - conf/modules/annotate.config + - modules/nf-core/snpeff/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_snpeff/main.nf + merge: + - conf/modules/annotate.config + - modules/nf-core/ensemblvep/main' + - modules/nf-core/snpeff/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_all/main.nf + - subworkflows/local/vcf_annotate_ensemblvep/main.nf + - subworkflows/local/vcf_annotate_snpeff/main.nf + vep: + - conf/modules/annotate.config + - modules/nf-core/ensemblvep/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_ensemblvep/main.nf From 986fe9154b13dce30ae6558bfcc1c566afa02ebd Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:34:52 +0100 Subject: [PATCH 12/49] add info as comments instead --- tests/config/pytest_tags.yml | 65 ++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 9e94c521b4..88b36d04e5 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -1,32 +1,33 @@ -aligner: - bwamem: - - conf/modules/aligner.config - - modules/nf-core/bwa/mem/main.nf - - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf - bwamem2: - - conf/modules/aligner.config - - modules/nf-core/bwamem2/mem/main.nf - - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf - dragmap: - - conf/modules/aligner.config - - modules/nf-core/dragmap/align/main.nf - - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf -annotate: - snpeff: - - conf/modules/annotate.config - - modules/nf-core/snpeff/main' - - modules/nf-core/tabix/bgziptabix/main' - - subworkflows/local/vcf_annotate_snpeff/main.nf - merge: - - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main' - - modules/nf-core/snpeff/main' - - modules/nf-core/tabix/bgziptabix/main' - - subworkflows/local/vcf_annotate_all/main.nf - - subworkflows/local/vcf_annotate_ensemblvep/main.nf - - subworkflows/local/vcf_annotate_snpeff/main.nf - vep: - - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main' - - modules/nf-core/tabix/bgziptabix/main' - - subworkflows/local/vcf_annotate_ensemblvep/main.nf +# aligner +bwamem: + - conf/modules/aligner.config + - modules/nf-core/bwa/mem/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf +bwamem2: + - conf/modules/aligner.config + - modules/nf-core/bwamem2/mem/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf +dragmap: + - conf/modules/aligner.config + - modules/nf-core/dragmap/align/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + +# annotate +snpeff: + - conf/modules/annotate.config + - modules/nf-core/snpeff/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_snpeff/main.nf +merge: + - conf/modules/annotate.config + - modules/nf-core/ensemblvep/main' + - modules/nf-core/snpeff/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_all/main.nf + - subworkflows/local/vcf_annotate_ensemblvep/main.nf + - subworkflows/local/vcf_annotate_snpeff/main.nf +vep: + - conf/modules/annotate.config + - modules/nf-core/ensemblvep/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_ensemblvep/main.nf From b9de3cf141b76ffec0752c74891f1853f50705d1 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:35:49 +0100 Subject: [PATCH 13/49] sort files --- tests/config/pytest_tags.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 88b36d04e5..d9aafebdcd 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -13,11 +13,6 @@ dragmap: - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf # annotate -snpeff: - - conf/modules/annotate.config - - modules/nf-core/snpeff/main' - - modules/nf-core/tabix/bgziptabix/main' - - subworkflows/local/vcf_annotate_snpeff/main.nf merge: - conf/modules/annotate.config - modules/nf-core/ensemblvep/main' @@ -26,6 +21,11 @@ merge: - subworkflows/local/vcf_annotate_all/main.nf - subworkflows/local/vcf_annotate_ensemblvep/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf +snpeff: + - conf/modules/annotate.config + - modules/nf-core/snpeff/main' + - modules/nf-core/tabix/bgziptabix/main' + - subworkflows/local/vcf_annotate_snpeff/main.nf vep: - conf/modules/annotate.config - modules/nf-core/ensemblvep/main' From 77157603efccd53d34ed1b7c3c0bf180b0b023ae Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:36:14 +0100 Subject: [PATCH 14/49] fix file names --- tests/config/pytest_tags.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index d9aafebdcd..43d52f3324 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -15,19 +15,19 @@ dragmap: # annotate merge: - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main' - - modules/nf-core/snpeff/main' - - modules/nf-core/tabix/bgziptabix/main' + - modules/nf-core/ensemblvep/main.nf' + - modules/nf-core/snpeff/main.nf' + - modules/nf-core/tabix/bgziptabix/main.nf' - subworkflows/local/vcf_annotate_all/main.nf - subworkflows/local/vcf_annotate_ensemblvep/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf snpeff: - conf/modules/annotate.config - - modules/nf-core/snpeff/main' - - modules/nf-core/tabix/bgziptabix/main' + - modules/nf-core/snpeff/main.nf' + - modules/nf-core/tabix/bgziptabix/main.nf' - subworkflows/local/vcf_annotate_snpeff/main.nf vep: - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main' - - modules/nf-core/tabix/bgziptabix/main' + - modules/nf-core/ensemblvep/main.nf' + - modules/nf-core/tabix/bgziptabix/main.nf' - subworkflows/local/vcf_annotate_ensemblvep/main.nf From 630eab97b3aad484e2da5791d1a5dbb6053c279a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 16:45:33 +0100 Subject: [PATCH 15/49] alignement_to_fastq --- conf/modules/alignment_to_fastq.config | 85 +++++++++++++++++++ conf/modules/modules.config | 70 --------------- nextflow.config | 1 + tests/config/pytest_tags.yml | 9 ++ ..._remap.yml => test_alignment_to_fastq.yml} | 0 5 files changed, 95 insertions(+), 70 deletions(-) create mode 100644 conf/modules/alignment_to_fastq.config rename tests/{test_bam_remap.yml => test_alignment_to_fastq.yml} (100%) diff --git a/conf/modules/alignment_to_fastq.config b/conf/modules/alignment_to_fastq.config new file mode 100644 index 0000000000..b207e57740 --- /dev/null +++ b/conf/modules/alignment_to_fastq.config @@ -0,0 +1,85 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// BAM TO FASTQ + +process { + + withName: 'COLLATE_FASTQ_MAP' { + ext.args2 = '-N' + ext.prefix = {"${meta.id}.mapped"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'COLLATE_FASTQ_UNMAP' { + ext.args2 = '-N' + ext.prefix = {"${meta.id}.unmapped"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_MAP' { + ext.args = '-b -f1 -F12' + ext.prefix = {"${meta.id}.map_map"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_MAP_UNMAP' { + ext.args = '-b -f8 -F260' + ext.prefix = {"${meta.id}.map_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_MAP' { + ext.args = '-b -f4 -F264' + ext.prefix = {"${meta.id}.unmap_map"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_VIEW_UNMAP_UNMAP' { + ext.args = '-b -f12 -F256' + ext.prefix = {"${meta.id}.unmap_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'SAMTOOLS_MERGE_UNMAP' { + ext.prefix = {"${meta.id}.merged_unmap"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + withName: 'CAT_FASTQ' { + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index f821507185..e2bead0b04 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -81,76 +81,6 @@ process { ] } -// BAM TO FASTQ - - withName: 'COLLATE_FASTQ_MAP' { - ext.args2 = '-N' - ext.prefix = {"${meta.id}.mapped"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: 'COLLATE_FASTQ_UNMAP' { - ext.args2 = '-N' - ext.prefix = {"${meta.id}.unmapped"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: 'SAMTOOLS_VIEW_MAP_MAP' { - ext.args = '-b -f1 -F12' - ext.prefix = {"${meta.id}.map_map"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: 'SAMTOOLS_VIEW_MAP_UNMAP' { - ext.args = '-b -f8 -F260' - ext.prefix = {"${meta.id}.map_unmap"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: 'SAMTOOLS_VIEW_UNMAP_MAP' { - ext.args = '-b -f4 -F264' - ext.prefix = {"${meta.id}.unmap_map"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: 'SAMTOOLS_VIEW_UNMAP_UNMAP' { - ext.args = '-b -f12 -F256' - ext.prefix = {"${meta.id}.unmap_unmap"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: 'SAMTOOLS_MERGE_UNMAP' { - ext.prefix = {"${meta.id}.merged_unmap"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - withName: 'CAT_FASTQ' { - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - // TRIMMING withName: 'FASTP' { diff --git a/nextflow.config b/nextflow.config index 611b161f16..2196f44135 100644 --- a/nextflow.config +++ b/nextflow.config @@ -288,6 +288,7 @@ manifest { includeConfig 'conf/modules/modules.config' includeConfig 'conf/modules/prepare_genome.config' includeConfig 'conf/modules/aligner.config' +includeConfig 'conf/modules/alignment_to_fastq.config' includeConfig 'conf/modules/annotate.config' // Function to ensure that resource requirements don't go beyond diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 43d52f3324..cec43a857d 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -12,6 +12,15 @@ dragmap: - modules/nf-core/dragmap/align/main.nf - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf +# alignment_to_fastq +alignment_to_fastq: + - conf/modules/alignment_to_fastq.config + - modules/nf-core/cat/fastq/main' + - modules/nf-core/samtools/collatefastq/main' + - modules/nf-core/samtools/merge/main' + - modules/nf-core/samtools/view/main' + - subworkflows/local/bam_convert_samtools/main.nf + # annotate merge: - conf/modules/annotate.config diff --git a/tests/test_bam_remap.yml b/tests/test_alignment_to_fastq.yml similarity index 100% rename from tests/test_bam_remap.yml rename to tests/test_alignment_to_fastq.yml From 767c9ef4aee31b6b413c9975446d28b4036b2348 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 17:25:50 +0100 Subject: [PATCH 16/49] markduplicates --- conf/modules/markduplicates.config | 111 ++++++++++++++++++ conf/modules/modules.config | 95 --------------- nextflow.config | 3 +- ..._aligner_bwamem.yml => aligner_bwamem.yml} | 0 ...ligner_bwamem2.yml => aligner_bwamem2.yml} | 0 ...ligner_dragmap.yml => aligner_dragmap.yml} | 0 ...nt_to_fastq.yml => alignment_to_fastq.yml} | 0 ...otation_merge.yml => annotation_merge.yml} | 0 ...ation_snpeff.yml => annotation_snpeff.yml} | 0 ..._annotation_vep.yml => annotation_vep.yml} | 0 tests/config/pytest_tags.yml | 40 +++++-- ...icates.yml => markduplicates_from_bam.yml} | 52 -------- tests/markduplicates_from_cram.yml | 52 ++++++++ ...st_gatk_spark.yml => test_gatk4_spark.yml} | 3 +- 14 files changed, 195 insertions(+), 161 deletions(-) create mode 100644 conf/modules/markduplicates.config rename tests/{test_aligner_bwamem.yml => aligner_bwamem.yml} (100%) rename tests/{test_aligner_bwamem2.yml => aligner_bwamem2.yml} (100%) rename tests/{test_aligner_dragmap.yml => aligner_dragmap.yml} (100%) rename tests/{test_alignment_to_fastq.yml => alignment_to_fastq.yml} (100%) rename tests/{test_annotation_merge.yml => annotation_merge.yml} (100%) rename tests/{test_annotation_snpeff.yml => annotation_snpeff.yml} (100%) rename tests/{test_annotation_vep.yml => annotation_vep.yml} (100%) rename tests/{test_markduplicates.yml => markduplicates_from_bam.yml} (51%) create mode 100644 tests/markduplicates_from_cram.yml rename tests/{test_gatk_spark.yml => test_gatk4_spark.yml} (97%) diff --git a/conf/modules/markduplicates.config b/conf/modules/markduplicates.config new file mode 100644 index 0000000000..3fdb6f8135 --- /dev/null +++ b/conf/modules/markduplicates.config @@ -0,0 +1,111 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +process { + +// MARKDUPLICATES + + withName: 'CRAM_TO_BAM' { + ext.args = "-b" + } + + withName: 'BAM_TO_CRAM' { + // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) + // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram + // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram + ext.args = "-C" + ext.prefix = { "${meta.id}.converted" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'BAM_TO_CRAM_MAPPING' { + // Run only when mapping should be saved as CRAM or when no MD is done + ext.when = (params.save_mapped && !params.save_output_as_bam) || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + // Never publish if BAM only should be published + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/mapped/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/markduplicates/${meta.id}" }, + pattern: "*metrics" + ] + } + + withName: 'GATK4_MARKDUPLICATES' { + ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' + ext.prefix = { "${meta.id}.md.cram" } + ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) } + publishDir = [ + [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ], + [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/" }, + pattern: "*metrics", + saveAs: { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) ? "markduplicates/${meta.id}/${it}" : null} + ] + ] + } + + withName: 'GATK4_MARKDUPLICATES_SPARK' { + ext.args = '--remove-sequencing-duplicates false -VS LENIENT' + ext.prefix = { "${meta.id}.md.cram" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + withName: 'INDEX_MARKDUPLICATES' { + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{cram,crai}" + ] + } + + + withName: 'NFCORE_SAREK:SAREK:CRAM_TO_BAM' { + ext.prefix = { "${meta.id}.md" } + ext.when = { params.save_output_as_bam } + publishDir = [ + enabled: params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, + pattern: "*{md.bam,md.bam.bai}" + ] + } + +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index e2bead0b04..6cdd790a0c 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -177,101 +177,6 @@ process { ] } -// MARKDUPLICATES - - withName: 'CRAM_TO_BAM' { - ext.args = "-b" - } - - withName: 'BAM_TO_CRAM' { - // BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly) - // BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram - // BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram - ext.args = "-C" - ext.prefix = { "${meta.id}.converted" } - publishDir = [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/converted/${meta.id}" }, - pattern: "*{cram,crai}" - ] - } - - withName: 'BAM_TO_CRAM_MAPPING' { - // Run only when mapping should be saved as CRAM or when no MD is done - ext.when = (params.save_mapped && !params.save_output_as_bam) || (params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) - ext.prefix = { "${meta.id}.sorted" } - publishDir = [ - // Never publish if BAM only should be published - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/mapped/${meta.id}/" }, - pattern: "*{cram,crai}" - ] - } - - withName: 'GATK4_ESTIMATELIBRARYCOMPLEXITY' { - ext.prefix = { "${meta.id}.md.cram" } - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/markduplicates/${meta.id}" }, - pattern: "*metrics" - ] - } - - withName: 'GATK4_MARKDUPLICATES' { - ext.args = '-REMOVE_DUPLICATES false -VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' - ext.prefix = { "${meta.id}.md.cram" } - ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) } - publishDir = [ - [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, - pattern: "*{cram,crai}" - ], - [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/reports/" }, - pattern: "*metrics", - saveAs: { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates_report')) ? "markduplicates/${meta.id}/${it}" : null} - ] - ] - } - - withName: 'GATK4_MARKDUPLICATES_SPARK' { - ext.args = '--remove-sequencing-duplicates false -VS LENIENT' - ext.prefix = { "${meta.id}.md.cram" } - publishDir = [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, - pattern: "*{cram,crai}" - ] - } - - withName: 'INDEX_MARKDUPLICATES' { - publishDir = [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, - pattern: "*{cram,crai}" - ] - } - - - withName: 'NFCORE_SAREK:SAREK:CRAM_TO_BAM' { - ext.prefix = { "${meta.id}.md" } - ext.when = { params.save_output_as_bam } - publishDir = [ - enabled: params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" }, - pattern: "*{md.bam,md.bam.bai}" - ] - } - // PREPARE_RECALIBRATION withName: 'GATK4_BASERECALIBRATOR|GATK4_BASERECALIBRATOR_SPARK' { diff --git a/nextflow.config b/nextflow.config index 2196f44135..0291773932 100644 --- a/nextflow.config +++ b/nextflow.config @@ -287,8 +287,9 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/modules.config' includeConfig 'conf/modules/prepare_genome.config' -includeConfig 'conf/modules/aligner.config' includeConfig 'conf/modules/alignment_to_fastq.config' +includeConfig 'conf/modules/aligner.config' +includeConfig 'conf/modules/markduplicates.config' includeConfig 'conf/modules/annotate.config' // Function to ensure that resource requirements don't go beyond diff --git a/tests/test_aligner_bwamem.yml b/tests/aligner_bwamem.yml similarity index 100% rename from tests/test_aligner_bwamem.yml rename to tests/aligner_bwamem.yml diff --git a/tests/test_aligner_bwamem2.yml b/tests/aligner_bwamem2.yml similarity index 100% rename from tests/test_aligner_bwamem2.yml rename to tests/aligner_bwamem2.yml diff --git a/tests/test_aligner_dragmap.yml b/tests/aligner_dragmap.yml similarity index 100% rename from tests/test_aligner_dragmap.yml rename to tests/aligner_dragmap.yml diff --git a/tests/test_alignment_to_fastq.yml b/tests/alignment_to_fastq.yml similarity index 100% rename from tests/test_alignment_to_fastq.yml rename to tests/alignment_to_fastq.yml diff --git a/tests/test_annotation_merge.yml b/tests/annotation_merge.yml similarity index 100% rename from tests/test_annotation_merge.yml rename to tests/annotation_merge.yml diff --git a/tests/test_annotation_snpeff.yml b/tests/annotation_snpeff.yml similarity index 100% rename from tests/test_annotation_snpeff.yml rename to tests/annotation_snpeff.yml diff --git a/tests/test_annotation_vep.yml b/tests/annotation_vep.yml similarity index 100% rename from tests/test_annotation_vep.yml rename to tests/annotation_vep.yml diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index cec43a857d..057bc026ab 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -15,28 +15,46 @@ dragmap: # alignment_to_fastq alignment_to_fastq: - conf/modules/alignment_to_fastq.config - - modules/nf-core/cat/fastq/main' - - modules/nf-core/samtools/collatefastq/main' - - modules/nf-core/samtools/merge/main' - - modules/nf-core/samtools/view/main' + - modules/nf-core/cat/fastq/main.nf + - modules/nf-core/samtools/collatefastq/main.nf + - modules/nf-core/samtools/merge/main.nf + - modules/nf-core/samtools/view/main.nf - subworkflows/local/bam_convert_samtools/main.nf +# markduplicates +markduplicates: + - conf/modules/markduplicates.config + - modules/nf-core/gatk4/markduplicates/main.nf + - modules/nf-core/mosdepth/main.nf + - modules/nf-core/samtools/convert/main.nf + - modules/nf-core/samtools/index/main.nf + - modules/nf-core/samtools/stats/main.nf + - subworkflows/local/bam_markduplicates/main.nf + - subworkflows/local/cram_qc_mosdepth_samtools/main.nf + +# gatk4_spark +gatk4_spark: + - conf/modules/markduplicates.config + - modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf + - modules/nf-core/gatk4/markduplicatesspark/main.nf + - subworkflows/local/bam_markduplicates_spark/main.nf + # annotate merge: - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main.nf' - - modules/nf-core/snpeff/main.nf' - - modules/nf-core/tabix/bgziptabix/main.nf' + - modules/nf-core/ensemblvep/main.nf + - modules/nf-core/snpeff/main.nf + - modules/nf-core/tabix/bgziptabix/main.nf - subworkflows/local/vcf_annotate_all/main.nf - subworkflows/local/vcf_annotate_ensemblvep/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf snpeff: - conf/modules/annotate.config - - modules/nf-core/snpeff/main.nf' - - modules/nf-core/tabix/bgziptabix/main.nf' + - modules/nf-core/snpeff/main.nf + - modules/nf-core/tabix/bgziptabix/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf vep: - conf/modules/annotate.config - - modules/nf-core/ensemblvep/main.nf' - - modules/nf-core/tabix/bgziptabix/main.nf' + - modules/nf-core/ensemblvep/main.nf + - modules/nf-core/tabix/bgziptabix/main.nf - subworkflows/local/vcf_annotate_ensemblvep/main.nf diff --git a/tests/test_markduplicates.yml b/tests/markduplicates_from_bam.yml similarity index 51% rename from tests/test_markduplicates.yml rename to tests/markduplicates_from_bam.yml index 259203f354..d4e32f0fcf 100644 --- a/tests/test_markduplicates.yml +++ b/tests/markduplicates_from_bam.yml @@ -52,55 +52,3 @@ md5sum: a0ecedb6be28639e276d38e4ac18935b - path: results/reports/samtools/test/test.recal.cram.stats md5sum: 69d2bab6fdbe21ec5c598dc404a3df00 -- name: Run markduplicates starting from CRAM - command: nextflow run main.nf -profile test,markduplicates_cram - tags: - - cram - - markduplicates - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 8e9408ef8d4f9e6e00e531268eebd42a - - path: results/csv/markduplicates_no_table.csv - md5sum: f8b1b25fec472453a98c3f7f0e3a7953 - - path: results/csv/recalibrated.csv - md5sum: 1888a924bc70bd80165a96ad641e22d6 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 9603b69fdc3b5090de2e0dd78bfcc4bf - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["testN 0 2820 2 2 0 828 0 0.293617 3807", "1.0 0.999986 1178 1178", "100.0 1.911145 0 0"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 8e875e20e3fb9cf288d68c1d223f6fd5 - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: 75e1ce7e55af51f4985fa91654a5ea2d - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: b23cf96942b2ada3f41172a9349a1175 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - # binary changes md5sums on reruns. - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - # binary changes md5sums on reruns. - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 8e875e20e3fb9cf288d68c1d223f6fd5 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 75e1ce7e55af51f4985fa91654a5ea2d - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: b23cf96942b2ada3f41172a9349a1175 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - # binary changes md5sums on reruns. - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - # binary changes md5sums on reruns. - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: a0ecedb6be28639e276d38e4ac18935b - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 69d2bab6fdbe21ec5c598dc404a3df00 - - path: results/preprocessing/mapped/ - should_exist: false diff --git a/tests/markduplicates_from_cram.yml b/tests/markduplicates_from_cram.yml new file mode 100644 index 0000000000..56e6eb0276 --- /dev/null +++ b/tests/markduplicates_from_cram.yml @@ -0,0 +1,52 @@ +- name: Run markduplicates starting from CRAM + command: nextflow run main.nf -profile test,markduplicates_cram + tags: + - cram + - markduplicates + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 8e9408ef8d4f9e6e00e531268eebd42a + - path: results/csv/markduplicates_no_table.csv + md5sum: f8b1b25fec472453a98c3f7f0e3a7953 + - path: results/csv/recalibrated.csv + md5sum: 1888a924bc70bd80165a96ad641e22d6 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 9603b69fdc3b5090de2e0dd78bfcc4bf + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["testN 0 2820 2 2 0 828 0 0.293617 3807", "1.0 0.999986 1178 1178", "100.0 1.911145 0 0"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 8e875e20e3fb9cf288d68c1d223f6fd5 + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: 75e1ce7e55af51f4985fa91654a5ea2d + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: b23cf96942b2ada3f41172a9349a1175 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + # binary changes md5sums on reruns. + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + # binary changes md5sums on reruns. + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 8e875e20e3fb9cf288d68c1d223f6fd5 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 75e1ce7e55af51f4985fa91654a5ea2d + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: b23cf96942b2ada3f41172a9349a1175 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + # binary changes md5sums on reruns. + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + # binary changes md5sums on reruns. + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: a0ecedb6be28639e276d38e4ac18935b + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 69d2bab6fdbe21ec5c598dc404a3df00 + - path: results/preprocessing/mapped/ + should_exist: false diff --git a/tests/test_gatk_spark.yml b/tests/test_gatk4_spark.yml similarity index 97% rename from tests/test_gatk_spark.yml rename to tests/test_gatk4_spark.yml index e679023427..53dc42cca5 100644 --- a/tests/test_gatk_spark.yml +++ b/tests/test_gatk4_spark.yml @@ -1,7 +1,6 @@ -- name: Run default pipeline with gatk_spark +- name: Run default pipeline with gatk4_spark command: nextflow run main.nf -profile test,use_gatk_spark tags: - - gatk4 - gatk4_spark - preprocessing files: From 66d770bd6307149936bc844376d992056bc78118 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 17:35:08 +0100 Subject: [PATCH 17/49] rename files --- tests/{test_intervals.yml => intervals.yml} | 0 ..._only_paired_VC.yml => only_paired_VC.yml} | 0 tests/{test_pair.yml => pair.yml} | 0 .../{test_recalibrate.yml => recalibrate.yml} | 0 .../{test_save_mapped.yml => save_mapped.yml} | 0 ...ave_output_bam.yml => save_output_bam.yml} | 0 ...duplicates.yml => skip_markduplicates.yml} | 0 tests/{test_skip_qc.yml => skip_qc.yml} | 0 .../{test_split_fastq.yml => split_fastq.yml} | 0 tests/{test_targeted.yml => targeted.yml} | 0 ...ner_bwamem.yml => test_aligner_bwamem.yml} | 0 ...r_bwamem2.yml => test_aligner_bwamem2.yml} | 0 ...r_dragmap.yml => test_aligner_dragmap.yml} | 0 ..._fastq.yml => test_alignment_to_fastq.yml} | 0 ...on_merge.yml => test_annotation_merge.yml} | 0 ..._snpeff.yml => test_annotation_snpeff.yml} | 0 ...tation_vep.yml => test_annotation_vep.yml} | 0 ...m.yml => test_markduplicates_from_bam.yml} | 0 ....yml => test_markduplicates_from_cram.yml} | 0 tests/test_prepare_recalibration_from_bam.yml | 84 +++++++++++++++++++ ... test_prepare_recalibration_from_cram.yml} | 84 ------------------- tests/{test_tools.yml => tools.yml} | 0 ..._tools_manually.yml => tools_manually.yml} | 0 tests/{test_trimming.yml => trimming.yml} | 0 tests/{test_umi.yml => umi.yml} | 0 25 files changed, 84 insertions(+), 84 deletions(-) rename tests/{test_intervals.yml => intervals.yml} (100%) rename tests/{test_only_paired_VC.yml => only_paired_VC.yml} (100%) rename tests/{test_pair.yml => pair.yml} (100%) rename tests/{test_recalibrate.yml => recalibrate.yml} (100%) rename tests/{test_save_mapped.yml => save_mapped.yml} (100%) rename tests/{test_save_output_bam.yml => save_output_bam.yml} (100%) rename tests/{test_skip_markduplicates.yml => skip_markduplicates.yml} (100%) rename tests/{test_skip_qc.yml => skip_qc.yml} (100%) rename tests/{test_split_fastq.yml => split_fastq.yml} (100%) rename tests/{test_targeted.yml => targeted.yml} (100%) rename tests/{aligner_bwamem.yml => test_aligner_bwamem.yml} (100%) rename tests/{aligner_bwamem2.yml => test_aligner_bwamem2.yml} (100%) rename tests/{aligner_dragmap.yml => test_aligner_dragmap.yml} (100%) rename tests/{alignment_to_fastq.yml => test_alignment_to_fastq.yml} (100%) rename tests/{annotation_merge.yml => test_annotation_merge.yml} (100%) rename tests/{annotation_snpeff.yml => test_annotation_snpeff.yml} (100%) rename tests/{annotation_vep.yml => test_annotation_vep.yml} (100%) rename tests/{markduplicates_from_bam.yml => test_markduplicates_from_bam.yml} (100%) rename tests/{markduplicates_from_cram.yml => test_markduplicates_from_cram.yml} (100%) create mode 100644 tests/test_prepare_recalibration_from_bam.yml rename tests/{test_prepare_recalibration.yml => test_prepare_recalibration_from_cram.yml} (50%) rename tests/{test_tools.yml => tools.yml} (100%) rename tests/{test_tools_manually.yml => tools_manually.yml} (100%) rename tests/{test_trimming.yml => trimming.yml} (100%) rename tests/{test_umi.yml => umi.yml} (100%) diff --git a/tests/test_intervals.yml b/tests/intervals.yml similarity index 100% rename from tests/test_intervals.yml rename to tests/intervals.yml diff --git a/tests/test_only_paired_VC.yml b/tests/only_paired_VC.yml similarity index 100% rename from tests/test_only_paired_VC.yml rename to tests/only_paired_VC.yml diff --git a/tests/test_pair.yml b/tests/pair.yml similarity index 100% rename from tests/test_pair.yml rename to tests/pair.yml diff --git a/tests/test_recalibrate.yml b/tests/recalibrate.yml similarity index 100% rename from tests/test_recalibrate.yml rename to tests/recalibrate.yml diff --git a/tests/test_save_mapped.yml b/tests/save_mapped.yml similarity index 100% rename from tests/test_save_mapped.yml rename to tests/save_mapped.yml diff --git a/tests/test_save_output_bam.yml b/tests/save_output_bam.yml similarity index 100% rename from tests/test_save_output_bam.yml rename to tests/save_output_bam.yml diff --git a/tests/test_skip_markduplicates.yml b/tests/skip_markduplicates.yml similarity index 100% rename from tests/test_skip_markduplicates.yml rename to tests/skip_markduplicates.yml diff --git a/tests/test_skip_qc.yml b/tests/skip_qc.yml similarity index 100% rename from tests/test_skip_qc.yml rename to tests/skip_qc.yml diff --git a/tests/test_split_fastq.yml b/tests/split_fastq.yml similarity index 100% rename from tests/test_split_fastq.yml rename to tests/split_fastq.yml diff --git a/tests/test_targeted.yml b/tests/targeted.yml similarity index 100% rename from tests/test_targeted.yml rename to tests/targeted.yml diff --git a/tests/aligner_bwamem.yml b/tests/test_aligner_bwamem.yml similarity index 100% rename from tests/aligner_bwamem.yml rename to tests/test_aligner_bwamem.yml diff --git a/tests/aligner_bwamem2.yml b/tests/test_aligner_bwamem2.yml similarity index 100% rename from tests/aligner_bwamem2.yml rename to tests/test_aligner_bwamem2.yml diff --git a/tests/aligner_dragmap.yml b/tests/test_aligner_dragmap.yml similarity index 100% rename from tests/aligner_dragmap.yml rename to tests/test_aligner_dragmap.yml diff --git a/tests/alignment_to_fastq.yml b/tests/test_alignment_to_fastq.yml similarity index 100% rename from tests/alignment_to_fastq.yml rename to tests/test_alignment_to_fastq.yml diff --git a/tests/annotation_merge.yml b/tests/test_annotation_merge.yml similarity index 100% rename from tests/annotation_merge.yml rename to tests/test_annotation_merge.yml diff --git a/tests/annotation_snpeff.yml b/tests/test_annotation_snpeff.yml similarity index 100% rename from tests/annotation_snpeff.yml rename to tests/test_annotation_snpeff.yml diff --git a/tests/annotation_vep.yml b/tests/test_annotation_vep.yml similarity index 100% rename from tests/annotation_vep.yml rename to tests/test_annotation_vep.yml diff --git a/tests/markduplicates_from_bam.yml b/tests/test_markduplicates_from_bam.yml similarity index 100% rename from tests/markduplicates_from_bam.yml rename to tests/test_markduplicates_from_bam.yml diff --git a/tests/markduplicates_from_cram.yml b/tests/test_markduplicates_from_cram.yml similarity index 100% rename from tests/markduplicates_from_cram.yml rename to tests/test_markduplicates_from_cram.yml diff --git a/tests/test_prepare_recalibration_from_bam.yml b/tests/test_prepare_recalibration_from_bam.yml new file mode 100644 index 0000000000..018345eddd --- /dev/null +++ b/tests/test_prepare_recalibration_from_bam.yml @@ -0,0 +1,84 @@ +- name: Run prepare_recalibration starting from bam + command: nextflow run main.nf -profile test,prepare_recalibration_bam + tags: + - bam + - prepare_recalibration + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 90e2ab85d8af642d6548af448a9d4226 + - path: results/csv/recalibrated.csv + md5sum: 1888a924bc70bd80165a96ad641e22d6 + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: bdb8f185c35dd1eec7ce2f69bce57972 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 32ea70ef1b99def3dc900b4afd513a40 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: d034a60ae5c0768d67b9ba6442bd2212 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b3716e5cd1744610e69c29bd4ffad259 + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 71af990f7acad8bf24d37e88c4adcded + - path: results/preprocessing/mapped/ + should_exist: false + - path: results/preprocessing/markduplicates/ + should_exist: false +- name: Run prepare_recalibration starting from bam and skip baserecalibration + command: nextflow run main.nf -profile test,prepare_recalibration_bam,skip_bqsr --tools strelka + tags: + - bam + - prepare_recalibration + - preprocessing + files: + - path: results/csv/variantcalled.csv + md5sum: 4d0effd3d8dc2b814230a189e7ca9dba + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: ee7dafc8d941b8502a04a63dc3126fff + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + md5sum: 9fe11e894f7567eb96b43c48593741a7 + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/csv/recalibrated.csv + should_exist: false + - path: results/preprocessing/recal_table/test/test.recal.table + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/mosdepth + should_exist: false + - path: results/reports/samtools_stats + should_exist: false + - path: results/preprocessing/mapped/ + should_exist: false + - path: results/preprocessing/markduplicates/ + should_exist: false diff --git a/tests/test_prepare_recalibration.yml b/tests/test_prepare_recalibration_from_cram.yml similarity index 50% rename from tests/test_prepare_recalibration.yml rename to tests/test_prepare_recalibration_from_cram.yml index a2d94d03aa..07678ed14a 100644 --- a/tests/test_prepare_recalibration.yml +++ b/tests/test_prepare_recalibration_from_cram.yml @@ -1,41 +1,3 @@ -- name: Run prepare_recalibration starting from bam - command: nextflow run main.nf -profile test,prepare_recalibration_bam - tags: - - bam - - prepare_recalibration - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 90e2ab85d8af642d6548af448a9d4226 - - path: results/csv/recalibrated.csv - md5sum: 1888a924bc70bd80165a96ad641e22d6 - - path: results/multiqc - - path: results/preprocessing/converted/test/test.converted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/converted/test/test.converted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 35d89a3811aa31711fc9815b6b80e6ec - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: bdb8f185c35dd1eec7ce2f69bce57972 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 32ea70ef1b99def3dc900b4afd513a40 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: d034a60ae5c0768d67b9ba6442bd2212 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b3716e5cd1744610e69c29bd4ffad259 - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 71af990f7acad8bf24d37e88c4adcded - - path: results/preprocessing/mapped/ - should_exist: false - - path: results/preprocessing/markduplicates/ - should_exist: false - name: Run prepare_recalibration starting from cram command: nextflow run main.nf -profile test,prepare_recalibration_cram tags: @@ -68,52 +30,6 @@ should_exist: false - path: results/preprocessing/markduplicates/ should_exist: false -- name: Run prepare_recalibration starting from bam and skip baserecalibration - command: nextflow run main.nf -profile test,prepare_recalibration_bam,skip_bqsr --tools strelka - tags: - - bam - - prepare_recalibration - - preprocessing - files: - - path: results/csv/variantcalled.csv - md5sum: 4d0effd3d8dc2b814230a189e7ca9dba - - path: results/multiqc - - path: results/preprocessing/converted/test/test.converted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/converted/test/test.converted.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 - - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary - md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count - md5sum: ee7dafc8d941b8502a04a63dc3126fff - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual - md5sum: 9fe11e894f7567eb96b43c48593741a7 - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/csv/recalibrated.csv - should_exist: false - - path: results/preprocessing/recal_table/test/test.recal.table - should_exist: false - - path: results/preprocessing/recalibrated/test/test.recal.cram - should_exist: false - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - should_exist: false - - path: results/reports/mosdepth - should_exist: false - - path: results/reports/samtools_stats - should_exist: false - - path: results/preprocessing/mapped/ - should_exist: false - - path: results/preprocessing/markduplicates/ - should_exist: false - name: Run prepare_recalibration starting from cram and skip baserecalibration command: nextflow run main.nf -profile test,prepare_recalibration_cram,skip_bqsr --tools strelka tags: diff --git a/tests/test_tools.yml b/tests/tools.yml similarity index 100% rename from tests/test_tools.yml rename to tests/tools.yml diff --git a/tests/test_tools_manually.yml b/tests/tools_manually.yml similarity index 100% rename from tests/test_tools_manually.yml rename to tests/tools_manually.yml diff --git a/tests/test_trimming.yml b/tests/trimming.yml similarity index 100% rename from tests/test_trimming.yml rename to tests/trimming.yml diff --git a/tests/test_umi.yml b/tests/umi.yml similarity index 100% rename from tests/test_umi.yml rename to tests/umi.yml From 83ed74ae4944bc3576791a325648795d3ed59ded Mon Sep 17 00:00:00 2001 From: maxulysse Date: Wed, 2 Nov 2022 17:54:50 +0100 Subject: [PATCH 18/49] prepare_recalibration + recalibrate --- conf/modules/aligner.config | 2 + conf/modules/annotate.config | 1 + conf/modules/markduplicates.config | 5 +- conf/modules/modules.config | 69 ----------------------- conf/modules/prepare_genome.config | 1 + conf/modules/prepare_recalibration.config | 37 ++++++++++++ conf/modules/recalibrate.config | 61 ++++++++++++++++++++ nextflow.config | 2 + tests/config/pytest_tags.yml | 24 ++++++++ 9 files changed, 130 insertions(+), 72 deletions(-) create mode 100644 conf/modules/prepare_recalibration.config create mode 100644 conf/modules/recalibrate.config diff --git a/conf/modules/aligner.config b/conf/modules/aligner.config index 5a378698a1..68e1cfdd28 100644 --- a/conf/modules/aligner.config +++ b/conf/modules/aligner.config @@ -14,10 +14,12 @@ // MAPPING process { + if (params.step == 'mapping') { withName: "BWAMEM1_MEM" { ext.when = { params.aligner == "bwa-mem" } } + withName: "BWAMEM2_MEM" { ext.when = { params.aligner == "bwa-mem2" } } diff --git a/conf/modules/annotate.config b/conf/modules/annotate.config index 26671b16cb..7d0b1eb523 100644 --- a/conf/modules/annotate.config +++ b/conf/modules/annotate.config @@ -14,6 +14,7 @@ // ANNOTATE process { + // SNPEFF if (params.tools && (params.tools.split(',').contains('snpeff') || params.tools.split(',').contains('merge'))) { withName: 'SNPEFF' { diff --git a/conf/modules/markduplicates.config b/conf/modules/markduplicates.config index 3fdb6f8135..08bfd85688 100644 --- a/conf/modules/markduplicates.config +++ b/conf/modules/markduplicates.config @@ -11,10 +11,10 @@ ---------------------------------------------------------------------------------------- */ -process { - // MARKDUPLICATES +process { + withName: 'CRAM_TO_BAM' { ext.args = "-b" } @@ -107,5 +107,4 @@ process { pattern: "*{md.bam,md.bam.bai}" ] } - } diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 6cdd790a0c..5e2339d7d7 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -177,75 +177,6 @@ process { ] } -// PREPARE_RECALIBRATION - - withName: 'GATK4_BASERECALIBRATOR|GATK4_BASERECALIBRATOR_SPARK' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*table", - saveAs: { meta.num_intervals > 1 ? null : "recal_table/${meta.id}/${it}" } - ] - } - - withName: 'GATK4_GATHERBQSRREPORTS' { - ext.prefix = {"${meta.id}.recal"} - ext.when = { meta.num_intervals > 1 } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/recal_table/${meta.id}/" }, - pattern: "*table", - ] - } - -// RECALIBRATE - - withName: 'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } - publishDir = [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/" }, - pattern: "*cram", - saveAs: { meta.num_intervals > 1 ? null : "recalibrated/${meta.id}/${it}" } - ] - } - - - if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { - withName: 'NFCORE_SAREK:SAREK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' { - ext.prefix = { "${meta.id}.recal" } - ext.when = { meta.num_intervals > 1 } - publishDir = [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, - pattern: "*cram" - ] - } - - withName: 'NFCORE_SAREK:SAREK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { - publishDir = [ - enabled: !params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, - pattern: "*{recal.cram,recal.cram.crai}" - ] - } - } - - withName: 'CRAM_TO_BAM_RECAL' { - ext.prefix = { "${meta.id}.recal" } - ext.when = { params.save_output_as_bam} - publishDir = [ - enabled: params.save_output_as_bam, - mode: params.publish_dir_mode, - path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, - pattern: "*{recal.bam,recal.bam.bai}" - ] - } - // QC withName: 'FASTQC' { diff --git a/conf/modules/prepare_genome.config b/conf/modules/prepare_genome.config index eebc9e9b7c..dbb88dd3d8 100644 --- a/conf/modules/prepare_genome.config +++ b/conf/modules/prepare_genome.config @@ -14,6 +14,7 @@ // PREPARE_GENOME process { + withName: 'BWAMEM1_INDEX' { ext.when = { !params.bwa && params.step == "mapping" && params.aligner == "bwa-mem" } publishDir = [ diff --git a/conf/modules/prepare_recalibration.config b/conf/modules/prepare_recalibration.config new file mode 100644 index 0000000000..cb394f0a2f --- /dev/null +++ b/conf/modules/prepare_recalibration.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE_RECALIBRATION + +process { + + withName: 'GATK4_BASERECALIBRATOR|GATK4_BASERECALIBRATOR_SPARK' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*table", + saveAs: { meta.num_intervals > 1 ? null : "recal_table/${meta.id}/${it}" } + ] + } + + withName: 'GATK4_GATHERBQSRREPORTS' { + ext.prefix = {"${meta.id}.recal"} + ext.when = { meta.num_intervals > 1 } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recal_table/${meta.id}/" }, + pattern: "*table", + ] + } +} diff --git a/conf/modules/recalibrate.config b/conf/modules/recalibrate.config new file mode 100644 index 0000000000..9602e33caf --- /dev/null +++ b/conf/modules/recalibrate.config @@ -0,0 +1,61 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// RECALIBRATE + +process { + + withName: 'GATK4_APPLYBQSR|GATK4_APPLYBQSR_SPARK' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.recal" : "${meta.id}_${intervals.simpleName}.recal" } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/" }, + pattern: "*cram", + saveAs: { meta.num_intervals > 1 ? null : "recalibrated/${meta.id}/${it}" } + ] + } + + if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) { + withName: 'NFCORE_SAREK:SAREK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { meta.num_intervals > 1 } + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*cram" + ] + } + + withName: 'NFCORE_SAREK:SAREK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' { + publishDir = [ + enabled: !params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.cram,recal.cram.crai}" + ] + } + } + + withName: 'CRAM_TO_BAM_RECAL' { + ext.prefix = { "${meta.id}.recal" } + ext.when = { params.save_output_as_bam} + publishDir = [ + enabled: params.save_output_as_bam, + mode: params.publish_dir_mode, + path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" }, + pattern: "*{recal.bam,recal.bam.bai}" + ] + } +} diff --git a/nextflow.config b/nextflow.config index 0291773932..bd5ed4c3bc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,6 +290,8 @@ includeConfig 'conf/modules/prepare_genome.config' includeConfig 'conf/modules/alignment_to_fastq.config' includeConfig 'conf/modules/aligner.config' includeConfig 'conf/modules/markduplicates.config' +includeConfig 'conf/modules/prepare_recalibration.config' +includeConfig 'conf/modules/recalibrate.config' includeConfig 'conf/modules/annotate.config' // Function to ensure that resource requirements don't go beyond diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 057bc026ab..6b27e1bf31 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -32,11 +32,35 @@ markduplicates: - subworkflows/local/bam_markduplicates/main.nf - subworkflows/local/cram_qc_mosdepth_samtools/main.nf +# prepare_recalibration +prepare_recalibration: + - conf/modules/prepare_recalibration.config + - modules/nf-core/gatk4/baserecalibrator/main.nf + - modules/nf-core/gatk4/gatherbqsrreports/main.nf + - modules/nf-core/samtools/convert/main.nf + - subworkflows/local/bam_baserecalibrator/main.nf + +# recalibrate +recalibrate: + - conf/modules/recalibrate.config + - modules/nf-core/gatk4/applybqsr/main.nf + - modules/nf-core/samtools/convert/main.nf + - modules/nf-core/samtools/index/main.nf + - modules/nf-core/samtools/merge/main.nf + - subworkflows/local/bam_applybqsr/main.nf + - subworkflows/local/cram_merge_index_samtools/main.nf + # gatk4_spark gatk4_spark: - conf/modules/markduplicates.config + - conf/modules/prepare_recalibration.config + - conf/modules/recalibrate.config + - modules/nf-core/gatk4/applybqsrspark/main.nf + - modules/nf-core/gatk4/baserecalibratorspark/main.nf - modules/nf-core/gatk4/estimatelibrarycomplexity/main.nf - modules/nf-core/gatk4/markduplicatesspark/main.nf + - subworkflows/local/bam_applybqsr_spark/main.nf + - subworkflows/local/bam_baserecalibrator_spark/main.nf - subworkflows/local/bam_markduplicates_spark/main.nf # annotate From 9fc7b3178f75a1cc3e94eff7797d5e168001ba83 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 13:24:58 +0100 Subject: [PATCH 19/49] code polish + intervals tests --- conf/modules/modules.config | 30 ----------- conf/modules/prepare_intervals.config | 45 ++++++++++++++++ nextflow.config | 2 + subworkflows/local/prepare_intervals/main.nf | 2 - tests/config/pytest_tags.yml | 45 +++++++++++----- tests/test_alignment_to_fastq.yml | 1 + tests/test_annotation_vep.yml | 14 ++--- tests/test_default.yml | 2 + tests/test_intervals_create.yml | 51 +++++++++++++++++++ tests/test_markduplicates_from_bam.yml | 2 +- tests/test_markduplicates_from_cram.yml | 2 +- .../{intervals.yml => test_no_intervals.yml} | 50 +----------------- tests/test_prepare_recalibration_from_bam.yml | 4 +- .../test_prepare_recalibration_from_cram.yml | 4 +- 14 files changed, 145 insertions(+), 109 deletions(-) create mode 100644 conf/modules/prepare_intervals.config create mode 100644 tests/test_intervals_create.yml rename tests/{intervals.yml => test_no_intervals.yml} (54%) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 5e2339d7d7..bf2d4a91f1 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -51,36 +51,6 @@ process { ] } -// PREPARE INTERVALS - - withName: 'CREATE_INTERVALS_BED' { - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/intervals" }, - pattern: "*bed" - ] - } - - withName: 'GATK4_INTERVALLISTTOBED' { - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/intervals" }, - pattern: "*bed" - ] - } - - withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' { - ext.prefix = {"${meta.id}"} - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/intervals" }, - pattern: "*bed.gz" - ] - } - // TRIMMING withName: 'FASTP' { diff --git a/conf/modules/prepare_intervals.config b/conf/modules/prepare_intervals.config new file mode 100644 index 0000000000..40b233a942 --- /dev/null +++ b/conf/modules/prepare_intervals.config @@ -0,0 +1,45 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// PREPARE INTERVALS + +process { + + withName: 'CREATE_INTERVALS_BED' { + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/intervals" }, + pattern: "*bed" + ] + } + + withName: 'GATK4_INTERVALLISTTOBED' { + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/intervals" }, + pattern: "*bed" + ] + } + + withName: 'TABIX_BGZIPTABIX_INTERVAL_SPLIT' { + ext.prefix = {"${meta.id}"} + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/intervals" }, + pattern: "*bed.gz" + ] + } +} diff --git a/nextflow.config b/nextflow.config index bd5ed4c3bc..ab2f7a0e54 100644 --- a/nextflow.config +++ b/nextflow.config @@ -286,6 +286,8 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/modules.config' +// Load more modules specific config for DSL2 module specific options +includeConfig 'conf/modules/prepare_intervals.config' includeConfig 'conf/modules/prepare_genome.config' includeConfig 'conf/modules/alignment_to_fastq.config' includeConfig 'conf/modules/aligner.config' diff --git a/subworkflows/local/prepare_intervals/main.nf b/subworkflows/local/prepare_intervals/main.nf index 852b6ef05b..b579cab087 100644 --- a/subworkflows/local/prepare_intervals/main.nf +++ b/subworkflows/local/prepare_intervals/main.nf @@ -7,8 +7,6 @@ // A when clause condition is defined in the conf/modules.config to determine if the module should be run include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main' -include { CNVKIT_ANTITARGET } from '../../../modules/nf-core/cnvkit/antitarget/main' -include { CNVKIT_REFERENCE } from '../../../modules/nf-core/cnvkit/reference/main' include { CREATE_INTERVALS_BED } from '../../../modules/local/create_intervals_bed/main' include { GATK4_INTERVALLISTTOBED } from '../../../modules/nf-core/gatk4/intervallisttobed/main' include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_INTERVAL_SPLIT } from '../../../modules/nf-core/tabix/bgziptabix/main' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 6b27e1bf31..2b4f5ebe71 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -1,27 +1,32 @@ -# aligner +# preprocessing + +## alignment_to_fastq +alignment_to_fastq: + - conf/modules/alignment_to_fastq.config + - modules/nf-core/cat/fastq/main.nf + - modules/nf-core/samtools/collatefastq/main.nf + - modules/nf-core/samtools/merge/main.nf + - modules/nf-core/samtools/view/main.nf + - subworkflows/local/bam_convert_samtools/main.nf + +## aligner +### bwamem bwamem: - conf/modules/aligner.config - modules/nf-core/bwa/mem/main.nf - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf +### bwamem2 bwamem2: - conf/modules/aligner.config - modules/nf-core/bwamem2/mem/main.nf - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf +### dragmap dragmap: - conf/modules/aligner.config - modules/nf-core/dragmap/align/main.nf - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf -# alignment_to_fastq -alignment_to_fastq: - - conf/modules/alignment_to_fastq.config - - modules/nf-core/cat/fastq/main.nf - - modules/nf-core/samtools/collatefastq/main.nf - - modules/nf-core/samtools/merge/main.nf - - modules/nf-core/samtools/view/main.nf - - subworkflows/local/bam_convert_samtools/main.nf - -# markduplicates +## markduplicates markduplicates: - conf/modules/markduplicates.config - modules/nf-core/gatk4/markduplicates/main.nf @@ -32,7 +37,7 @@ markduplicates: - subworkflows/local/bam_markduplicates/main.nf - subworkflows/local/cram_qc_mosdepth_samtools/main.nf -# prepare_recalibration +## prepare_recalibration prepare_recalibration: - conf/modules/prepare_recalibration.config - modules/nf-core/gatk4/baserecalibrator/main.nf @@ -40,7 +45,7 @@ prepare_recalibration: - modules/nf-core/samtools/convert/main.nf - subworkflows/local/bam_baserecalibrator/main.nf -# recalibrate +## recalibrate recalibrate: - conf/modules/recalibrate.config - modules/nf-core/gatk4/applybqsr/main.nf @@ -50,7 +55,16 @@ recalibrate: - subworkflows/local/bam_applybqsr/main.nf - subworkflows/local/cram_merge_index_samtools/main.nf -# gatk4_spark +## intervals +intervals: + - conf/modules/prepare_intervals.config + - modules/local/build_intervals/main.nf + - modules/local/create_intervals_bed/main.nf + - modules/nf-core/gatk4/intervallisttobed/main.nf + - modules/nf-core/tabix/bgziptabix/main.nf + - subworkflows/local/prepare_intervals/main.nf + +## gatk4_spark gatk4_spark: - conf/modules/markduplicates.config - conf/modules/prepare_recalibration.config @@ -64,6 +78,7 @@ gatk4_spark: - subworkflows/local/bam_markduplicates_spark/main.nf # annotate +## merge merge: - conf/modules/annotate.config - modules/nf-core/ensemblvep/main.nf @@ -72,11 +87,13 @@ merge: - subworkflows/local/vcf_annotate_all/main.nf - subworkflows/local/vcf_annotate_ensemblvep/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf +## snpeff snpeff: - conf/modules/annotate.config - modules/nf-core/snpeff/main.nf - modules/nf-core/tabix/bgziptabix/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf +## vep vep: - conf/modules/annotate.config - modules/nf-core/ensemblvep/main.nf diff --git a/tests/test_alignment_to_fastq.yml b/tests/test_alignment_to_fastq.yml index b9cdf8ad8b..af5be3b592 100644 --- a/tests/test_alignment_to_fastq.yml +++ b/tests/test_alignment_to_fastq.yml @@ -2,6 +2,7 @@ command: nextflow run main.nf -profile test,alignment_to_fastq tags: - alignment_to_fastq + - input_bam files: - path: results/csv/markduplicates.csv md5sum: 0d6120bb99e92f6810343270711ca53e diff --git a/tests/test_annotation_vep.yml b/tests/test_annotation_vep.yml index 569aed2e20..e80b48fd9f 100644 --- a/tests/test_annotation_vep.yml +++ b/tests/test_annotation_vep.yml @@ -12,15 +12,7 @@ # test_VEP.summary.html changes md5sums on reruns. contains: [ - "Input filetest.vcf.gzOutput filetest_VEP.ann.vcf", - "General statistics", - "Lines of input read", - "Variants processed", - "Variants filtered out", - "Novel / existing variants", - "Overlapped genes", - "Overlapped transcripts", - "Overlapped regulatory features", + "Input filetest.vcf.gzOutput filetest_VEP.ann.vcf" ] - name: Run VEP with fasta command: nextflow run main.nf -profile test,annotation --tools vep --vep_include_fasta --skip_tools multiqc @@ -34,3 +26,7 @@ md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/reports/EnsemblVEP/test/test_VEP.summary.html # text-based file changes md5sums on reruns. + contains: + [ + "Input filetest.vcf.gzOutput filetest_VEP.ann.vcf" + ] diff --git a/tests/test_default.yml b/tests/test_default.yml index a106028b87..861d6e8c86 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -3,6 +3,8 @@ tags: - default - preprocessing + - strelka + - variant_calling files: - path: results/csv/markduplicates.csv md5sum: 0d6120bb99e92f6810343270711ca53e diff --git a/tests/test_intervals_create.yml b/tests/test_intervals_create.yml new file mode 100644 index 0000000000..09d951034f --- /dev/null +++ b/tests/test_intervals_create.yml @@ -0,0 +1,51 @@ +- name: Run intervals false pipeline + command: nextflow run main.nf -profile test --intervals false --save_reference + tags: + - intervals + - intervals_false + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reference/intervals/chr22_1-40001.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: results/reference/intervals/chr22_1-40001.bed.gz + md5sum: d3341fa28986c40b24fcc10a079dbb80 + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 diff --git a/tests/test_markduplicates_from_bam.yml b/tests/test_markduplicates_from_bam.yml index d4e32f0fcf..f9a18fe635 100644 --- a/tests/test_markduplicates_from_bam.yml +++ b/tests/test_markduplicates_from_bam.yml @@ -1,7 +1,7 @@ - name: Run markduplicates starting from BAM command: nextflow run main.nf -profile test,markduplicates_bam tags: - - bam + - input_bam - markduplicates - preprocessing files: diff --git a/tests/test_markduplicates_from_cram.yml b/tests/test_markduplicates_from_cram.yml index 56e6eb0276..25b3a450bc 100644 --- a/tests/test_markduplicates_from_cram.yml +++ b/tests/test_markduplicates_from_cram.yml @@ -1,7 +1,7 @@ - name: Run markduplicates starting from CRAM command: nextflow run main.nf -profile test,markduplicates_cram tags: - - cram + - input_cram - markduplicates - preprocessing files: diff --git a/tests/intervals.yml b/tests/test_no_intervals.yml similarity index 54% rename from tests/intervals.yml rename to tests/test_no_intervals.yml index 9d1d528623..5284709b51 100644 --- a/tests/intervals.yml +++ b/tests/test_no_intervals.yml @@ -1,51 +1,3 @@ -- name: Run intervals false pipeline - command: nextflow run main.nf -profile test --intervals false - tags: - - intervals - - intervals_false - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 - - name: Run default pipeline without intervals command: nextflow run main.nf -profile test,no_intervals tags: @@ -76,6 +28,8 @@ # binary changes md5sums on reruns. - path: results/preprocessing/recalibrated/test/test.recal.cram.crai # binary changes md5sums on reruns. + - path: results/reference/intervals + should_exist: false - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] diff --git a/tests/test_prepare_recalibration_from_bam.yml b/tests/test_prepare_recalibration_from_bam.yml index 018345eddd..0124c63e07 100644 --- a/tests/test_prepare_recalibration_from_bam.yml +++ b/tests/test_prepare_recalibration_from_bam.yml @@ -1,7 +1,7 @@ - name: Run prepare_recalibration starting from bam command: nextflow run main.nf -profile test,prepare_recalibration_bam tags: - - bam + - input_bam - prepare_recalibration - preprocessing files: @@ -39,7 +39,7 @@ - name: Run prepare_recalibration starting from bam and skip baserecalibration command: nextflow run main.nf -profile test,prepare_recalibration_bam,skip_bqsr --tools strelka tags: - - bam + - input_bam - prepare_recalibration - preprocessing files: diff --git a/tests/test_prepare_recalibration_from_cram.yml b/tests/test_prepare_recalibration_from_cram.yml index 07678ed14a..234eeb7d06 100644 --- a/tests/test_prepare_recalibration_from_cram.yml +++ b/tests/test_prepare_recalibration_from_cram.yml @@ -1,7 +1,7 @@ - name: Run prepare_recalibration starting from cram command: nextflow run main.nf -profile test,prepare_recalibration_cram tags: - - cram + - input_cram - prepare_recalibration - preprocessing files: @@ -33,7 +33,7 @@ - name: Run prepare_recalibration starting from cram and skip baserecalibration command: nextflow run main.nf -profile test,prepare_recalibration_cram,skip_bqsr --tools strelka tags: - - cram + - input_cram - prepare_recalibration - preprocessing files: From a3f502cd6bdaf5752454c4128a3dc47ac403436b Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 13:33:12 +0100 Subject: [PATCH 20/49] prettier . -w --- tests/test_annotation_vep.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/test_annotation_vep.yml b/tests/test_annotation_vep.yml index e80b48fd9f..6c55f8c3d1 100644 --- a/tests/test_annotation_vep.yml +++ b/tests/test_annotation_vep.yml @@ -11,9 +11,7 @@ - path: results/reports/EnsemblVEP/test/test_VEP.summary.html # test_VEP.summary.html changes md5sums on reruns. contains: - [ - "Input filetest.vcf.gzOutput filetest_VEP.ann.vcf" - ] + ["Input filetest.vcf.gzOutput filetest_VEP.ann.vcf"] - name: Run VEP with fasta command: nextflow run main.nf -profile test,annotation --tools vep --vep_include_fasta --skip_tools multiqc tags: @@ -25,8 +23,6 @@ - path: results/annotation/test/test_VEP.ann.vcf.gz.tbi md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - path: results/reports/EnsemblVEP/test/test_VEP.summary.html - # text-based file changes md5sums on reruns. + # text-based file changes md5sums on reruns. contains: - [ - "Input filetest.vcf.gzOutput filetest_VEP.ann.vcf" - ] + ["Input filetest.vcf.gzOutput filetest_VEP.ann.vcf"] From c5a4531c9d41de3c46d9cea5a975a3dae53c2a09 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 13:36:40 +0100 Subject: [PATCH 21/49] recalibrate --- ...rate.yml => test_recalibrate_from_bam.yml} | 91 +------------------ tests/test_recalibrate_from_cram.yml | 87 ++++++++++++++++++ 2 files changed, 89 insertions(+), 89 deletions(-) rename tests/{recalibrate.yml => test_recalibrate_from_bam.yml} (51%) create mode 100644 tests/test_recalibrate_from_cram.yml diff --git a/tests/recalibrate.yml b/tests/test_recalibrate_from_bam.yml similarity index 51% rename from tests/recalibrate.yml rename to tests/test_recalibrate_from_bam.yml index e64114cbbf..468675abae 100644 --- a/tests/recalibrate.yml +++ b/tests/test_recalibrate_from_bam.yml @@ -1,7 +1,7 @@ - name: Run Recalibration starting from bam command: nextflow run main.nf -profile test,recalibrate_bam tags: - - bam + - input_bam - recalibrate - preprocessing files: @@ -32,40 +32,10 @@ should_exist: false - path: results/preprocessing/markduplicates/ should_exist: false -- name: Run Recalibration starting from cram - command: nextflow run main.nf -profile test,recalibrate_cram - tags: - - cram - - recalibrate - - preprocessing - files: - - path: results/csv/recalibrated.csv - md5sum: 1888a924bc70bd80165a96ad641e22d6 - - path: results/multiqc - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: bdb8f185c35dd1eec7ce2f69bce57972 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 32ea70ef1b99def3dc900b4afd513a40 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: d034a60ae5c0768d67b9ba6442bd2212 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b3716e5cd1744610e69c29bd4ffad259 - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: a0841c41460d0d4bdc2a1a777ee7e7c2 - - path: results/preprocessing/mapped/ - should_exist: false - - path: results/preprocessing/markduplicates/ - should_exist: false - name: Run Recalibration starting from bam and skip baserecalibration command: nextflow run main.nf -profile test,recalibrate_bam,skip_bqsr --tools strelka tags: - - bam + - input_bam - recalibrate - preprocessing - variant_calling @@ -119,60 +89,3 @@ should_exist: false - path: results/preprocessing/markduplicates/ should_exist: false -- name: Run Recalibration starting from cram and skip baserecalibration - command: nextflow run main.nf -profile test,recalibrate_cram,skip_bqsr --tools strelka - tags: - - cram - - recalibrate - - preprocessing - - variant_calling - - strelka - files: - - path: results/csv/variantcalled.csv - md5sum: 4d0effd3d8dc2b814230a189e7ca9dba - - path: results/multiqc - - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 - - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary - md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count - md5sum: ee7dafc8d941b8502a04a63dc3126fff - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual - # The text-based file test.strelka.variants.TsTv.qual has different md5sums on my linux system and the GitHub-test-server. - # I don't know exactly why that is, but it file contains some very small numbers written in scientific notation 4.00193e-322 which might change slightly on different systems. - # Instead I'll try some string testing - contains: ["0 0 0 -nan 3 4 0.75", "2 0 1 0 2 4 0.5", "5 1 1 1 2 3 0.666667"] - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/csv/recalibrated.csv - should_exist: false - - path: results/preprocessing/markduplicates/test/test.md.cram - should_exist: false - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - should_exist: false - - path: results/preprocessing/recal_table/test/test.recal.table - should_exist: false - - path: results/preprocessing/recalibrated/test/test.recal.cram - should_exist: false - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - should_exist: false - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - should_exist: false - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - should_exist: false - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - should_exist: false - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - should_exist: false - - path: results/reports/samtools/test/test.recal.cram.stats - should_exist: false - - path: results/preprocessing/mapped/ - should_exist: false - - path: results/preprocessing/markduplicates/ - should_exist: false diff --git a/tests/test_recalibrate_from_cram.yml b/tests/test_recalibrate_from_cram.yml new file mode 100644 index 0000000000..5b5ce93a4c --- /dev/null +++ b/tests/test_recalibrate_from_cram.yml @@ -0,0 +1,87 @@ +- name: Run Recalibration starting from cram + command: nextflow run main.nf -profile test,recalibrate_cram + tags: + - input_cram + - recalibrate + - preprocessing + files: + - path: results/csv/recalibrated.csv + md5sum: 1888a924bc70bd80165a96ad641e22d6 + - path: results/multiqc + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: bdb8f185c35dd1eec7ce2f69bce57972 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 32ea70ef1b99def3dc900b4afd513a40 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: d034a60ae5c0768d67b9ba6442bd2212 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b3716e5cd1744610e69c29bd4ffad259 + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: a0841c41460d0d4bdc2a1a777ee7e7c2 + - path: results/preprocessing/mapped/ + should_exist: false + - path: results/preprocessing/markduplicates/ + should_exist: false +- name: Run Recalibration starting from cram and skip baserecalibration + command: nextflow run main.nf -profile test,recalibrate_cram,skip_bqsr --tools strelka + tags: + - input_cram + - recalibrate + - preprocessing + - variant_calling + - strelka + files: + - path: results/csv/variantcalled.csv + md5sum: 4d0effd3d8dc2b814230a189e7ca9dba + - path: results/multiqc + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: ee7dafc8d941b8502a04a63dc3126fff + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + # The text-based file test.strelka.variants.TsTv.qual has different md5sums on my linux system and the GitHub-test-server. + # I don't know exactly why that is, but it file contains some very small numbers written in scientific notation 4.00193e-322 which might change slightly on different systems. + # Instead I'll try some string testing + contains: ["0 0 0 -nan 3 4 0.75", "2 0 1 0 2 4 0.5", "5 1 1 1 2 3 0.666667"] + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/csv/recalibrated.csv + should_exist: false + - path: results/preprocessing/markduplicates/test/test.md.cram + should_exist: false + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + should_exist: false + - path: results/preprocessing/recal_table/test/test.recal.table + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + should_exist: false + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + should_exist: false + - path: results/reports/samtools/test/test.recal.cram.stats + should_exist: false + - path: results/preprocessing/mapped/ + should_exist: false + - path: results/preprocessing/markduplicates/ + should_exist: false From b6ea03ef6ea90c534ba516e1656896b44acf7f18 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 13:50:42 +0100 Subject: [PATCH 22/49] trimming --- conf/modules/modules.config | 28 --------------- conf/modules/trimming.config | 42 +++++++++++++++++++++++ nextflow.config | 1 + tests/config/pytest_tags.yml | 10 ++++++ tests/{trimming.yml => test_trimming.yml} | 0 5 files changed, 53 insertions(+), 28 deletions(-) create mode 100644 conf/modules/trimming.config rename tests/{trimming.yml => test_trimming.yml} (100%) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index bf2d4a91f1..e6ae70434f 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -51,34 +51,6 @@ process { ] } -// TRIMMING - - withName: 'FASTP' { - ext.args = [ "", - params.trim_fastq ?: "--disable_adapter_trimming", - params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1. - params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed. - params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : "", //Remove bp from the 5' end of read 2. - params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : "", // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed. - params.trim_nextseq ? "--trim_poly_g" : "", // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails. - params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : "", - - ].join(" ").trim() - publishDir = [ - [ - path: { "${params.outdir}/reports/fastp/${meta.sample}" }, - mode: params.publish_dir_mode, - pattern: "*.{html,json,log}" - ], - [ - enabled: params.save_trimmed || params.save_split_fastqs, - path: { "${params.outdir}/preprocessing/fastp/${meta.sample}/" }, - mode: params.publish_dir_mode, - pattern: "*.fastp.fastq.gz" - ] - ] - } - // UMI Subworkflow withName: 'FASTQTOBAM' { diff --git a/conf/modules/trimming.config b/conf/modules/trimming.config new file mode 100644 index 0000000000..13aba3531d --- /dev/null +++ b/conf/modules/trimming.config @@ -0,0 +1,42 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// TRIMMING + +process { + + withName: 'FASTP' { + ext.args = [ "", + params.trim_fastq ?: "--disable_adapter_trimming", + params.clip_r1 > 0 ? "--trim_front1 ${params.clip_r1}" : "", // Remove bp from the 5' end of read 1 + params.clip_r2 > 0 ? "--trim_front2 ${params.clip_r2}" : "", // Remove bp from the 5' end of read 2 + params.three_prime_clip_r1 > 0 ? "--trim_tail1 ${params.three_prime_clip_r1}" : "", // Remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed + params.three_prime_clip_r2 > 0 ? "--trim_tail2 ${params.three_prime_clip_r2}" : "", // Remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed + params.trim_nextseq ? "--trim_poly_g" : "", // Apply the --nextseq=X option, to trim based on quality after removing poly-G tails + params.split_fastq > 0 ? "--split_by_lines ${params.split_fastq * 4}" : "" + ].join(" ").trim() + publishDir = [ + [ + path: { "${params.outdir}/reports/fastp/${meta.sample}" }, + mode: params.publish_dir_mode, + pattern: "*.{html,json,log}" + ], + [ + enabled: params.save_trimmed || params.save_split_fastqs, + path: { "${params.outdir}/preprocessing/fastp/${meta.sample}/" }, + mode: params.publish_dir_mode, + pattern: "*.fastp.fastq.gz" + ] + ] + } +} diff --git a/nextflow.config b/nextflow.config index ab2f7a0e54..17e5292996 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,6 +290,7 @@ includeConfig 'conf/modules/modules.config' includeConfig 'conf/modules/prepare_intervals.config' includeConfig 'conf/modules/prepare_genome.config' includeConfig 'conf/modules/alignment_to_fastq.config' +includeConfig 'conf/modules/trimming.config' includeConfig 'conf/modules/aligner.config' includeConfig 'conf/modules/markduplicates.config' includeConfig 'conf/modules/prepare_recalibration.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 2b4f5ebe71..7663b8367e 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -9,17 +9,24 @@ alignment_to_fastq: - modules/nf-core/samtools/view/main.nf - subworkflows/local/bam_convert_samtools/main.nf +## trimming + - conf/modules/trimming.config + - modules/nf-core/fastp/main.nf + ## aligner + ### bwamem bwamem: - conf/modules/aligner.config - modules/nf-core/bwa/mem/main.nf - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + ### bwamem2 bwamem2: - conf/modules/aligner.config - modules/nf-core/bwamem2/mem/main.nf - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + ### dragmap dragmap: - conf/modules/aligner.config @@ -78,6 +85,7 @@ gatk4_spark: - subworkflows/local/bam_markduplicates_spark/main.nf # annotate + ## merge merge: - conf/modules/annotate.config @@ -87,12 +95,14 @@ merge: - subworkflows/local/vcf_annotate_all/main.nf - subworkflows/local/vcf_annotate_ensemblvep/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf + ## snpeff snpeff: - conf/modules/annotate.config - modules/nf-core/snpeff/main.nf - modules/nf-core/tabix/bgziptabix/main.nf - subworkflows/local/vcf_annotate_snpeff/main.nf + ## vep vep: - conf/modules/annotate.config diff --git a/tests/trimming.yml b/tests/test_trimming.yml similarity index 100% rename from tests/trimming.yml rename to tests/test_trimming.yml From e14cc7f7b2e84bc6b51b9a16730eac6ceacb509a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 14:00:10 +0100 Subject: [PATCH 23/49] umi --- conf/modules/modules.config | 68 --------------------------- conf/modules/umi.config | 83 +++++++++++++++++++++++++++++++++ nextflow.config | 1 + tests/config/pytest_tags.yml | 15 ++++++ tests/{umi.yml => test_umi.yml} | 1 + 5 files changed, 100 insertions(+), 68 deletions(-) create mode 100644 conf/modules/umi.config rename tests/{umi.yml => test_umi.yml} (99%) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index e6ae70434f..ae0c693b1c 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -51,74 +51,6 @@ process { ] } -// UMI Subworkflow - - withName: 'FASTQTOBAM' { - ext.prefix = {"${meta.id}"} - ext.args = { "--read-structures $params.umi_read_structure" } - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: 'BAM2FASTQ' { - ext.args = '-T RX' - ext.when = { params.umi_read_structure } - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - if (params.umi_read_structure) { - withName: "NFCORE_SAREK:SAREK:FASTQ_CREATE_UMI_CONSENSUS_FGBIO:ALIGN_UMI:BWAMEM.*_MEM" { - ext.args = { "-K 100000000 -p -C -Y -R ${meta.read_group}" } - ext.args2 = '-bS' - ext.prefix = {"${meta.id}.umi_unsorted"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - - withName: "NFCORE_SAREK:SAREK:FASTQ_CREATE_UMI_CONSENSUS_FGBIO:ALIGN_UMI:DRAGMAP_ALIGN" { - ext.args2 = '-bS' - ext.prefix = {"${meta.id}.umi_unsorted"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - } - - withName: 'SAMBLASTER' { - ext.args = '-M --addMateTags' - ext.prefix = {"${meta.id}_unsorted_tagged"} - publishDir = [ - //specify to avoid publishing, overwritten otherwise - enabled: false - ] - } - withName: 'GROUPREADSBYUMI' { - publishDir = [ - [ path: { "${params.outdir}/reports/umi/" }, - mode: params.publish_dir_mode, - pattern: "*.{txt}" - ] - ] - } - - withName: 'CALLUMICONSENSUS' { - ext.args = '-M 1 -S Coordinate' - ext.prefix = {"${meta.id}_umi-consensus"} - publishDir = [ - path: { "${params.outdir}/preprocessing/umi/${meta.sample}" }, - mode: params.publish_dir_mode, - pattern: "*.{bam}" - ] - } - // QC withName: 'FASTQC' { diff --git a/conf/modules/umi.config b/conf/modules/umi.config new file mode 100644 index 0000000000..ed4f29570b --- /dev/null +++ b/conf/modules/umi.config @@ -0,0 +1,83 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// UMI + +process { + + withName: 'FASTQTOBAM' { + ext.prefix = {"${meta.id}"} + ext.args = { "--read-structures $params.umi_read_structure" } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: 'BAM2FASTQ' { + ext.args = '-T RX' + ext.when = { params.umi_read_structure } + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + if (params.umi_read_structure) { + withName: "NFCORE_SAREK:SAREK:FASTQ_CREATE_UMI_CONSENSUS_FGBIO:ALIGN_UMI:BWAMEM.*_MEM" { + ext.args = { "-K 100000000 -p -C -Y -R ${meta.read_group}" } + ext.args2 = '-bS' + ext.prefix = {"${meta.id}.umi_unsorted"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + + withName: "NFCORE_SAREK:SAREK:FASTQ_CREATE_UMI_CONSENSUS_FGBIO:ALIGN_UMI:DRAGMAP_ALIGN" { + ext.args2 = '-bS' + ext.prefix = {"${meta.id}.umi_unsorted"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + } + + withName: 'SAMBLASTER' { + ext.args = '-M --addMateTags' + ext.prefix = {"${meta.id}_unsorted_tagged"} + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } + withName: 'GROUPREADSBYUMI' { + publishDir = [ + [ path: { "${params.outdir}/reports/umi/" }, + mode: params.publish_dir_mode, + pattern: "*.{txt}" + ] + ] + } + + withName: 'CALLUMICONSENSUS' { + ext.args = '-M 1 -S Coordinate' + ext.prefix = {"${meta.id}_umi-consensus"} + publishDir = [ + path: { "${params.outdir}/preprocessing/umi/${meta.sample}" }, + mode: params.publish_dir_mode, + pattern: "*.{bam}" + ] + } +} diff --git a/nextflow.config b/nextflow.config index 17e5292996..78b9625c59 100644 --- a/nextflow.config +++ b/nextflow.config @@ -290,6 +290,7 @@ includeConfig 'conf/modules/modules.config' includeConfig 'conf/modules/prepare_intervals.config' includeConfig 'conf/modules/prepare_genome.config' includeConfig 'conf/modules/alignment_to_fastq.config' +includeConfig 'conf/modules/umi.config' includeConfig 'conf/modules/trimming.config' includeConfig 'conf/modules/aligner.config' includeConfig 'conf/modules/markduplicates.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 7663b8367e..4535e34431 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -9,7 +9,22 @@ alignment_to_fastq: - modules/nf-core/samtools/view/main.nf - subworkflows/local/bam_convert_samtools/main.nf +## umi +umi: + - conf/modules/umi.config + - modules/nf-core/bwa/mem/main.nf + - modules/nf-core/bwamem2/mem/main.nf + - modules/nf-core/dragmap/align/main.nf + - modules/nf-core/fgbio/callmolecularconsensusreads/main.nf + - modules/nf-core/fgbio/fastqtobam/main.nf + - modules/nf-core/fgbio/groupreadsbyumi/main.nf + - modules/nf-core/samblaster/main.nf + - modules/nf-core/samtools/bam2fq/main.nf + - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf + - subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf + ## trimming +trimming: - conf/modules/trimming.config - modules/nf-core/fastp/main.nf diff --git a/tests/umi.yml b/tests/test_umi.yml similarity index 99% rename from tests/umi.yml rename to tests/test_umi.yml index b1d2b5b62d..5ddacea421 100644 --- a/tests/umi.yml +++ b/tests/test_umi.yml @@ -1,6 +1,7 @@ - name: Run UMI test command: nextflow run main.nf -profile test,umi tags: + - preprocessing - umi files: - path: results/preprocessing/umi/test/test-test_L1_umi-consensus.bam From 4a3685da4f251d683deef836ae49cd3d974308cd Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 14:27:57 +0100 Subject: [PATCH 24/49] default + trimming | split fastq > fastp --- tests/config/pytest_tags.yml | 4 +- tests/save_mapped.yml | 56 ---- tests/save_output_bam.yml | 101 ------ tests/skip_markduplicates.yml | 258 --------------- tests/skip_qc.yml | 65 ---- tests/test_default.yml | 362 ++++++++++++++++++++++ tests/{split_fastq.yml => test_fastp.yml} | 58 ++++ tests/test_gatk4_spark.yml | 85 +++++ tests/test_markduplicates_from_bam.yml | 53 ++++ tests/test_markduplicates_from_cram.yml | 49 +++ tests/test_trimming.yml | 55 ---- 11 files changed, 609 insertions(+), 537 deletions(-) delete mode 100644 tests/save_mapped.yml delete mode 100644 tests/save_output_bam.yml delete mode 100644 tests/skip_markduplicates.yml delete mode 100644 tests/skip_qc.yml rename tests/{split_fastq.yml => test_fastp.yml} (51%) delete mode 100644 tests/test_trimming.yml diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 4535e34431..70208f0ba2 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -23,8 +23,8 @@ umi: - subworkflows/local/fastq_align_bwamem_mem2_dragmap/main.nf - subworkflows/local/fastq_create_umi_consensus_fgbio/main.nf -## trimming -trimming: +## fastp +fastp: - conf/modules/trimming.config - modules/nf-core/fastp/main.nf diff --git a/tests/save_mapped.yml b/tests/save_mapped.yml deleted file mode 100644 index 81be25b685..0000000000 --- a/tests/save_mapped.yml +++ /dev/null @@ -1,56 +0,0 @@ -- name: Run save_mapped - command: nextflow run main.nf -profile test --save_mapped - tags: - - preprocessing - - save_mapped - files: - - path: results/csv/mapped.csv - md5sum: ae97b7394ab53a6b60921ab06c713cd9 - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 diff --git a/tests/save_output_bam.yml b/tests/save_output_bam.yml deleted file mode 100644 index 615b27a67d..0000000000 --- a/tests/save_output_bam.yml +++ /dev/null @@ -1,101 +0,0 @@ -- name: Run save_output_as_bam - command: nextflow run main.nf -profile test --save_output_as_bam - tags: - - preprocessing - - save_output_as_bam - files: - - path: results/csv/markduplicates.csv - md5sum: 8679570b8db1937ee574fec36b25d7bc - - path: results/csv/markduplicates_no_table.csv - md5sum: 145154b6037e90448273fbe8e8dec5d3 - - path: results/csv/recalibrated.csv - md5sum: 3ddc20eb105fdcc483945afd7d7d238c - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.bam.bai - md5sum: 3add495a02547bd9e6882935d2f3e1f7 - - path: results/preprocessing/markduplicates/test/test.md.bam - md5sum: ebb72f603f016ce37964259c61625360 - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.bam - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.bam.bai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 -- name: Run save_output_as_bam with gatk4 spark - command: nextflow run main.nf -profile test,use_gatk_spark --save_output_as_bam - tags: - - preprocessing - - save_output_as_bam - files: - - path: results/csv/markduplicates.csv - md5sum: 8679570b8db1937ee574fec36b25d7bc - - path: results/csv/markduplicates_no_table.csv - md5sum: 145154b6037e90448273fbe8e8dec5d3 - - path: results/csv/recalibrated.csv - md5sum: 3ddc20eb105fdcc483945afd7d7d238c - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.bam - md5sum: 8bfd111af60ca7e63b233e59b0fb570b - - path: results/preprocessing/markduplicates/test/test.md.bam.bai - md5sum: fe8bc8655aff0d1a8093680390d98fab - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 5b6e5078b4a90f6cb982fa0f0df616c2 - - path: results/preprocessing/recalibrated/test/test.recal.bam - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.bam.bai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - # The text-based output-file test.md.cram.metrics only contains comments and no metrics, which is a bit odd. - # The file test.md.cram.metrics contains a timestamp which means that it cannot be tested using the md5sum. - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 9269e959d5eec5273307693c6274f53e - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: deb2df4f3ca19f441f0a22f04dea8723 - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: 96acf3439477a1448b7e1b43c020c7d0 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: c259a9fd73f576626c3a29841c2dc019 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: cf85ba4692f016af70db2a594d9effdf - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 9269e959d5eec5273307693c6274f53e - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: deb2df4f3ca19f441f0a22f04dea8723 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 96acf3439477a1448b7e1b43c020c7d0 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: c259a9fd73f576626c3a29841c2dc019 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: cf85ba4692f016af70db2a594d9effdf - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: b573b2e930c5f68e7e4910faf9e51e44 - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: a75731dca9b42d87d9997ea44aaf82fc diff --git a/tests/skip_markduplicates.yml b/tests/skip_markduplicates.yml deleted file mode 100644 index 87bc3e0fcf..0000000000 --- a/tests/skip_markduplicates.yml +++ /dev/null @@ -1,258 +0,0 @@ -- name: Run default pipeline with skipping MarkDuplicates - command: nextflow run main.nf -profile test,skip_markduplicates - tags: - - preprocessing - - skip_markduplicates - files: - - path: results/csv/mapped.csv - md5sum: ae97b7394ab53a6b60921ab06c713cd9 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 8de213f4c00fac61a1102633760493df - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.bam - should_exist: false - - path: results/preprocessing/mapped/test/test.sorted.bam - should_exist: false -- name: Run skip markduplicates bam from step markduplicates - command: nextflow run main.nf -profile test,markduplicates_bam,skip_markduplicates - tags: - - bam - - preprocessing - - skip_markduplicates - files: - - path: results/csv/recalibrated.csv - md5sum: 1888a924bc70bd80165a96ad641e22d6 - - path: results/multiqc - - path: results/preprocessing/converted/test/test.converted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/converted/test/test.converted.cram.crai - # binary changes md5sums on reruns.py - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 35d89a3811aa31711fc9815b6b80e6ec - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: bdb8f185c35dd1eec7ce2f69bce57972 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 32ea70ef1b99def3dc900b4afd513a40 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: d034a60ae5c0768d67b9ba6442bd2212 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b3716e5cd1744610e69c29bd4ffad259 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: bdb8f185c35dd1eec7ce2f69bce57972 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 32ea70ef1b99def3dc900b4afd513a40 - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: d034a60ae5c0768d67b9ba6442bd2212 - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: b3716e5cd1744610e69c29bd4ffad259 - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 71af990f7acad8bf24d37e88c4adcded - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 9de0a2738ab150e2e3fd857b0f42efc4 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.bam - should_exist: false - - path: results/preprocessing/mapped/test/test.sorted.bam - should_exist: false -- name: Run skip markduplicates cram from step markduplicates - command: nextflow run main.nf -profile test,markduplicates_cram,skip_markduplicates - tags: - - cram - - preprocessing - - skip_markduplicates - files: - - path: results/csv/recalibrated.csv - md5sum: 1888a924bc70bd80165a96ad641e22d6 - - path: results/multiqc - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 35d89a3811aa31711fc9815b6b80e6ec - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - md5sum: adac39d7845f64f6b35b766751393b06 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: bdb8f185c35dd1eec7ce2f69bce57972 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 32ea70ef1b99def3dc900b4afd513a40 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: d034a60ae5c0768d67b9ba6442bd2212 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b3716e5cd1744610e69c29bd4ffad259 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: bdb8f185c35dd1eec7ce2f69bce57972 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 32ea70ef1b99def3dc900b4afd513a40 - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: d034a60ae5c0768d67b9ba6442bd2212 - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: b3716e5cd1744610e69c29bd4ffad259 - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 71af990f7acad8bf24d37e88c4adcded - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 42b536ada66f772bb4dfd741098b2783 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.sorted.cram - should_exist: false - - path: results/preprocessing/mapped/test/test.sorted.cram.crai - should_exist: false -- name: Run default pipeline with skipping MarkDuplicates with save_mapped - command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped - tags: - - preprocessing - - skip_markduplicates - files: - - path: results/csv/mapped.csv - md5sum: ae97b7394ab53a6b60921ab06c713cd9 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 8de213f4c00fac61a1102633760493df - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.bam - should_exist: false -- name: Run default pipeline with skipping MarkDuplicates with save_mapped & save_output_as_bam - command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped --save_output_as_bam - tags: - - preprocessing - - skip_markduplicates - files: - - path: results/csv/mapped.csv - md5sum: 7f21bf40d3fbc248ee2ea3fdf0f7cdb2 - - path: results/csv/recalibrated.csv - md5sum: 3ddc20eb105fdcc483945afd7d7d238c - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.bam - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.bam.bai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 8de213f4c00fac61a1102633760493df - - path: results/preprocessing/recalibrated/test/test.recal.bam - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.bam.bai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.bam - should_exist: false diff --git a/tests/skip_qc.yml b/tests/skip_qc.yml deleted file mode 100644 index 6cb6ee5eff..0000000000 --- a/tests/skip_qc.yml +++ /dev/null @@ -1,65 +0,0 @@ -- name: Run default pipeline with skipping all QC steps - command: nextflow run main.nf -profile test --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools - tags: - - skip_qc - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/multiqc - should_exist: false - - path: results/reports/fastqc - should_exist: false - - path: results/reports/markduplicates - should_exist: false - - path: results/reports/mosdepth - should_exist: false - - path: results/reports/samtools - should_exist: false - -- name: Run spark pipeline with skipping all QC steps - command: nextflow run main.nf -profile test,use_gatk_spark --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools - tags: - - skip_qc - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/preprocessing/markduplicates/test/test.md.cram - md5sum: cf0ebfe288ee7164cd2fcd028bda1887 - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - md5sum: 2136b37eca3e806c77c39ee823231679 - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 5b6e5078b4a90f6cb982fa0f0df616c2 - - path: results/preprocessing/recalibrated/test/test.recal.cram - md5sum: 1c168110f006bacba2ed8783bb5a3dee - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - md5sum: 1c91543c9f75bc354274b1a1da745ee3 - - path: results/multiqc - should_exist: false - - path: results/reports/fastqc - should_exist: false - - path: results/reports/markduplicates - should_exist: false - - path: results/reports/mosdepth - should_exist: false - - path: results/reports/samtools - should_exist: false diff --git a/tests/test_default.yml b/tests/test_default.yml index 861d6e8c86..3552b93ce0 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -71,3 +71,365 @@ should_exist: false - path: results/preprocessing/mapped/ should_exist: false +- name: Run save_mapped + command: nextflow run main.nf -profile test --save_mapped + tags: + - default + - preprocessing + - save_mapped + - strelka + - variant_calling + files: + - path: results/csv/mapped.csv + md5sum: ae97b7394ab53a6b60921ab06c713cd9 + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + # conda changes md5sums for test. + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: dd87f507da7de20d5318841af312493b + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + - path: results/strelka + should_exist: false +- name: Run save_output_as_bam + command: nextflow run main.nf -profile test --save_output_as_bam + tags: + - default + - preprocessing + - save_output_as_bam + - strelka + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: 8679570b8db1937ee574fec36b25d7bc + - path: results/csv/markduplicates_no_table.csv + md5sum: 145154b6037e90448273fbe8e8dec5d3 + - path: results/csv/recalibrated.csv + md5sum: 3ddc20eb105fdcc483945afd7d7d238c + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.bam.bai + md5sum: 3add495a02547bd9e6882935d2f3e1f7 + - path: results/preprocessing/markduplicates/test/test.md.bam + md5sum: ebb72f603f016ce37964259c61625360 + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.bam + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.bam.bai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + # conda changes md5sums for test. + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: dd87f507da7de20d5318841af312493b + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + - path: results/strelka + should_exist: false +- name: Run default pipeline with skipping Markduplicates + command: nextflow run main.nf -profile test,skip_markduplicates + tags: + - default + - preprocessing + - skip_markduplicates + files: + - path: results/csv/mapped.csv + md5sum: ae97b7394ab53a6b60921ab06c713cd9 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 8de213f4c00fac61a1102633760493df + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 59d921ed3970e19145fbae75966de3e3 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.bam + should_exist: false + - path: results/preprocessing/mapped/test/test.sorted.bam + should_exist: false +- name: Run default pipeline with skipping Markduplicates with save_mapped + command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped + tags: + - default + - preprocessing + - save_mapped + - skip_markduplicates + files: + - path: results/csv/mapped.csv + md5sum: ae97b7394ab53a6b60921ab06c713cd9 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 8de213f4c00fac61a1102633760493df + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 59d921ed3970e19145fbae75966de3e3 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.bam + should_exist: false +- name: Run default pipeline with skipping Markduplicates with save_mapped & save_output_as_bam + command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped --save_output_as_bam + tags: + - default + - preprocessing + - save_mapped + - save_output_as_bam + - skip_markduplicates + files: + - path: results/csv/mapped.csv + md5sum: 7f21bf40d3fbc248ee2ea3fdf0f7cdb2 + - path: results/csv/recalibrated.csv + md5sum: 3ddc20eb105fdcc483945afd7d7d238c + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.bam + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.bam.bai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 8de213f4c00fac61a1102633760493df + - path: results/preprocessing/recalibrated/test/test.recal.bam + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.bam.bai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 59d921ed3970e19145fbae75966de3e3 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.bam + should_exist: false +- name: Run default pipeline with skipping all QC steps + command: nextflow run main.nf -profile test --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools + tags: + - default + - preprocessing + - skip_qc + - strelka + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/multiqc + should_exist: false + - path: results/reports/fastqc + should_exist: false + - path: results/reports/markduplicates + should_exist: false + - path: results/reports/mosdepth + should_exist: false + - path: results/reports/samtools + should_exist: false + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + # conda changes md5sums for test. + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: dd87f507da7de20d5318841af312493b + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + - path: results/strelka + should_exist: false diff --git a/tests/split_fastq.yml b/tests/test_fastp.yml similarity index 51% rename from tests/split_fastq.yml rename to tests/test_fastp.yml index 49b17b39d4..36453b4e60 100644 --- a/tests/split_fastq.yml +++ b/tests/test_fastp.yml @@ -1,6 +1,64 @@ +- name: Run trimming pipeline + command: nextflow run main.nf -profile test,trimming --save_trimmed + tags: + - fastp + - preprocessing + - trimming + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/fastp/test/test-test_L1_1.fastp.fastq.gz + md5sum: 325acd143b6fcbf92ca9e34f97d87158 + - path: results/preprocessing/fastp/test/test-test_L1_2.fastp.fastq.gz + md5sum: af73322b9742bce0dd7f767c5c676c0e + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 766a4cd88d3d085c19d8e31540040ecd + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastp/test + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8304 930 80 523308 3813 0 0 0.375148", "1.0 930 930"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 626d5c03a6192283a302bde72415c693 + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: 4a82e0e19a549edae27fc0cc71f6546f + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: 51e054d8b3b4ef745c94fbda7a2ef2f3 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 80738b824b3a7d00bdad2d8e26ac3ccc + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: a5ad8f917979f62eacfff1461529dbaa + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 626d5c03a6192283a302bde72415c693 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 4a82e0e19a549edae27fc0cc71f6546f + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 51e054d8b3b4ef745c94fbda7a2ef2f3 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 80738b824b3a7d00bdad2d8e26ac3ccc + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: a5ad8f917979f62eacfff1461529dbaa + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 7382e028335a2b057cb54ea160c4be7b + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 5adeae7e9ce068009e88bacba549096e - name: Run split fastq module command: nextflow run main.nf -profile test,split_fastq tags: + - fastp + - preprocessing - split_fastq files: - path: results/csv/markduplicates.csv diff --git a/tests/test_gatk4_spark.yml b/tests/test_gatk4_spark.yml index 53dc42cca5..a23bae3d8b 100644 --- a/tests/test_gatk4_spark.yml +++ b/tests/test_gatk4_spark.yml @@ -49,3 +49,88 @@ md5sum: a75731dca9b42d87d9997ea44aaf82fc - path: results/preprocessing/mapped/ should_exist: false +- name: Run default pipeline with gatk4_spark & skipping all QC steps + command: nextflow run main.nf -profile test,use_gatk_spark --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools + tags: + - gatk4_spark + - preprocessing + - skip_qc + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/preprocessing/markduplicates/test/test.md.cram + md5sum: cf0ebfe288ee7164cd2fcd028bda1887 + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + md5sum: 2136b37eca3e806c77c39ee823231679 + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 5b6e5078b4a90f6cb982fa0f0df616c2 + - path: results/preprocessing/recalibrated/test/test.recal.cram + md5sum: 1c168110f006bacba2ed8783bb5a3dee + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + md5sum: 1c91543c9f75bc354274b1a1da745ee3 + - path: results/multiqc + should_exist: false + - path: results/reports/fastqc + should_exist: false + - path: results/reports/markduplicates + should_exist: false + - path: results/reports/mosdepth + should_exist: false + - path: results/reports/samtools + should_exist: false +- name: Run save_output_as_bam with gatk4 spark + command: nextflow run main.nf -profile test,use_gatk_spark --save_output_as_bam + tags: + - gatk4_spark + - preprocessing + - save_output_as_bam + files: + - path: results/csv/markduplicates.csv + md5sum: 8679570b8db1937ee574fec36b25d7bc + - path: results/csv/markduplicates_no_table.csv + md5sum: 145154b6037e90448273fbe8e8dec5d3 + - path: results/csv/recalibrated.csv + md5sum: 3ddc20eb105fdcc483945afd7d7d238c + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.bam + md5sum: 8bfd111af60ca7e63b233e59b0fb570b + - path: results/preprocessing/markduplicates/test/test.md.bam.bai + md5sum: fe8bc8655aff0d1a8093680390d98fab + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 5b6e5078b4a90f6cb982fa0f0df616c2 + - path: results/preprocessing/recalibrated/test/test.recal.bam + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.bam.bai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + # The text-based output-file test.md.cram.metrics only contains comments and no metrics, which is a bit odd. + # The file test.md.cram.metrics contains a timestamp which means that it cannot be tested using the md5sum. + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 9269e959d5eec5273307693c6274f53e + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: deb2df4f3ca19f441f0a22f04dea8723 + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: 96acf3439477a1448b7e1b43c020c7d0 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: c259a9fd73f576626c3a29841c2dc019 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: cf85ba4692f016af70db2a594d9effdf + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 9269e959d5eec5273307693c6274f53e + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: deb2df4f3ca19f441f0a22f04dea8723 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 96acf3439477a1448b7e1b43c020c7d0 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: c259a9fd73f576626c3a29841c2dc019 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: cf85ba4692f016af70db2a594d9effdf + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: b573b2e930c5f68e7e4910faf9e51e44 + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: a75731dca9b42d87d9997ea44aaf82fc diff --git a/tests/test_markduplicates_from_bam.yml b/tests/test_markduplicates_from_bam.yml index f9a18fe635..7b07d22bef 100644 --- a/tests/test_markduplicates_from_bam.yml +++ b/tests/test_markduplicates_from_bam.yml @@ -52,3 +52,56 @@ md5sum: a0ecedb6be28639e276d38e4ac18935b - path: results/reports/samtools/test/test.recal.cram.stats md5sum: 69d2bab6fdbe21ec5c598dc404a3df00 +- name: Run skip markduplicates bam from step markduplicates + command: nextflow run main.nf -profile test,markduplicates_bam,skip_markduplicates + tags: + - input_bam + - markduplicates + - preprocessing + - skip_markduplicates + files: + - path: results/csv/recalibrated.csv + md5sum: 1888a924bc70bd80165a96ad641e22d6 + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns.py + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: bdb8f185c35dd1eec7ce2f69bce57972 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 32ea70ef1b99def3dc900b4afd513a40 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: d034a60ae5c0768d67b9ba6442bd2212 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b3716e5cd1744610e69c29bd4ffad259 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: bdb8f185c35dd1eec7ce2f69bce57972 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 32ea70ef1b99def3dc900b4afd513a40 + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: d034a60ae5c0768d67b9ba6442bd2212 + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: b3716e5cd1744610e69c29bd4ffad259 + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 71af990f7acad8bf24d37e88c4adcded + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 9de0a2738ab150e2e3fd857b0f42efc4 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.bam + should_exist: false + - path: results/preprocessing/mapped/test/test.sorted.bam + should_exist: false diff --git a/tests/test_markduplicates_from_cram.yml b/tests/test_markduplicates_from_cram.yml index 25b3a450bc..0152929fe3 100644 --- a/tests/test_markduplicates_from_cram.yml +++ b/tests/test_markduplicates_from_cram.yml @@ -50,3 +50,52 @@ md5sum: 69d2bab6fdbe21ec5c598dc404a3df00 - path: results/preprocessing/mapped/ should_exist: false +- name: Run skip markduplicates cram from step markduplicates + command: nextflow run main.nf -profile test,markduplicates_cram,skip_markduplicates + tags: + - input_cram + - markduplicates + - preprocessing + - skip_markduplicates + files: + - path: results/csv/recalibrated.csv + md5sum: 1888a924bc70bd80165a96ad641e22d6 + - path: results/multiqc + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 35d89a3811aa31711fc9815b6b80e6ec + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + md5sum: adac39d7845f64f6b35b766751393b06 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: bdb8f185c35dd1eec7ce2f69bce57972 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 32ea70ef1b99def3dc900b4afd513a40 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: d034a60ae5c0768d67b9ba6442bd2212 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b3716e5cd1744610e69c29bd4ffad259 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: bdb8f185c35dd1eec7ce2f69bce57972 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 6fd2e5c5c938bf69cdb2811f9e3afef8 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 32ea70ef1b99def3dc900b4afd513a40 + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: d034a60ae5c0768d67b9ba6442bd2212 + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: b3716e5cd1744610e69c29bd4ffad259 + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 71af990f7acad8bf24d37e88c4adcded + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 42b536ada66f772bb4dfd741098b2783 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.sorted.cram + should_exist: false + - path: results/preprocessing/mapped/test/test.sorted.cram.crai + should_exist: false diff --git a/tests/test_trimming.yml b/tests/test_trimming.yml deleted file mode 100644 index fd7ae1c79f..0000000000 --- a/tests/test_trimming.yml +++ /dev/null @@ -1,55 +0,0 @@ -- name: Run trimming pipeline - command: nextflow run main.nf -profile test,trimming --save_trimmed - tags: - - trimming - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/fastp/test/test-test_L1_1.fastp.fastq.gz - md5sum: 325acd143b6fcbf92ca9e34f97d87158 - - path: results/preprocessing/fastp/test/test-test_L1_2.fastp.fastq.gz - md5sum: af73322b9742bce0dd7f767c5c676c0e - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 766a4cd88d3d085c19d8e31540040ecd - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastp/test - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8304 930 80 523308 3813 0 0 0.375148", "1.0 930 930"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 626d5c03a6192283a302bde72415c693 - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: 4a82e0e19a549edae27fc0cc71f6546f - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: 51e054d8b3b4ef745c94fbda7a2ef2f3 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 80738b824b3a7d00bdad2d8e26ac3ccc - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: a5ad8f917979f62eacfff1461529dbaa - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 626d5c03a6192283a302bde72415c693 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 4a82e0e19a549edae27fc0cc71f6546f - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 51e054d8b3b4ef745c94fbda7a2ef2f3 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 80738b824b3a7d00bdad2d8e26ac3ccc - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: a5ad8f917979f62eacfff1461529dbaa - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 7382e028335a2b057cb54ea160c4be7b - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5adeae7e9ce068009e88bacba549096e From bfc72f6adc765d3e4a1a2c9c92567f816ab5c3db Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 14:52:23 +0100 Subject: [PATCH 25/49] target > intervals --- tests/targeted.yml | 58 ----------- tests/test_intervals.yml | 169 ++++++++++++++++++++++++++++++++ tests/test_intervals_create.yml | 51 ---------- tests/test_no_intervals.yml | 59 ----------- 4 files changed, 169 insertions(+), 168 deletions(-) delete mode 100644 tests/targeted.yml create mode 100644 tests/test_intervals.yml delete mode 100644 tests/test_intervals_create.yml delete mode 100644 tests/test_no_intervals.yml diff --git a/tests/targeted.yml b/tests/targeted.yml deleted file mode 100644 index e2d19b765b..0000000000 --- a/tests/targeted.yml +++ /dev/null @@ -1,58 +0,0 @@ -- name: Run default pipeline with target bed - command: nextflow run main.nf -profile test,targeted - tags: - - preprocessing - - targeted - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: c990e4f1b7dbd5a3a623882a54ae2bf2 - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 5a0679057c530e5945c9c5a3a17312dc - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: dcc9ab2bf3248903e02d8da87e678977 - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz - md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi - md5sum: b0ab630c3241fbd7581b7a38d944ff8b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 91e0d531f1bab64711ecefe52bfc8255 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 0b3162def977123809598639f7698121 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: a8455eb2947de529abfa62b303986e0f - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz - md5sum: d9fa560ff78ae106cfee9db2c90801b5 - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi - md5sum: 4816eeb9af254ca40177b08cf11b98d2 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 91e0d531f1bab64711ecefe52bfc8255 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 diff --git a/tests/test_intervals.yml b/tests/test_intervals.yml new file mode 100644 index 0000000000..a960611d6a --- /dev/null +++ b/tests/test_intervals.yml @@ -0,0 +1,169 @@ +- name: Run default pipeline with target bed + command: nextflow run main.nf -profile test,targeted + tags: + - intervals + - preprocessing + - targeted + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: c990e4f1b7dbd5a3a623882a54ae2bf2 + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 5a0679057c530e5945c9c5a3a17312dc + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: dcc9ab2bf3248903e02d8da87e678977 + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz + md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi + md5sum: b0ab630c3241fbd7581b7a38d944ff8b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 91e0d531f1bab64711ecefe52bfc8255 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 0b3162def977123809598639f7698121 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: a8455eb2947de529abfa62b303986e0f + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz + md5sum: d9fa560ff78ae106cfee9db2c90801b5 + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi + md5sum: 4816eeb9af254ca40177b08cf11b98d2 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 91e0d531f1bab64711ecefe52bfc8255 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 +- name: Run intervals false pipeline + command: nextflow run main.nf -profile test --intervals false --save_reference + tags: + - intervals + - intervals_false + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reference/intervals/chr22_1-40001.bed + md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 + - path: results/reference/intervals/chr22_1-40001.bed.gz + md5sum: d3341fa28986c40b24fcc10a079dbb80 + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 +- name: Run default pipeline without intervals + command: nextflow run main.nf -profile test,no_intervals + tags: + - intervals + - no_intervals + - preprocessing + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reference/intervals + should_exist: false + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 diff --git a/tests/test_intervals_create.yml b/tests/test_intervals_create.yml deleted file mode 100644 index 09d951034f..0000000000 --- a/tests/test_intervals_create.yml +++ /dev/null @@ -1,51 +0,0 @@ -- name: Run intervals false pipeline - command: nextflow run main.nf -profile test --intervals false --save_reference - tags: - - intervals - - intervals_false - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reference/intervals/chr22_1-40001.bed - md5sum: 87a15eb9c2ff20ccd5cd8735a28708f7 - - path: results/reference/intervals/chr22_1-40001.bed.gz - md5sum: d3341fa28986c40b24fcc10a079dbb80 - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 diff --git a/tests/test_no_intervals.yml b/tests/test_no_intervals.yml deleted file mode 100644 index 5284709b51..0000000000 --- a/tests/test_no_intervals.yml +++ /dev/null @@ -1,59 +0,0 @@ -- name: Run default pipeline without intervals - command: nextflow run main.nf -profile test,no_intervals - tags: - - intervals - - no_intervals - - preprocessing - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reference/intervals - should_exist: false - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 From 0cc8facdb51550442abdd77cd9fb82954386a5f8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 14:53:52 +0100 Subject: [PATCH 26/49] pair > default --- tests/pair.yml | 89 ------------------------------------------ tests/test_default.yml | 89 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 89 deletions(-) delete mode 100644 tests/pair.yml diff --git a/tests/pair.yml b/tests/pair.yml deleted file mode 100644 index 20cddcc05b..0000000000 --- a/tests/pair.yml +++ /dev/null @@ -1,89 +0,0 @@ -- name: Run default pipeline for tumor normal pair - command: nextflow run main.nf -profile test,pair - tags: - - preprocessing - - tumor_normal_pair - files: - - path: results/csv/markduplicates.csv - md5sum: e8e587ac25253ff7ab8f1cc66d410c98 - - path: results/csv/markduplicates_no_table.csv - md5sum: 617574c9b607e5daaf4ad56d48982247 - - path: results/csv/recalibrated.csv - md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test2/test2.recal.table - md5sum: 0626cd4337eab79b38b5bc5c95e0c003 - - path: results/preprocessing/recalibrated/test2/test2.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/fastqc/test2-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/markduplicates/test2/test2.md.cram.metrics - contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt - md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 - - path: results/reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt - md5sum: 38ff8b38c33b9231f047fea8ea830aae - - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt - md5sum: 8b991358768cade225470a07cd34f573 - - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz - md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 - - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz.csi - md5sum: d5f1c9389ecf52ba839e834780a94549 - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt - md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt - md5sum: 38ff8b38c33b9231f047fea8ea830aae - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt - md5sum: 8b991358768cade225470a07cd34f573 - - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz - md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 - - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi - md5sum: d5f1c9389ecf52ba839e834780a94549 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 - - path: results/reports/samtools/test2/test2.md.cram.stats - md5sum: 60152dbf1e109d4c407c151204388109 - - path: results/reports/samtools/test2/test2.recal.cram.stats - md5sum: 2907543ab51cabd97318b15cf035c867 - - path: results/preprocessing/mapped/ - should_exist: false diff --git a/tests/test_default.yml b/tests/test_default.yml index 3552b93ce0..e16b82c027 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -433,3 +433,92 @@ - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi - path: results/strelka should_exist: false +- name: Run default pipeline for tumor normal pair + command: nextflow run main.nf -profile test,pair + tags: + - preprocessing + - tumor_normal_pair + files: + - path: results/csv/markduplicates.csv + md5sum: e8e587ac25253ff7ab8f1cc66d410c98 + - path: results/csv/markduplicates_no_table.csv + md5sum: 617574c9b607e5daaf4ad56d48982247 + - path: results/csv/recalibrated.csv + md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test2/test2.recal.table + md5sum: 0626cd4337eab79b38b5bc5c95e0c003 + - path: results/preprocessing/recalibrated/test2/test2.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/fastqc/test2-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/markduplicates/test2/test2.md.cram.metrics + contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt + md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 + - path: results/reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt + md5sum: 38ff8b38c33b9231f047fea8ea830aae + - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt + md5sum: 8b991358768cade225470a07cd34f573 + - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz + md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 + - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz.csi + md5sum: d5f1c9389ecf52ba839e834780a94549 + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt + md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt + md5sum: 38ff8b38c33b9231f047fea8ea830aae + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt + md5sum: 8b991358768cade225470a07cd34f573 + - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz + md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 + - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi + md5sum: d5f1c9389ecf52ba839e834780a94549 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 + - path: results/reports/samtools/test2/test2.md.cram.stats + md5sum: 60152dbf1e109d4c407c151204388109 + - path: results/reports/samtools/test2/test2.recal.cram.stats + md5sum: 2907543ab51cabd97318b15cf035c867 + - path: results/preprocessing/mapped/ + should_exist: false From 20ba4bdea494ed6e372ac284556429e7d5de152c Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:03:47 +0100 Subject: [PATCH 27/49] cnvkit --- conf/modules/cnvkit.config | 50 ++++++++++++++ conf/modules/modules.config | 34 ---------- nextflow.config | 1 + tests/config/pytest_tags.yml | 11 ++++ tests/test_cnvkit.yml | 123 +++++++++++++++++++++++++++++++++++ tests/tools.yml | 123 ----------------------------------- 6 files changed, 185 insertions(+), 157 deletions(-) create mode 100644 conf/modules/cnvkit.config create mode 100644 tests/test_cnvkit.yml diff --git a/conf/modules/cnvkit.config b/conf/modules/cnvkit.config new file mode 100644 index 0000000000..93b06518b5 --- /dev/null +++ b/conf/modules/cnvkit.config @@ -0,0 +1,50 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// CNVKIT + +process { + +// PREPARE REFERENCE CNVKIT + withName: 'CNVKIT_ANTITARGET' { + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/cnvkit" }, + pattern: "*{bed}" + ] + } + + withName: 'CNVKIT_REFERENCE' { + ext.prefix = "cnvkit" + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + enabled: params.save_reference, + mode: params.publish_dir_mode, + path: { "${params.outdir}/reference/cnvkit" }, + pattern: "*{cnn}" + ] + } + + // CNVKIT + withName: 'CNVKIT_BATCH' { + ext.args = { params.wes ? "--method hybrid --diagram --scatter" : "--method wgs --diagram --scatter" } + ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, + pattern: "*{bed,cnn,cnr,cns,pdf,png}" + ] + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index ae0c693b1c..bc59914fe5 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -28,29 +28,6 @@ process { ] } -// PREPARE REFERENCE CNVKIT - - withName: 'CNVKIT_ANTITARGET' { - ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/cnvkit" }, - pattern: "*{bed}" - ] - } - - withName: 'CNVKIT_REFERENCE' { - ext.prefix = "cnvkit" - ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } - publishDir = [ - enabled: params.save_reference, - mode: params.publish_dir_mode, - path: { "${params.outdir}/reference/cnvkit" }, - pattern: "*{cnn}" - ] - } - // QC withName: 'FASTQC' { @@ -117,17 +94,6 @@ process { process{ - // CNVKIT - withName: 'CNVKIT_BATCH' { - ext.args = { params.wes ? "--method hybrid --diagram --scatter" : "--method wgs --diagram --scatter" } - ext.when = { params.tools && params.tools.split(',').contains('cnvkit') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/cnvkit/${meta.id}/" }, - pattern: "*{bed,cnn,cnr,cns,pdf,png}" - ] - } - // DEEPVARIANT withName: 'MERGE_DEEPVARIANT_.*' { ext.prefix = {"${meta.id}.deepvariant"} diff --git a/nextflow.config b/nextflow.config index 78b9625c59..0de5ef0bba 100644 --- a/nextflow.config +++ b/nextflow.config @@ -296,6 +296,7 @@ includeConfig 'conf/modules/aligner.config' includeConfig 'conf/modules/markduplicates.config' includeConfig 'conf/modules/prepare_recalibration.config' includeConfig 'conf/modules/recalibrate.config' +includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/annotate.config' // Function to ensure that resource requirements don't go beyond diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 70208f0ba2..1abde8b005 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -99,6 +99,17 @@ gatk4_spark: - subworkflows/local/bam_baserecalibrator_spark/main.nf - subworkflows/local/bam_markduplicates_spark/main.nf +# variant calling + +## cnvkit +cnvkit: + - conf/modules/cnvkit.config + - modules/nf-core/cnvkit/antitarget/main.nf + - modules/nf-core/cnvkit/batch/main.nf + - modules/nf-core/cnvkit/reference/main.nf + - subworkflows/local/bam_variant_calling_cnvkit/main.nf + - subworkflows/local/prepare_reference_cnvkit/main.nf + # annotate ## merge diff --git a/tests/test_cnvkit.yml b/tests/test_cnvkit.yml new file mode 100644 index 0000000000..c7935938d1 --- /dev/null +++ b/tests/test_cnvkit.yml @@ -0,0 +1,123 @@ +- name: Run variant calling on somatic samples with cnvkit + command: nextflow run main.nf -profile test,tools_somatic --tools cnvkit + tags: + - cnvkit + - copy_number_calling + - somatic + - variant_calling + files: + - path: results/multiqc + - path: results/variant_calling/cnvkit/sample3/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: results/variant_calling/cnvkit/sample3/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: results/variant_calling/cnvkit/sample3/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-diagram.pdf + # binary changes md5sums on reruns. + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-scatter.png + md5sum: c96b97b9ce948daf3437ccecfd67a4a7 + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: fe1248aa91fad7769303bb4c031d55ca + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.bintest.cns + md5sum: e55667a75d7d8eaec2efd3b611f15379 + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.call.cns + md5sum: 9968e02ef2f11ed22e2789c053f7159c + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cnr + md5sum: 3f0fe46574d0f7137f779e7ac1c2362d + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cns + md5sum: 0b2b81d391965488b8634a2b802b69cd + - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: 79aae7e9c135fb8c65f8fbda12610faf + - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/reference.cnn + md5sum: 59ec306bb820684b1f6f277d67cb2d92 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: fe1248aa91fad7769303bb4c031d55ca + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: 79aae7e9c135fb8c65f8fbda12610faf + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-diagram.pdf + # binary changes md5sums on reruns. + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-scatter.png + md5sum: acd87dfb61db5910afaea34053aed561 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: e9a260b81a34d8bc75330435d2a5e8da + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.call.cns + md5sum: c7d7fd87b27e103f73988ae307450e82 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cnr + md5sum: 26c506866198d46610c71fa6c0f9e381 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cns + md5sum: 83510b274c24a2671a962477a1ceb436 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f + - path: results/cnvkit + should_exist: false +- name: Run variant calling on tumor_only sample with cnvkit + command: nextflow run main.nf -profile test,tools_tumoronly --tools cnvkit + tags: + - cnvkit + - copy_number_calling + - tumor_only + - variant_calling + files: + - path: results/multiqc + - path: results/variant_calling/cnvkit/sample2/cnvkit.reference.antitarget-tmp.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: results/variant_calling/cnvkit/sample2/cnvkit.reference.target-tmp.bed + md5sum: 657b25dbda8516624efa8cb2cf3716ca + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted-diagram.pdf + # binary changes md5sums on reruns. + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted-scatter.png + md5sum: 7f2d2380309d5e19c8942131890d0e39 + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: f6adc75a0a86b7a921eca1b79a394cb0 + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.call.cns + md5sum: f7caeca04aba28b125ce26b511f42afb + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.cnr + md5sum: d9bdb71ce807051369577ee7f807a40c + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.cns + md5sum: 2b56aac606ba6183d018b30ca58afcec + - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 + - path: results/cnvkit + should_exist: false +- name: Run variant calling on germline sample with cnvkit + command: nextflow run main.nf -profile test,tools_germline --tools cnvkit + tags: + - cnvkit + - copy_number_calling + - germline + - variant_calling + files: + - path: results/multiqc + - path: results/variant_calling/cnvkit/sample1/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: results/variant_calling/cnvkit/sample1/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: results/variant_calling/cnvkit/sample1/reference.cnn + md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-diagram.pdf + # binary changes md5sums on reruns. + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-scatter.png + md5sum: c96b97b9ce948daf3437ccecfd67a4a7 + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: fe1248aa91fad7769303bb4c031d55ca + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.bintest.cns + md5sum: e55667a75d7d8eaec2efd3b611f15379 + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.call.cns + md5sum: 9968e02ef2f11ed22e2789c053f7159c + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cnr + md5sum: 3f0fe46574d0f7137f779e7ac1c2362d + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cns + md5sum: 0b2b81d391965488b8634a2b802b69cd + - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: 79aae7e9c135fb8c65f8fbda12610faf + - path: results/cnvkit + should_exist: false diff --git a/tests/tools.yml b/tests/tools.yml index fa81e2c270..7a85b72628 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,126 +1,3 @@ -- name: Run variant calling on somatic samples with cnvkit - command: nextflow run main.nf -profile test,tools_somatic --tools cnvkit - tags: - - cnvkit - - somatic - - variant_calling - - copy_number_calling - files: - - path: results/multiqc - - path: results/variant_calling/cnvkit/sample3/multi_intervals.antitarget.bed - md5sum: 3d4d20f9f23b39970865d29ef239d20b - - path: results/variant_calling/cnvkit/sample3/multi_intervals.target.bed - md5sum: 86d30493bb2e619a93f4ebc2923d29f3 - - path: results/variant_calling/cnvkit/sample3/reference.cnn - md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-diagram.pdf - # binary changes md5sums on reruns. - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-scatter.png - md5sum: c96b97b9ce948daf3437ccecfd67a4a7 - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn - md5sum: fe1248aa91fad7769303bb4c031d55ca - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.bintest.cns - md5sum: e55667a75d7d8eaec2efd3b611f15379 - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.call.cns - md5sum: 9968e02ef2f11ed22e2789c053f7159c - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cnr - md5sum: 3f0fe46574d0f7137f779e7ac1c2362d - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.cns - md5sum: 0b2b81d391965488b8634a2b802b69cd - - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn - md5sum: 79aae7e9c135fb8c65f8fbda12610faf - - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.antitarget.bed - md5sum: 3d4d20f9f23b39970865d29ef239d20b - - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.target.bed - md5sum: 86d30493bb2e619a93f4ebc2923d29f3 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/reference.cnn - md5sum: 59ec306bb820684b1f6f277d67cb2d92 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn - md5sum: fe1248aa91fad7769303bb4c031d55ca - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn - md5sum: 79aae7e9c135fb8c65f8fbda12610faf - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-diagram.pdf - # binary changes md5sums on reruns. - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-scatter.png - md5sum: acd87dfb61db5910afaea34053aed561 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn - md5sum: 067115082c4af4b64d58c0dc3a3642e4 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.bintest.cns - md5sum: e9a260b81a34d8bc75330435d2a5e8da - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.call.cns - md5sum: c7d7fd87b27e103f73988ae307450e82 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cnr - md5sum: 26c506866198d46610c71fa6c0f9e381 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cns - md5sum: 83510b274c24a2671a962477a1ceb436 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.targetcoverage.cnn - md5sum: b4a49faf170e436ec32dcc21ccc3ce8f - - path: results/cnvkit - should_exist: false -- name: Run variant calling on tumor_only sample with cnvkit - command: nextflow run main.nf -profile test,tools_tumoronly --tools cnvkit - tags: - - cnvkit - - tumor_only - - variant_calling - - copy_number_calling - files: - - path: results/multiqc - - path: results/variant_calling/cnvkit/sample2/cnvkit.reference.antitarget-tmp.bed - md5sum: 3d4d20f9f23b39970865d29ef239d20b - - path: results/variant_calling/cnvkit/sample2/cnvkit.reference.target-tmp.bed - md5sum: 657b25dbda8516624efa8cb2cf3716ca - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted-diagram.pdf - # binary changes md5sums on reruns. - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted-scatter.png - md5sum: 7f2d2380309d5e19c8942131890d0e39 - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn - md5sum: 067115082c4af4b64d58c0dc3a3642e4 - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.bintest.cns - md5sum: f6adc75a0a86b7a921eca1b79a394cb0 - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.call.cns - md5sum: f7caeca04aba28b125ce26b511f42afb - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.cnr - md5sum: d9bdb71ce807051369577ee7f807a40c - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.cns - md5sum: 2b56aac606ba6183d018b30ca58afcec - - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.targetcoverage.cnn - md5sum: e6d0190c1c37ce6e41f76ca5b24ccca3 - - path: results/cnvkit - should_exist: false -- name: Run variant calling on germline sample with cnvkit - command: nextflow run main.nf -profile test,tools_germline --tools cnvkit - tags: - - cnvkit - - germline - - variant_calling - - copy_number_calling - files: - - path: results/multiqc - - path: results/variant_calling/cnvkit/sample1/multi_intervals.antitarget.bed - md5sum: 3d4d20f9f23b39970865d29ef239d20b - - path: results/variant_calling/cnvkit/sample1/multi_intervals.target.bed - md5sum: 86d30493bb2e619a93f4ebc2923d29f3 - - path: results/variant_calling/cnvkit/sample1/reference.cnn - md5sum: a09ee4be5dda1cf0f68073bdb3aad8ec - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-diagram.pdf - # binary changes md5sums on reruns. - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-scatter.png - md5sum: c96b97b9ce948daf3437ccecfd67a4a7 - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn - md5sum: fe1248aa91fad7769303bb4c031d55ca - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.bintest.cns - md5sum: e55667a75d7d8eaec2efd3b611f15379 - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.call.cns - md5sum: 9968e02ef2f11ed22e2789c053f7159c - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cnr - md5sum: 3f0fe46574d0f7137f779e7ac1c2362d - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.cns - md5sum: 0b2b81d391965488b8634a2b802b69cd - - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.targetcoverage.cnn - md5sum: 79aae7e9c135fb8c65f8fbda12610faf - - path: results/cnvkit - should_exist: false - name: Run variant calling on somatic samples with controlfreec command: nextflow run main.nf -profile test,tools_somatic --tools controlfreec tags: From 10c74f1f0eacbb8f32ed0f54011d5c376511731e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:14:21 +0100 Subject: [PATCH 28/49] controlfreec --- conf/modules/controlfreec.config | 184 +++++++++++++++++++++++++++++++ conf/modules/modules.config | 167 ---------------------------- nextflow.config | 9 ++ tests/config/pytest_tags.yml | 11 ++ tests/test_controlfreec.yml | 150 +++++++++++++++++++++++++ tests/tools.yml | 150 ------------------------- 6 files changed, 354 insertions(+), 317 deletions(-) create mode 100644 conf/modules/controlfreec.config create mode 100644 tests/test_controlfreec.yml diff --git a/conf/modules/controlfreec.config b/conf/modules/controlfreec.config new file mode 100644 index 0000000000..449d2f1e14 --- /dev/null +++ b/conf/modules/controlfreec.config @@ -0,0 +1,184 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// CONTROLFREEC + +process { + + withName: 'ASSESS_SIGNIFICANCE' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*{.p.value.txt}" + ] + } + + withName: 'CAT_MPILEUP' { + publishDir = [ + enabled: true, + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, + pattern: "*{mpileup.gz}", + ] + } + + withName: 'FREEC_.*' { + ext.when = { params.tools && params.tools.split(',').contains('controlfreec') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*{BedGraph,cpn,txt,_CNVs}" + ] + } + + withName: 'FREEC2BED' { + ext.args = { "${params.cf_ploidy}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*bed" + ] + } + + withName: 'FREEC2CIRCOS' { + ext.args = { "${params.cf_ploidy}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*circos.txt" + ] + } + + withName: 'MAKEGRAPH' { + ext.args = { "${params.cf_ploidy}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, + pattern: "*png" + ] + } + + withName: 'SAMTOOLS_MPILEUP' { + ext.when = { params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, + pattern: "*mpileup.gz", + saveAs: { meta.num_intervals > 1 ? null : it } + ] + } + +// TUMOR_ONLY_VARIANT_CALLING + withName: 'FREEC_TUMORONLY' { + ext.args = {[ + "sample":[ + inputformat: 'pileup', + mateorientation: 'FR' + ], + "general" :[ + bedgraphoutput: "TRUE", + breakpointthreshold: params.wes ? "1.2" : "0.8", //Values taken from Freec example configs + breakpointtype: params.wes ? "4" : "2", // Values taken from Freec example configs + coefficientofvariation: params.cf_coeff, + contamination: params.cf_contamination ?: "", + contaminationadjustment: params.cf_contamination_adjustment ? "TRUE" : "", + forcegccontentnormalization: params.wes ? "1" : "0", + minimalsubclonepresence: params.wes ? "30" : "20", + noisydata: params.wes ? "TRUE" : "FALSE", + ploidy: params.cf_ploidy, + printNA: params.wes ? "FALSE" : "TRUE", + readcountthreshold: params.wes ? "50" : "10", + sex: meta.sex, + //uniquematch: not set + window: params.cf_window ?: "" + ], + "BAF":[ + minimalcoverageperposition: params.cf_mincov ?: "", + minimalqualityperposition: params.cf_minqual ?: "", + //"shiftinquality": (optional)not set + ] + ] + } + } + +// PAIR_VARIANT_CALLING + if (params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup'))) { + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.normal.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.normal.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.tumor.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.tumor.mpileup.gz" } + } + } + + withName: 'FREEC_SOMATIC' { + ext.args = {[ + "sample":[ + inputformat: 'pileup', + mateorientation: 'FR' + ], + "control":[ + inputformat: "pileup", + mateorientation: "FR" + ], + "general" :[ + bedgraphoutput: "TRUE", + breakpointthreshold: params.wes ? "1.2" : "0.8", //Values taken from Freec example configs + breakpointtype: params.wes ? "4" : "2", // Values taken from Freec example configs + coefficientofvariation: params.cf_coeff, + contamination: params.cf_contamination ?: "", + contaminationadjustment: params.cf_contamination_adjustment ? "TRUE" : "", + forcegccontentnormalization: params.wes ? "1" : "0", + minimalsubclonepresence: params.wes ? "30" : "20", + noisydata: params.wes ? "TRUE" : "FALSE", + ploidy: params.cf_ploidy, + printNA: params.wes ? "FALSE" : "TRUE", + readcountthreshold: params.wes ? "50" : "10", + sex: meta.sex, + //uniquematch: not set + window: params.cf_window ?: "" + ], + "BAF":[ + minimalcoverageperposition: params.cf_mincov ?: "", + minimalqualityperposition: params.cf_minqual ?: "", + //"shiftinquality": (optional)not set + ] + ] + } + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index bc59914fe5..71d8efa8f7 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -345,102 +345,6 @@ process{ // TUMOR_VARIANT_CALLING - // CONTROLFREEC - withName: 'ASSESS_SIGNIFICANCE' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, - pattern: "*{.p.value.txt}" - ] - } - - withName: 'CAT_MPILEUP' { - publishDir = [ - enabled: true, - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, - pattern: "*{mpileup.gz}", - ] - } - - withName: 'FREEC_.*' { - ext.when = { params.tools && params.tools.split(',').contains('controlfreec') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, - pattern: "*{BedGraph,cpn,txt,_CNVs}" - ] - } - - withName: 'FREEC_TUMORONLY' { - ext.args = {[ - "sample":[ - inputformat: 'pileup', - mateorientation: 'FR' - ], - "general" :[ - bedgraphoutput: "TRUE", - breakpointthreshold: params.wes ? "1.2" : "0.8", //Values taken from Freec example configs - breakpointtype: params.wes ? "4" : "2", // Values taken from Freec example configs - coefficientofvariation: params.cf_coeff, - contamination: params.cf_contamination ?: "", - contaminationadjustment: params.cf_contamination_adjustment ? "TRUE" : "", - forcegccontentnormalization: params.wes ? "1" : "0", - minimalsubclonepresence: params.wes ? "30" : "20", - noisydata: params.wes ? "TRUE" : "FALSE", - ploidy: params.cf_ploidy, - printNA: params.wes ? "FALSE" : "TRUE", - readcountthreshold: params.wes ? "50" : "10", - sex: meta.sex, - //uniquematch: not set - window: params.cf_window ?: "" - ], - "BAF":[ - minimalcoverageperposition: params.cf_mincov ?: "", - minimalqualityperposition: params.cf_minqual ?: "", - //"shiftinquality": (optional)not set - ] - ] - } - } - - withName: 'FREEC2BED' { - ext.args = { "${params.cf_ploidy}" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, - pattern: "*bed" - ] - } - - withName: 'FREEC2CIRCOS' { - ext.args = { "${params.cf_ploidy}" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, - pattern: "*circos.txt" - ] - } - - withName: 'MAKEGRAPH' { - ext.args = { "${params.cf_ploidy}" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/controlfreec/${meta.id}/" }, - pattern: "*png" - ] - } - - withName: 'SAMTOOLS_MPILEUP' { - ext.when = { params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup')) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, - pattern: "*mpileup.gz", - saveAs: { meta.num_intervals > 1 ? null : it } - ] - } - // MANTA withName: 'MERGE_MANTA_TUMOR' { ext.prefix = {"${meta.id}.manta.tumor_sv"} @@ -567,77 +471,6 @@ process{ } - // CONTROLFREEC - if (params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup'))) { - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.normal.mpileup.gz" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.normal.mpileup.gz" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.tumor.mpileup.gz" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.tumor.mpileup.gz" } - } - } - - withName: 'FREEC_SOMATIC' { - ext.args = {[ - "sample":[ - inputformat: 'pileup', - mateorientation: 'FR' - ], - "control":[ - inputformat: "pileup", - mateorientation: "FR" - ], - "general" :[ - bedgraphoutput: "TRUE", - breakpointthreshold: params.wes ? "1.2" : "0.8", //Values taken from Freec example configs - breakpointtype: params.wes ? "4" : "2", // Values taken from Freec example configs - coefficientofvariation: params.cf_coeff, - contamination: params.cf_contamination ?: "", - contaminationadjustment: params.cf_contamination_adjustment ? "TRUE" : "", - forcegccontentnormalization: params.wes ? "1" : "0", - minimalsubclonepresence: params.wes ? "30" : "20", - noisydata: params.wes ? "TRUE" : "FALSE", - ploidy: params.cf_ploidy, - printNA: params.wes ? "FALSE" : "TRUE", - readcountthreshold: params.wes ? "50" : "10", - sex: meta.sex, - //uniquematch: not set - window: params.cf_window ?: "" - ], - "BAF":[ - minimalcoverageperposition: params.cf_mincov ?: "", - minimalqualityperposition: params.cf_minqual ?: "", - //"shiftinquality": (optional)not set - ] - ] - } - } - // FREEBAYES if (params.tools && params.tools.split(',').contains('freebayes')) { withName: '.*:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { diff --git a/nextflow.config b/nextflow.config index 0de5ef0bba..e22c238411 100644 --- a/nextflow.config +++ b/nextflow.config @@ -287,8 +287,12 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/modules.config' // Load more modules specific config for DSL2 module specific options + +// prepare reference includeConfig 'conf/modules/prepare_intervals.config' includeConfig 'conf/modules/prepare_genome.config' + +// preprocessing includeConfig 'conf/modules/alignment_to_fastq.config' includeConfig 'conf/modules/umi.config' includeConfig 'conf/modules/trimming.config' @@ -296,7 +300,12 @@ includeConfig 'conf/modules/aligner.config' includeConfig 'conf/modules/markduplicates.config' includeConfig 'conf/modules/prepare_recalibration.config' includeConfig 'conf/modules/recalibrate.config' + +// variant calling includeConfig 'conf/modules/cnvkit.config' +includeConfig 'conf/modules/controlfreec.config' + +//annotate includeConfig 'conf/modules/annotate.config' // Function to ensure that resource requirements don't go beyond diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 1abde8b005..89121b1a6e 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -110,6 +110,17 @@ cnvkit: - subworkflows/local/bam_variant_calling_cnvkit/main.nf - subworkflows/local/prepare_reference_cnvkit/main.nf +## controlfreec +controlfreec: + - conf/modules/controlfreec.config + - modules/nf-core/controlfreec/assesssignificance/main.nf + - modules/nf-core/controlfreec/freec/main.nf + - modules/nf-core/controlfreec/freec2bed/main.nf + - modules/nf-core/controlfreec/freec2circos/main.nf + - modules/nf-core/controlfreec/makegraph/main.nf + - subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf + - subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf + # annotate ## merge diff --git a/tests/test_controlfreec.yml b/tests/test_controlfreec.yml new file mode 100644 index 0000000000..5f3ea7f452 --- /dev/null +++ b/tests/test_controlfreec.yml @@ -0,0 +1,150 @@ +- name: Run variant calling on somatic samples with controlfreec + command: nextflow run main.nf -profile test,tools_somatic --tools controlfreec + tags: + - controlfreec + - somatic + - variant_calling + - copy_number_calling + files: + - path: results/multiqc + - path: results/untar/chromosomes/chr21.fasta + md5sum: 69bd44ef67566a76d6cbb8aa4a25ae35 + - path: results/variant_calling/controlfreec/sample4_vs_sample3/config.txt + contains: + [ + "BedGraphOutput = TRUE", + "minExpectedGC = 0", + "maxThreads = 2", + "noisyData = TRUE", + "readCountThreshold = 1", + "sex = XX", + "window = 10", + ] + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.bed + md5sum: 833920178e4f40a296d8eab029caf086 + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.circos.txt + md5sum: 92ce5ce97b27a7214dfa9c2cb20cf854 + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.normal.mpileup.gz_control.cpn + md5sum: 508a003da85b186d9a60d867ef7cdf15 + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.p.value.txt + # binary changes md5sums on reruns. + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_BAF.txt + # binary changes md5sums on reruns. + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_CNVs + # binary changes md5sums on reruns. + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_info.txt + md5sum: 271271719c576d9218bdc859850e54ee + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_ratio.BedGraph + md5sum: 5d3321af93678f16878d59e01d3a87d3 + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_ratio.txt + # binary changes md5sums on reruns. + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_sample.cpn + md5sum: befe1706c61464635a76c7323a6bd2a2 + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.png + # binary changes md5sums on reruns. + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.log2.png + # binary changes md5sums on reruns. + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.png + # binary changes md5sums on reruns. + - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.normal.mpileup.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz + # binary changes md5sums on reruns. + - path: results/cnvkit + should_exist: false +- name: Run variant calling on somatic samples with controlfreec without intervals + command: nextflow run main.nf -profile test,tools_somatic --tools controlfreec --no_intervals -stub-run + tags: + - controlfreec + - no_intervals + - somatic + - variant_calling + - copy_number_calling + files: + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/untar/chromosomes + - path: results/variant_calling/controlfreec/sample4_vs_sample3/GC_profile.sample4_vs_sample3.cpn + md5sum: d41d8cd98f00b204e9800998ecf8427e # This is the md5sum of an empty file. Are all these files suppose to be empty? + - path: results/variant_calling/controlfreec/sample4_vs_sample3/config.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.circos.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.p.value.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_CNVs + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_info.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.BedGraph + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.log2.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_sample.cpn + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.normal.mpileup.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz + # binary changes md5sums on reruns. + - path: results/controlfreec + should_exist: false + - path: results/mpileup + should_exist: false +- name: Run variant calling on tumor_only sample with controlfreec + command: nextflow run main.nf -profile test,tools_tumoronly --tools controlfreec -stub-run + tags: + - controlfreec + - tumor_only + - variant_calling + - copy_number_calling + files: + - path: results/multiqc + - path: results/variant_calling/controlfreec/sample2/GC_profile.sample2.cpn + md5sum: d41d8cd98f00b204e9800998ecf8427e # This is the md5sum of an empty file. Are all these files suppose to be empty? + - path: results/variant_calling/controlfreec/sample2/config.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2.bed + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2.circos.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2.p.value.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_BAF.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_BAF.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_CNVs + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_info.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_ratio.BedGraph + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_ratio.log2.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_ratio.png + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_ratio.txt + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/controlfreec/sample2/sample2_sample.cpn + md5sum: d41d8cd98f00b204e9800998ecf8427e + - path: results/variant_calling/mpileup/sample2/sample2.tumor.mpileup.gz + # binary changes md5sums on reruns. + - path: results/controlfreec + should_exist: false + - path: results/mpileup + should_exist: false diff --git a/tests/tools.yml b/tests/tools.yml index 7a85b72628..5baffe05f9 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,153 +1,3 @@ -- name: Run variant calling on somatic samples with controlfreec - command: nextflow run main.nf -profile test,tools_somatic --tools controlfreec - tags: - - controlfreec - - somatic - - variant_calling - - copy_number_calling - files: - - path: results/multiqc - - path: results/untar/chromosomes/chr21.fasta - md5sum: 69bd44ef67566a76d6cbb8aa4a25ae35 - - path: results/variant_calling/controlfreec/sample4_vs_sample3/config.txt - contains: - [ - "BedGraphOutput = TRUE", - "minExpectedGC = 0", - "maxThreads = 2", - "noisyData = TRUE", - "readCountThreshold = 1", - "sex = XX", - "window = 10", - ] - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.bed - md5sum: 833920178e4f40a296d8eab029caf086 - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.circos.txt - md5sum: 92ce5ce97b27a7214dfa9c2cb20cf854 - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.normal.mpileup.gz_control.cpn - md5sum: 508a003da85b186d9a60d867ef7cdf15 - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.p.value.txt - # binary changes md5sums on reruns. - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_BAF.txt - # binary changes md5sums on reruns. - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_CNVs - # binary changes md5sums on reruns. - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_info.txt - md5sum: 271271719c576d9218bdc859850e54ee - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_ratio.BedGraph - md5sum: 5d3321af93678f16878d59e01d3a87d3 - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_ratio.txt - # binary changes md5sums on reruns. - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz_sample.cpn - md5sum: befe1706c61464635a76c7323a6bd2a2 - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.png - # binary changes md5sums on reruns. - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.log2.png - # binary changes md5sums on reruns. - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.png - # binary changes md5sums on reruns. - - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.normal.mpileup.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz - # binary changes md5sums on reruns. - - path: results/cnvkit - should_exist: false -- name: Run variant calling on somatic samples with controlfreec without intervals - command: nextflow run main.nf -profile test,tools_somatic --tools controlfreec --no_intervals -stub-run - tags: - - controlfreec - - no_intervals - - somatic - - variant_calling - - copy_number_calling - files: - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/untar/chromosomes - - path: results/variant_calling/controlfreec/sample4_vs_sample3/GC_profile.sample4_vs_sample3.cpn - md5sum: d41d8cd98f00b204e9800998ecf8427e # This is the md5sum of an empty file. Are all these files suppose to be empty? - - path: results/variant_calling/controlfreec/sample4_vs_sample3/config.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.circos.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3.p.value.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.png - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_BAF.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_CNVs - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_info.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.BedGraph - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.log2.png - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.png - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_ratio.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample4_vs_sample3/sample4_vs_sample3_sample.cpn - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.normal.mpileup.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mpileup/sample4_vs_sample3/sample4_vs_sample3.tumor.mpileup.gz - # binary changes md5sums on reruns. - - path: results/controlfreec - should_exist: false - - path: results/mpileup - should_exist: false -- name: Run variant calling on tumor_only sample with controlfreec - command: nextflow run main.nf -profile test,tools_tumoronly --tools controlfreec -stub-run - tags: - - controlfreec - - tumor_only - - variant_calling - - copy_number_calling - files: - - path: results/multiqc - - path: results/variant_calling/controlfreec/sample2/GC_profile.sample2.cpn - md5sum: d41d8cd98f00b204e9800998ecf8427e # This is the md5sum of an empty file. Are all these files suppose to be empty? - - path: results/variant_calling/controlfreec/sample2/config.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2.bed - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2.circos.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2.p.value.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_BAF.png - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_BAF.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_CNVs - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_info.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_ratio.BedGraph - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_ratio.log2.png - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_ratio.png - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_ratio.txt - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/controlfreec/sample2/sample2_sample.cpn - md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: results/variant_calling/mpileup/sample2/sample2.tumor.mpileup.gz - # binary changes md5sums on reruns. - - path: results/controlfreec - should_exist: false - - path: results/mpileup - should_exist: false - name: Run variant calling on germline sample with deepvariant command: nextflow run main.nf -profile test,tools_germline --tools deepvariant tags: From 12fc3b7ebe92263cbae1b621e52de8f470a1c121 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:20:28 +0100 Subject: [PATCH 29/49] more cnvkit --- tests/only_paired_VC.yml | 113 --------------------------------------- tests/test_cnvkit.yml | 35 ++++++++++++ 2 files changed, 35 insertions(+), 113 deletions(-) delete mode 100644 tests/only_paired_VC.yml diff --git a/tests/only_paired_VC.yml b/tests/only_paired_VC.yml deleted file mode 100644 index b051b3e2b7..0000000000 --- a/tests/only_paired_VC.yml +++ /dev/null @@ -1,113 +0,0 @@ -- name: Run variant calling on somatic samples with cnvkit and skip variant calling on matched normal - command: nextflow run main.nf -profile test,tools_somatic --tools cnvkit --only_paired_variant_calling - tags: - - cnvkit - - somatic - - only_paired_variant_calling - - copy_number_calling - files: - - path: results/multiqc - - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.antitarget.bed - md5sum: 3d4d20f9f23b39970865d29ef239d20b - - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.target.bed - md5sum: 86d30493bb2e619a93f4ebc2923d29f3 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/reference.cnn - md5sum: 59ec306bb820684b1f6f277d67cb2d92 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn - md5sum: fe1248aa91fad7769303bb4c031d55ca - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn - md5sum: 79aae7e9c135fb8c65f8fbda12610faf - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-diagram.pdf - # binary changes md5sums on reruns. - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-scatter.png - md5sum: acd87dfb61db5910afaea34053aed561 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn - md5sum: 067115082c4af4b64d58c0dc3a3642e4 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.bintest.cns - md5sum: e9a260b81a34d8bc75330435d2a5e8da - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.call.cns - md5sum: c7d7fd87b27e103f73988ae307450e82 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cnr - md5sum: 26c506866198d46610c71fa6c0f9e381 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cns - md5sum: 83510b274c24a2671a962477a1ceb436 - - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.targetcoverage.cnn - md5sum: b4a49faf170e436ec32dcc21ccc3ce8f -- name: Skip variant calling on matched normal - command: nextflow run main.nf -profile test,variantcalling_channels --tools strelka --only_paired_variant_calling - tags: - - somatic - - strelka - - variantcalling_channel - files: - - path: results/multiqc - - path: results/reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt - md5sum: fa17527ede69d65762bd78fe86fbe50a - - path: results/reports/bcftools/strelka/sample2/sample2.strelka.variants.bcftools_stats.txt - md5sum: 93bf8f09d155e69418fff988e76cbc1d - - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 - - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 7cc6f2412d6b3d8e84ec0daece3d5443 - - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary - md5sum: 2048a5de0201a6052c988a0189979a5f - - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count - md5sum: c5b7a8eda2526d899098439ae4c06a49 - - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual - md5sum: f01534408c3a87893b523de5f39deb0b - - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.FILTER.summary - md5sum: fa3112841a4575d104916027c8851b30 - - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.count - md5sum: d7f54d09d38af01a574a4930af21cfc9 - - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.qual - contains: - [ - "19 453 47848 0.00946748 11 50 0.22", - "56 456 47875 0.0095248 8 25 0.32", - "72 458 47880 0.00956558 6 20 0.3", - "314 463 47899 0.00966617 1 1 1", - ] - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary - md5sum: 3441628cd6550ed459ca1c3db989ceea - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary - md5sum: 4fc17fa5625b4d1dcc5d791b1eb22d85 - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count - md5sum: fc7af1f534890c4ad3025588b3af62ae - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: c949f848859f138731898aac64a73eaf - - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample2/sample2.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample2/sample2.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample2/sample2.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample2/sample2.strelka.variants.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/sample3/strelka/sample3.strelka.variants.vcf.gz - should_exist: false - - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi - should_exist: false - - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz - should_exist: false - - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi - should_exist: false diff --git a/tests/test_cnvkit.yml b/tests/test_cnvkit.yml index c7935938d1..b32223906c 100644 --- a/tests/test_cnvkit.yml +++ b/tests/test_cnvkit.yml @@ -121,3 +121,38 @@ md5sum: 79aae7e9c135fb8c65f8fbda12610faf - path: results/cnvkit should_exist: false +- name: Run variant calling on somatic samples with cnvkit and skip variant calling on matched normal + command: nextflow run main.nf -profile test,tools_somatic --tools cnvkit --only_paired_variant_calling + tags: + - cnvkit + - somatic + - only_paired_variant_calling + - copy_number_calling + files: + - path: results/multiqc + - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.antitarget.bed + md5sum: 3d4d20f9f23b39970865d29ef239d20b + - path: results/variant_calling/cnvkit/sample4_vs_sample3/multi_intervals.target.bed + md5sum: 86d30493bb2e619a93f4ebc2923d29f3 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/reference.cnn + md5sum: 59ec306bb820684b1f6f277d67cb2d92 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: fe1248aa91fad7769303bb4c031d55ca + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: 79aae7e9c135fb8c65f8fbda12610faf + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-diagram.pdf + # binary changes md5sums on reruns. + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-scatter.png + md5sum: acd87dfb61db5910afaea34053aed561 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn + md5sum: 067115082c4af4b64d58c0dc3a3642e4 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.bintest.cns + md5sum: e9a260b81a34d8bc75330435d2a5e8da + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.call.cns + md5sum: c7d7fd87b27e103f73988ae307450e82 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cnr + md5sum: 26c506866198d46610c71fa6c0f9e381 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.cns + md5sum: 83510b274c24a2671a962477a1ceb436 + - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.targetcoverage.cnn + md5sum: b4a49faf170e436ec32dcc21ccc3ce8f From 22987389f71c77c5a09f6cd1524093989c9ac83a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:20:48 +0100 Subject: [PATCH 30/49] deepvariant --- conf/modules/deepvariant.config | 51 ++++++++++++++++++++++++++++ conf/modules/modules.config | 31 ----------------- nextflow.config | 1 + tests/config/pytest_tags.yml | 8 +++++ tests/test_deepvariant.yml | 59 +++++++++++++++++++++++++++++++++ tests/tools.yml | 59 --------------------------------- 6 files changed, 119 insertions(+), 90 deletions(-) create mode 100644 conf/modules/deepvariant.config create mode 100644 tests/test_deepvariant.yml diff --git a/conf/modules/deepvariant.config b/conf/modules/deepvariant.config new file mode 100644 index 0000000000..f91e792447 --- /dev/null +++ b/conf/modules/deepvariant.config @@ -0,0 +1,51 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// DEEPVARIANT + +process { + + withName: 'DEEPVARIANT' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepvariant" : "${meta.id}.deepvariant.${intervals.simpleName}" } + ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } + ext.when = { params.tools && params.tools.split(',').contains('deepvariant') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*vcf.gz", + saveAs: { meta.num_intervals > 1 ? null : "deepvariant/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_DEEPVARIANT_.*' { + ext.prefix = {"${meta.id}.deepvariant"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/deepvariant/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_DEEPVARIANT_GVCF' { + ext.prefix = {"${meta.id}.deepvariant.g"} + } + + withName : 'TABIX_VC_DEEPVARIANT_.*' { + ext.prefix = {"${meta.id}.deepvariant"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/deepvariant/${meta.id}/" }, + pattern: "*tbi" + ] + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 71d8efa8f7..3ac2c75484 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -94,37 +94,6 @@ process { process{ - // DEEPVARIANT - withName: 'MERGE_DEEPVARIANT_.*' { - ext.prefix = {"${meta.id}.deepvariant"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/deepvariant/${meta.id}/" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - withName: 'MERGE_DEEPVARIANT_GVCF' { - ext.prefix = {"${meta.id}.deepvariant.g"} - } - withName: 'DEEPVARIANT' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.deepvariant" : "${meta.id}.deepvariant.${intervals.simpleName}" } - ext.args = { params.wes ? "--model_type WES" : "--model_type WGS" } - ext.when = { params.tools && params.tools.split(',').contains('deepvariant') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*vcf.gz", - saveAs: { meta.num_intervals > 1 ? null : "deepvariant/${meta.id}/${it}" } - ] - } - withName : 'TABIX_VC_DEEPVARIANT_.*' { - ext.prefix = {"${meta.id}.deepvariant"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/deepvariant/${meta.id}/" }, - pattern: "*tbi" - ] - } // FREEBAYES withName: 'MERGE_FREEBAYES' { diff --git a/nextflow.config b/nextflow.config index e22c238411..d68a395338 100644 --- a/nextflow.config +++ b/nextflow.config @@ -304,6 +304,7 @@ includeConfig 'conf/modules/recalibrate.config' // variant calling includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/controlfreec.config' +includeConfig 'conf/modules/deepvariant.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 89121b1a6e..32431fd5a8 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -121,6 +121,14 @@ controlfreec: - subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf - subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf +## deepvariant +deepvariant: + - conf/modules/deepvariant.config + - modules/nf-core/deepvariant/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/tabix/tabix/main.nf + - subworkflows/local/bam_variant_calling_deepvariant/main.nf + # annotate ## merge diff --git a/tests/test_deepvariant.yml b/tests/test_deepvariant.yml new file mode 100644 index 0000000000..5cc462a74a --- /dev/null +++ b/tests/test_deepvariant.yml @@ -0,0 +1,59 @@ +- name: Run variant calling on germline sample with deepvariant + command: nextflow run main.nf -profile test,tools_germline --tools deepvariant + tags: + - deepvariant + - germline + - variant_calling + files: + - path: results/multiqc + - path: results/reports/bcftools/deepvariant/sample1/sample1.deepvariant.bcftools_stats.txt + md5sum: 433aa535e9ce72acddca7be36b12238b + - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.FILTER.summary + md5sum: f97d30123c7e20c704e9a8a73170661e + - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.count + md5sum: 1f546d259ddc493736507b35ee520dc4 + - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.qual + md5sum: 437d6a802b10f05fa1601166eb6b7d48 + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/deepvariant + should_exist: false +- name: Run variant calling on germline sample with deepvariant without intervals + command: nextflow run main.nf -profile test,tools_germline --tools deepvariant --no_intervals + tags: + - deepvariant + - germline + - no_intervals + - variant_calling + files: + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/reports/bcftools/deepvariant/sample1/sample1.deepvariant.bcftools_stats.txt + md5sum: 0d8602284347cb5acf1c264a301f5093 + - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.FILTER.summary + md5sum: cffb486232d067db80616c3129bf1322 + - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.count + md5sum: bbd541f33c490df06fee5feb2f6e2c7f + - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.qual + md5sum: 037063e030edec5fb4e784b1f6c8954f + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/deepvariant + should_exist: false diff --git a/tests/tools.yml b/tests/tools.yml index 5baffe05f9..7dd8c6025f 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,62 +1,3 @@ -- name: Run variant calling on germline sample with deepvariant - command: nextflow run main.nf -profile test,tools_germline --tools deepvariant - tags: - - deepvariant - - germline - - variant_calling - files: - - path: results/multiqc - - path: results/reports/bcftools/deepvariant/sample1/sample1.deepvariant.bcftools_stats.txt - md5sum: 433aa535e9ce72acddca7be36b12238b - - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.FILTER.summary - md5sum: f97d30123c7e20c704e9a8a73170661e - - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.count - md5sum: 1f546d259ddc493736507b35ee520dc4 - - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.qual - md5sum: 437d6a802b10f05fa1601166eb6b7d48 - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/deepvariant - should_exist: false -- name: Run variant calling on germline sample with deepvariant without intervals - command: nextflow run main.nf -profile test,tools_germline --tools deepvariant --no_intervals - tags: - - deepvariant - - germline - - no_intervals - - variant_calling - files: - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/reports/bcftools/deepvariant/sample1/sample1.deepvariant.bcftools_stats.txt - md5sum: 0d8602284347cb5acf1c264a301f5093 - - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.FILTER.summary - md5sum: cffb486232d067db80616c3129bf1322 - - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.count - md5sum: bbd541f33c490df06fee5feb2f6e2c7f - - path: results/reports/vcftools/deepvariant/sample1/sample1.deepvariant.TsTv.qual - md5sum: 037063e030edec5fb4e784b1f6c8954f - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.g.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/deepvariant/sample1/sample1.deepvariant.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/deepvariant - should_exist: false - name: Run variant calling on germline sample with freebayes command: nextflow run main.nf -profile test,targeted --tools freebayes tags: From 696796b297793fb8b1b503c7a7764a29511c23a2 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:25:56 +0100 Subject: [PATCH 31/49] freebayes --- conf/modules/freebayes.config | 54 +++++ conf/modules/modules.config | 39 ---- nextflow.config | 1 + tests/config/pytest_tags.yml | 9 + tests/test_freebayes.yml | 413 +++++++++++++++++++++++++++++++++ tests/tools.yml | 417 ---------------------------------- 6 files changed, 477 insertions(+), 456 deletions(-) create mode 100644 conf/modules/freebayes.config create mode 100644 tests/test_freebayes.yml diff --git a/conf/modules/freebayes.config b/conf/modules/freebayes.config new file mode 100644 index 0000000000..9890735588 --- /dev/null +++ b/conf/modules/freebayes.config @@ -0,0 +1,54 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// FREEBAYES + +process { + + withName: 'MERGE_FREEBAYES' { + ext.prefix = { "${meta.id}.freebayes" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FREEBAYES' { + ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' + //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('freebayes') } + publishDir = [ + enabled: false + ] + } + + withName: 'BCFTOOLS_SORT' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.freebayes" : "${vcf.minus("vcf")}.sort" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*vcf.gz", + saveAs: { meta.num_intervals > 1 ? null : "freebayes/${meta.id}/${it}" } + ] + } + + withName : 'TABIX_VC_FREEBAYES' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 3ac2c75484..31de4a5e0a 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -94,45 +94,6 @@ process { process{ - - // FREEBAYES - withName: 'MERGE_FREEBAYES' { - ext.prefix = { "${meta.id}.freebayes" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'FREEBAYES' { - //To make sure no naming conflicts ensure with module BCFTOOLS_SORT & the naming being correct in the output folder - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.simpleName}" } - ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' - ext.when = { params.tools && params.tools.split(',').contains('freebayes') } - publishDir = [ - enabled: false - ] - } - - withName: 'BCFTOOLS_SORT' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.freebayes" : "${vcf.minus("vcf")}.sort" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*vcf.gz", - saveAs: { meta.num_intervals > 1 ? null : "freebayes/${meta.id}/${it}" } - ] - } - - withName : 'TABIX_VC_FREEBAYES' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/freebayes/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - // HAPLOTYPECALLER withName: 'MERGE_HAPLOTYPECALLER' { ext.prefix = { params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" } diff --git a/nextflow.config b/nextflow.config index d68a395338..8f7c999b8e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -305,6 +305,7 @@ includeConfig 'conf/modules/recalibrate.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/controlfreec.config' includeConfig 'conf/modules/deepvariant.config' +includeConfig 'conf/modules/freebayes.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 32431fd5a8..1ec15969b1 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -129,6 +129,15 @@ deepvariant: - modules/nf-core/tabix/tabix/main.nf - subworkflows/local/bam_variant_calling_deepvariant/main.nf +## freebayes +freebayes: + - conf/modules/freebayes.config + - modules/nf-core/bcftools/sort/main.nf + - modules/nf-core/freebayes/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/tabix/tabix/main.nf + - subworkflows/local/bam_variant_calling_freebayes/main.nf + # annotate ## merge diff --git a/tests/test_freebayes.yml b/tests/test_freebayes.yml new file mode 100644 index 0000000000..611102b745 --- /dev/null +++ b/tests/test_freebayes.yml @@ -0,0 +1,413 @@ +- name: Run variant calling on germline sample with freebayes + command: nextflow run main.nf -profile test,targeted --tools freebayes + tags: + - freebayes + - germline + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/csv/variantcalled.csv + md5sum: 0cc6a67fedb2ef9ce97e463d310f9f30 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: c990e4f1b7dbd5a3a623882a54ae2bf2 + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt + md5sum: 16c7673085520b66bd47f59fbf8a7e0e + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 5a0679057c530e5945c9c5a3a17312dc + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: dcc9ab2bf3248903e02d8da87e678977 + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz + md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi + md5sum: b0ab630c3241fbd7581b7a38d944ff8b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 91e0d531f1bab64711ecefe52bfc8255 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 0b3162def977123809598639f7698121 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: a8455eb2947de529abfa62b303986e0f + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz + md5sum: d9fa560ff78ae106cfee9db2c90801b5 + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi + md5sum: 4816eeb9af254ca40177b08cf11b98d2 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 91e0d531f1bab64711ecefe52bfc8255 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 + - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary + md5sum: 43d53e36cbb1091f915b2499e545b41e + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count + md5sum: 650f3dc78c5aaaecfe8ffa3d499e812f + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual + contains: ["0 0 0 -nan 82 29 2.82759", "1.1 196 35 5.6 57 24 2.375", "3.16 205 38 5.39474 49 20 2.45"] + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/freebayes + should_exist: false +- name: Run variant calling on germline sample with freebayes without intervals + command: nextflow run main.nf -profile test --tools freebayes --no_intervals + tags: + - freebayes + - germline + - no_intervals + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/csv/variantcalled.csv + md5sum: 0cc6a67fedb2ef9ce97e463d310f9f30 + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt + md5sum: f66375a00d692d67df46c129147008b2 + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 + - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary + md5sum: 76c5919541536c12b5c8a6094d6d78d5 + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count + md5sum: 0a0464beef110bc0f3c5a35d022b528e + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual + contains: ["0 0 0 -nan 325 86 3.77907", "27.5873 359 84 4.27381 39 13 3", "40.5188 368 87 4.22989 30 10 3"] + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/freebayes + should_exist: false +- name: Run variant calling on somatic sample with freebayes + command: nextflow run main.nf -profile test,pair,targeted --tools freebayes + tags: + - freebayes + - somatic + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: e8e587ac25253ff7ab8f1cc66d410c98 + - path: results/csv/markduplicates_no_table.csv + md5sum: 617574c9b607e5daaf4ad56d48982247 + - path: results/csv/recalibrated.csv + md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab + - path: results/csv/variantcalled.csv + md5sum: b31f56256a1cfa839a2ea7f7ba6c1c45 + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: c990e4f1b7dbd5a3a623882a54ae2bf2 + - path: results/preprocessing/recal_table/test2/test2.recal.table + md5sum: 00d6877d68d622d81e4d633c4e340e7e + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test2/test2.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt + md5sum: 16c7673085520b66bd47f59fbf8a7e0e + - path: results/reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.bcftools_stats.txt + md5sum: 001709e5eeca385908e31ba370f53381 + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/markduplicates/test2/test2.md.cram.metrics + contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 5a0679057c530e5945c9c5a3a17312dc + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: dcc9ab2bf3248903e02d8da87e678977 + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz + md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi + md5sum: b0ab630c3241fbd7581b7a38d944ff8b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 91e0d531f1bab64711ecefe52bfc8255 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 0b3162def977123809598639f7698121 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: a8455eb2947de529abfa62b303986e0f + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz + md5sum: d9fa560ff78ae106cfee9db2c90801b5 + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi + md5sum: 4816eeb9af254ca40177b08cf11b98d2 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 91e0d531f1bab64711ecefe52bfc8255 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt + md5sum: f25166c3a0051bb4d8c11a210278de6c + - path: results/reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt + md5sum: 3211135329e4077bd9bf0ba488e14371 + - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt + md5sum: ce0eb6d33c6d0dc720fbc6d1811abef8 + - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz + md5sum: 55c160e8f3c8c7761524646426611f6b + - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz.csi + md5sum: 4205a09ede17cdbdaad45e3553f73105 + - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz + md5sum: 1dd426a45f967a9f37dcddcaea29a582 + - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt + md5sum: a1ef7e662ce993da4668e804952014ce + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt + md5sum: 3211135329e4077bd9bf0ba488e14371 + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt + md5sum: 70ad653c0c98baeeaf5085f1209a7bdb + - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz + md5sum: 250a9f15a7d3f102435fa98adccf48a3 + - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi + md5sum: 8072f447199c60f24b01eede8b557333 + - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz + md5sum: 1dd426a45f967a9f37dcddcaea29a582 + - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi + md5sum: c6d1ac97ef4dfe43731c8368d8391cab + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 + - path: results/reports/samtools/test2/test2.md.cram.stats + md5sum: 60152dbf1e109d4c407c151204388109 + - path: results/reports/samtools/test2/test2.recal.cram.stats + md5sum: 53a0b34b67fdf90ea68fac8ea744b576 + - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary + md5sum: 43d53e36cbb1091f915b2499e545b41e + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count + md5sum: 650f3dc78c5aaaecfe8ffa3d499e812f + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual + contains: ["0 0 0 -nan 82 29 2.82759", "1.1 196 35 5.6 57 24 2.375", "3.16 205 38 5.39474 49 20 2.45"] + - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.FILTER.summary + md5sum: ac61b178a3e15efb861b34f16d80fb67 + - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.count + md5sum: d7c0e0afd59f87dc4685487c0d497575 + - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.qual + # the text-based file test2_vs_test.freebayes.TsTv.qual seemingly changes content on reruns! + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/freebayes + should_exist: false +- name: Run variant calling on somatic sample with freebayes without intervals + command: nextflow run main.nf -profile test,pair,targeted --tools freebayes --no_intervals + tags: + - freebayes + - somatic + - no_intervals + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: e8e587ac25253ff7ab8f1cc66d410c98 + - path: results/csv/markduplicates_no_table.csv + md5sum: 617574c9b607e5daaf4ad56d48982247 + - path: results/csv/recalibrated.csv + md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab + - path: results/csv/variantcalled.csv + md5sum: b31f56256a1cfa839a2ea7f7ba6c1c45 + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recal_table/test2/test2.recal.table + md5sum: 0626cd4337eab79b38b5bc5c95e0c003 + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test2/test2.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt + md5sum: f66375a00d692d67df46c129147008b2 + - path: results/reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.bcftools_stats.txt + md5sum: 41d9835772fa1326769c2dd94749cbea + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/markduplicates/test2/test2.md.cram.metrics + contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 5a0679057c530e5945c9c5a3a17312dc + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: 0010c2396a3173c7cf4983abe2eb6a4c + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz + md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 + - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi + md5sum: b0ab630c3241fbd7581b7a38d944ff8b + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 5a0679057c530e5945c9c5a3a17312dc + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0010c2396a3173c7cf4983abe2eb6a4c + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz + md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 + - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi + md5sum: b0ab630c3241fbd7581b7a38d944ff8b + - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt + md5sum: f25166c3a0051bb4d8c11a210278de6c + - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt + md5sum: d5e4084de2ea2a0a7b60b2d71c804d4b + - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz + md5sum: 55c160e8f3c8c7761524646426611f6b + - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz.csi + md5sum: 4205a09ede17cdbdaad45e3553f73105 + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt + md5sum: f25166c3a0051bb4d8c11a210278de6c + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt + md5sum: d5e4084de2ea2a0a7b60b2d71c804d4b + - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz + md5sum: 55c160e8f3c8c7761524646426611f6b + - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi + md5sum: 4205a09ede17cdbdaad45e3553f73105 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 + - path: results/reports/samtools/test2/test2.md.cram.stats + md5sum: 60152dbf1e109d4c407c151204388109 + - path: results/reports/samtools/test2/test2.recal.cram.stats + md5sum: 029b903797dc228e56d6ab74e677fa21 + - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary + md5sum: 76c5919541536c12b5c8a6094d6d78d5 + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count + md5sum: 0a0464beef110bc0f3c5a35d022b528e + - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual + contains: ["0 0 0 -nan 325 86 3.77907", "68.8262 377 88 4.28409 20 10 2", "321.335 393 96 4.09375 4 2 2"] + - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.FILTER.summary + md5sum: 59e9d340b588537655020147e6d93e29 + - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.count + md5sum: 61ad00bb8cd47592b0671fabe26cf8cd + - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.qual + # the text-based file test2_vs_test.freebayes.TsTv.qual seemingly changes content on reruns! + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/freebayes + should_exist: false +# - name: Run variant calling on tumor_only sample with freebayes +# command: nextflow run main.nf -profile test,tools_tumoronly --tools freebayes +# tags: +# - freebayes +# - tumor_only +# - variant_calling +# files: +# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz +# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi +# - name: Run variant calling on tumor_only sample with freebayes without intervals +# command: nextflow run main.nf -profile test,tools_tumoronly --tools freebayes --no_intervals +# tags: +# - freebayes +# - no_intervals +# - tumor_only +# - variant_calling +# files: +# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz +# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi diff --git a/tests/tools.yml b/tests/tools.yml index 7dd8c6025f..d43d000fbd 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,420 +1,3 @@ -- name: Run variant calling on germline sample with freebayes - command: nextflow run main.nf -profile test,targeted --tools freebayes - tags: - - freebayes - - germline - - variant_calling - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/csv/variantcalled.csv - md5sum: 0cc6a67fedb2ef9ce97e463d310f9f30 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: c990e4f1b7dbd5a3a623882a54ae2bf2 - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: 16c7673085520b66bd47f59fbf8a7e0e - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 5a0679057c530e5945c9c5a3a17312dc - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: dcc9ab2bf3248903e02d8da87e678977 - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz - md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi - md5sum: b0ab630c3241fbd7581b7a38d944ff8b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 91e0d531f1bab64711ecefe52bfc8255 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 0b3162def977123809598639f7698121 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: a8455eb2947de529abfa62b303986e0f - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz - md5sum: d9fa560ff78ae106cfee9db2c90801b5 - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi - md5sum: 4816eeb9af254ca40177b08cf11b98d2 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 91e0d531f1bab64711ecefe52bfc8255 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 - - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary - md5sum: 43d53e36cbb1091f915b2499e545b41e - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count - md5sum: 650f3dc78c5aaaecfe8ffa3d499e812f - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual - contains: ["0 0 0 -nan 82 29 2.82759", "1.1 196 35 5.6 57 24 2.375", "3.16 205 38 5.39474 49 20 2.45"] - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/freebayes - should_exist: false -- name: Run variant calling on germline sample with freebayes without intervals - command: nextflow run main.nf -profile test --tools freebayes --no_intervals - tags: - - freebayes - - germline - - no_intervals - - variant_calling - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/csv/variantcalled.csv - md5sum: 0cc6a67fedb2ef9ce97e463d310f9f30 - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: f66375a00d692d67df46c129147008b2 - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 - - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary - md5sum: 76c5919541536c12b5c8a6094d6d78d5 - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count - md5sum: 0a0464beef110bc0f3c5a35d022b528e - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual - contains: ["0 0 0 -nan 325 86 3.77907", "27.5873 359 84 4.27381 39 13 3", "40.5188 368 87 4.22989 30 10 3"] - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/freebayes - should_exist: false -- name: Run variant calling on somatic sample with freebayes - command: nextflow run main.nf -profile test,pair,targeted --tools freebayes - tags: - - freebayes - - somatic - - variant_calling - files: - - path: results/csv/markduplicates.csv - md5sum: e8e587ac25253ff7ab8f1cc66d410c98 - - path: results/csv/markduplicates_no_table.csv - md5sum: 617574c9b607e5daaf4ad56d48982247 - - path: results/csv/recalibrated.csv - md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab - - path: results/csv/variantcalled.csv - md5sum: b31f56256a1cfa839a2ea7f7ba6c1c45 - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: c990e4f1b7dbd5a3a623882a54ae2bf2 - - path: results/preprocessing/recal_table/test2/test2.recal.table - md5sum: 00d6877d68d622d81e4d633c4e340e7e - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test2/test2.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: 16c7673085520b66bd47f59fbf8a7e0e - - path: results/reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.bcftools_stats.txt - md5sum: 001709e5eeca385908e31ba370f53381 - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/markduplicates/test2/test2.md.cram.metrics - contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 5a0679057c530e5945c9c5a3a17312dc - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: dcc9ab2bf3248903e02d8da87e678977 - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz - md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi - md5sum: b0ab630c3241fbd7581b7a38d944ff8b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 91e0d531f1bab64711ecefe52bfc8255 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 0b3162def977123809598639f7698121 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 835fdc6fa52cc33e6fb76c0c20a8a6c3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: a8455eb2947de529abfa62b303986e0f - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz - md5sum: d9fa560ff78ae106cfee9db2c90801b5 - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi - md5sum: 4816eeb9af254ca40177b08cf11b98d2 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 91e0d531f1bab64711ecefe52bfc8255 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt - md5sum: f25166c3a0051bb4d8c11a210278de6c - - path: results/reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt - md5sum: 3211135329e4077bd9bf0ba488e14371 - - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt - md5sum: ce0eb6d33c6d0dc720fbc6d1811abef8 - - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz - md5sum: 55c160e8f3c8c7761524646426611f6b - - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz.csi - md5sum: 4205a09ede17cdbdaad45e3553f73105 - - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz - md5sum: 1dd426a45f967a9f37dcddcaea29a582 - - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt - md5sum: a1ef7e662ce993da4668e804952014ce - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt - md5sum: 3211135329e4077bd9bf0ba488e14371 - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt - md5sum: 70ad653c0c98baeeaf5085f1209a7bdb - - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz - md5sum: 250a9f15a7d3f102435fa98adccf48a3 - - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi - md5sum: 8072f447199c60f24b01eede8b557333 - - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz - md5sum: 1dd426a45f967a9f37dcddcaea29a582 - - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi - md5sum: c6d1ac97ef4dfe43731c8368d8391cab - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 - - path: results/reports/samtools/test2/test2.md.cram.stats - md5sum: 60152dbf1e109d4c407c151204388109 - - path: results/reports/samtools/test2/test2.recal.cram.stats - md5sum: 53a0b34b67fdf90ea68fac8ea744b576 - - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary - md5sum: 43d53e36cbb1091f915b2499e545b41e - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count - md5sum: 650f3dc78c5aaaecfe8ffa3d499e812f - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual - contains: ["0 0 0 -nan 82 29 2.82759", "1.1 196 35 5.6 57 24 2.375", "3.16 205 38 5.39474 49 20 2.45"] - - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.FILTER.summary - md5sum: ac61b178a3e15efb861b34f16d80fb67 - - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.count - md5sum: d7c0e0afd59f87dc4685487c0d497575 - - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.qual - # the text-based file test2_vs_test.freebayes.TsTv.qual seemingly changes content on reruns! - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/freebayes - should_exist: false - -- name: Run variant calling on somatic sample with freebayes without intervals - command: nextflow run main.nf -profile test,pair,targeted --tools freebayes --no_intervals - tags: - - freebayes - - somatic - - no_intervals - - variant_calling - files: - - path: results/csv/markduplicates.csv - md5sum: e8e587ac25253ff7ab8f1cc66d410c98 - - path: results/csv/markduplicates_no_table.csv - md5sum: 617574c9b607e5daaf4ad56d48982247 - - path: results/csv/recalibrated.csv - md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab - - path: results/csv/variantcalled.csv - md5sum: b31f56256a1cfa839a2ea7f7ba6c1c45 - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recal_table/test2/test2.recal.table - md5sum: 0626cd4337eab79b38b5bc5c95e0c003 - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test2/test2.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: f66375a00d692d67df46c129147008b2 - - path: results/reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.bcftools_stats.txt - md5sum: 41d9835772fa1326769c2dd94749cbea - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/markduplicates/test2/test2.md.cram.metrics - contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 5a0679057c530e5945c9c5a3a17312dc - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: 0010c2396a3173c7cf4983abe2eb6a4c - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz - md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 - - path: results/reports/mosdepth/test/test.md.per-base.bed.gz.csi - md5sum: b0ab630c3241fbd7581b7a38d944ff8b - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 5a0679057c530e5945c9c5a3a17312dc - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0010c2396a3173c7cf4983abe2eb6a4c - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz - md5sum: 5724f1c6b6a0e63e25ec8a0f38edfda6 - - path: results/reports/mosdepth/test/test.recal.per-base.bed.gz.csi - md5sum: b0ab630c3241fbd7581b7a38d944ff8b - - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt - md5sum: f25166c3a0051bb4d8c11a210278de6c - - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt - md5sum: d5e4084de2ea2a0a7b60b2d71c804d4b - - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz - md5sum: 55c160e8f3c8c7761524646426611f6b - - path: results/reports/mosdepth/test2/test2.md.per-base.bed.gz.csi - md5sum: 4205a09ede17cdbdaad45e3553f73105 - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt - md5sum: f25166c3a0051bb4d8c11a210278de6c - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt - md5sum: d5e4084de2ea2a0a7b60b2d71c804d4b - - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz - md5sum: 55c160e8f3c8c7761524646426611f6b - - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi - md5sum: 4205a09ede17cdbdaad45e3553f73105 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 - - path: results/reports/samtools/test2/test2.md.cram.stats - md5sum: 60152dbf1e109d4c407c151204388109 - - path: results/reports/samtools/test2/test2.recal.cram.stats - md5sum: 029b903797dc228e56d6ab74e677fa21 - - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary - md5sum: 76c5919541536c12b5c8a6094d6d78d5 - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count - md5sum: 0a0464beef110bc0f3c5a35d022b528e - - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.qual - contains: ["0 0 0 -nan 325 86 3.77907", "68.8262 377 88 4.28409 20 10 2", "321.335 393 96 4.09375 4 2 2"] - - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.FILTER.summary - md5sum: 59e9d340b588537655020147e6d93e29 - - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.count - md5sum: 61ad00bb8cd47592b0671fabe26cf8cd - - path: results/reports/vcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.TsTv.qual - # the text-based file test2_vs_test.freebayes.TsTv.qual seemingly changes content on reruns! - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test/test.freebayes.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/freebayes/test2_vs_test/test2_vs_test.freebayes.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/freebayes - should_exist: false - -# - name: Run variant calling on tumor_only sample with freebayes -# command: nextflow run main.nf -profile test,tools_tumoronly --tools freebayes -# tags: -# - freebayes -# - tumor_only -# - variant_calling -# files: -# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz -# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi - -# - name: Run variant calling on tumor_only sample with freebayes without intervals -# command: nextflow run main.nf -profile test,tools_tumoronly --tools freebayes --no_intervals -# tags: -# - freebayes -# - no_intervals -# - tumor_only -# - variant_calling -# files: -# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz -# - path: results/variant_calling/freebayes/sample2/sample2.freebayes.vcf.gz.tbi - - name: Run variant calling on germline sample with haplotypecaller command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller --step variant_calling tags: From e13662579655b648b574f5a7cf4aaf6fa992cb18 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:32:29 +0100 Subject: [PATCH 32/49] haplotypecaller --- conf/modules/haplotypecaller.config | 120 ++++++++++++++++++++++++++++ conf/modules/modules.config | 104 ------------------------ nextflow.config | 1 + tests/config/pytest_tags.yml | 17 ++++ tests/test_haplotypecaller.yml | 107 +++++++++++++++++++++++++ tests/tools.yml | 107 ------------------------- 6 files changed, 245 insertions(+), 211 deletions(-) create mode 100644 conf/modules/haplotypecaller.config create mode 100644 tests/test_haplotypecaller.yml diff --git a/conf/modules/haplotypecaller.config b/conf/modules/haplotypecaller.config new file mode 100644 index 0000000000..53c3a7e5b8 --- /dev/null +++ b/conf/modules/haplotypecaller.config @@ -0,0 +1,120 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// HAPLOTYPECALLER + +process { + + withName: 'GATK4_HAPLOTYPECALLER' { + ext.args = { params.joint_germline ? "-ERC GVCF" : "" } + ext.prefix = { meta.num_intervals <= 1 ? ( params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" ) : ( params.joint_germline ? "${meta.id}.haplotypecaller.${intervals.simpleName}.g" :"${meta.id}.haplotypecaller.${intervals.simpleName}" ) } + ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') } + publishDir = [ + enabled: !params.joint_germline, + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "haplotypecaller/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_HAPLOTYPECALLER' { + ext.prefix = { params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'CNNSCOREVARIANTS' { + publishDir = [ + // Otherwise it gets published + enabled: false + ] + } + + withName: 'FILTERVARIANTTRANCHES' { + ext.prefix = {"${meta.id}.haplotypecaller"} + ext.args = { "--info-key CNN_1D" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/"}, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'GATK4_GENOMICSDBIMPORT' { + ext.prefix = { meta.num_intervals > 1 ? "${meta.intervals_name}.joint" : "joint" } + ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') && params.joint_germline && !params.no_intervals} + } + + withName: 'GATK4_GENOTYPEGVCFS' { + ext.prefix = { meta.num_intervals > 1 ? meta.intervals_name : "joint_germline" } + } + + if (params.tools && params.tools.contains('haplotypecaller') && params.joint_germline){ + withName: 'NFCORE_SAREK:SAREK:GERMLINE_VARIANT_CALLING:RUN_HAPLOTYPECALLER:JOINT_GERMLINE:BCFTOOLS_SORT' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.joint.sort" : "${vcf.minus("vcf.gz")}.sort" } + publishDir = [ + enabled: false + ] + } + } + + withName: 'MERGE_GENOTYPEGVCFS' { + ext.prefix = "joint_germline" + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'VARIANTRECALIBRATOR_INDEL' { + ext.prefix = { "${meta.id}_INDEL" } + ext.args = "-an QD -an MQRankSum -an ReadPosRankSum -an FS -an SOR -an DP -mode INDEL" + publishDir = [ + enabled: false + ] + } + + withName: 'VARIANTRECALIBRATOR_SNP' { + ext.prefix = { "${meta.id}_SNP" } + ext.args = "-an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR -mode SNP" + publishDir = [ + enabled: false + ] + } + + withName: 'GATK4_APPLYVQSR_SNP' { + ext.prefix = { "${meta.id}_SNP" } + ext.args = '--truth-sensitivity-filter-level 99.9 -mode SNP' + } + + withName: 'GATK4_APPLYVQSR_INDEL' { + ext.prefix = { "${meta.id}_INDEL" } + ext.args = '--truth-sensitivity-filter-level 99.9 -mode INDEL' + } + + withName: 'MERGE_VQSR' { + ext.prefix = "joint_germline_recalibrated" + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/"}, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 31de4a5e0a..52be21c6e3 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -94,110 +94,6 @@ process { process{ - // HAPLOTYPECALLER - withName: 'MERGE_HAPLOTYPECALLER' { - ext.prefix = { params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'GATK4_HAPLOTYPECALLER' { - ext.args = { params.joint_germline ? "-ERC GVCF" : "" } - ext.prefix = { meta.num_intervals <= 1 ? ( params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" ) : ( params.joint_germline ? "${meta.id}.haplotypecaller.${intervals.simpleName}.g" :"${meta.id}.haplotypecaller.${intervals.simpleName}" ) } - ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') } - publishDir = [ - enabled: !params.joint_germline, - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/"}, - pattern: "*{vcf.gz,vcf.gz.tbi}", - saveAs: { meta.num_intervals > 1 ? null : "haplotypecaller/${meta.id}/${it}" } - ] - } - - withName: 'CNNSCOREVARIANTS' { - publishDir = [ - // Otherwise it gets published - enabled: false - ] - } - - withName: 'FILTERVARIANTTRANCHES' { - ext.prefix = {"${meta.id}.haplotypecaller"} - ext.args = { "--info-key CNN_1D" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/"}, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - - withName: 'GATK4_GENOMICSDBIMPORT' { - ext.prefix = { meta.num_intervals > 1 ? "${meta.intervals_name}.joint" : "joint" } - ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') && params.joint_germline && !params.no_intervals} - } - - withName: 'GATK4_GENOTYPEGVCFS' { - ext.prefix = { meta.num_intervals > 1 ? meta.intervals_name : "joint_germline" } - } - - if (params.tools && params.tools.contains('haplotypecaller') && params.joint_germline){ - withName: 'NFCORE_SAREK:SAREK:GERMLINE_VARIANT_CALLING:RUN_HAPLOTYPECALLER:JOINT_GERMLINE:BCFTOOLS_SORT' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.joint.sort" : "${vcf.minus("vcf.gz")}.sort" } - publishDir = [ - enabled: false - ] - } - } - - withName: 'MERGE_GENOTYPEGVCFS' { - ext.prefix = "joint_germline" - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - - withName: 'VARIANTRECALIBRATOR_INDEL' { - ext.prefix = { "${meta.id}_INDEL" } - ext.args = "-an QD -an MQRankSum -an ReadPosRankSum -an FS -an SOR -an DP -mode INDEL" - publishDir = [ - enabled: false - ] - } - - withName: 'VARIANTRECALIBRATOR_SNP' { - ext.prefix = { "${meta.id}_SNP" } - ext.args = "-an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR -mode SNP" - publishDir = [ - enabled: false - ] - } - - withName: 'GATK4_APPLYVQSR_SNP' { - ext.prefix = { "${meta.id}_SNP" } - ext.args = '--truth-sensitivity-filter-level 99.9 -mode SNP' - } - - withName: 'GATK4_APPLYVQSR_INDEL' { - ext.prefix = { "${meta.id}_INDEL" } - ext.args = '--truth-sensitivity-filter-level 99.9 -mode INDEL' - } - - withName: 'MERGE_VQSR' { - ext.prefix = "joint_germline_recalibrated" - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/haplotypecaller/joint_variant_calling/"}, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - // MANTA withName: 'MERGE_MANTA.*' { publishDir = [ diff --git a/nextflow.config b/nextflow.config index 8f7c999b8e..61185fe2ec 100644 --- a/nextflow.config +++ b/nextflow.config @@ -306,6 +306,7 @@ includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/controlfreec.config' includeConfig 'conf/modules/deepvariant.config' includeConfig 'conf/modules/freebayes.config' +includeConfig 'conf/modules/haplotypecaller.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 1ec15969b1..920045e600 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -138,6 +138,23 @@ freebayes: - modules/nf-core/tabix/tabix/main.nf - subworkflows/local/bam_variant_calling_freebayes/main.nf +## haplotypecaller +haplotypecaller: + - conf/modules/haplotypecaller.config + - modules/nf-core/bcftools/sort/main.nf + - modules/nf-core/gatk4/applyvqsr/main.nf + - modules/nf-core/gatk4/cnnscorevariants/main.nf + - modules/nf-core/gatk4/filtervarianttranches/main.nf + - modules/nf-core/gatk4/genomicsdbimport/main.nf + - modules/nf-core/gatk4/genotypegvcfs/main.nf + - modules/nf-core/gatk4/haplotypecaller/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/gatk4/variantrecalibrator/main.nf + - modules/nf-core/tabix/tabix/main.nf + - subworkflows/local/bam_joint_calling_germline_gatk/main.nf + - subworkflows/local/bam_variant_calling_haplotypecaller/main.nf + - subworkflows/local/vcf_variant_filtering_gatk/main.nf + # annotate ## merge diff --git a/tests/test_haplotypecaller.yml b/tests/test_haplotypecaller.yml new file mode 100644 index 0000000000..0d13eb3650 --- /dev/null +++ b/tests/test_haplotypecaller.yml @@ -0,0 +1,107 @@ +- name: Run variant calling on germline sample with haplotypecaller + command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller --step variant_calling + tags: + - germline + - haplotypecaller + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: d7d86e82902a4f57876b2414a4f812a4 + - path: results/multiqc + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/haplotypecaller/test/test.haplotypecaller.filtered.bcftools_stats.txt + md5sum: f9cc3809106a42601eafd3b09900750a + - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.FILTER.summary + md5sum: 4e2ceea7f3ff998004691fd71192d9ee + - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.count + md5sum: b77c120ee5cc0423267200c67d60c663 + - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.qual + md5sum: 1e34357e5848c318f8c2c7d3b041d229 + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/haplotypecaller + should_exist: false +- name: Run variant calling on germline sample with haplotypecaller without intervals + command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller --step variant_calling --no_intervals + tags: + - germline + - haplotypecaller + - no_intervals + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: d7d86e82902a4f57876b2414a4f812a4 + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/preprocessing/converted/test/test.converted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/converted/test/test.converted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/haplotypecaller/test/test.haplotypecaller.filtered.bcftools_stats.txt + md5sum: f9cc3809106a42601eafd3b09900750a + - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.FILTER.summary + md5sum: 4e2ceea7f3ff998004691fd71192d9ee + - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.count + md5sum: b77c120ee5cc0423267200c67d60c663 + - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.qual + md5sum: 1e34357e5848c318f8c2c7d3b041d229 + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/haplotypecaller + should_exist: false +- name: Run joint germline variant calling with haplotypecaller + command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --joint_germline true --step variant_calling + tags: + - germline + - haplotypecaller + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: d2dffdbd2b4f1f26a06637592d24dab3 + - path: results/multiqc + - path: results/preprocessing/recalibrated/test/test.recal.cram + should_exist: false + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + should_exist: false + - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt + md5sum: f91f185319e00108870911e0ce1a7ed6 + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary + md5sum: 2a4eb7abfb2e64e45d53fdda17530b7f + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count + md5sum: 949fa16c755189c23a37f0ea8ecd1b26 + - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual + md5sum: 3b3a249fbc26c80ce0acbe91147291ea + - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/haplotypecaller + should_exist: false diff --git a/tests/tools.yml b/tests/tools.yml index d43d000fbd..300aee623d 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,110 +1,3 @@ -- name: Run variant calling on germline sample with haplotypecaller - command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller --step variant_calling - tags: - - germline - - haplotypecaller - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: d7d86e82902a4f57876b2414a4f812a4 - - path: results/multiqc - - path: results/preprocessing/converted/test/test.converted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/converted/test/test.converted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram - should_exist: false - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - should_exist: false - - path: results/reports/bcftools/haplotypecaller/test/test.haplotypecaller.filtered.bcftools_stats.txt - md5sum: f9cc3809106a42601eafd3b09900750a - - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.FILTER.summary - md5sum: 4e2ceea7f3ff998004691fd71192d9ee - - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.count - md5sum: b77c120ee5cc0423267200c67d60c663 - - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.qual - md5sum: 1e34357e5848c318f8c2c7d3b041d229 - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/haplotypecaller - should_exist: false -- name: Run variant calling on germline sample with haplotypecaller without intervals - command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_single_bam.csv --tools haplotypecaller --step variant_calling --no_intervals - tags: - - germline - - haplotypecaller - - no_intervals - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: d7d86e82902a4f57876b2414a4f812a4 - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/preprocessing/converted/test/test.converted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/converted/test/test.converted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram - should_exist: false - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - should_exist: false - - path: results/reports/bcftools/haplotypecaller/test/test.haplotypecaller.filtered.bcftools_stats.txt - md5sum: f9cc3809106a42601eafd3b09900750a - - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.FILTER.summary - md5sum: 4e2ceea7f3ff998004691fd71192d9ee - - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.count - md5sum: b77c120ee5cc0423267200c67d60c663 - - path: results/reports/vcftools/haplotypecaller/test/test.haplotypecaller.filtered.TsTv.qual - md5sum: 1e34357e5848c318f8c2c7d3b041d229 - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.filtered.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/haplotypecaller/test/test.haplotypecaller.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/haplotypecaller - should_exist: false -- name: Run joint germline variant calling with haplotypecaller - command: nextflow run main.nf -profile test,targeted --input ./tests/csv/3.0/mapped_joint_bam.csv --tools haplotypecaller --joint_germline true --step variant_calling - tags: - - germline - - haplotypecaller - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: d2dffdbd2b4f1f26a06637592d24dab3 - - path: results/multiqc - - path: results/preprocessing/recalibrated/test/test.recal.cram - should_exist: false - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - should_exist: false - - path: results/reports/bcftools/haplotypecaller/joint_variant_calling/joint_germline.bcftools_stats.txt - md5sum: f91f185319e00108870911e0ce1a7ed6 - - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.FILTER.summary - md5sum: 2a4eb7abfb2e64e45d53fdda17530b7f - - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.count - md5sum: 949fa16c755189c23a37f0ea8ecd1b26 - - path: results/reports/vcftools/haplotypecaller/joint_variant_calling/joint_germline.TsTv.qual - md5sum: 3b3a249fbc26c80ce0acbe91147291ea - - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/haplotypecaller/joint_variant_calling/joint_germline.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/haplotypecaller - should_exist: false - name: Run variant calling on germline sample with manta command: nextflow run main.nf -profile test,tools_germline --tools manta tags: From a922a395c09f942eab82cf20aab185d113773969 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:48:21 +0100 Subject: [PATCH 33/49] manta --- conf/modules/manta.config | 57 +++++++++ conf/modules/modules.config | 38 ------ nextflow.config | 1 + tests/config/pytest_tags.yml | 11 ++ tests/test_manta.yml | 213 +++++++++++++++++++++++++++++++++ tests/tools.yml | 223 +---------------------------------- 6 files changed, 288 insertions(+), 255 deletions(-) create mode 100644 conf/modules/manta.config create mode 100644 tests/test_manta.yml diff --git a/conf/modules/manta.config b/conf/modules/manta.config new file mode 100644 index 0000000000..944025af07 --- /dev/null +++ b/conf/modules/manta.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MANTA + +process { + + withName: 'MANTA.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.manta" : "${meta.id}.manta.${target_bed.simpleName}" } + ext.args = { params.wes ? "--exome" : "" } + ext.when = { params.tools && params.tools.split(',').contains('manta') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "manta/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_MANTA.*' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/manta/${meta.id}/" }, + pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_MANTA_DIPLOID' { + ext.prefix = {"${meta.id}.manta.diploid_sv"} + } + + withName: 'MERGE_MANTA_SMALL_INDELS' { + ext.prefix = {"${meta.id}.manta.candidate_small_indels"} + } + + withName: 'MERGE_MANTA_SV' { + ext.prefix = {"${meta.id}.manta.candidate_sv"} + } + + withName: 'MERGE_MANTA_TUMOR' { + ext.prefix = {"${meta.id}.manta.tumor_sv"} + } + + withName: 'MERGE_MANTA_SOMATIC' { + ext.prefix = {"${meta.id}.manta.somatic_sv"} + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 52be21c6e3..de2ca963dd 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -94,34 +94,6 @@ process { process{ - // MANTA - withName: 'MERGE_MANTA.*' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/manta/${meta.id}/" }, - pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}" - ] - } - withName: 'MERGE_MANTA_DIPLOID' { - ext.prefix = {"${meta.id}.manta.diploid_sv"} - } - withName: 'MERGE_MANTA_SMALL_INDELS' { - ext.prefix = {"${meta.id}.manta.candidate_small_indels"} - } - withName: 'MERGE_MANTA_SV' { - ext.prefix = {"${meta.id}.manta.candidate_sv"} - } - withName: 'MANTA.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.manta" : "${meta.id}.manta.${target_bed.simpleName}" } - ext.args = { params.wes ? "--exome" : "" } - ext.when = { params.tools && params.tools.split(',').contains('manta') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*{diploid_sv,tumor_sv,somatic_sv}.{vcf.gz,vcf.gz.tbi}", - saveAs: { meta.num_intervals > 1 ? null : "manta/${meta.id}/${it}" } - ] - } // STRELKA withName: 'MERGE_STRELKA.*' { @@ -171,11 +143,6 @@ process{ // TUMOR_VARIANT_CALLING - // MANTA - withName: 'MERGE_MANTA_TUMOR' { - ext.prefix = {"${meta.id}.manta.tumor_sv"} - } - // MUTECT2 withName: 'CALCULATECONTAMINATION' { ext.prefix = { "${meta.id}.mutect2" } @@ -311,11 +278,6 @@ process{ } } - // MANTA - withName: 'MERGE_MANTA_SOMATIC' { - ext.prefix = {"${meta.id}.manta.somatic_sv"} - } - // MUTECT2 if (params.tools && params.tools.split(',').contains('mutect2')) { withName: 'MUTECT2_PAIRED' { diff --git a/nextflow.config b/nextflow.config index 61185fe2ec..1d4e5c2b20 100644 --- a/nextflow.config +++ b/nextflow.config @@ -307,6 +307,7 @@ includeConfig 'conf/modules/controlfreec.config' includeConfig 'conf/modules/deepvariant.config' includeConfig 'conf/modules/freebayes.config' includeConfig 'conf/modules/haplotypecaller.config' +includeConfig 'conf/modules/manta.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 920045e600..93b02fa71b 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -155,6 +155,17 @@ haplotypecaller: - subworkflows/local/bam_variant_calling_haplotypecaller/main.nf - subworkflows/local/vcf_variant_filtering_gatk/main.nf +## manta +manta: + - conf/modules/manta.config + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/manta/germline/main.nf + - modules/nf-core/manta/somatic/main.nf + - modules/nf-core/manta/tumoronly/main.nf + - subworkflows/local/bam_variant_calling_germline_manta/main.nf + - subworkflows/local/bam_variant_calling_somatic_manta/main.nf + - subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf + # annotate ## merge diff --git a/tests/test_manta.yml b/tests/test_manta.yml new file mode 100644 index 0000000000..e8a85e1312 --- /dev/null +++ b/tests/test_manta.yml @@ -0,0 +1,213 @@ +- name: Run variant calling on germline sample with manta + command: nextflow run main.nf -profile test,tools_germline --tools manta + tags: + - germline + - manta + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: 981280af86f69190fdf0639030a80249 + - path: results/multiqc + - path: results/reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt + md5sum: 2afce09f0b53a47acde7f8767fd6fcef + - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/manta + should_exist: false +- name: Run variant calling on germline sample with manta without intervals + command: nextflow run main.nf -profile test,tools_germline --tools manta --no_intervals + tags: + - germline + - manta + - no_intervals + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: 981280af86f69190fdf0639030a80249 + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt + md5sum: 2afce09f0b53a47acde7f8767fd6fcef + - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/manta + should_exist: false +- name: Run variant calling on tumor_only sample with manta + command: nextflow run main.nf -profile test,tools_tumoronly --tools manta + tags: + - manta + - tumor_only + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: f1051fe647abf202e6332f9a1789c05d + - path: results/multiqc + - path: results/reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt + md5sum: ea7f369ab8edaccc5bf45347bda15dfa + - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: results/manta + should_exist: false +- name: Run variant calling on tumor_only sample with manta without intervals + command: nextflow run main.nf -profile test,tools_tumoronly --tools manta --no_intervals + tags: + - manta + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: f1051fe647abf202e6332f9a1789c05d + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt + md5sum: ea7f369ab8edaccc5bf45347bda15dfa + - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/manta + should_exist: false +- name: Run variant calling on somatic sample with manta + command: nextflow run main.nf -profile test,tools_somatic --tools manta + tags: + - manta + - somatic + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: 3a8861808601994f89d5c55ce5c95dae + - path: results/multiqc + - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: baa05f77160e6cd985050790334c91e8 + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt + md5sum: 56c39772dc25e451d8209f608bb16e37 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: results/manta + should_exist: false +- name: Run variant calling on somatic sample with manta without intervals + command: nextflow run main.nf -profile test,tools_somatic --tools manta --no_intervals + tags: + - manta + - no_intervals + - somatic + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: 3a8861808601994f89d5c55ce5c95dae + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: baa05f77160e6cd985050790334c91e8 + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt + md5sum: 56c39772dc25e451d8209f608bb16e37 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/manta + should_exist: false diff --git a/tests/tools.yml b/tests/tools.yml index 300aee623d..98ff2829e8 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,216 +1,3 @@ -- name: Run variant calling on germline sample with manta - command: nextflow run main.nf -profile test,tools_germline --tools manta - tags: - - germline - - manta - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: 981280af86f69190fdf0639030a80249 - - path: results/multiqc - - path: results/reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt - md5sum: 2afce09f0b53a47acde7f8767fd6fcef - - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/manta - should_exist: false -- name: Run variant calling on germline sample with manta without intervals - command: nextflow run main.nf -profile test,tools_germline --tools manta --no_intervals - tags: - - germline - - manta - - no_intervals - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: 981280af86f69190fdf0639030a80249 - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt - md5sum: 2afce09f0b53a47acde7f8767fd6fcef - - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample1/sample1.manta.diploid_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/manta - should_exist: false -- name: Run variant calling on tumor_only sample with manta - command: nextflow run main.nf -profile test,tools_tumoronly --tools manta - tags: - - manta - - tumor_only - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: f1051fe647abf202e6332f9a1789c05d - - path: results/multiqc - - path: results/reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt - md5sum: ea7f369ab8edaccc5bf45347bda15dfa - - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - - path: results/manta - should_exist: false -- name: Run variant calling on tumor_only sample with manta without intervals - command: nextflow run main.nf -profile test,tools_tumoronly --tools manta --no_intervals - tags: - - manta - - no_intervals - - tumor_only - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: f1051fe647abf202e6332f9a1789c05d - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt - md5sum: ea7f369ab8edaccc5bf45347bda15dfa - - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample2/sample2.manta.tumor_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/manta - should_exist: false -- name: Run variant calling on somatic sample with manta - command: nextflow run main.nf -profile test,tools_somatic --tools manta - tags: - - manta - - somatic - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: 3a8861808601994f89d5c55ce5c95dae - - path: results/multiqc - - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - - path: results/manta - should_exist: false -- name: Run variant calling on somatic sample with manta without intervals - command: nextflow run main.nf -profile test,tools_somatic --tools manta --no_intervals - tags: - - manta - - no_intervals - - somatic - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: 3a8861808601994f89d5c55ce5c95dae - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/manta - should_exist: false - name: Run variant calling on tumor_only sample to test mpileup command: nextflow run main.nf -profile test,tools_tumoronly --tools mpileup tags: @@ -607,11 +394,12 @@ # binary changes md5sums on reruns. - path: results/strelka should_exist: false -- name: Run variant calling on somatic sample with strelka & manta (StrelkaBP) +- name: Run variant calling on somatic sample with strelka & manta (Strelka BP) command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta tags: - somatic - - strelkabp + - strelka + - strelka_bp - variant_calling files: - path: results/csv/variantcalled.csv @@ -697,12 +485,13 @@ should_exist: false - path: results/strelka should_exist: false -- name: Run variant calling on somatic sample with & manta (StrelkaBP) without intervals +- name: Run variant calling on somatic sample with & manta (Strelka BP) without intervals command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta --no_intervals tags: - no_intervals - somatic - - strelkabp + - strelka + - strelka_bp - variant_calling files: - path: results/csv/variantcalled.csv From 7f56a748c750e4df274cb2efc5ba3a2d701be285 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 15:57:07 +0100 Subject: [PATCH 34/49] mpileup + controlfreec --- conf/modules/controlfreec.config | 53 ------------------------ conf/modules/mpileup.config | 71 ++++++++++++++++++++++++++++++++ nextflow.config | 1 + tests/config/pytest_tags.yml | 11 +++++ tests/test_mpileup.yml | 58 ++++++++++++++++++++++++++ tests/tools.yml | 58 -------------------------- 6 files changed, 141 insertions(+), 111 deletions(-) create mode 100644 conf/modules/mpileup.config create mode 100644 tests/test_mpileup.yml diff --git a/conf/modules/controlfreec.config b/conf/modules/controlfreec.config index 449d2f1e14..3945756957 100644 --- a/conf/modules/controlfreec.config +++ b/conf/modules/controlfreec.config @@ -23,15 +23,6 @@ process { ] } - withName: 'CAT_MPILEUP' { - publishDir = [ - enabled: true, - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, - pattern: "*{mpileup.gz}", - ] - } - withName: 'FREEC_.*' { ext.when = { params.tools && params.tools.split(',').contains('controlfreec') } publishDir = [ @@ -68,16 +59,6 @@ process { ] } - withName: 'SAMTOOLS_MPILEUP' { - ext.when = { params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup')) } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, - pattern: "*mpileup.gz", - saveAs: { meta.num_intervals > 1 ? null : it } - ] - } - // TUMOR_ONLY_VARIANT_CALLING withName: 'FREEC_TUMORONLY' { ext.args = {[ @@ -112,40 +93,6 @@ process { } // PAIR_VARIANT_CALLING - if (params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup'))) { - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.normal.mpileup.gz" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.normal.mpileup.gz" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.tumor.mpileup.gz" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { - ext.prefix = { "${meta.id}.tumor.mpileup.gz" } - } - } - withName: 'FREEC_SOMATIC' { ext.args = {[ "sample":[ diff --git a/conf/modules/mpileup.config b/conf/modules/mpileup.config new file mode 100644 index 0000000000..8c48d9b359 --- /dev/null +++ b/conf/modules/mpileup.config @@ -0,0 +1,71 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MPILEUP + +process { + + withName: 'CAT_MPILEUP' { + publishDir = [ + enabled: true, + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, + pattern: "*{mpileup.gz}", + ] + } + + withName: 'SAMTOOLS_MPILEUP' { + ext.when = { params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup')) } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mpileup/${meta.id}/" }, + pattern: "*mpileup.gz", + saveAs: { meta.num_intervals > 1 ? null : it } + ] + } + +// PAIR_VARIANT_CALLING + if (params.tools && (params.tools.split(',').contains('controlfreec') || params.tools.split(',').contains('mpileup'))) { + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.normal.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.normal" : "${meta.id}_${intervals.simpleName}.normal" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_NORMAL:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.normal.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:MPILEUP_TUMOR:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.tumor.mpileup.gz" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:SAMTOOLS_MPILEUP' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.tumor" : "${meta.id}_${intervals.simpleName}.tumor" } + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_TUMOR_ONLY_ALL:BAM_VARIANT_CALLING_MPILEUP:CAT_MPILEUP' { + ext.prefix = { "${meta.id}.tumor.mpileup.gz" } + } + } +} diff --git a/nextflow.config b/nextflow.config index 1d4e5c2b20..f9feebb9cf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -308,6 +308,7 @@ includeConfig 'conf/modules/deepvariant.config' includeConfig 'conf/modules/freebayes.config' includeConfig 'conf/modules/haplotypecaller.config' includeConfig 'conf/modules/manta.config' +includeConfig 'conf/modules/mpileup.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 93b02fa71b..e873f8523f 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -113,13 +113,17 @@ cnvkit: ## controlfreec controlfreec: - conf/modules/controlfreec.config + - conf/modules/mpileup.config + - modules/nf-core/cat/cat/main.nf - modules/nf-core/controlfreec/assesssignificance/main.nf - modules/nf-core/controlfreec/freec/main.nf - modules/nf-core/controlfreec/freec2bed/main.nf - modules/nf-core/controlfreec/freec2circos/main.nf - modules/nf-core/controlfreec/makegraph/main.nf + - modules/nf-core/samtools/mpileup/main.nf - subworkflows/local/bam_variant_calling_somatic_controlfreec/main.nf - subworkflows/local/bam_variant_calling_tumor_only_controlfreec/main.nf + - subworkflows/local/bam_variant_calling_mpileup/main.nf ## deepvariant deepvariant: @@ -166,6 +170,13 @@ manta: - subworkflows/local/bam_variant_calling_somatic_manta/main.nf - subworkflows/local/bam_variant_calling_tumor_only_manta/main.nf +## mpileup +mpileup: + - conf/modules/mpileup.config + - modules/nf-core/cat/cat/main.nf + - modules/nf-core/samtools/mpileup/main.nf + - subworkflows/local/bam_variant_calling_mpileup/main.nf + # annotate ## merge diff --git a/tests/test_mpileup.yml b/tests/test_mpileup.yml new file mode 100644 index 0000000000..3ace5ecf32 --- /dev/null +++ b/tests/test_mpileup.yml @@ -0,0 +1,58 @@ +- name: Run variant calling on tumor_only sample to test mpileup + command: nextflow run main.nf -profile test,tools_tumoronly --tools mpileup + tags: + - tumor_only + - mpileup + files: + - path: results/multiqc + - path: results/variant_calling/mpileup/sample2/sample2.tumor.mpileup.gz + # binary changes md5sums on reruns. + - path: results/mpileup + should_exist: false +- name: Run variant calling on tumor_only sample to test mpileup without intervals + command: nextflow run main.nf -profile test,tools_tumoronly --tools mpileup --no_intervals + tags: + - tumor_only + - mpileup + - no_intervals + files: + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/variant_calling/mpileup/sample2/sample2.tumor.mpileup.gz + # binary changes md5sums on reruns. + - path: results/mpileup + should_exist: false +- name: Run variant calling on germline sample to test mpileup + command: nextflow run main.nf -profile test,tools_germline --tools mpileup + tags: + - germline + - mpileup + files: + - path: results/multiqc + - path: results/variant_calling/mpileup/sample1/sample1.normal.mpileup.gz + # binary changes md5sums on reruns. + - path: results/mpileup + should_exist: false +- name: Run variant calling on germline sample to test mpileup without intervals + command: nextflow run main.nf -profile test,tools_germline --tools mpileup --no_intervals + tags: + - germline + - mpileup + - no_intervals + files: + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/variant_calling/mpileup/sample1/sample1.normal.mpileup.gz + # binary changes md5sums on reruns. + - path: results/mpileup + should_exist: false diff --git a/tests/tools.yml b/tests/tools.yml index 98ff2829e8..07d3325562 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,61 +1,3 @@ -- name: Run variant calling on tumor_only sample to test mpileup - command: nextflow run main.nf -profile test,tools_tumoronly --tools mpileup - tags: - - tumor_only - - mpileup - files: - - path: results/multiqc - - path: results/variant_calling/mpileup/sample2/sample2.tumor.mpileup.gz - # binary changes md5sums on reruns. - - path: results/mpileup - should_exist: false -- name: Run variant calling on tumor_only sample to test mpileup without intervals - command: nextflow run main.nf -profile test,tools_tumoronly --tools mpileup --no_intervals - tags: - - tumor_only - - mpileup - - no_intervals - files: - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/variant_calling/mpileup/sample2/sample2.tumor.mpileup.gz - # binary changes md5sums on reruns. - - path: results/mpileup - should_exist: false -- name: Run variant calling on germline sample to test mpileup - command: nextflow run main.nf -profile test,tools_germline --tools mpileup - tags: - - germline - - mpileup - files: - - path: results/multiqc - - path: results/variant_calling/mpileup/sample1/sample1.normal.mpileup.gz - # binary changes md5sums on reruns. - - path: results/mpileup - should_exist: false -- name: Run variant calling on germline sample to test mpileup without intervals - command: nextflow run main.nf -profile test,tools_germline --tools mpileup --no_intervals - tags: - - germline - - mpileup - - no_intervals - files: - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/variant_calling/mpileup/sample1/sample1.normal.mpileup.gz - # binary changes md5sums on reruns. - - path: results/mpileup - should_exist: false - name: Run variant calling on tumor only sample with mutect2 command: nextflow run main.nf -profile test,tools_tumoronly --tools mutect2 tags: From ec13c41c083cb2b6a4c583079002d119e8dc7b3e Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 16:06:51 +0100 Subject: [PATCH 35/49] mutect2 --- conf/modules/freebayes.config | 14 ++++ conf/modules/modules.config | 123 ---------------------------------- conf/modules/mutect2.config | 123 ++++++++++++++++++++++++++++++++++ nextflow.config | 1 + tests/config/pytest_tags.yml | 14 ++++ tests/test_mutect2.yml | 87 ++++++++++++++++++++++++ tests/tools.yml | 87 ------------------------ 7 files changed, 239 insertions(+), 210 deletions(-) create mode 100644 conf/modules/mutect2.config create mode 100644 tests/test_mutect2.yml diff --git a/conf/modules/freebayes.config b/conf/modules/freebayes.config index 9890735588..a36193ec09 100644 --- a/conf/modules/freebayes.config +++ b/conf/modules/freebayes.config @@ -51,4 +51,18 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + // PAIR_VARIANT_CALLING + if (params.tools && params.tools.split(',').contains('freebayes')) { + withName: '.*:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { + ext.args = "--pooled-continuous \ + --pooled-discrete \ + --genotype-qualities \ + --report-genotype-likelihood-max \ + --allele-balance-priors-off \ + --min-alternate-fraction 0.03 \ + --min-repeat-entropy 1 \ + --min-alternate-count 2 " + } + } } diff --git a/conf/modules/modules.config b/conf/modules/modules.config index de2ca963dd..8b03da3da8 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -94,7 +94,6 @@ process { process{ - // STRELKA withName: 'MERGE_STRELKA.*' { publishDir = [ @@ -141,105 +140,6 @@ process{ ] } -// TUMOR_VARIANT_CALLING - - // MUTECT2 - withName: 'CALCULATECONTAMINATION' { - ext.prefix = { "${meta.id}.mutect2" } - ext.args = { "-tumor-segmentation ${meta.id}.mutect2.segmentation.table" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } - ] - } - - withName: 'MERGE_MUTECT2.*' { - ext.prefix = { "${meta.id}.mutect2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - - withName: 'FILTERMUTECTCALLS.*' { - ext.prefix = {"${meta.id}.mutect2.filtered"} - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } - ] - } - - withName: 'GATHERPILEUPSUMMARIES' { - ext.prefix = { "${meta.id}.mutect2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'GATHERPILEUPSUMMARIES_.*' { - ext.prefix = { "${meta.id}.mutect2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.tumor_id}_vs_${meta.normal_id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'GETPILEUPSUMMARIES.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*.table", - saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } - ] - } - - withName: 'GETPILEUPSUMMARIES_.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*.table", - saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.tumor_id}_vs_${meta.normal_id}/${it}" } - ] - } - - withName: 'LEARNREADORIENTATIONMODEL' { - ext.prefix = { "${meta.id}.mutect2.artifactprior" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'MERGEMUTECTSTATS' { - ext.prefix = { "${meta.id}.mutect2" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'GATK4_MUTECT2' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } - ext.when = { params.tools && params.tools.split(',').contains('mutect2') } - ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" : "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*{vcf.gz,vcf.gz.tbi,stats}", - saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } - ] - } - // PAIR_VARIANT_CALLING // ASCAT @@ -264,29 +164,6 @@ process{ } - // FREEBAYES - if (params.tools && params.tools.split(',').contains('freebayes')) { - withName: '.*:PAIR_VARIANT_CALLING:RUN_FREEBAYES_SOMATIC:FREEBAYES' { - ext.args = "--pooled-continuous \ - --pooled-discrete \ - --genotype-qualities \ - --report-genotype-likelihood-max \ - --allele-balance-priors-off \ - --min-alternate-fraction 0.03 \ - --min-repeat-entropy 1 \ - --min-alternate-count 2 " - } - } - - // MUTECT2 - if (params.tools && params.tools.split(',').contains('mutect2')) { - withName: 'MUTECT2_PAIRED' { - ext.args = { params.ignore_soft_clipped_bases ? - "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" : - "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" } - } - } - // MSISENSORPRO withName: 'MSISENSORPRO_MSI_SOMATIC' { publishDir = [ diff --git a/conf/modules/mutect2.config b/conf/modules/mutect2.config new file mode 100644 index 0000000000..63572f7931 --- /dev/null +++ b/conf/modules/mutect2.config @@ -0,0 +1,123 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MUTECT2 + +process { + +// TUMOR_ONLY_VARIANT_CALLING + withName: 'CALCULATECONTAMINATION' { + ext.prefix = { "${meta.id}.mutect2" } + ext.args = { "-tumor-segmentation ${meta.id}.mutect2.segmentation.table" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } + ] + } + + withName: 'MERGE_MUTECT2.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'FILTERMUTECTCALLS.*' { + ext.prefix = {"${meta.id}.mutect2.filtered"} + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : "mutect2/${meta.id}/${filename}" } + ] + } + + withName: 'GATHERPILEUPSUMMARIES' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATHERPILEUPSUMMARIES_.*' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.tumor_id}_vs_${meta.normal_id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GETPILEUPSUMMARIES.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*.table", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + + withName: 'GETPILEUPSUMMARIES_.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*.table", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.tumor_id}_vs_${meta.normal_id}/${it}" } + ] + } + + withName: 'LEARNREADORIENTATIONMODEL' { + ext.prefix = { "${meta.id}.mutect2.artifactprior" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MERGEMUTECTSTATS' { + ext.prefix = { "${meta.id}.mutect2" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/mutect2/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GATK4_MUTECT2' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.mutect2" : "${meta.id}.mutect2.${intervals.simpleName}" } + ext.when = { params.tools && params.tools.split(',').contains('mutect2') } + ext.args = { params.ignore_soft_clipped_bases ? "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" : "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi,stats}", + saveAs: { meta.num_intervals > 1 ? null : "mutect2/${meta.id}/${it}" } + ] + } + +// PAIR_VARIANT_CALLING + if (params.tools && params.tools.split(',').contains('mutect2')) { + withName: 'MUTECT2_PAIRED' { + ext.args = { params.ignore_soft_clipped_bases ? + "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" : + "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.patient}_${meta.normal_id}" } + } + } +} diff --git a/nextflow.config b/nextflow.config index f9feebb9cf..015f558afb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -309,6 +309,7 @@ includeConfig 'conf/modules/freebayes.config' includeConfig 'conf/modules/haplotypecaller.config' includeConfig 'conf/modules/manta.config' includeConfig 'conf/modules/mpileup.config' +includeConfig 'conf/modules/mutect2.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index e873f8523f..a9131e191d 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -177,6 +177,20 @@ mpileup: - modules/nf-core/samtools/mpileup/main.nf - subworkflows/local/bam_variant_calling_mpileup/main.nf +## mutect2 +mutect2: + - conf/modules/mutect2.config + - modules/nf-core/gatk4/calculatecontamination/main.nf + - modules/nf-core/gatk4/filtermutectcalls/main.nf + - modules/nf-core/gatk4/gatherpileupsummaries/main.nf + - modules/nf-core/gatk4/getpileupsummaries/main.nf + - modules/nf-core/gatk4/learnreadorientationmodel/main.nf + - modules/nf-core/gatk4/mergemutectstats/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/gatk4/mutect2/main.nf + - subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf + - subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf + # annotate ## merge diff --git a/tests/test_mutect2.yml b/tests/test_mutect2.yml new file mode 100644 index 0000000000..7fc84938f6 --- /dev/null +++ b/tests/test_mutect2.yml @@ -0,0 +1,87 @@ +- name: Run variant calling on tumor only sample with mutect2 + command: nextflow run main.nf -profile test,tools_tumoronly --tools mutect2 + tags: + - mutect2 + - tumor_only + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: d57c1beba9005e9790a573bd93398b72 + - path: results/multiqc + - path: results/reports/bcftools/mutect2/sample2/sample2.mutect2.filtered.bcftools_stats.txt + md5sum: b0277a18599f8aa01e4b1b42ff0257b1 + - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.FILTER.summary + md5sum: ef9bd9a2f41d8872ba25e5616e4c2a5e + - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.count + md5sum: fe3ff1f0c2ead72f037552727438e00a + - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.qual + md5sum: 13cc608c3cdc83a12ec53ef78b5aa888 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.contamination.table + md5sum: 46c708c943b453da89a3da08acfdb2a7 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.filteringStats.tsv + md5sum: 9a8439d0bb5875f1e673cf592af85ffb + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileupsummaries.table + md5sum: 9afe42339f590937166edcf4746c22ec + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.segmentation.table + md5sum: f4643d9319bde4efbfbe516d6fb13052 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats + md5sum: 3cc40a35727af6c5223fb45678f3f172 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/mutect2 + should_exist: false +- name: Run variant calling on tumor only sample with mutect2 without intervals + command: nextflow run main.nf -profile test,tools_tumoronly --tools mutect2 --no_intervals + tags: + - mutect2 + - no_intervals + - tumor_only + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: d57c1beba9005e9790a573bd93398b72 + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/reports/bcftools/mutect2/sample2/sample2.mutect2.filtered.bcftools_stats.txt + md5sum: a449e85411b3b295685f05915de6098d + - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.FILTER.summary + md5sum: 5a833fd50e6efb26d1df2336eb0caf5e + - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.count + md5sum: f5295a61da80f12babae74fe4e104aad + - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.qual + md5sum: 13cc608c3cdc83a12ec53ef78b5aa888 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.contamination.table + md5sum: 46c708c943b453da89a3da08acfdb2a7 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.filteringStats.tsv + md5sum: e4eac0c602dd25aa61a6dc26a2b61844 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileupsummaries.table + md5sum: fe35b6bc041f2df8bd1f23420af3ddf9 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.segmentation.table + md5sum: f4643d9319bde4efbfbe516d6fb13052 + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats + md5sum: 55ed641e16089afb33cdbc478e202d3d + - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/mutect2 + should_exist: false diff --git a/tests/tools.yml b/tests/tools.yml index 07d3325562..65b9cc3a74 100644 --- a/tests/tools.yml +++ b/tests/tools.yml @@ -1,90 +1,3 @@ -- name: Run variant calling on tumor only sample with mutect2 - command: nextflow run main.nf -profile test,tools_tumoronly --tools mutect2 - tags: - - mutect2 - - tumor_only - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: d57c1beba9005e9790a573bd93398b72 - - path: results/multiqc - - path: results/reports/bcftools/mutect2/sample2/sample2.mutect2.filtered.bcftools_stats.txt - md5sum: b0277a18599f8aa01e4b1b42ff0257b1 - - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.FILTER.summary - md5sum: ef9bd9a2f41d8872ba25e5616e4c2a5e - - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.count - md5sum: fe3ff1f0c2ead72f037552727438e00a - - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.qual - md5sum: 13cc608c3cdc83a12ec53ef78b5aa888 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.contamination.table - md5sum: 46c708c943b453da89a3da08acfdb2a7 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.filteringStats.tsv - md5sum: 9a8439d0bb5875f1e673cf592af85ffb - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileupsummaries.table - md5sum: 9afe42339f590937166edcf4746c22ec - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.segmentation.table - md5sum: f4643d9319bde4efbfbe516d6fb13052 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats - md5sum: 3cc40a35727af6c5223fb45678f3f172 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/mutect2 - should_exist: false -- name: Run variant calling on tumor only sample with mutect2 without intervals - command: nextflow run main.nf -profile test,tools_tumoronly --tools mutect2 --no_intervals - tags: - - mutect2 - - no_intervals - - tumor_only - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: d57c1beba9005e9790a573bd93398b72 - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/reports/bcftools/mutect2/sample2/sample2.mutect2.filtered.bcftools_stats.txt - md5sum: a449e85411b3b295685f05915de6098d - - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.FILTER.summary - md5sum: 5a833fd50e6efb26d1df2336eb0caf5e - - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.count - md5sum: f5295a61da80f12babae74fe4e104aad - - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.qual - md5sum: 13cc608c3cdc83a12ec53ef78b5aa888 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.contamination.table - md5sum: 46c708c943b453da89a3da08acfdb2a7 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.filteringStats.tsv - md5sum: e4eac0c602dd25aa61a6dc26a2b61844 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.filtered.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileupsummaries.table - md5sum: fe35b6bc041f2df8bd1f23420af3ddf9 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.segmentation.table - md5sum: f4643d9319bde4efbfbe516d6fb13052 - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.stats - md5sum: 55ed641e16089afb33cdbc478e202d3d - - path: results/variant_calling/mutect2/sample2/sample2.mutect2.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/mutect2 - should_exist: false - name: Run variant calling on somatic sample with msisensor-pro command: nextflow run main.nf -profile test,tools_somatic --tools msisensorpro tags: From cfc634f272d87cb20a807c360a89f64f178e1933 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 16:39:09 +0100 Subject: [PATCH 36/49] msisensorpro + strelka + strelka_bp + tiddit --- conf/modules/ascat.config | 36 ++ conf/modules/modules.config | 117 +----- conf/modules/msisensorpro.config | 25 ++ conf/modules/strelka.config | 53 +++ conf/modules/tiddit.config | 58 +++ nextflow.config | 5 + tests/config/pytest_tags.yml | 34 ++ tests/test_msisensorpro.yml | 14 + tests/{tools.yml => test_strelka.yml} | 369 ++++-------------- tests/test_strelka_bp.yml | 187 +++++++++ tests/test_tiddit.yml | 98 +++++ ...s_manually.yml => test_tools_manually.yml} | 2 - 12 files changed, 584 insertions(+), 414 deletions(-) create mode 100644 conf/modules/ascat.config create mode 100644 conf/modules/msisensorpro.config create mode 100644 conf/modules/strelka.config create mode 100644 conf/modules/tiddit.config create mode 100644 tests/test_msisensorpro.yml rename tests/{tools.yml => test_strelka.yml} (51%) create mode 100644 tests/test_strelka_bp.yml create mode 100644 tests/test_tiddit.yml rename tests/{tools_manually.yml => test_tools_manually.yml} (99%) diff --git a/conf/modules/ascat.config b/conf/modules/ascat.config new file mode 100644 index 0000000000..9ee66663a0 --- /dev/null +++ b/conf/modules/ascat.config @@ -0,0 +1,36 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// ASCAT + +process { + + withName: 'ASCAT' { + ext.args = {[ + "gender": meta.sex, + "genomeVersion": params.ascat_genome, + "purity": params.ascat_purity, + "ploidy": params.ascat_ploidy, + "minCounts": params.ascat_min_counts, + "chrom_names": meta.sex == 'XX' ? "c(1:22, 'X')" : "c(1:22, 'X', 'Y')", + "min_base_qual": params.ascat_min_base_qual, + "min_map_qual": params.ascat_min_map_qual + ]} + ext.when = { params.tools && params.tools.split(',').contains('ascat') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/ascat/${meta.id}/" }, + pattern: "*{png,cnvs.txt,metrics.txt,purityploidy.txt,segments.txt,LogR.txt,BAF.txt}" + ] + } +} diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 8b03da3da8..0c36169370 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -12,7 +12,6 @@ */ process { - publishDir = [ mode: params.publish_dir_mode, path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, @@ -29,7 +28,6 @@ process { } // QC - withName: 'FASTQC' { ext.args = '--quiet' ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('fastqc')) } @@ -88,121 +86,8 @@ process { ] } } -} - -// VARIANT CALLING - -process{ - - // STRELKA - withName: 'MERGE_STRELKA.*' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - withName: 'MERGE_STRELKA' { - ext.prefix = {"${meta.id}.strelka.variants"} - } - withName: 'MERGE_STRELKA_GENOME' { - ext.prefix = {"${meta.id}.strelka.genome"} - } - withName: 'STRELKA_.*' { - ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.simpleName}" } - ext.args = { params.wes ? "--exome" : "" } - ext.when = { params.tools && params.tools.split(',').contains('strelka') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/" }, - pattern: "*{vcf.gz,vcf.gz.tbi}", - saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } - ] - } - - // TIDDIT - withName: 'TIDDIT_SV' { - ext.when = { params.tools && params.tools.split(',').contains('tiddit') } - ext.args = { bwa_index ? "" : "--skip_assembly" } - ext.prefix = { "${meta.id}.tiddit" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, - pattern: "*tab", - ] - } - withName : 'TABIX_BGZIP_TIDDIT_SV' { - ext.prefix = { "${meta.id}.tiddit" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, - pattern: "*{vcf.gz,vcf.gz.tbi}" - ] - } - -// PAIR_VARIANT_CALLING - - // ASCAT - withName: 'ASCAT' { - - ext.args = {[ - "gender": meta.sex, - "genomeVersion": params.ascat_genome, - "purity": params.ascat_purity, - "ploidy": params.ascat_ploidy, - "minCounts": params.ascat_min_counts, - "chrom_names": meta.sex == 'XX' ? "c(1:22, 'X')" : "c(1:22, 'X', 'Y')", - "min_base_qual": params.ascat_min_base_qual, - "min_map_qual": params.ascat_min_map_qual - ]} - ext.when = { params.tools && params.tools.split(',').contains('ascat') } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/ascat/${meta.id}/" }, - pattern: "*{png,cnvs.txt,metrics.txt,purityploidy.txt,segments.txt,LogR.txt,BAF.txt}" - ] - - } - - // MSISENSORPRO - withName: 'MSISENSORPRO_MSI_SOMATIC' { - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/msisensorpro/${meta.id}/" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - // STRELKA - withName: 'MERGE_STRELKA_INDELS' { - ext.prefix = {"${meta.id}.strelka.somatic_indels"} - } - withName: 'MERGE_STRELKA_SNVS' { - ext.prefix = {"${meta.id}.strelka.somatic_snvs"} - } - - // TIDDIT - if (params.tools && params.tools.split(',').contains('tiddit')) { - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:TIDDIT_NORMAL:TABIX_BGZIP_TIDDIT_SV' { - ext.prefix = {"${meta.id}.tiddit.normal"} - } - - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:TIDDIT_TUMOR:TABIX_BGZIP_TIDDIT_SV' { - ext.prefix = {"${meta.id}.tiddit.tumor"} - } - - // SVDB - withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:SVDB_MERGE' { - ext.prefix = { "${meta.id}.tiddit" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, - pattern: "*vcf.gz" - ] - } - } - // VCF QC + // VCF withName: 'BCFTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } ext.prefix = { "${vcf.baseName.minus(".vcf")}" } diff --git a/conf/modules/msisensorpro.config b/conf/modules/msisensorpro.config new file mode 100644 index 0000000000..574068e6b4 --- /dev/null +++ b/conf/modules/msisensorpro.config @@ -0,0 +1,25 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// MSISENSORPRO + +process { + + withName: 'MSISENSORPRO_MSI_SOMATIC' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/msisensorpro/${meta.id}/" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/strelka.config b/conf/modules/strelka.config new file mode 100644 index 0000000000..e4fb6663db --- /dev/null +++ b/conf/modules/strelka.config @@ -0,0 +1,53 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// STRELKA + +process { + + withName: 'STRELKA_.*' { + ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}.strelka" : "${meta.id}.strelka.${target_bed.simpleName}" } + ext.args = { params.wes ? "--exome" : "" } + ext.when = { params.tools && params.tools.split(',').contains('strelka') } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}", + saveAs: { meta.num_intervals > 1 ? null : "strelka/${meta.id}/${it}" } + ] + } + + withName: 'MERGE_STRELKA.*' { + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/strelka/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + withName: 'MERGE_STRELKA' { + ext.prefix = {"${meta.id}.strelka.variants"} + } + + withName: 'MERGE_STRELKA_GENOME' { + ext.prefix = {"${meta.id}.strelka.genome"} + } + + // PAIR_VARIANT_CALLING + withName: 'MERGE_STRELKA_INDELS' { + ext.prefix = {"${meta.id}.strelka.somatic_indels"} + } + withName: 'MERGE_STRELKA_SNVS' { + ext.prefix = {"${meta.id}.strelka.somatic_snvs"} + } +} diff --git a/conf/modules/tiddit.config b/conf/modules/tiddit.config new file mode 100644 index 0000000000..4e8c2962ca --- /dev/null +++ b/conf/modules/tiddit.config @@ -0,0 +1,58 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// TIDDIT + +process { + + withName: 'TIDDIT_SV' { + ext.when = { params.tools && params.tools.split(',').contains('tiddit') } + ext.args = { bwa_index ? "" : "--skip_assembly" } + ext.prefix = { "${meta.id}.tiddit" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, + pattern: "*tab", + ] + } + + withName : 'TABIX_BGZIP_TIDDIT_SV' { + ext.prefix = { "${meta.id}.tiddit" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, + pattern: "*{vcf.gz,vcf.gz.tbi}" + ] + } + + // PAIR_VARIANT_CALLING + if (params.tools && params.tools.split(',').contains('tiddit')) { + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:TIDDIT_NORMAL:TABIX_BGZIP_TIDDIT_SV' { + ext.prefix = {"${meta.id}.tiddit.normal"} + } + + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:TIDDIT_TUMOR:TABIX_BGZIP_TIDDIT_SV' { + ext.prefix = {"${meta.id}.tiddit.tumor"} + } + + // SVDB + withName: 'NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_SOMATIC_ALL:BAM_VARIANT_CALLING_SOMATIC_TIDDIT:SVDB_MERGE' { + ext.prefix = { "${meta.id}.tiddit" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/tiddit/${meta.id}/" }, + pattern: "*vcf.gz" + ] + } + } +} diff --git a/nextflow.config b/nextflow.config index 015f558afb..dd886f8262 100644 --- a/nextflow.config +++ b/nextflow.config @@ -286,6 +286,7 @@ manifest { // Load modules.config for DSL2 module specific options includeConfig 'conf/modules/modules.config' + // Load more modules specific config for DSL2 module specific options // prepare reference @@ -302,6 +303,7 @@ includeConfig 'conf/modules/prepare_recalibration.config' includeConfig 'conf/modules/recalibrate.config' // variant calling +includeConfig 'conf/modules/ascat.config' includeConfig 'conf/modules/cnvkit.config' includeConfig 'conf/modules/controlfreec.config' includeConfig 'conf/modules/deepvariant.config' @@ -309,7 +311,10 @@ includeConfig 'conf/modules/freebayes.config' includeConfig 'conf/modules/haplotypecaller.config' includeConfig 'conf/modules/manta.config' includeConfig 'conf/modules/mpileup.config' +includeConfig 'conf/modules/msisensorpro.config' includeConfig 'conf/modules/mutect2.config' +includeConfig 'conf/modules/strelka.config' +includeConfig 'conf/modules/tiddit.config' //annotate includeConfig 'conf/modules/annotate.config' diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index a9131e191d..2d7519af04 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -177,6 +177,12 @@ mpileup: - modules/nf-core/samtools/mpileup/main.nf - subworkflows/local/bam_variant_calling_mpileup/main.nf +## msisensorpro +msisensorpro: + - conf/modules/msisensorpro.config + - modules/nf-core/msisensorpro/scan/main.nf + - modules/nf-core/msisensorpro/msi_somatic/main.nf + ## mutect2 mutect2: - conf/modules/mutect2.config @@ -191,6 +197,34 @@ mutect2: - subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf - subworkflows/local/bam_variant_calling_tumor_only_mutect2/main.nf +## strelka +strelka: + - conf/modules/strelka.config + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/strelka/germline/main.nf + - modules/nf-core/strelka/somatic/main.nf + - subworkflows/local/bam_variant_calling_single_strelka/main.nf + - subworkflows/local/bam_variant_calling_somatic_strelka/main.nf + +## strelka_bp +strelka_bp: + - conf/modules/manta.config + - conf/modules/strelka.config + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/manta/somatic/main.nf + - modules/nf-core/strelka/somatic/main.nf + - subworkflows/local/bam_variant_calling_somatic_manta/main.nf + - subworkflows/local/bam_variant_calling_somatic_strelka/main.nf + +## tiddit +tiddit: + - conf/modules/tiddit.config + - modules/nf-core/svdb/merge/main.nf.nf + - modules/nf-core/tabix/bgziptabix/main.nf + - modules/nf-core/tiddit/sv/main.nf + - subworkflows/local/bam_variant_calling_single_tiddit/main.nf + - subworkflows/local/bam_variant_calling_somatic_tiddit/main.nf + # annotate ## merge diff --git a/tests/test_msisensorpro.yml b/tests/test_msisensorpro.yml new file mode 100644 index 0000000000..6d2de98243 --- /dev/null +++ b/tests/test_msisensorpro.yml @@ -0,0 +1,14 @@ +- name: Run variant calling on somatic sample with msisensor-pro + command: nextflow run main.nf -profile test,tools_somatic --tools msisensorpro + tags: + - msisensorpro + - somatic + - variant_calling + files: + - path: results/multiqc + - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3 + - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3_dis + - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3_germline + - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3_somatic + - path: results/msisensorpro + should_exist: false diff --git a/tests/tools.yml b/tests/test_strelka.yml similarity index 51% rename from tests/tools.yml rename to tests/test_strelka.yml index 65b9cc3a74..bb24ad1fb0 100644 --- a/tests/tools.yml +++ b/tests/test_strelka.yml @@ -1,16 +1,80 @@ -- name: Run variant calling on somatic sample with msisensor-pro - command: nextflow run main.nf -profile test,tools_somatic --tools msisensorpro +- name: Skip variant calling on matched normal + command: nextflow run main.nf -profile test,variantcalling_channels --tools strelka --only_paired_variant_calling tags: - - msisensorpro - somatic - - variant_calling + - strelka + - variantcalling_channel files: - path: results/multiqc - - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3 - - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3_dis - - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3_germline - - path: results/variant_calling/msisensorpro/sample4_vs_sample3/sample4_vs_sample3_somatic - - path: results/msisensorpro + - path: results/reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt + md5sum: fa17527ede69d65762bd78fe86fbe50a + - path: results/reports/bcftools/strelka/sample2/sample2.strelka.variants.bcftools_stats.txt + md5sum: 93bf8f09d155e69418fff988e76cbc1d + - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt + md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt + md5sum: 7cc6f2412d6b3d8e84ec0daece3d5443 + - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary + md5sum: 2048a5de0201a6052c988a0189979a5f + - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count + md5sum: c5b7a8eda2526d899098439ae4c06a49 + - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual + md5sum: f01534408c3a87893b523de5f39deb0b + - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.FILTER.summary + md5sum: fa3112841a4575d104916027c8851b30 + - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.count + md5sum: d7f54d09d38af01a574a4930af21cfc9 + - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.qual + contains: + [ + "19 453 47848 0.00946748 11 50 0.22", + "56 456 47875 0.0095248 8 25 0.32", + "72 458 47880 0.00956558 6 20 0.3", + "314 463 47899 0.00966617 1 1 1", + ] + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary + md5sum: 3441628cd6550ed459ca1c3db989ceea + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary + md5sum: 4fc17fa5625b4d1dcc5d791b1eb22d85 + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count + md5sum: fc7af1f534890c4ad3025588b3af62ae + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual + md5sum: c949f848859f138731898aac64a73eaf + - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample1/sample1.strelka.variants.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample2/sample2.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample2/sample2.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample2/sample2.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample2/sample2.strelka.variants.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/sample3/strelka/sample3.strelka.variants.vcf.gz + should_exist: false + - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi + should_exist: false + - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz + should_exist: false + - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi should_exist: false - name: Run variant calling on germline sample with strelka command: nextflow run main.nf -profile test,tools_germline --tools strelka @@ -249,290 +313,3 @@ # binary changes md5sums on reruns. - path: results/strelka should_exist: false -- name: Run variant calling on somatic sample with strelka & manta (Strelka BP) - command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta - tags: - - somatic - - strelka - - strelka_bp - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: eff248896ca462b76c79749403e44f48 - - path: results/multiqc - - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 - - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt - md5sum: af8f05fd8a09e96c4c8850b6ef44729e - - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 - - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 7cc6f2412d6b3d8e84ec0daece3d5443 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary - md5sum: 2048a5de0201a6052c988a0189979a5f - - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count - md5sum: c5b7a8eda2526d899098439ae4c06a49 - - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual - md5sum: f01534408c3a87893b523de5f39deb0b - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary - md5sum: 3441628cd6550ed459ca1c3db989ceea - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary - md5sum: 4fc17fa5625b4d1dcc5d791b1eb22d85 - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count - md5sum: fc7af1f534890c4ad3025588b3af62ae - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: c949f848859f138731898aac64a73eaf - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi - md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 - - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/manta - should_exist: false - - path: results/strelka - should_exist: false -- name: Run variant calling on somatic sample with & manta (Strelka BP) without intervals - command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta --no_intervals - tags: - - no_intervals - - somatic - - strelka - - strelka_bp - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: eff248896ca462b76c79749403e44f48 - - path: results/multiqc - - path: results/no_intervals.bed - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/no_intervals.bed.gz.tbi - md5sum: f3dac01ea66b95fe477446fde2d31489 - - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 - - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 - - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt - md5sum: 572bc880d2bf64a00c8f86567c72f575 - - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 - - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 21b5d06e7e9f34a05b3d7554240f7669 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary - md5sum: 2b7be6ff481fddc655210b836587810d - - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count - md5sum: 1481854d2a765f5641856ecf95ca4097 - - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual - md5sum: fd8e449a715922e24fe4ea9c89870432 - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary - md5sum: 3441628cd6550ed459ca1c3db989ceea - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary - md5sum: 7a81b11aa29fec73d5bc872b7b58f8aa - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count - md5sum: a922c51ca3b2ea7cdcfa09e9c8c55d52 - - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: 75af7d1b1e5a2f1bdfe5b530e37a6cbb - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/manta - should_exist: false - - path: results/strelka - should_exist: false -- name: Run variant calling on somatic sample with tiddit - command: nextflow run main.nf -profile test,tools_somatic --tools tiddit - tags: - - tiddit - - somatic - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: 76b499e35c128d67b0606ea561bf70e0 - - path: results/multiqc - - path: results/reports/bcftools/tiddit/sample3/sample3.tiddit.bcftools_stats.txt - md5sum: 47af91809c214102f723a7c58e9b8e72 - - path: results/reports/bcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.bcftools_stats.txt - md5sum: c071ebe8222acd2c76fc019838e8c230 - - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.TsTv.count - md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - - path: results/reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/tiddit/sample3/sample3.tiddit.ploidies.tab - md5sum: d65f8aa5bb136f77c23264640481f047 - - path: results/variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz - md5sum: 1aeb97037b73251bb79e48b355026c9d - - path: results/variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz - md5sum: 5ba8e30a9d831c05e31300b7d2c578ef - - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.ploidies.tab - md5sum: d65f8aa5bb136f77c23264640481f047 - - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz - md5sum: 302cb980260b313573cf53067fd11120 - - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.vcf.gz - md5sum: f69f1fb9c246ac3bbc9270a2ea979557 - - path: results/tiddit - should_exist: false -- name: Run variant calling on germline sample with tiddit - command: nextflow run main.nf -profile test,tools_germline --tools tiddit - tags: - - tiddit - - germline - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: cd458ebee25e1fb1258d2f390e54c736 - - path: results/multiqc - - path: results/reports/bcftools/tiddit/sample1/sample1.tiddit.bcftools_stats.txt - md5sum: 6bde7b353b927f8e99f5d0f3b592e67b - - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/tiddit/sample1/sample1.tiddit.ploidies.tab - md5sum: d65f8aa5bb136f77c23264640481f047 - - path: results/variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz - md5sum: 72cd7a6dc9dece29fdcb0a19f69e1cb5 - - path: results/variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/tiddit - should_exist: false -- name: Run variant calling on tumor_only sample with tiddit - command: nextflow run main.nf -profile test,tools_tumoronly --tools tiddit - tags: - - tiddit - - tumor_only - - variant_calling - files: - - path: results/csv/variantcalled.csv - md5sum: 15076bb78912fe51006e83934c376fc2 - - path: results/multiqc - - path: results/reports/bcftools/tiddit/sample2/sample2.tiddit.bcftools_stats.txt - md5sum: f2e3a2944f28287286f526d069d473e4 - - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.FILTER.summary - md5sum: 1ce42d34e4ae919afb519efc99146423 - - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/tiddit/sample2/sample2.tiddit.ploidies.tab - md5sum: 0f01874e20df10ecc7418d4537c7aa82 - - path: results/variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz - md5sum: d75c2688964d79a87d882a8ac9a9a93b - - path: results/variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 - - path: results/tiddit - should_exist: false diff --git a/tests/test_strelka_bp.yml b/tests/test_strelka_bp.yml new file mode 100644 index 0000000000..823928dd6c --- /dev/null +++ b/tests/test_strelka_bp.yml @@ -0,0 +1,187 @@ +- name: Run variant calling on somatic sample with strelka & manta (Strelka BP) + command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta + tags: + - somatic + - strelka_bp + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: eff248896ca462b76c79749403e44f48 + - path: results/multiqc + - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: baa05f77160e6cd985050790334c91e8 + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt + md5sum: 56c39772dc25e451d8209f608bb16e37 + - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt + md5sum: af8f05fd8a09e96c4c8850b6ef44729e + - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt + md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt + md5sum: 7cc6f2412d6b3d8e84ec0daece3d5443 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary + md5sum: 2048a5de0201a6052c988a0189979a5f + - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count + md5sum: c5b7a8eda2526d899098439ae4c06a49 + - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual + md5sum: f01534408c3a87893b523de5f39deb0b + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary + md5sum: 3441628cd6550ed459ca1c3db989ceea + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary + md5sum: 4fc17fa5625b4d1dcc5d791b1eb22d85 + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count + md5sum: fc7af1f534890c4ad3025588b3af62ae + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual + md5sum: c949f848859f138731898aac64a73eaf + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi + md5sum: e7ca7e9fe76ce12198fd54ec9a64fad4 + - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/manta + should_exist: false + - path: results/strelka + should_exist: false +- name: Run variant calling on somatic sample with & manta (Strelka BP) without intervals + command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta --no_intervals + tags: + - no_intervals + - somatic + - strelka_bp + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: eff248896ca462b76c79749403e44f48 + - path: results/multiqc + - path: results/no_intervals.bed + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/no_intervals.bed.gz.tbi + md5sum: f3dac01ea66b95fe477446fde2d31489 + - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt + md5sum: baa05f77160e6cd985050790334c91e8 + - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt + md5sum: 56c39772dc25e451d8209f608bb16e37 + - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt + md5sum: 572bc880d2bf64a00c8f86567c72f575 + - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt + md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt + md5sum: 21b5d06e7e9f34a05b3d7554240f7669 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary + md5sum: 2b7be6ff481fddc655210b836587810d + - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count + md5sum: 1481854d2a765f5641856ecf95ca4097 + - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual + md5sum: fd8e449a715922e24fe4ea9c89870432 + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary + md5sum: 3441628cd6550ed459ca1c3db989ceea + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary + md5sum: 7a81b11aa29fec73d5bc872b7b58f8aa + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count + md5sum: a922c51ca3b2ea7cdcfa09e9c8c55d52 + - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual + md5sum: 75af7d1b1e5a2f1bdfe5b530e37a6cbb + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample3/sample3.strelka.variants.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/manta + should_exist: false + - path: results/strelka + should_exist: false diff --git a/tests/test_tiddit.yml b/tests/test_tiddit.yml new file mode 100644 index 0000000000..a7c8fa560f --- /dev/null +++ b/tests/test_tiddit.yml @@ -0,0 +1,98 @@ +- name: Run variant calling on somatic sample with tiddit + command: nextflow run main.nf -profile test,tools_somatic --tools tiddit + tags: + - tiddit + - somatic + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: 76b499e35c128d67b0606ea561bf70e0 + - path: results/multiqc + - path: results/reports/bcftools/tiddit/sample3/sample3.tiddit.bcftools_stats.txt + md5sum: 47af91809c214102f723a7c58e9b8e72 + - path: results/reports/bcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.bcftools_stats.txt + md5sum: c071ebe8222acd2c76fc019838e8c230 + - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.TsTv.count + md5sum: 8dcfdbcaac118df1d5ad407dd2af699f + - path: results/reports/vcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/tiddit/sample3/sample3.tiddit.ploidies.tab + md5sum: d65f8aa5bb136f77c23264640481f047 + - path: results/variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz + md5sum: 1aeb97037b73251bb79e48b355026c9d + - path: results/variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz + md5sum: 5ba8e30a9d831c05e31300b7d2c578ef + - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.ploidies.tab + md5sum: d65f8aa5bb136f77c23264640481f047 + - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz + md5sum: 302cb980260b313573cf53067fd11120 + - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.vcf.gz + md5sum: f69f1fb9c246ac3bbc9270a2ea979557 + - path: results/tiddit + should_exist: false +- name: Run variant calling on germline sample with tiddit + command: nextflow run main.nf -profile test,tools_germline --tools tiddit + tags: + - tiddit + - germline + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: cd458ebee25e1fb1258d2f390e54c736 + - path: results/multiqc + - path: results/reports/bcftools/tiddit/sample1/sample1.tiddit.bcftools_stats.txt + md5sum: 6bde7b353b927f8e99f5d0f3b592e67b + - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/tiddit/sample1/sample1.tiddit.ploidies.tab + md5sum: d65f8aa5bb136f77c23264640481f047 + - path: results/variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz + md5sum: 72cd7a6dc9dece29fdcb0a19f69e1cb5 + - path: results/variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/tiddit + should_exist: false +- name: Run variant calling on tumor_only sample with tiddit + command: nextflow run main.nf -profile test,tools_tumoronly --tools tiddit + tags: + - tiddit + - tumor_only + - variant_calling + files: + - path: results/csv/variantcalled.csv + md5sum: 15076bb78912fe51006e83934c376fc2 + - path: results/multiqc + - path: results/reports/bcftools/tiddit/sample2/sample2.tiddit.bcftools_stats.txt + md5sum: f2e3a2944f28287286f526d069d473e4 + - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.FILTER.summary + md5sum: 1ce42d34e4ae919afb519efc99146423 + - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/tiddit/sample2/sample2.tiddit.ploidies.tab + md5sum: 0f01874e20df10ecc7418d4537c7aa82 + - path: results/variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz + md5sum: d75c2688964d79a87d882a8ac9a9a93b + - path: results/variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz.tbi + md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + - path: results/tiddit + should_exist: false diff --git a/tests/tools_manually.yml b/tests/test_tools_manually.yml similarity index 99% rename from tests/tools_manually.yml rename to tests/test_tools_manually.yml index 833b1f8629..a751d7b706 100644 --- a/tests/tools_manually.yml +++ b/tests/test_tools_manually.yml @@ -34,7 +34,6 @@ md5sum: 4ac774bf5f1157e77426fd82f5a03013 - path: results/variant_calling/ascat/sample4_vs_sample3/sample4_vs_sample3.tumour_tumourLogR.txt md5sum: c6ef56244b8ac53d2c949437918523b2 - - name: Run variant calling on somatic sample with mutect2 without intervals command: nextflow run main.nf -profile test,tools_somatic --tools mutect2 --no_intervals tags: @@ -87,7 +86,6 @@ md5sum: 17d2091015d04cbd4a26b7a67dc659e6 - path: results/variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi # binary changes md5sums on reruns. - - name: Run variant calling on somatic sample with mutect2 command: nextflow run main.nf -profile test,tools_somatic --tools mutect2 tags: From 8ca37fac7597dd5d202c9d76d0be2ef378eb7236 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 17:01:06 +0100 Subject: [PATCH 37/49] test with singularity and conda as well --- .github/workflows/pytest-workflow.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index e5549f1ce2..0857401604 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -34,10 +34,7 @@ jobs: fail-fast: false matrix: tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] - # Only docker for now for faster testing while feature in dev - profile: ["docker"] - # TODO: Need to uncomment that and add exclude for some combination with conda (ie annotation) - # profile: ["docker", "singularity", "conda"] + profile: ["docker", "singularity", "conda"] env: NXF_ANSI_LOG: false steps: From 1694983d436d0cbc85f808d7c8c7846f0a500c48 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 17:30:18 +0100 Subject: [PATCH 38/49] update GHA workflows --- .github/workflows/ci.yml | 11 +++++------ .github/workflows/pytest-workflow.yml | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0de1750e41..c58128e68a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,15 +2,12 @@ name: nf-core CI # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: push: - branches: - - dev + branches: [dev] pull_request: release: types: [published] -env: - NXF_ANSI_LOG: false - +# Cancel if a newer run is started concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true @@ -34,9 +31,11 @@ jobs: exclude: - NXF_VER: "latest-everything" profile: "conda" + env: + NXF_ANSI_LOG: false steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v2 diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index 0857401604..d73e2aa552 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -18,16 +18,14 @@ jobs: tags: ${{ steps.filter.outputs.changes }} steps: - uses: actions/checkout@v3 - - uses: dorny/paths-filter@v2 id: filter with: filters: "tests/config/pytest_tags.yml" test: - runs-on: ubuntu-20.04 - name: ${{ matrix.tags }} ${{ matrix.profile }} + runs-on: ubuntu-latest needs: changes if: needs.changes.outputs.tags != '[]' strategy: @@ -35,6 +33,12 @@ jobs: matrix: tags: ["${{ fromJson(needs.changes.outputs.tags) }}"] profile: ["docker", "singularity", "conda"] + NXF_VER: + - "21.10.3" + - "latest-everything" + exclude: + - NXF_VER: "latest-everything" + profile: "conda" env: NXF_ANSI_LOG: false steps: @@ -64,11 +68,9 @@ jobs: ${{ runner.os }}-nextflow- - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Set up Singularity if: matrix.profile == 'singularity' @@ -88,9 +90,7 @@ jobs: if: matrix.profile == 'conda' run: conda clean -a - # Test the module - name: Run pytest-workflow - # only use one thread for pytest-workflow to avoid race condition on conda cache. uses: Wandalen/wretry.action@v1.0.11 with: command: TMPDIR=~ PROFILE=${{ matrix.profile }} pytest --tag ${{ matrix.tags }} --symlink --kwdof --git-aware --color=yes From 09241c57d392529f9fed2a1c2f1a634629a369c5 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 19:28:59 +0100 Subject: [PATCH 39/49] remove tags --- tests/test_annotation_merge.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_annotation_merge.yml b/tests/test_annotation_merge.yml index a10b50c112..9924d41735 100644 --- a/tests/test_annotation_merge.yml +++ b/tests/test_annotation_merge.yml @@ -3,8 +3,6 @@ tags: - annotation - merge - - snpeff - - vep files: - path: results/annotation/test/test_snpEff_VEP.ann.vcf.gz # binary changes md5sums on reruns. @@ -45,8 +43,6 @@ tags: - annotation - merge - - snpeff - - vep files: - path: results/annotation/test/test_VEP.ann.vcf.gz # binary changes md5sums on reruns. From 7faac67df83b34ed6b2d521a63f9cbea2015fcd2 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 19:32:17 +0100 Subject: [PATCH 40/49] remove more conda tests --- .github/workflows/pytest-workflow.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index d73e2aa552..de9c1f82b2 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -37,8 +37,12 @@ jobs: - "21.10.3" - "latest-everything" exclude: - - NXF_VER: "latest-everything" - profile: "conda" + - profile: "conda" + NXF_VER: "latest-everything" + - profile: "conda" + tags: deepvariant + - profile: "conda" + tags: haplotypecaller env: NXF_ANSI_LOG: false steps: From 5c6b9958e0637111c1a1c9df1513de3a3625de1b Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 19:35:24 +0100 Subject: [PATCH 41/49] remove more tags --- tests/test_default.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_default.yml b/tests/test_default.yml index e16b82c027..3ed362d769 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -3,7 +3,6 @@ tags: - default - preprocessing - - strelka - variant_calling files: - path: results/csv/markduplicates.csv @@ -77,7 +76,6 @@ - default - preprocessing - save_mapped - - strelka - variant_calling files: - path: results/csv/mapped.csv @@ -153,7 +151,6 @@ - default - preprocessing - save_output_as_bam - - strelka - variant_calling files: - path: results/csv/markduplicates.csv @@ -387,7 +384,6 @@ - default - preprocessing - skip_qc - - strelka - variant_calling files: - path: results/csv/markduplicates.csv @@ -436,6 +432,7 @@ - name: Run default pipeline for tumor normal pair command: nextflow run main.nf -profile test,pair tags: + - default - preprocessing - tumor_normal_pair files: From e8c20a1821af19ba30760d168a57ece1195775a9 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 20:04:01 +0100 Subject: [PATCH 42/49] update default --- .github/workflows/ci.yml | 5 +++++ tests/test_default.yml | 10 ---------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c58128e68a..0b93906351 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,11 @@ jobs: - "latest-everything" test: - "default" + - "save_mapped" + - "save_output_as_bam" + - "skip_markduplicates" + - "skip_qc" + - "tumor_normal_pair" profile: ["docker", "singularity", "conda"] exclude: - NXF_VER: "latest-everything" diff --git a/tests/test_default.yml b/tests/test_default.yml index 3ed362d769..b61faab8e6 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -73,7 +73,6 @@ - name: Run save_mapped command: nextflow run main.nf -profile test --save_mapped tags: - - default - preprocessing - save_mapped - variant_calling @@ -148,7 +147,6 @@ - name: Run save_output_as_bam command: nextflow run main.nf -profile test --save_output_as_bam tags: - - default - preprocessing - save_output_as_bam - variant_calling @@ -217,7 +215,6 @@ - name: Run default pipeline with skipping Markduplicates command: nextflow run main.nf -profile test,skip_markduplicates tags: - - default - preprocessing - skip_markduplicates files: @@ -272,10 +269,8 @@ - name: Run default pipeline with skipping Markduplicates with save_mapped command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped tags: - - default - preprocessing - save_mapped - - skip_markduplicates files: - path: results/csv/mapped.csv md5sum: ae97b7394ab53a6b60921ab06c713cd9 @@ -326,11 +321,8 @@ - name: Run default pipeline with skipping Markduplicates with save_mapped & save_output_as_bam command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped --save_output_as_bam tags: - - default - preprocessing - - save_mapped - save_output_as_bam - - skip_markduplicates files: - path: results/csv/mapped.csv md5sum: 7f21bf40d3fbc248ee2ea3fdf0f7cdb2 @@ -381,7 +373,6 @@ - name: Run default pipeline with skipping all QC steps command: nextflow run main.nf -profile test --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools tags: - - default - preprocessing - skip_qc - variant_calling @@ -432,7 +423,6 @@ - name: Run default pipeline for tumor normal pair command: nextflow run main.nf -profile test,pair tags: - - default - preprocessing - tumor_normal_pair files: From 9184570c4f58b1177b3787c4a60c65cbd3795a12 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 20:17:02 +0100 Subject: [PATCH 43/49] less default tests --- .github/workflows/ci.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b93906351..d780f48500 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,16 @@ jobs: exclude: - NXF_VER: "latest-everything" profile: "conda" + - NXF_VER: "latest-everything" + test: "save_mapped" + - NXF_VER: "latest-everything" + test: "save_output_as_bam" + - NXF_VER: "latest-everything" + test: "skip_markduplicates" + - NXF_VER: "latest-everything" + test: "skip_qc" + - NXF_VER: "latest-everything" + test: "tumor_normal_pair" env: NXF_ANSI_LOG: false steps: From 819d284866cc9ff2a59d2764110bdcfd3b7b11bb Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 21:20:11 +0100 Subject: [PATCH 44/49] less default tests --- .github/workflows/ci.yml | 15 -- tests/config/pytest_tags.yml | 10 + tests/test_default.yml | 439 ------------------------------- tests/test_default_extended.yml | 446 ++++++++++++++++++++++++++++++++ 4 files changed, 456 insertions(+), 454 deletions(-) create mode 100644 tests/test_default_extended.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d780f48500..c58128e68a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,25 +27,10 @@ jobs: - "latest-everything" test: - "default" - - "save_mapped" - - "save_output_as_bam" - - "skip_markduplicates" - - "skip_qc" - - "tumor_normal_pair" profile: ["docker", "singularity", "conda"] exclude: - NXF_VER: "latest-everything" profile: "conda" - - NXF_VER: "latest-everything" - test: "save_mapped" - - NXF_VER: "latest-everything" - test: "save_output_as_bam" - - NXF_VER: "latest-everything" - test: "skip_markduplicates" - - NXF_VER: "latest-everything" - test: "skip_qc" - - NXF_VER: "latest-everything" - test: "tumor_normal_pair" env: NXF_ANSI_LOG: false steps: diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index 2d7519af04..465c216c48 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -1,3 +1,13 @@ +# default_extended + +default_extended: + - conf/modules/** + - modules/** + - subworkflows/** + - workflows/** + - nextflow.config + - main.nf + # preprocessing ## alignment_to_fastq diff --git a/tests/test_default.yml b/tests/test_default.yml index b61faab8e6..5d808516ee 100644 --- a/tests/test_default.yml +++ b/tests/test_default.yml @@ -70,442 +70,3 @@ should_exist: false - path: results/preprocessing/mapped/ should_exist: false -- name: Run save_mapped - command: nextflow run main.nf -profile test --save_mapped - tags: - - preprocessing - - save_mapped - - variant_calling - files: - - path: results/csv/mapped.csv - md5sum: ae97b7394ab53a6b60921ab06c713cd9 - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 - - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - # conda changes md5sums for test. - - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary - md5sum: dd87f507da7de20d5318841af312493b - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi - - path: results/strelka - should_exist: false -- name: Run save_output_as_bam - command: nextflow run main.nf -profile test --save_output_as_bam - tags: - - preprocessing - - save_output_as_bam - - variant_calling - files: - - path: results/csv/markduplicates.csv - md5sum: 8679570b8db1937ee574fec36b25d7bc - - path: results/csv/markduplicates_no_table.csv - md5sum: 145154b6037e90448273fbe8e8dec5d3 - - path: results/csv/recalibrated.csv - md5sum: 3ddc20eb105fdcc483945afd7d7d238c - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.bam.bai - md5sum: 3add495a02547bd9e6882935d2f3e1f7 - - path: results/preprocessing/markduplicates/test/test.md.bam - md5sum: ebb72f603f016ce37964259c61625360 - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.bam - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.bam.bai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 - - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - # conda changes md5sums for test. - - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary - md5sum: dd87f507da7de20d5318841af312493b - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi - - path: results/strelka - should_exist: false -- name: Run default pipeline with skipping Markduplicates - command: nextflow run main.nf -profile test,skip_markduplicates - tags: - - preprocessing - - skip_markduplicates - files: - - path: results/csv/mapped.csv - md5sum: ae97b7394ab53a6b60921ab06c713cd9 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 8de213f4c00fac61a1102633760493df - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.bam - should_exist: false - - path: results/preprocessing/mapped/test/test.sorted.bam - should_exist: false -- name: Run default pipeline with skipping Markduplicates with save_mapped - command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped - tags: - - preprocessing - - save_mapped - files: - - path: results/csv/mapped.csv - md5sum: ae97b7394ab53a6b60921ab06c713cd9 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 8de213f4c00fac61a1102633760493df - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.bam - should_exist: false -- name: Run default pipeline with skipping Markduplicates with save_mapped & save_output_as_bam - command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped --save_output_as_bam - tags: - - preprocessing - - save_output_as_bam - files: - - path: results/csv/mapped.csv - md5sum: 7f21bf40d3fbc248ee2ea3fdf0f7cdb2 - - path: results/csv/recalibrated.csv - md5sum: 3ddc20eb105fdcc483945afd7d7d238c - - path: results/multiqc - - path: results/preprocessing/mapped/test/test.sorted.bam - # binary changes md5sums on reruns. - - path: results/preprocessing/mapped/test/test.sorted.bam.bai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 8de213f4c00fac61a1102633760493df - - path: results/preprocessing/recalibrated/test/test.recal.bam - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.bam.bai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt - md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt - md5sum: 71e938314bba70f978271b2b56cb7ad3 - - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt - md5sum: 0786d56af0672eaef44b29f4ed5c12cb - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz - md5sum: b8719430e56a58f734884bb422a7b8fa - - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi - md5sum: e3235323f455414ee3acc7144e659caf - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 - - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 - - path: results/csv/markduplicates.csv - should_exist: false - - path: results/csv/markduplicates_no_table.csv - should_exist: false - - path: results/preprocessing/mapped/test/test.bam - should_exist: false -- name: Run default pipeline with skipping all QC steps - command: nextflow run main.nf -profile test --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools - tags: - - preprocessing - - skip_qc - - variant_calling - files: - - path: results/csv/markduplicates.csv - md5sum: 0d6120bb99e92f6810343270711ca53e - - path: results/csv/markduplicates_no_table.csv - md5sum: 2a2d3d4842befd4def39156463859ee3 - - path: results/csv/recalibrated.csv - md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/multiqc - should_exist: false - - path: results/reports/fastqc - should_exist: false - - path: results/reports/markduplicates - should_exist: false - - path: results/reports/mosdepth - should_exist: false - - path: results/reports/samtools - should_exist: false - - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - # conda changes md5sums for test. - - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary - md5sum: dd87f507da7de20d5318841af312493b - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count - md5sum: fa27f678965b7cba6a92efcd039f802a - - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz - # binary changes md5sums on reruns. - - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi - - path: results/strelka - should_exist: false -- name: Run default pipeline for tumor normal pair - command: nextflow run main.nf -profile test,pair - tags: - - preprocessing - - tumor_normal_pair - files: - - path: results/csv/markduplicates.csv - md5sum: e8e587ac25253ff7ab8f1cc66d410c98 - - path: results/csv/markduplicates_no_table.csv - md5sum: 617574c9b607e5daaf4ad56d48982247 - - path: results/csv/recalibrated.csv - md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab - - path: results/multiqc - - path: results/preprocessing/markduplicates/test/test.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test/test.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test/test.recal.table - md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - - path: results/preprocessing/recalibrated/test/test.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test/test.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai - # binary changes md5sums on reruns. - - path: results/preprocessing/recal_table/test2/test2.recal.table - md5sum: 0626cd4337eab79b38b5bc5c95e0c003 - - path: results/preprocessing/recalibrated/test2/test2.recal.cram - # binary changes md5sums on reruns. - - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai - # binary changes md5sums on reruns. - - path: results/reports/fastqc/test-test_L1 - - path: results/reports/fastqc/test2-test_L1 - - path: results/reports/markduplicates/test/test.md.cram.metrics - contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] - - path: results/reports/markduplicates/test2/test2.md.cram.metrics - contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] - - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.md.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt - md5sum: 76fa71922a3f748e507c2364c531dfcb - - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt - md5sum: abc5df85e302b79985627888870882da - - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt - md5sum: d536456436eb275159b8c6af83213d80 - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz - md5sum: 38fe39894abe62e38f8ac214cba64f2b - - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi - md5sum: b1c2a861f64e20a94108a6de3b76c582 - - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt - md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 - - path: results/reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt - md5sum: 38ff8b38c33b9231f047fea8ea830aae - - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt - md5sum: 8b991358768cade225470a07cd34f573 - - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz - md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 - - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz.csi - md5sum: d5f1c9389ecf52ba839e834780a94549 - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt - md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt - md5sum: 38ff8b38c33b9231f047fea8ea830aae - - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt - md5sum: 8b991358768cade225470a07cd34f573 - - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz - md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 - - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi - md5sum: d5f1c9389ecf52ba839e834780a94549 - - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c - - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 - - path: results/reports/samtools/test2/test2.md.cram.stats - md5sum: 60152dbf1e109d4c407c151204388109 - - path: results/reports/samtools/test2/test2.recal.cram.stats - md5sum: 2907543ab51cabd97318b15cf035c867 - - path: results/preprocessing/mapped/ - should_exist: false diff --git a/tests/test_default_extended.yml b/tests/test_default_extended.yml new file mode 100644 index 0000000000..44ef8237e4 --- /dev/null +++ b/tests/test_default_extended.yml @@ -0,0 +1,446 @@ +- name: Run save_mapped + command: nextflow run main.nf -profile test --save_mapped + tags: + - default_extended + - preprocessing + - save_mapped + - variant_calling + files: + - path: results/csv/mapped.csv + md5sum: ae97b7394ab53a6b60921ab06c713cd9 + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + # conda changes md5sums for test. + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: dd87f507da7de20d5318841af312493b + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + - path: results/strelka + should_exist: false +- name: Run save_output_as_bam + command: nextflow run main.nf -profile test --save_output_as_bam + tags: + - default_extended + - preprocessing + - save_output_as_bam + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: 8679570b8db1937ee574fec36b25d7bc + - path: results/csv/markduplicates_no_table.csv + md5sum: 145154b6037e90448273fbe8e8dec5d3 + - path: results/csv/recalibrated.csv + md5sum: 3ddc20eb105fdcc483945afd7d7d238c + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.bam.bai + md5sum: 3add495a02547bd9e6882935d2f3e1f7 + - path: results/preprocessing/markduplicates/test/test.md.bam + md5sum: ebb72f603f016ce37964259c61625360 + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.bam + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.bam.bai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + # conda changes md5sums for test. + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: dd87f507da7de20d5318841af312493b + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + - path: results/strelka + should_exist: false +- name: Run default pipeline with skipping Markduplicates + command: nextflow run main.nf -profile test,skip_markduplicates + tags: + - default_extended + - preprocessing + - skip_markduplicates + files: + - path: results/csv/mapped.csv + md5sum: ae97b7394ab53a6b60921ab06c713cd9 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 8de213f4c00fac61a1102633760493df + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 59d921ed3970e19145fbae75966de3e3 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.bam + should_exist: false + - path: results/preprocessing/mapped/test/test.sorted.bam + should_exist: false +- name: Run default pipeline with skipping Markduplicates with save_mapped + command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped + tags: + - default_extended + - preprocessing + - save_mapped + files: + - path: results/csv/mapped.csv + md5sum: ae97b7394ab53a6b60921ab06c713cd9 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 8de213f4c00fac61a1102633760493df + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 59d921ed3970e19145fbae75966de3e3 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.bam + should_exist: false +- name: Run default pipeline with skipping Markduplicates with save_mapped & save_output_as_bam + command: nextflow run main.nf -profile test,skip_markduplicates --save_mapped --save_output_as_bam + tags: + - default_extended + - preprocessing + - save_output_as_bam + files: + - path: results/csv/mapped.csv + md5sum: 7f21bf40d3fbc248ee2ea3fdf0f7cdb2 + - path: results/csv/recalibrated.csv + md5sum: 3ddc20eb105fdcc483945afd7d7d238c + - path: results/multiqc + - path: results/preprocessing/mapped/test/test.sorted.bam + # binary changes md5sums on reruns. + - path: results/preprocessing/mapped/test/test.sorted.bam.bai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 8de213f4c00fac61a1102633760493df + - path: results/preprocessing/recalibrated/test/test.recal.bam + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.bam.bai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/mosdepth/test/test.sorted.mosdepth.global.dist.txt + md5sum: 48aa760b024d7c0ddd5a66f89a1cb544 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.region.dist.txt + md5sum: 71e938314bba70f978271b2b56cb7ad3 + - path: results/reports/mosdepth/test/test.sorted.mosdepth.summary.txt + md5sum: 0786d56af0672eaef44b29f4ed5c12cb + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz + md5sum: b8719430e56a58f734884bb422a7b8fa + - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi + md5sum: e3235323f455414ee3acc7144e659caf + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + - path: results/reports/samtools/test/test.sorted.cram.stats + md5sum: 59d921ed3970e19145fbae75966de3e3 + - path: results/csv/markduplicates.csv + should_exist: false + - path: results/csv/markduplicates_no_table.csv + should_exist: false + - path: results/preprocessing/mapped/test/test.bam + should_exist: false +- name: Run default pipeline with skipping all QC steps + command: nextflow run main.nf -profile test --skip_tools fastqc,markduplicates_report,mosdepth,multiqc,samtools + tags: + - default_extended + - preprocessing + - skip_qc + - variant_calling + files: + - path: results/csv/markduplicates.csv + md5sum: 0d6120bb99e92f6810343270711ca53e + - path: results/csv/markduplicates_no_table.csv + md5sum: 2a2d3d4842befd4def39156463859ee3 + - path: results/csv/recalibrated.csv + md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/multiqc + should_exist: false + - path: results/reports/fastqc + should_exist: false + - path: results/reports/markduplicates + should_exist: false + - path: results/reports/mosdepth + should_exist: false + - path: results/reports/samtools + should_exist: false + - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt + # conda changes md5sums for test. + - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary + md5sum: dd87f507da7de20d5318841af312493b + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count + md5sum: fa27f678965b7cba6a92efcd039f802a + - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual + md5sum: bc68ae4e688e9fb772b457069e604883 + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/strelka/test/test.strelka.variants.vcf.gz.tbi + - path: results/strelka + should_exist: false +- name: Run default pipeline for tumor normal pair + command: nextflow run main.nf -profile test,pair + tags: + - default_extended + - preprocessing + - tumor_normal_pair + files: + - path: results/csv/markduplicates.csv + md5sum: e8e587ac25253ff7ab8f1cc66d410c98 + - path: results/csv/markduplicates_no_table.csv + md5sum: 617574c9b607e5daaf4ad56d48982247 + - path: results/csv/recalibrated.csv + md5sum: 008dff17e2a0d96ef9c1cae12fcab6ab + - path: results/multiqc + - path: results/preprocessing/markduplicates/test/test.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test/test.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test/test.recal.table + md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe + - path: results/preprocessing/recalibrated/test/test.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test/test.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/markduplicates/test2/test2.md.cram.crai + # binary changes md5sums on reruns. + - path: results/preprocessing/recal_table/test2/test2.recal.table + md5sum: 0626cd4337eab79b38b5bc5c95e0c003 + - path: results/preprocessing/recalibrated/test2/test2.recal.cram + # binary changes md5sums on reruns. + - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai + # binary changes md5sums on reruns. + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/fastqc/test2-test_L1 + - path: results/reports/markduplicates/test/test.md.cram.metrics + contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] + - path: results/reports/markduplicates/test2/test2.md.cram.metrics + contains: ["test2 10103 880 35 523579 4837 2 0 0.408076 193306", "1.0 1 876 876", "100.0 80.515303 0 0"] + - path: results/reports/mosdepth/test/test.md.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.md.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.md.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.md.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.md.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test/test.recal.mosdepth.global.dist.txt + md5sum: 76fa71922a3f748e507c2364c531dfcb + - path: results/reports/mosdepth/test/test.recal.mosdepth.region.dist.txt + md5sum: abc5df85e302b79985627888870882da + - path: results/reports/mosdepth/test/test.recal.mosdepth.summary.txt + md5sum: d536456436eb275159b8c6af83213d80 + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz + md5sum: 38fe39894abe62e38f8ac214cba64f2b + - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi + md5sum: b1c2a861f64e20a94108a6de3b76c582 + - path: results/reports/mosdepth/test2/test2.md.mosdepth.global.dist.txt + md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 + - path: results/reports/mosdepth/test2/test2.md.mosdepth.region.dist.txt + md5sum: 38ff8b38c33b9231f047fea8ea830aae + - path: results/reports/mosdepth/test2/test2.md.mosdepth.summary.txt + md5sum: 8b991358768cade225470a07cd34f573 + - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz + md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 + - path: results/reports/mosdepth/test2/test2.md.regions.bed.gz.csi + md5sum: d5f1c9389ecf52ba839e834780a94549 + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.global.dist.txt + md5sum: 2020cf6dfc7ddca020c921dd9f0549b7 + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.region.dist.txt + md5sum: 38ff8b38c33b9231f047fea8ea830aae + - path: results/reports/mosdepth/test2/test2.recal.mosdepth.summary.txt + md5sum: 8b991358768cade225470a07cd34f573 + - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz + md5sum: 5d67bc6ea9f077abb4fdac3b087c6387 + - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi + md5sum: d5f1c9389ecf52ba839e834780a94549 + - path: results/reports/samtools/test/test.md.cram.stats + md5sum: 52411f93d9b5382342b40a351c2e313c + - path: results/reports/samtools/test/test.recal.cram.stats + md5sum: 811d834ff8849e69e7c5925dc2a34257 + - path: results/reports/samtools/test2/test2.md.cram.stats + md5sum: 60152dbf1e109d4c407c151204388109 + - path: results/reports/samtools/test2/test2.recal.cram.stats + md5sum: 2907543ab51cabd97318b15cf035c867 + - path: results/preprocessing/mapped/ + should_exist: false From f4953699f5f27b25142a52f77d7212127908754f Mon Sep 17 00:00:00 2001 From: maxulysse Date: Thu, 3 Nov 2022 22:18:19 +0100 Subject: [PATCH 45/49] exclude conda from annotation tests --- .github/workflows/pytest-workflow.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index de9c1f82b2..3e322633c0 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -43,6 +43,12 @@ jobs: tags: deepvariant - profile: "conda" tags: haplotypecaller + - profile: "conda" + tags: merge + - profile: "conda" + tags: snpeff + - profile: "conda" + tags: vep env: NXF_ANSI_LOG: false steps: From c31691f2e353e6fb75a6a533cd6824fd6a5d6f82 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 4 Nov 2022 10:40:43 +0100 Subject: [PATCH 46/49] exclude more tests + remove md5sum for some conda tests --- .github/workflows/pytest-workflow.yml | 4 ++ tests/test_aligner_bwamem.yml | 8 +-- tests/test_aligner_bwamem2.yml | 8 +-- tests/test_aligner_dragmap.yml | 8 +-- tests/test_alignment_to_fastq.yml | 4 +- tests/test_cnvkit.yml | 10 ++-- tests/test_default_extended.yml | 16 +++--- tests/test_fastp.yml | 20 +++---- tests/test_freebayes.yml | 36 ++++++------- tests/test_gatk4_spark.yml | 8 +-- tests/test_intervals.yml | 12 ++--- tests/test_manta.yml | 20 +++---- tests/test_markduplicates_from_bam.yml | 8 +-- tests/test_markduplicates_from_cram.yml | 8 +-- tests/test_mutect2.yml | 8 +-- tests/test_prepare_recalibration_from_bam.yml | 6 +-- .../test_prepare_recalibration_from_cram.yml | 6 +-- tests/test_recalibrate_from_bam.yml | 4 +- tests/test_recalibrate_from_cram.yml | 4 +- tests/test_strelka.yml | 52 +++++++++---------- tests/test_strelka_bp.yml | 48 ++++++++--------- tests/test_tiddit.yml | 30 +++++------ 22 files changed, 166 insertions(+), 162 deletions(-) diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index 3e322633c0..ad93edfc35 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -39,6 +39,8 @@ jobs: exclude: - profile: "conda" NXF_VER: "latest-everything" + - profile: "conda" + tags: umi - profile: "conda" tags: deepvariant - profile: "conda" @@ -49,6 +51,8 @@ jobs: tags: snpeff - profile: "conda" tags: vep + - profile: "singularity" + tags: merge env: NXF_ANSI_LOG: false steps: diff --git a/tests/test_aligner_bwamem.yml b/tests/test_aligner_bwamem.yml index feaacf5435..49f6c145a9 100644 --- a/tests/test_aligner_bwamem.yml +++ b/tests/test_aligner_bwamem.yml @@ -33,7 +33,7 @@ - path: results/reference/bwa/genome.sa md5sum: e7cff62b919448a3a3d0fe4aaf427594 - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi - md5sum: 628232d0c870f2dbf73c3e81aff7b4b4 + # conda changes md5sums for test. - path: results/reference/dict/genome.dict md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 - path: results/reference/fai/genome.fasta.fai @@ -45,7 +45,7 @@ - path: results/reference/intervals/genome.bed md5sum: a87dc7d20ebca626f65cc16ff6c97a3e - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi - md5sum: 1bb7ab8f22eb798efd796439d3b29b7a + # conda changes md5sums for test. - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] @@ -70,6 +70,6 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b1c2a861f64e20a94108a6de3b76c582 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 + # conda changes md5sums for test. diff --git a/tests/test_aligner_bwamem2.yml b/tests/test_aligner_bwamem2.yml index c93dab0339..355d89a369 100644 --- a/tests/test_aligner_bwamem2.yml +++ b/tests/test_aligner_bwamem2.yml @@ -33,7 +33,7 @@ - path: results/reference/bwamem2/genome.fasta.pac md5sum: 8569fbdb2c98c6fb16dfa73d8eacb070 - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi - md5sum: 628232d0c870f2dbf73c3e81aff7b4b4 + # conda changes md5sums for test. - path: results/reference/dict/genome.dict md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 - path: results/reference/fai/genome.fasta.fai @@ -45,7 +45,7 @@ - path: results/reference/intervals/genome.bed md5sum: a87dc7d20ebca626f65cc16ff6c97a3e - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi - md5sum: 1bb7ab8f22eb798efd796439d3b29b7a + # conda changes md5sums for test. - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] @@ -70,6 +70,6 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b1c2a861f64e20a94108a6de3b76c582 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 + # conda changes md5sums for test. diff --git a/tests/test_aligner_dragmap.yml b/tests/test_aligner_dragmap.yml index 9950a62dcd..6650f8aef1 100644 --- a/tests/test_aligner_dragmap.yml +++ b/tests/test_aligner_dragmap.yml @@ -23,7 +23,7 @@ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai # binary changing on reruns - path: results/reference/dbsnp/dbsnp_146.hg38.vcf.gz.tbi - md5sum: 628232d0c870f2dbf73c3e81aff7b4b4 + # conda changes md5sums for test. - path: results/reference/dict/genome.dict md5sum: 2433fe2ba31257337bf4c4bd4cb8da15 - path: results/reference/dragmap/hash_table.cfg @@ -60,7 +60,7 @@ - path: results/reference/intervals/genome.bed md5sum: a87dc7d20ebca626f65cc16ff6c97a3e - path: results/reference/known_indels/mills_and_1000G.indels.vcf.gz.tbi - md5sum: 1bb7ab8f22eb798efd796439d3b29b7a + # conda changes md5sums for test. - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["LB0 13607 543 161 518779 6410 0 0 0.436262"] @@ -85,6 +85,6 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: d5f1c9389ecf52ba839e834780a94549 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 9f64e86202f2291b33c0e8c3e7981193 + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: a63f247fb403d8679c8f96f3a128aba0 + # conda changes md5sums for test. diff --git a/tests/test_alignment_to_fastq.yml b/tests/test_alignment_to_fastq.yml index af5be3b592..54d8b25b24 100644 --- a/tests/test_alignment_to_fastq.yml +++ b/tests/test_alignment_to_fastq.yml @@ -45,8 +45,8 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: d0713716f63ac573f4a3385733e9a537 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: d6634d51a009e3da43b2349e424b2d9f + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 91915547ff9e654c106a42e5edfd9405 + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false diff --git a/tests/test_cnvkit.yml b/tests/test_cnvkit.yml index b32223906c..930d2cae94 100644 --- a/tests/test_cnvkit.yml +++ b/tests/test_cnvkit.yml @@ -16,7 +16,7 @@ - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-diagram.pdf # binary changes md5sums on reruns. - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted-scatter.png - md5sum: c96b97b9ce948daf3437ccecfd67a4a7 + # conda changes md5sums for test. - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn md5sum: fe1248aa91fad7769303bb4c031d55ca - path: results/variant_calling/cnvkit/sample3/test.paired_end.recalibrated.sorted.bintest.cns @@ -42,7 +42,7 @@ - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-diagram.pdf # binary changes md5sums on reruns. - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-scatter.png - md5sum: acd87dfb61db5910afaea34053aed561 + # conda changes md5sums for test. - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn md5sum: 067115082c4af4b64d58c0dc3a3642e4 - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.bintest.cns @@ -73,7 +73,7 @@ - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted-diagram.pdf # binary changes md5sums on reruns. - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted-scatter.png - md5sum: 7f2d2380309d5e19c8942131890d0e39 + # conda changes md5sums for test. - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn md5sum: 067115082c4af4b64d58c0dc3a3642e4 - path: results/variant_calling/cnvkit/sample2/test2.paired_end.recalibrated.sorted.bintest.cns @@ -106,7 +106,7 @@ - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-diagram.pdf # binary changes md5sums on reruns. - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted-scatter.png - md5sum: c96b97b9ce948daf3437ccecfd67a4a7 + # conda changes md5sums for test. - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.antitargetcoverage.cnn md5sum: fe1248aa91fad7769303bb4c031d55ca - path: results/variant_calling/cnvkit/sample1/test.paired_end.recalibrated.sorted.bintest.cns @@ -143,7 +143,7 @@ - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-diagram.pdf # binary changes md5sums on reruns. - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted-scatter.png - md5sum: acd87dfb61db5910afaea34053aed561 + # conda changes md5sums for test. - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.antitargetcoverage.cnn md5sum: 067115082c4af4b64d58c0dc3a3642e4 - path: results/variant_calling/cnvkit/sample4_vs_sample3/test2.paired_end.recalibrated.sorted.bintest.cns diff --git a/tests/test_default_extended.yml b/tests/test_default_extended.yml index 44ef8237e4..67df5f7b11 100644 --- a/tests/test_default_extended.yml +++ b/tests/test_default_extended.yml @@ -53,9 +53,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b1c2a861f64e20a94108a6de3b76c582 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt # conda changes md5sums for test. - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary @@ -89,9 +89,9 @@ md5sum: 3ddc20eb105fdcc483945afd7d7d238c - path: results/multiqc - path: results/preprocessing/markduplicates/test/test.md.bam.bai - md5sum: 3add495a02547bd9e6882935d2f3e1f7 + # conda changes md5sums for test. - path: results/preprocessing/markduplicates/test/test.md.bam - md5sum: ebb72f603f016ce37964259c61625360 + # conda changes md5sums for test. - path: results/preprocessing/recal_table/test/test.recal.table md5sum: 4ac774bf5f1157e77426fd82f5ac0fbe - path: results/preprocessing/recalibrated/test/test.recal.bam @@ -122,9 +122,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b1c2a861f64e20a94108a6de3b76c582 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt # conda changes md5sums for test. - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary @@ -435,9 +435,9 @@ - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi md5sum: d5f1c9389ecf52ba839e834780a94549 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 + # conda changes md5sums for test. - path: results/reports/samtools/test2/test2.md.cram.stats md5sum: 60152dbf1e109d4c407c151204388109 - path: results/reports/samtools/test2/test2.recal.cram.stats diff --git a/tests/test_fastp.yml b/tests/test_fastp.yml index 36453b4e60..985ef5cc7d 100644 --- a/tests/test_fastp.yml +++ b/tests/test_fastp.yml @@ -13,9 +13,9 @@ md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - path: results/multiqc - path: results/preprocessing/fastp/test/test-test_L1_1.fastp.fastq.gz - md5sum: 325acd143b6fcbf92ca9e34f97d87158 + # conda changes md5sums for test. - path: results/preprocessing/fastp/test/test-test_L1_2.fastp.fastq.gz - md5sum: af73322b9742bce0dd7f767c5c676c0e + # conda changes md5sums for test. - path: results/preprocessing/markduplicates/test/test.md.cram # binary changes md5sums on reruns. - path: results/preprocessing/markduplicates/test/test.md.cram.crai @@ -51,9 +51,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: a5ad8f917979f62eacfff1461529dbaa - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 7382e028335a2b057cb54ea160c4be7b + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5adeae7e9ce068009e88bacba549096e + # conda changes md5sums for test. - name: Run split fastq module command: nextflow run main.nf -profile test,split_fastq tags: @@ -68,13 +68,13 @@ - path: results/csv/recalibrated.csv md5sum: 2d29d9e53894dcce96a1b5beb6ef3312 - path: results/preprocessing/fastp/test/0001.test-test_L1_1.fastp.fastq.gz - md5sum: dc56d2161f02a4a9f79fe2c4a646aadc + # conda changes md5sums for test. - path: results/preprocessing/fastp/test/0001.test-test_L1_2.fastp.fastq.gz - md5sum: 3ed01c8e26fcc25859322e4f9a07a217 + # conda changes md5sums for test. - path: results/preprocessing/fastp/test/0002.test-test_L1_1.fastp.fastq.gz - md5sum: 16c3b700adf22dbac28ffb6b63a9615b + # conda changes md5sums for test. - path: results/preprocessing/fastp/test/0002.test-test_L1_2.fastp.fastq.gz - md5sum: 7efff2539c8cb03e0cc5eaab442870e5 + # conda changes md5sums for test. - path: results/preprocessing/markduplicates/test/test.md.cram # binary changes md5sums on reruns. - path: results/preprocessing/markduplicates/test/test.md.cram.crai @@ -110,6 +110,6 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: 1b65ac38cd96783394e0cf9b7bdd5bc2 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 44c873922c844b85c0920d22c98b08cf + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5ad46c4c701943dfc67604ed34c1c69e + # conda changes md5sums for test. diff --git a/tests/test_freebayes.yml b/tests/test_freebayes.yml index 611102b745..245f81250f 100644 --- a/tests/test_freebayes.yml +++ b/tests/test_freebayes.yml @@ -25,7 +25,7 @@ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai # binary changes md5sums on reruns. - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: 16c7673085520b66bd47f59fbf8a7e0e + # conda changes md5sums for test. - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] @@ -58,9 +58,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: c6d1ac97ef4dfe43731c8368d8391cab - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 + # conda changes md5sums for test. - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary md5sum: 43d53e36cbb1091f915b2499e545b41e - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count @@ -107,7 +107,7 @@ - path: results/preprocessing/recalibrated/test/test.recal.cram.crai # binary changes md5sums on reruns. - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: f66375a00d692d67df46c129147008b2 + # conda changes md5sums for test. - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] @@ -132,9 +132,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b1c2a861f64e20a94108a6de3b76c582 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 + # conda changes md5sums for test. - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary md5sum: 76c5919541536c12b5c8a6094d6d78d5 - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count @@ -184,9 +184,9 @@ - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai # binary changes md5sums on reruns. - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: 16c7673085520b66bd47f59fbf8a7e0e + # conda changes md5sums for test. - path: results/reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.bcftools_stats.txt - md5sum: 001709e5eeca385908e31ba370f53381 + # conda changes md5sums for test. - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] @@ -249,13 +249,13 @@ - path: results/reports/mosdepth/test2/test2.recal.regions.bed.gz.csi md5sum: c6d1ac97ef4dfe43731c8368d8391cab - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 + # conda changes md5sums for test. - path: results/reports/samtools/test2/test2.md.cram.stats - md5sum: 60152dbf1e109d4c407c151204388109 + # conda changes md5sums for test. - path: results/reports/samtools/test2/test2.recal.cram.stats - md5sum: 53a0b34b67fdf90ea68fac8ea744b576 + # conda changes md5sums for test. - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary md5sum: 43d53e36cbb1091f915b2499e545b41e - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count @@ -322,9 +322,9 @@ - path: results/preprocessing/recalibrated/test2/test2.recal.cram.crai # binary changes md5sums on reruns. - path: results/reports/bcftools/freebayes/test/test.freebayes.bcftools_stats.txt - md5sum: f66375a00d692d67df46c129147008b2 + # conda changes md5sums for test. - path: results/reports/bcftools/freebayes/test2_vs_test/test2_vs_test.freebayes.bcftools_stats.txt - md5sum: 41d9835772fa1326769c2dd94749cbea + # conda changes md5sums for test. - path: results/reports/fastqc/test-test_L1 - path: results/reports/markduplicates/test/test.md.cram.metrics contains: ["test 8547 767 84 523391 3882 0 0 0.385081", "1.0 767 767"] @@ -363,13 +363,13 @@ - path: results/reports/mosdepth/test2/test2.recal.per-base.bed.gz.csi md5sum: 4205a09ede17cdbdaad45e3553f73105 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 + # conda changes md5sums for test. - path: results/reports/samtools/test2/test2.md.cram.stats - md5sum: 60152dbf1e109d4c407c151204388109 + # conda changes md5sums for test. - path: results/reports/samtools/test2/test2.recal.cram.stats - md5sum: 029b903797dc228e56d6ab74e677fa21 + # conda changes md5sums for test. - path: results/reports/vcftools/freebayes/test/test.freebayes.FILTER.summary md5sum: 76c5919541536c12b5c8a6094d6d78d5 - path: results/reports/vcftools/freebayes/test/test.freebayes.TsTv.count diff --git a/tests/test_gatk4_spark.yml b/tests/test_gatk4_spark.yml index a23bae3d8b..07b8611aaf 100644 --- a/tests/test_gatk4_spark.yml +++ b/tests/test_gatk4_spark.yml @@ -44,9 +44,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz md5sum: c259a9fd73f576626c3a29841c2dc019 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: b573b2e930c5f68e7e4910faf9e51e44 + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: a75731dca9b42d87d9997ea44aaf82fc + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false - name: Run default pipeline with gatk4_spark & skipping all QC steps @@ -131,6 +131,6 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: cf85ba4692f016af70db2a594d9effdf - path: results/reports/samtools/test/test.md.cram.stats - md5sum: b573b2e930c5f68e7e4910faf9e51e44 + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: a75731dca9b42d87d9997ea44aaf82fc + # conda changes md5sums for test. diff --git a/tests/test_intervals.yml b/tests/test_intervals.yml index a960611d6a..457e46938e 100644 --- a/tests/test_intervals.yml +++ b/tests/test_intervals.yml @@ -54,9 +54,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: c6d1ac97ef4dfe43731c8368d8391cab - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 5ed26e702d5c2238a1f3cdc5e5b44f98 + # conda changes md5sums for test. - name: Run intervals false pipeline command: nextflow run main.nf -profile test --intervals false --save_reference tags: @@ -105,9 +105,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz md5sum: 38fe39894abe62e38f8ac214cba64f2b - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 811d834ff8849e69e7c5925dc2a34257 + # conda changes md5sums for test. - name: Run default pipeline without intervals command: nextflow run main.nf -profile test,no_intervals tags: @@ -164,6 +164,6 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b1c2a861f64e20a94108a6de3b76c582 - path: results/reports/samtools/test/test.md.cram.stats - md5sum: 52411f93d9b5382342b40a351c2e313c + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: d80ad11b8e308a3d5b1fb6f8e13cef38 + # conda changes md5sums for test. diff --git a/tests/test_manta.yml b/tests/test_manta.yml index e8a85e1312..64957769db 100644 --- a/tests/test_manta.yml +++ b/tests/test_manta.yml @@ -9,7 +9,7 @@ md5sum: 981280af86f69190fdf0639030a80249 - path: results/multiqc - path: results/reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt - md5sum: 2afce09f0b53a47acde7f8767fd6fcef + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count @@ -40,7 +40,7 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/manta/sample1/sample1.manta.diploid_sv.bcftools_stats.txt - md5sum: 2afce09f0b53a47acde7f8767fd6fcef + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample1/sample1.manta.diploid_sv.TsTv.count @@ -64,7 +64,7 @@ md5sum: f1051fe647abf202e6332f9a1789c05d - path: results/multiqc - path: results/reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt - md5sum: ea7f369ab8edaccc5bf45347bda15dfa + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count @@ -95,7 +95,7 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/manta/sample2/sample2.manta.tumor_sv.bcftools_stats.txt - md5sum: ea7f369ab8edaccc5bf45347bda15dfa + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample2/sample2.manta.tumor_sv.TsTv.count @@ -119,11 +119,11 @@ md5sum: 3a8861808601994f89d5c55ce5c95dae - path: results/multiqc - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count @@ -174,11 +174,11 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count diff --git a/tests/test_markduplicates_from_bam.yml b/tests/test_markduplicates_from_bam.yml index 7b07d22bef..d10b1f9354 100644 --- a/tests/test_markduplicates_from_bam.yml +++ b/tests/test_markduplicates_from_bam.yml @@ -49,9 +49,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi # binary changes md5sums on reruns. - path: results/reports/samtools/test/test.md.cram.stats - md5sum: a0ecedb6be28639e276d38e4ac18935b + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 69d2bab6fdbe21ec5c598dc404a3df00 + # conda changes md5sums for test. - name: Run skip markduplicates bam from step markduplicates command: nextflow run main.nf -profile test,markduplicates_bam,skip_markduplicates tags: @@ -94,9 +94,9 @@ - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi md5sum: b3716e5cd1744610e69c29bd4ffad259 - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 71af990f7acad8bf24d37e88c4adcded + # conda changes md5sums for test. - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 9de0a2738ab150e2e3fd857b0f42efc4 + # conda changes md5sums for test. - path: results/csv/markduplicates.csv should_exist: false - path: results/csv/markduplicates_no_table.csv diff --git a/tests/test_markduplicates_from_cram.yml b/tests/test_markduplicates_from_cram.yml index 0152929fe3..1accab1b5e 100644 --- a/tests/test_markduplicates_from_cram.yml +++ b/tests/test_markduplicates_from_cram.yml @@ -45,9 +45,9 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi # binary changes md5sums on reruns. - path: results/reports/samtools/test/test.md.cram.stats - md5sum: a0ecedb6be28639e276d38e4ac18935b + # conda changes md5sums for test. - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 69d2bab6fdbe21ec5c598dc404a3df00 + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false - name: Run skip markduplicates cram from step markduplicates @@ -88,9 +88,9 @@ - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi md5sum: b3716e5cd1744610e69c29bd4ffad259 - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 71af990f7acad8bf24d37e88c4adcded + # conda changes md5sums for test. - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 42b536ada66f772bb4dfd741098b2783 + # conda changes md5sums for test. - path: results/csv/markduplicates.csv should_exist: false - path: results/csv/markduplicates_no_table.csv diff --git a/tests/test_mutect2.yml b/tests/test_mutect2.yml index 7fc84938f6..22340a34c5 100644 --- a/tests/test_mutect2.yml +++ b/tests/test_mutect2.yml @@ -9,13 +9,13 @@ md5sum: d57c1beba9005e9790a573bd93398b72 - path: results/multiqc - path: results/reports/bcftools/mutect2/sample2/sample2.mutect2.filtered.bcftools_stats.txt - md5sum: b0277a18599f8aa01e4b1b42ff0257b1 + # conda changes md5sums for test. - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.FILTER.summary md5sum: ef9bd9a2f41d8872ba25e5616e4c2a5e - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.count md5sum: fe3ff1f0c2ead72f037552727438e00a - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.qual - md5sum: 13cc608c3cdc83a12ec53ef78b5aa888 + # conda changes md5sums for test. - path: results/variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz # binary changes md5sums on reruns. - path: results/variant_calling/mutect2/sample2/sample2.mutect2.contamination.table @@ -56,13 +56,13 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/mutect2/sample2/sample2.mutect2.filtered.bcftools_stats.txt - md5sum: a449e85411b3b295685f05915de6098d + # conda changes md5sums for test. - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.FILTER.summary md5sum: 5a833fd50e6efb26d1df2336eb0caf5e - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.count md5sum: f5295a61da80f12babae74fe4e104aad - path: results/reports/vcftools/mutect2/sample2/sample2.mutect2.filtered.TsTv.qual - md5sum: 13cc608c3cdc83a12ec53ef78b5aa888 + # conda changes md5sums for test. - path: results/variant_calling/mutect2/sample2/sample2.mutect2.artifactprior.tar.gz # binary changes md5sums on reruns. - path: results/variant_calling/mutect2/sample2/sample2.mutect2.contamination.table diff --git a/tests/test_prepare_recalibration_from_bam.yml b/tests/test_prepare_recalibration_from_bam.yml index 0124c63e07..6f12f71581 100644 --- a/tests/test_prepare_recalibration_from_bam.yml +++ b/tests/test_prepare_recalibration_from_bam.yml @@ -31,7 +31,7 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b3716e5cd1744610e69c29bd4ffad259 - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 71af990f7acad8bf24d37e88c4adcded + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false - path: results/preprocessing/markduplicates/ @@ -51,13 +51,13 @@ - path: results/preprocessing/converted/test/test.converted.cram.crai # binary changes md5sums on reruns. - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count md5sum: ee7dafc8d941b8502a04a63dc3126fff - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual - md5sum: 9fe11e894f7567eb96b43c48593741a7 + # conda changes md5sums for test. - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi diff --git a/tests/test_prepare_recalibration_from_cram.yml b/tests/test_prepare_recalibration_from_cram.yml index 234eeb7d06..22be04acf8 100644 --- a/tests/test_prepare_recalibration_from_cram.yml +++ b/tests/test_prepare_recalibration_from_cram.yml @@ -25,7 +25,7 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b3716e5cd1744610e69c29bd4ffad259 - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 71af990f7acad8bf24d37e88c4adcded + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false - path: results/preprocessing/markduplicates/ @@ -41,13 +41,13 @@ md5sum: 4d0effd3d8dc2b814230a189e7ca9dba - path: results/multiqc - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count md5sum: ee7dafc8d941b8502a04a63dc3126fff - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.qual - md5sum: a7e5db212d1e4d05029e93ff6e6b843a + # conda changes md5sums for test. - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/test/test.strelka.genome.vcf.gz.tbi diff --git a/tests/test_recalibrate_from_bam.yml b/tests/test_recalibrate_from_bam.yml index 468675abae..960757f841 100644 --- a/tests/test_recalibrate_from_bam.yml +++ b/tests/test_recalibrate_from_bam.yml @@ -27,7 +27,7 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b3716e5cd1744610e69c29bd4ffad259 - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: a0841c41460d0d4bdc2a1a777ee7e7c2 + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false - path: results/preprocessing/markduplicates/ @@ -49,7 +49,7 @@ - path: results/preprocessing/converted/test/test.converted.cram.crai # binary changes md5sums on reruns. - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count diff --git a/tests/test_recalibrate_from_cram.yml b/tests/test_recalibrate_from_cram.yml index 5b5ce93a4c..8387dea5e7 100644 --- a/tests/test_recalibrate_from_cram.yml +++ b/tests/test_recalibrate_from_cram.yml @@ -23,7 +23,7 @@ - path: results/reports/mosdepth/test/test.recal.regions.bed.gz.csi md5sum: b3716e5cd1744610e69c29bd4ffad259 - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: a0841c41460d0d4bdc2a1a777ee7e7c2 + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false - path: results/preprocessing/markduplicates/ @@ -41,7 +41,7 @@ md5sum: 4d0effd3d8dc2b814230a189e7ca9dba - path: results/multiqc - path: results/reports/bcftools/strelka/test/test.strelka.variants.bcftools_stats.txt - md5sum: 0741b5dd5e6c5428bd5d4f1b7a923dd5 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/test/test.strelka.variants.FILTER.summary md5sum: 39ff2cc8eb7495a14a6b76e0ab627027 - path: results/reports/vcftools/strelka/test/test.strelka.variants.TsTv.count diff --git a/tests/test_strelka.yml b/tests/test_strelka.yml index bb24ad1fb0..d6c0f4551a 100644 --- a/tests/test_strelka.yml +++ b/tests/test_strelka.yml @@ -7,19 +7,19 @@ files: - path: results/multiqc - path: results/reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt - md5sum: fa17527ede69d65762bd78fe86fbe50a + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample2/sample2.strelka.variants.bcftools_stats.txt - md5sum: 93bf8f09d155e69418fff988e76cbc1d + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 7cc6f2412d6b3d8e84ec0daece3d5443 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary md5sum: 2048a5de0201a6052c988a0189979a5f - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count md5sum: c5b7a8eda2526d899098439ae4c06a49 - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual - md5sum: f01534408c3a87893b523de5f39deb0b + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.FILTER.summary md5sum: fa3112841a4575d104916027c8851b30 - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.count @@ -37,13 +37,13 @@ - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary md5sum: 4fc17fa5625b4d1dcc5d791b1eb22d85 - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count md5sum: fc7af1f534890c4ad3025588b3af62ae - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: c949f848859f138731898aac64a73eaf + # conda changes md5sums for test. - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi @@ -87,13 +87,13 @@ md5sum: cd8a47dfc3e44c395e9f693770ccc6c9 - path: results/multiqc - path: results/reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt - md5sum: fa17527ede69d65762bd78fe86fbe50a + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary md5sum: 2048a5de0201a6052c988a0189979a5f - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count md5sum: c5b7a8eda2526d899098439ae4c06a49 - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual - md5sum: f01534408c3a87893b523de5f39deb0b + # conda changes md5sums for test. - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi @@ -122,13 +122,13 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/strelka/sample1/sample1.strelka.variants.bcftools_stats.txt - md5sum: 90bd53c11222b5e07b4b37cbefa0e2d6 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.FILTER.summary md5sum: 2b7be6ff481fddc655210b836587810d - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.count md5sum: 1481854d2a765f5641856ecf95ca4097 - path: results/reports/vcftools/strelka/sample1/sample1.strelka.variants.TsTv.qual - md5sum: fd8e449a715922e24fe4ea9c89870432 + # conda changes md5sums for test. - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/sample1/sample1.strelka.genome.vcf.gz.tbi @@ -150,7 +150,7 @@ md5sum: 8d2a5e0ad12781c99e773b828e478d35 - path: results/multiqc - path: results/reports/bcftools/strelka/sample2/sample2.strelka.variants.bcftools_stats.txt - md5sum: 93bf8f09d155e69418fff988e76cbc1d + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.FILTER.summary md5sum: fa3112841a4575d104916027c8851b30 - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.count @@ -185,13 +185,13 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/strelka/sample2/sample2.strelka.variants.bcftools_stats.txt - md5sum: 9c3427ef0a28c43cc10537974f4bd9c4 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.FILTER.summary md5sum: d1dcce19d82ced016724ace746e95d01 - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.count md5sum: 9de35bbe9ebe45166b6bd195717f733a - path: results/reports/vcftools/strelka/sample2/sample2.strelka.variants.TsTv.qual - md5sum: d2908c6821e5f45146a382625e4704c9 + # conda changes md5sums for test. - path: results/variant_calling/strelka/sample2/sample2.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/sample2/sample2.strelka.genome.vcf.gz.tbi @@ -213,29 +213,29 @@ md5sum: 31ccee9472fed8bd15798724c62aee15 - path: results/multiqc - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt - md5sum: af8f05fd8a09e96c4c8850b6ef44729e + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 7cc6f2412d6b3d8e84ec0daece3d5443 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary md5sum: 2048a5de0201a6052c988a0189979a5f - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count md5sum: c5b7a8eda2526d899098439ae4c06a49 - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual - md5sum: f01534408c3a87893b523de5f39deb0b + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary md5sum: 3441628cd6550ed459ca1c3db989ceea - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary md5sum: 4fc17fa5625b4d1dcc5d791b1eb22d85 - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count md5sum: fc7af1f534890c4ad3025588b3af62ae - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: c949f848859f138731898aac64a73eaf + # conda changes md5sums for test. - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi @@ -272,29 +272,29 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt - md5sum: 572bc880d2bf64a00c8f86567c72f575 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 21b5d06e7e9f34a05b3d7554240f7669 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary md5sum: 2b7be6ff481fddc655210b836587810d - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count md5sum: 1481854d2a765f5641856ecf95ca4097 - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual - md5sum: fd8e449a715922e24fe4ea9c89870432 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary md5sum: 3441628cd6550ed459ca1c3db989ceea - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary md5sum: 7a81b11aa29fec73d5bc872b7b58f8aa - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count md5sum: a922c51ca3b2ea7cdcfa09e9c8c55d52 - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: 75af7d1b1e5a2f1bdfe5b530e37a6cbb + # conda changes md5sums for test. - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/strelka/sample3/sample3.strelka.genome.vcf.gz.tbi diff --git a/tests/test_strelka_bp.yml b/tests/test_strelka_bp.yml index 823928dd6c..da8a6a33a2 100644 --- a/tests/test_strelka_bp.yml +++ b/tests/test_strelka_bp.yml @@ -9,53 +9,53 @@ md5sum: eff248896ca462b76c79749403e44f48 - path: results/multiqc - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt - md5sum: af8f05fd8a09e96c4c8850b6ef44729e + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 7cc6f2412d6b3d8e84ec0daece3d5443 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count md5sum: fa27f678965b7cba6a92efcd039f802a - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count md5sum: fa27f678965b7cba6a92efcd039f802a - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary md5sum: 2048a5de0201a6052c988a0189979a5f - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count md5sum: c5b7a8eda2526d899098439ae4c06a49 - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual - md5sum: f01534408c3a87893b523de5f39deb0b + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary md5sum: 3441628cd6550ed459ca1c3db989ceea - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary md5sum: 4fc17fa5625b4d1dcc5d791b1eb22d85 - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count md5sum: fc7af1f534890c4ad3025588b3af62ae - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: c949f848859f138731898aac64a73eaf + # conda changes md5sums for test. - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi @@ -106,53 +106,53 @@ - path: results/no_intervals.bed.gz.tbi md5sum: f3dac01ea66b95fe477446fde2d31489 - path: results/reports/bcftools/manta/sample3/sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: 5a5ce4cf9c5f48801851d40772d3ba4b + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.bcftools_stats.txt - md5sum: baa05f77160e6cd985050790334c91e8 + # conda changes md5sums for test. - path: results/reports/bcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.bcftools_stats.txt - md5sum: 56c39772dc25e451d8209f608bb16e37 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample3/sample3.strelka.variants.bcftools_stats.txt - md5sum: 572bc880d2bf64a00c8f86567c72f575 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.bcftools_stats.txt - md5sum: 6cf7a55bcb53b400ce7df7e72c892574 + # conda changes md5sums for test. - path: results/reports/bcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.bcftools_stats.txt - md5sum: 21b5d06e7e9f34a05b3d7554240f7669 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.count md5sum: fa27f678965b7cba6a92efcd039f802a - path: results/reports/vcftools/manta/sample3/sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.count md5sum: fa27f678965b7cba6a92efcd039f802a - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.diploid_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.count md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - path: results/reports/vcftools/manta/sample4_vs_sample3/sample4_vs_sample3.manta.somatic_sv.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.FILTER.summary md5sum: 2b7be6ff481fddc655210b836587810d - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.count md5sum: 1481854d2a765f5641856ecf95ca4097 - path: results/reports/vcftools/strelka/sample3/sample3.strelka.variants.TsTv.qual - md5sum: fd8e449a715922e24fe4ea9c89870432 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.FILTER.summary md5sum: 3441628cd6550ed459ca1c3db989ceea - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.count md5sum: 8dcfdbcaac118df1d5ad407dd2af699f - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_indels.TsTv.qual - md5sum: bc68ae4e688e9fb772b457069e604883 + # conda changes md5sums for test. - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.FILTER.summary md5sum: 7a81b11aa29fec73d5bc872b7b58f8aa - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.count md5sum: a922c51ca3b2ea7cdcfa09e9c8c55d52 - path: results/reports/vcftools/strelka/sample4_vs_sample3/sample4_vs_sample3.strelka.somatic_snvs.TsTv.qual - md5sum: 75af7d1b1e5a2f1bdfe5b530e37a6cbb + # conda changes md5sums for test. - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/manta/sample3/sample3.manta.diploid_sv.vcf.gz.tbi diff --git a/tests/test_tiddit.yml b/tests/test_tiddit.yml index a7c8fa560f..c13518a21c 100644 --- a/tests/test_tiddit.yml +++ b/tests/test_tiddit.yml @@ -9,9 +9,9 @@ md5sum: 76b499e35c128d67b0606ea561bf70e0 - path: results/multiqc - path: results/reports/bcftools/tiddit/sample3/sample3.tiddit.bcftools_stats.txt - md5sum: 47af91809c214102f723a7c58e9b8e72 + # conda changes md5sums for test. - path: results/reports/bcftools/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.bcftools_stats.txt - md5sum: c071ebe8222acd2c76fc019838e8c230 + # conda changes md5sums for test. - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/tiddit/sample3/sample3.tiddit.TsTv.count @@ -27,19 +27,19 @@ - path: results/variant_calling/tiddit/sample3/sample3.tiddit.ploidies.tab md5sum: d65f8aa5bb136f77c23264640481f047 - path: results/variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz - md5sum: 1aeb97037b73251bb79e48b355026c9d + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample3/sample3.tiddit.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz - md5sum: 5ba8e30a9d831c05e31300b7d2c578ef + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.normal.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.ploidies.tab - md5sum: d65f8aa5bb136f77c23264640481f047 + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz - md5sum: 302cb980260b313573cf53067fd11120 + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.vcf.gz md5sum: f69f1fb9c246ac3bbc9270a2ea979557 - path: results/tiddit @@ -55,7 +55,7 @@ md5sum: cd458ebee25e1fb1258d2f390e54c736 - path: results/multiqc - path: results/reports/bcftools/tiddit/sample1/sample1.tiddit.bcftools_stats.txt - md5sum: 6bde7b353b927f8e99f5d0f3b592e67b + # conda changes md5sums for test. - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/tiddit/sample1/sample1.tiddit.TsTv.count @@ -65,9 +65,9 @@ - path: results/variant_calling/tiddit/sample1/sample1.tiddit.ploidies.tab md5sum: d65f8aa5bb136f77c23264640481f047 - path: results/variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz - md5sum: 72cd7a6dc9dece29fdcb0a19f69e1cb5 + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample1/sample1.tiddit.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + # conda changes md5sums for test. - path: results/tiddit should_exist: false - name: Run variant calling on tumor_only sample with tiddit @@ -81,7 +81,7 @@ md5sum: 15076bb78912fe51006e83934c376fc2 - path: results/multiqc - path: results/reports/bcftools/tiddit/sample2/sample2.tiddit.bcftools_stats.txt - md5sum: f2e3a2944f28287286f526d069d473e4 + # conda changes md5sums for test. - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.FILTER.summary md5sum: 1ce42d34e4ae919afb519efc99146423 - path: results/reports/vcftools/tiddit/sample2/sample2.tiddit.TsTv.count @@ -91,8 +91,8 @@ - path: results/variant_calling/tiddit/sample2/sample2.tiddit.ploidies.tab md5sum: 0f01874e20df10ecc7418d4537c7aa82 - path: results/variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz - md5sum: d75c2688964d79a87d882a8ac9a9a93b + # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample2/sample2.tiddit.vcf.gz.tbi - md5sum: 4cb176febbc8c26d717a6c6e67b9c905 + # conda changes md5sums for test. - path: results/tiddit should_exist: false From 973bbcb87b35eb48db4c131c7f7dd83bc9c2cd4a Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 4 Nov 2022 12:20:12 +0100 Subject: [PATCH 47/49] more conda md5sum removed --- tests/test_default_extended.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/test_default_extended.yml b/tests/test_default_extended.yml index 67df5f7b11..86386bfb12 100644 --- a/tests/test_default_extended.yml +++ b/tests/test_default_extended.yml @@ -186,9 +186,9 @@ - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi md5sum: e3235323f455414ee3acc7144e659caf - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + # conda changes md5sums for test. - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 + # conda changes md5sums for test. - path: results/csv/markduplicates.csv should_exist: false - path: results/csv/markduplicates_no_table.csv @@ -241,9 +241,9 @@ - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi md5sum: e3235323f455414ee3acc7144e659caf - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + # conda changes md5sums for test. - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 + # conda changes md5sums for test. - path: results/csv/markduplicates.csv should_exist: false - path: results/csv/markduplicates_no_table.csv @@ -294,9 +294,9 @@ - path: results/reports/mosdepth/test/test.sorted.regions.bed.gz.csi md5sum: e3235323f455414ee3acc7144e659caf - path: results/reports/samtools/test/test.recal.cram.stats - md5sum: 55ddd2d98552a1483bf2a965f19d8da4 + # conda changes md5sums for test. - path: results/reports/samtools/test/test.sorted.cram.stats - md5sum: 59d921ed3970e19145fbae75966de3e3 + # conda changes md5sums for test. - path: results/csv/markduplicates.csv should_exist: false - path: results/csv/markduplicates_no_table.csv @@ -439,8 +439,8 @@ - path: results/reports/samtools/test/test.recal.cram.stats # conda changes md5sums for test. - path: results/reports/samtools/test2/test2.md.cram.stats - md5sum: 60152dbf1e109d4c407c151204388109 + # conda changes md5sums for test. - path: results/reports/samtools/test2/test2.recal.cram.stats - md5sum: 2907543ab51cabd97318b15cf035c867 + # conda changes md5sums for test. - path: results/preprocessing/mapped/ should_exist: false From 8af8cc465ce9e1e02b7ffdbab421272f05e8e1d8 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 4 Nov 2022 14:28:24 +0100 Subject: [PATCH 48/49] comments --- tests/test_strelka_bp.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_strelka_bp.yml b/tests/test_strelka_bp.yml index da8a6a33a2..d81907b5eb 100644 --- a/tests/test_strelka_bp.yml +++ b/tests/test_strelka_bp.yml @@ -1,4 +1,4 @@ -- name: Run variant calling on somatic sample with strelka & manta (Strelka BP) +- name: Run variant calling on somatic sample with Strelka BP command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta tags: - somatic @@ -88,7 +88,7 @@ should_exist: false - path: results/strelka should_exist: false -- name: Run variant calling on somatic sample with & manta (Strelka BP) without intervals +- name: Run variant calling on somatic sample with Strelka BP without intervals command: nextflow run main.nf -profile test,tools_somatic --tools strelka,manta --no_intervals tags: - no_intervals From 93f4db22e1c9578770c2ad48e6fb6ba28c35b337 Mon Sep 17 00:00:00 2001 From: maxulysse Date: Fri, 4 Nov 2022 17:42:34 +0100 Subject: [PATCH 49/49] more conda md5sum removed + remove duplicate tests --- tests/test_gatk4_spark.yml | 4 ++-- tests/test_recalibrate_from_bam.yml | 1 - tests/test_recalibrate_from_cram.yml | 1 - tests/test_tiddit.yml | 2 +- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_gatk4_spark.yml b/tests/test_gatk4_spark.yml index 07b8611aaf..6b8e3a2f83 100644 --- a/tests/test_gatk4_spark.yml +++ b/tests/test_gatk4_spark.yml @@ -97,9 +97,9 @@ md5sum: 3ddc20eb105fdcc483945afd7d7d238c - path: results/multiqc - path: results/preprocessing/markduplicates/test/test.md.bam - md5sum: 8bfd111af60ca7e63b233e59b0fb570b + # conda changes md5sums for test. - path: results/preprocessing/markduplicates/test/test.md.bam.bai - md5sum: fe8bc8655aff0d1a8093680390d98fab + # conda changes md5sums for test. - path: results/preprocessing/recal_table/test/test.recal.table md5sum: 5b6e5078b4a90f6cb982fa0f0df616c2 - path: results/preprocessing/recalibrated/test/test.recal.bam diff --git a/tests/test_recalibrate_from_bam.yml b/tests/test_recalibrate_from_bam.yml index 960757f841..a4e92a8792 100644 --- a/tests/test_recalibrate_from_bam.yml +++ b/tests/test_recalibrate_from_bam.yml @@ -39,7 +39,6 @@ - recalibrate - preprocessing - variant_calling - - strelka files: - path: results/csv/variantcalled.csv md5sum: 4d0effd3d8dc2b814230a189e7ca9dba diff --git a/tests/test_recalibrate_from_cram.yml b/tests/test_recalibrate_from_cram.yml index 8387dea5e7..a30908cdca 100644 --- a/tests/test_recalibrate_from_cram.yml +++ b/tests/test_recalibrate_from_cram.yml @@ -35,7 +35,6 @@ - recalibrate - preprocessing - variant_calling - - strelka files: - path: results/csv/variantcalled.csv md5sum: 4d0effd3d8dc2b814230a189e7ca9dba diff --git a/tests/test_tiddit.yml b/tests/test_tiddit.yml index c13518a21c..2c7a51d0d3 100644 --- a/tests/test_tiddit.yml +++ b/tests/test_tiddit.yml @@ -41,7 +41,7 @@ - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit.tumor.vcf.gz.tbi # conda changes md5sums for test. - path: results/variant_calling/tiddit/sample4_vs_sample3/sample4_vs_sample3.tiddit_sv_merge.vcf.gz - md5sum: f69f1fb9c246ac3bbc9270a2ea979557 + # conda changes md5sums for test. - path: results/tiddit should_exist: false - name: Run variant calling on germline sample with tiddit