diff --git a/CHANGELOG.md b/CHANGELOG.md index 12dccac45d..edec6afd8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#1113](https://github.com/nf-core/sarek/pull/1113) - Adding CNVkit genemetrics module - [#1193](https://github.com/nf-core/sarek/pull/1193) - Adding support for Sentieon's DnaScope for germline variant-calling including joint-germline +- [#1252](https://github.com/nf-core/sarek/pull/1252) - Added NGSCheckMate tool for checking that samples come from the same individual - [#1271](https://github.com/nf-core/sarek/pull/1271) - Back to dev - [#1290](https://github.com/nf-core/sarek/pull/1290) - Add nf-test for whole pipeline. diff --git a/conf/igenomes.config b/conf/igenomes.config index 0e68dfee19..b020ddee5f 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -35,6 +35,7 @@ params { known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.tbi" known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz' mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_wChr.bed" snpeff_db = 87 snpeff_genome = 'GRCh37' vep_cache_version = 110 @@ -68,6 +69,7 @@ params { known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" known_indels_vqsr = '--resource:gatk,known=false,training=true,truth=true,prior=10.0 Homo_sapiens_assembly38.known_indels.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" snpeff_db = 105 @@ -79,6 +81,7 @@ params { 'Ensembl.GRCh37' { bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + ngscheckmate_bed = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/NGSCheckMate/SNP_GRCh37_hg19_woChr.bed" readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" snpeff_db = 87 snpeff_genome = 'GRCh37' @@ -89,6 +92,7 @@ params { 'NCBI.GRCh38' { bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + ngscheckmate_bed ="${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/NGSCheckMate/SNP_GRCh38_hg38_wChr.bed" snpeff_db = 105 snpeff_genome = 'GRCh38' vep_cache_version = 110 diff --git a/conf/modules/ngscheckmate.config b/conf/modules/ngscheckmate.config new file mode 100644 index 0000000000..89fa045ed4 --- /dev/null +++ b/conf/modules/ngscheckmate.config @@ -0,0 +1,26 @@ +process { + withName: ".*BAM_NGSCHECKMATE:BCFTOOLS_MPILEUP" { + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/ngscheckmate/vcfs" }, + pattern: "*{vcf.gz}" + ] + ext.prefix = { "${meta.id}.ngscheckmate" } + ext.when = { params.tools && params.tools.split(',').contains('ngscheckmate') } + ext.args2 = '--no-version --ploidy 1 -c' + ext.args3 = '--no-version' + } + + withName: ".*BAM_NGSCHECKMATE:NGSCHECKMATE_NCM" { + ext.args = '-V' + + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/reports/ngscheckmate/" }, + pattern: "*" + ] + + } + +} diff --git a/conf/test.config b/conf/test.config index 6ca8124443..4b9c3c73af 100644 --- a/conf/test.config +++ b/conf/test.config @@ -39,6 +39,7 @@ params { bcftools_annotations = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz" bcftools_annotations_index = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi" bcftools_header_lines = "${projectDir}/tests/config/bcfann_test_header.txt" + ngscheckmate_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/chr21/germlineresources/SNP_GRCh38_hg38_wChr.bed" // default params split_fastq = 0 // no FASTQ splitting diff --git a/conf/test/cache.config b/conf/test/cache.config index 9f51f72354..95e6fb40f7 100644 --- a/conf/test/cache.config +++ b/conf/test/cache.config @@ -48,6 +48,7 @@ params { vep_cache_version = 110 vep_genome = 'WBcel235' vep_species = 'caenorhabditis_elegans' + ngscheckmate_bed = params.test_data['homo_sapiens']['genome']['ngscheckmate_bed'] // default params split_fastq = 0 // no FASTQ splitting diff --git a/conf/test_full.config b/conf/test_full.config index 8377d35102..8acaae89d8 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,7 +18,7 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/HCC1395_WXS_somatic_full_test.csv' // Other params - tools = 'strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep' + tools = 'strelka,mutect2,freebayes,ascat,manta,cnvkit,tiddit,controlfreec,vep,ngscheckmate' split_fastq = 20000000 intervals = 's3://ngi-igenomes/test-data/sarek/S07604624_Padded_Agilent_SureSelectXT_allexons_V6_UTR.bed' wes = true diff --git a/docs/output.md b/docs/output.md index fd614ee112..769ee71fbd 100644 --- a/docs/output.md +++ b/docs/output.md @@ -61,6 +61,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [FastQC](#fastqc) - [FastP](#fastp) - [Mosdepth](#mosdepth) + - [NGSCheckMate](#ngscheckmate) - [GATK MarkDuplicates reports](#gatk-markduplicates-reports) - [Sentieon Dedup reports](#sentieon-dedup-reports) - [samtools stats](#samtools-stats) @@ -985,6 +986,25 @@ Plots will show: - CSI index for per-base depth for targeted data, per-window (500bp) depth of WGS +#### NGSCheckMate + +[NGSCheckMate](https://github.com/parklab/NGSCheckMate) is a tool for determining whether samples come from the same genetic individual, using a set of commonly heterozygous SNPs. This enables for the detecting of sample mislabelling events. The output includes a text file indicating whether samples have matched or not according to the algorithm, as well as a dendrogram visualising these results. + +
+Output files for all samples + +**Output directory: `{outdir}/reports/ngscheckmate/`** + +- `ngscheckmate_all.txt` + - Tab delimited text file listing all the comparisons made, whether they were considered as a match, with the correlation and a normalised depth. +- `ngscheckmate_matched.txt` + - Tab delimited text file listing only the comparison that were considered to match, with the correlation and a normalised depth. +- `ngscheckmate_output_corr_matrix.txt` + - Tab delimited text file containing a matrix of all correlations for all comparisons made. +- `vcfs/.vcf.gz` + - Set of vcf files for each sample. Contains calls for the set of SNP positions used to calculate sample relatedness. +
+ #### GATK MarkDuplicates reports More information in the [GATK MarkDuplicates section](#gatk-markduplicates) diff --git a/main.nf b/main.nf index 99ea7db553..bc69e55d1b 100644 --- a/main.nf +++ b/main.nf @@ -52,6 +52,7 @@ params.known_indels = WorkflowMain.getGenomeAttribute(params, 'known_in params.known_indels_tbi = WorkflowMain.getGenomeAttribute(params, 'known_indels_tbi') params.known_indels_vqsr = WorkflowMain.getGenomeAttribute(params, 'known_indels_vqsr') params.mappability = WorkflowMain.getGenomeAttribute(params, 'mappability') +params.ngscheckmate_bed = WorkflowMain.getGenomeAttribute(params, 'ngscheckmate_bed') params.pon = WorkflowMain.getGenomeAttribute(params, 'pon') params.pon_tbi = WorkflowMain.getGenomeAttribute(params, 'pon_tbi') params.snpeff_db = WorkflowMain.getGenomeAttribute(params, 'snpeff_db') diff --git a/modules.json b/modules.json index 1b5a25a36b..1ba2f99ca0 100644 --- a/modules.json +++ b/modules.json @@ -24,7 +24,7 @@ "bcftools/mpileup": { "branch": "master", "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", - "installed_by": ["modules"] + "installed_by": ["bam_ngscheckmate", "modules"] }, "bcftools/sort": { "branch": "master", @@ -334,6 +334,11 @@ "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", "installed_by": ["modules"] }, + "ngscheckmate/ncm": { + "branch": "master", + "git_sha": "32d6725f584ebf460de39b7c1c53a29d5384d697", + "installed_by": ["bam_ngscheckmate"] + }, "samblaster": { "branch": "master", "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", @@ -483,6 +488,11 @@ }, "subworkflows": { "nf-core": { + "bam_ngscheckmate": { + "branch": "master", + "git_sha": "32d6725f584ebf460de39b7c1c53a29d5384d697", + "installed_by": ["subworkflows"] + }, "vcf_annotate_ensemblvep": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", diff --git a/modules/nf-core/ngscheckmate/ncm/main.nf b/modules/nf-core/ngscheckmate/ncm/main.nf new file mode 100644 index 0000000000..28fab8f096 --- /dev/null +++ b/modules/nf-core/ngscheckmate/ncm/main.nf @@ -0,0 +1,64 @@ +process NGSCHECKMATE_NCM { + label 'process_low' + + conda "bioconda::ngscheckmate=1.0.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ngscheckmate:1.0.1--py27pl5321r40hdfd78af_1': + 'biocontainers/ngscheckmate:1.0.1--py27pl5321r40hdfd78af_1' }" + + input: + tuple val(meta) , path(files) + tuple val(meta2), path(snp_bed) + tuple val(meta3), path(fasta) + + output: + tuple val(meta), path("*_corr_matrix.txt"), emit: corr_matrix + tuple val(meta), path("*_matched.txt") , emit: matched + tuple val(meta), path("*_all.txt") , emit: all + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + tuple val(meta), path("*.vcf") , emit: vcf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "$meta.id" + def unzip = files.any { it.toString().endsWith(".vcf.gz") } + """ + if $unzip + then + for VCFGZ in *.vcf.gz; do + gunzip -cdf \$VCFGZ > \$( basename \$VCFGZ .gz ); + done + fi + + NCM_REF="./"${fasta} ncm.py -d . -bed ${snp_bed} -O . -N ${prefix} $args + + if $unzip + then + rm -f *.vcf # clean up decompressed vcfs + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "$meta.id" + """ + touch ${prefix}_output_corr_matrix.txt + touch ${prefix}_matched.txt + touch ${prefix}_all.txt + touch ${prefix}.pdf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngscheckmate: \$(ncm.py --help | sed "7!d;s/ *Ensuring Sample Identity v//g") + END_VERSIONS + """ + +} diff --git a/modules/nf-core/ngscheckmate/ncm/meta.yml b/modules/nf-core/ngscheckmate/ncm/meta.yml new file mode 100644 index 0000000000..024f031485 --- /dev/null +++ b/modules/nf-core/ngscheckmate/ncm/meta.yml @@ -0,0 +1,77 @@ +name: ngscheckmate_ncm +description: Determining whether sequencing data comes from the same individual by using SNP matching. Designed for humans on vcf or bam files. +keywords: + - ngscheckmate + - matching + - snp +tools: + - ngscheckmate: + description: NGSCheckMate is a software package for identifying next generation sequencing (NGS) data files from the same individual, including matching between DNA and RNA. + homepage: https://github.com/parklab/NGSCheckMate + documentation: https://github.com/parklab/NGSCheckMate + tool_dev_url: https://github.com/parklab/NGSCheckMate + doi: "10.1093/nar/gkx193" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - files: + type: file + description: VCF or BAM files for each sample, in a merged channel (possibly gzipped). BAM files require an index too. + pattern: "*.{vcf,vcf.gz,bam,bai}" + - meta2: + type: map + description: | + Groovy Map containing SNP information + e.g. [ id:'test' ] + - snp_bed: + type: file + description: BED file containing the SNPs to analyse + pattern: "*.{bed}" + - meta3: + type: map + description: | + Groovy Map containing reference fasta index information + e.g. [ id:'test' ] + - fasta: + type: file + description: fasta file for the genome, only used in the bam mode + pattern: "*.{bed}" + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + - pdf: + type: file + description: A pdf containing a dendrogram showing how the samples match up + pattern: "*.{pdf}" + + - corr_matrix: + type: file + description: A text file containing the correlation matrix between each sample + pattern: "*corr_matrix.txt" + + - matched: + type: file + description: A txt file containing only the samples that match with each other + pattern: "*matched.txt" + + - all: + type: file + description: A txt file containing all the sample comparisons, whether they match or not + pattern: "*all.txt" + + - vcf: + type: file + description: If ran in bam mode, vcf files for each sample giving the SNP calls used + pattern: "*.vcf" + +authors: + - "@sppearce" diff --git a/nextflow.config b/nextflow.config index be4804094a..6cfe5993b7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -375,6 +375,9 @@ includeConfig 'conf/modules/recalibrate.config' includeConfig 'conf/modules/trimming.config' includeConfig 'conf/modules/umi.config' +//ngscheckmate +includeConfig 'conf/modules/ngscheckmate.config' + // variant calling includeConfig 'conf/modules/ascat.config' includeConfig 'conf/modules/cnvkit.config' @@ -393,7 +396,6 @@ includeConfig 'conf/modules/sentieon_haplotyper.config' includeConfig 'conf/modules/sentieon_haplotyper_joint_germline.config' includeConfig 'conf/modules/strelka.config' includeConfig 'conf/modules/tiddit.config' - includeConfig 'conf/modules/post_variant_calling.config' //annotate diff --git a/nextflow_schema.json b/nextflow_schema.json index 13b48a211c..c3ff2d759e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -100,7 +100,8 @@ "fa_icon": "fas fa-toolbox", "description": "Tools to use for duplicate marking, variant calling and/or for annotation.", "help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.", - "pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(? [[id: "ngscheckmate"], bed]} + + ch_fasta = fasta.map{fasta -> [[id: "genome"], fasta]} + + BAM_NGSCHECKMATE ( ch_cram.map{meta, cram, crai -> [meta, cram]}, ch_ngscheckmate_bed, ch_fasta) + ch_versions = ch_versions.mix(BAM_NGSCHECKMATE.out.versions.first()) + + emit: + corr_matrix = BAM_NGSCHECKMATE.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = BAM_NGSCHECKMATE.out.matched // channel: [ meta, matched ] + all = BAM_NGSCHECKMATE.out.all // channel: [ meta, all ] + vcf = BAM_NGSCHECKMATE.out.vcf // channel: [ meta, vcf ] + pdf = BAM_NGSCHECKMATE.out.pdf // channel: [ meta, pdf ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/nf-core/bam_ngscheckmate/main.nf b/subworkflows/nf-core/bam_ngscheckmate/main.nf new file mode 100644 index 0000000000..2da41fede3 --- /dev/null +++ b/subworkflows/nf-core/bam_ngscheckmate/main.nf @@ -0,0 +1,49 @@ +include { BCFTOOLS_MPILEUP } from '../../../modules/nf-core/bcftools/mpileup/main' +include { NGSCHECKMATE_NCM } from '../../../modules/nf-core/ngscheckmate/ncm/main' + +workflow BAM_NGSCHECKMATE { + + take: + ch_input // channel: [ val(meta1), bam/cram ] + ch_snp_bed // channel: [ val(meta2), bed ] + ch_fasta // channel: [ val(meta3), fasta ] + + main: + + ch_versions = Channel.empty() + + ch_input_bed = ch_input.combine(ch_snp_bed.collect()) + // do something to combine the metas? + .map{ input_meta, input_file, bed_meta, bed_file -> + [input_meta, input_file, bed_file] + } + + BCFTOOLS_MPILEUP (ch_input_bed, ch_fasta, false) + ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions) + + BCFTOOLS_MPILEUP + .out + .vcf + .map{meta, vcf -> vcf} // discard individual metas + .collect() // group into one channel + .map{files -> [files]} // make the channel into [vcf1, vcf2, ...] + .set {ch_collected_vcfs} + + ch_snp_bed + .map{meta, bed -> meta} // use the snp_bed file meta as the meta for the merged channel + .combine(ch_collected_vcfs) // add the vcf files after the meta, now looks like [meta, [vcf1, vcf2, ... ] ] + .set {ch_vcfs} + + NGSCHECKMATE_NCM (ch_vcfs, ch_snp_bed, ch_fasta) + ch_versions = ch_versions.mix(NGSCHECKMATE_NCM.out.versions) + + emit: + corr_matrix = NGSCHECKMATE_NCM.out.corr_matrix // channel: [ meta, corr_matrix ] + matched = NGSCHECKMATE_NCM.out.matched // channel: [ meta, matched ] + all = NGSCHECKMATE_NCM.out.all // channel: [ meta, all ] + vcf = BCFTOOLS_MPILEUP.out.vcf // channel: [ meta, vcf ] + pdf = NGSCHECKMATE_NCM.out.pdf // channel: [ meta, pdf ] + versions = ch_versions // channel: [ versions.yml ] + +} + diff --git a/subworkflows/nf-core/bam_ngscheckmate/meta.yml b/subworkflows/nf-core/bam_ngscheckmate/meta.yml new file mode 100644 index 0000000000..a3a1ab6e4a --- /dev/null +++ b/subworkflows/nf-core/bam_ngscheckmate/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_ngscheckmate" +description: Take a set of bam files and run NGSCheckMate to determine whether samples match with each other, using a set of SNPs. +keywords: + - ngscheckmate + - qc + - bam + - snp +components: + - bcftools/mpileup + - ngscheckmate/ncm +input: + - meta1: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: BAM files for each sample + pattern: "*.{bam}" + - meta2: + type: map + description: | + Groovy Map containing bed file information + e.g. [ id:'sarscov2' ] + - snp_bed: + type: file + description: BED file containing the SNPs to analyse. NGSCheckMate provides some default ones for hg19/hg38. + pattern: "*.{bed}" + - meta3: + type: map + description: | + Groovy Map containing reference genome meta information + e.g. [ id:'sarscov2' ] + - fasta: + type: file + description: fasta file for the genome + pattern: "*.{fasta}" + +output: + - pdf: + type: file + description: A pdf containing a dendrogram showing how the samples match up + pattern: "*.{pdf}" + - corr_matrix: + type: file + description: A text file containing the correlation matrix between each sample + pattern: "*corr_matrix.txt" + - matched: + type: file + description: A txt file containing only the samples that match with each other + pattern: "*matched.txt" + - all: + type: file + description: A txt file containing all the sample comparisons, whether they match or not + pattern: "*all.txt" + - vcf: + type: file + description: vcf files for each sample giving the SNP calls + pattern: "*.vcf" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@SPPearce" diff --git a/tests/config/pytesttags.yml b/tests/config/pytesttags.yml index 2c93fed587..8a550ee057 100644 --- a/tests/config/pytesttags.yml +++ b/tests/config/pytesttags.yml @@ -598,3 +598,14 @@ concatenate_vcfs: - subworkflows/local/vcf_concatenate_germline/main.nf - tests/csv/3.0/mapped_joint_bam.csv - tests/test_concat_germline_vcfs.yml + +# sampleqc + +## ngscheckmate +ngscheckmate: + - conf/modules/ngscheckmate.config + - modules/nf-core/bcftools/mpileup/main.nf + - modules/nf-core/ngscheckmate/ncm/main.nf + - subworkflows/local/cram_sampleqc/main.nf + - subworkflows/nf-core/bam_ngscheckmate/main.nf + - tests/test_ngscheckmate.yml diff --git a/tests/test_ngscheckmate.yml b/tests/test_ngscheckmate.yml new file mode 100644 index 0000000000..8923ddc9ec --- /dev/null +++ b/tests/test_ngscheckmate.yml @@ -0,0 +1,15 @@ +- name: Check ngscheckmate is working + command: nextflow run main.nf -profile test_cache,tools --tools ngscheckmate --outdir results + tags: + - ngscheckmate + - tools + files: + - path: results/multiqc + - path: results/reports/ngscheckmate/ngscheckmate_all.txt + - path: results/reports/ngscheckmate/ngscheckmate_matched.txt + - path: results/reports/ngscheckmate/ngscheckmate_output_corr_matrix.txt + - path: results/reports/ngscheckmate/ngscheckmate.pdf + - path: results/reports/ngscheckmate/vcfs/sample1.ngscheckmate.vcf.gz + - path: results/reports/ngscheckmate/vcfs/sample2.ngscheckmate.vcf.gz + - path: results/reports/ngscheckmate/vcfs/sample3.ngscheckmate.vcf.gz + - path: results/reports/ngscheckmate/vcfs/sample4.ngscheckmate.vcf.gz diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 3f76eb40b6..be673dc249 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -48,6 +48,7 @@ def checkPathParamList = [ params.known_snps_tbi, params.mappability, params.multiqc_config, + params.ngscheckmate_bed, params.pon, params.pon_tbi, params.sentieon_dnascope_model, @@ -104,6 +105,7 @@ ascat_genome = params.ascat_genome ?: Channel.empty() dbsnp_vqsr = params.dbsnp_vqsr ? Channel.value(params.dbsnp_vqsr) : Channel.empty() known_indels_vqsr = params.known_indels_vqsr ? Channel.value(params.known_indels_vqsr) : Channel.empty() known_snps_vqsr = params.known_snps_vqsr ? Channel.value(params.known_snps_vqsr) : Channel.empty() +ngscheckmate_bed = params.ngscheckmate_bed ? Channel.value(params.ngscheckmate_bed) : Channel.empty() snpeff_db = params.snpeff_db ?: Channel.empty() vep_cache_version = params.vep_cache_version ?: Channel.empty() vep_genome = params.vep_genome ?: Channel.empty() @@ -214,6 +216,9 @@ include { POST_VARIANTCALLING } from '../subworkflows/lo // QC on VCF files include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' +// Sample QC on CRAM files +include { CRAM_SAMPLEQC } from '../subworkflows/local/cram_sampleqc/main' + // Annotation include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main' @@ -854,6 +859,8 @@ workflow SAREK { if (params.step == 'annotate') cram_variant_calling = Channel.empty() + CRAM_SAMPLEQC(cram_variant_calling, ngscheckmate_bed, fasta) + // // Logic to separate germline samples, tumor samples with no matched normal, and combine tumor-normal pairs //