diff --git a/CHANGELOG.md b/CHANGELOG.md index 55291bfa..d2f69d89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - GATK's ShiftFasta to generate all the files required for mitochondrial analysis - Feature to calculate CADD scores for indels +- HmtNote to annotate mitochondria +- MT del script to detect mitochondrial deltions ## v1.0.0 - [2023-03-31] diff --git a/conf/modules/align_and_call_MT.config b/conf/modules/align_and_call_MT.config index 32e9ed0c..1a2993f5 100644 --- a/conf/modules/align_and_call_MT.config +++ b/conf/modules/align_and_call_MT.config @@ -52,6 +52,15 @@ process { ext.prefix = { "${meta.id}_sorted" } } + withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:MT_DELETION' { + ext.args = '-s --insert-size 16000' + publishDir = [ + path: { "${params.outdir}/mt_sv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MUTECT2_MT' { ext.args = '--mitochondria-mode TRUE' } diff --git a/docs/output.md b/docs/output.md index 5c890d9c..5c15aa71 100644 --- a/docs/output.md +++ b/docs/output.md @@ -47,10 +47,12 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [VEP](#vep-1) - [Mitochondrial analysis](#mitochondrial-analysis) - [Alignment and variant calling](#alignment-and-variant-calling) + - [MT deletion script](#mt-deletion-script) - [Annotation:](#annotation-) - [HaploGrep2](#haplogrep2) - [vcfanno](#vcfanno-1) - [VEP](#vep-2) + - [HmtNote](#hmtnote) - [Rank variants and filtering](#rank-variants-and-filtering) - [GENMOD](#genmod) - [Pipeline information](#pipeline-information) @@ -360,6 +362,10 @@ Mitochondrial analysis is run by default, to turn it off set `--skip_mt_analysis The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sensitivity to low AF and separate alignments using opposite genome breakpoints to allow for the tracing of lineages of rare mitochondrial variants. +##### MT deletion script + +[MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools. + #### Annotation: ##### HaploGrep2 @@ -384,6 +390,10 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files [CADD](https://cadd.gs.washington.edu/) is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. In nf-core/raredisease, SNVs can be annotated with precomputed CADD scores using vcfanno. However, for small indels they will be calculated on the fly by CADD. The output files are not published in the output folder by default, and is passed to VEP for further annotation. +##### Hmtnote + +[HmtNote](https://github.com/robertopreste/HmtNote) annotates vcf containing human mitochondrial variants with HmtVar. It will run offline by default wiht a database within the container. + ##### VEP [VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) determines the effect of your variants on genes, transcripts, and protein sequence, as well as regulatory regions. diff --git a/modules/local/mt_deletion_script.nf b/modules/local/mt_deletion_script.nf new file mode 100644 index 00000000..df586191 --- /dev/null +++ b/modules/local/mt_deletion_script.nf @@ -0,0 +1,48 @@ +process MT_DELETION { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.txt'), emit: mt_del_result + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + + """ + samtools stats --threads ${task.cpus} $args ${reference} ${input} | \\ + grep -E ^IS | \\ + awk 'BEGIN {sum=0} (\$2>=1200 && \$2<=15000) {sum=sum+\$3} (\$2<1200 || \$2>15000) {sum_norm=sum_norm+\$3} END \\ + {print "intermediate discordant ", sum, "normal ", sum_norm, "ratio ppk", sum*1000/(sum_norm+sum)}' 1> ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_mt_del.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf index 1dbe2911..65a05162 100644 --- a/subworkflows/local/analyse_MT.nf +++ b/subworkflows/local/analyse_MT.nf @@ -102,6 +102,7 @@ workflow ANALYSE_MT { tbi = MERGE_ANNOTATE_MT.out.tbi // channel: [ val(meta), path(tbi) ] stats = ALIGN_AND_CALL_MT.out.stats // channel: [ val(meta), path(stats) ] filt_stats = ALIGN_AND_CALL_MT.out.filt_stats // channel: [ val(meta), path(tsv) ] + mt_del_result = ALIGN_AND_CALL_MT.out.mt_del_result // channel: [ val(meta), path(txt) ] stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.stats // channel: [ val(meta), path(stats) ] filt_stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.filt_stats // channel: [ val(meta), path(tsv) ] haplog = MERGE_ANNOTATE_MT.out.haplog // channel: [ val(meta), path(txt) ] diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf index dc629d84..a913c1ee 100644 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ b/subworkflows/local/mitochondria/align_and_call_MT.nf @@ -13,6 +13,7 @@ include { HAPLOCHECK as HAPLOCHECK_MT } fr include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' +include { MT_DELETION } from '../../../modules/local/mt_deletion_script' workflow ALIGN_AND_CALL_MT { take: @@ -49,6 +50,8 @@ workflow ALIGN_AND_CALL_MT { ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true) ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals) + MT_DELETION(ch_sort_index_bam, ch_fasta) + GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[]) HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf) @@ -69,16 +72,18 @@ workflow ALIGN_AND_CALL_MT { ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first()) + ch_versions = ch_versions.mix(MT_DELETION.out.versions.first()) ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first()) ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first()) emit: - vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] - txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] - html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] + stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] + filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] + txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] + html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] + mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ] + versions = ch_versions // channel: [ path(versions.yml) ] }