Skip to content

Commit

Permalink
Merge pull request #349 from nf-core/add_deletion_script
Browse files Browse the repository at this point in the history
feat added MT deletion script
  • Loading branch information
Lucpen authored May 30, 2023
2 parents ced4ee8 + 203d9f8 commit b9bac75
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 7 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- GATK's ShiftFasta to generate all the files required for mitochondrial analysis
- Feature to calculate CADD scores for indels
- HmtNote to annotate mitochondria
- MT del script to detect mitochondrial deltions

## v1.0.0 - [2023-03-31]

Expand Down
9 changes: 9 additions & 0 deletions conf/modules/align_and_call_MT.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ process {
ext.prefix = { "${meta.id}_sorted" }
}

withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:MT_DELETION' {
ext.args = '-s --insert-size 16000'
publishDir = [
path: { "${params.outdir}/mt_sv" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MUTECT2_MT' {
ext.args = '--mitochondria-mode TRUE'
}
Expand Down
10 changes: 10 additions & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,12 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [VEP](#vep-1)
- [Mitochondrial analysis](#mitochondrial-analysis)
- [Alignment and variant calling](#alignment-and-variant-calling)
- [MT deletion script](#mt-deletion-script)
- [Annotation:](#annotation-)
- [HaploGrep2](#haplogrep2)
- [vcfanno](#vcfanno-1)
- [VEP](#vep-2)
- [HmtNote](#hmtnote)
- [Rank variants and filtering](#rank-variants-and-filtering)
- [GENMOD](#genmod)
- [Pipeline information](#pipeline-information)
Expand Down Expand Up @@ -360,6 +362,10 @@ Mitochondrial analysis is run by default, to turn it off set `--skip_mt_analysis

The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sensitivity to low AF and separate alignments using opposite genome breakpoints to allow for the tracing of lineages of rare mitochondrial variants.

##### MT deletion script

[MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools.

#### Annotation:

##### HaploGrep2
Expand All @@ -384,6 +390,10 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files

[CADD](https://cadd.gs.washington.edu/) is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome. In nf-core/raredisease, SNVs can be annotated with precomputed CADD scores using vcfanno. However, for small indels they will be calculated on the fly by CADD. The output files are not published in the output folder by default, and is passed to VEP for further annotation.

##### Hmtnote

[HmtNote](https://github.com/robertopreste/HmtNote) annotates vcf containing human mitochondrial variants with HmtVar. It will run offline by default wiht a database within the container.

##### VEP

[VEP](https://www.ensembl.org/info/docs/tools/vep/index.html) determines the effect of your variants on genes, transcripts, and protein sequence, as well as regulatory regions.
Expand Down
48 changes: 48 additions & 0 deletions modules/local/mt_deletion_script.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process MT_DELETION {
tag "$meta.id"
label 'process_single'

conda "bioconda::samtools=1.16.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' :
'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }"

input:
tuple val(meta), path(input), path(input_index)
tuple val(meta2), path(fasta)

output:
tuple val(meta), path('*.txt'), emit: mt_del_result
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta}" : ""

"""
samtools stats --threads ${task.cpus} $args ${reference} ${input} | \\
grep -E ^IS | \\
awk 'BEGIN {sum=0} (\$2>=1200 && \$2<=15000) {sum=sum+\$3} (\$2<1200 || \$2>15000) {sum_norm=sum_norm+\$3} END \\
{print "intermediate discordant ", sum, "normal ", sum_norm, "ratio ppk", sum*1000/(sum_norm+sum)}' 1> ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_mt_del.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions subworkflows/local/analyse_MT.nf
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ workflow ANALYSE_MT {
tbi = MERGE_ANNOTATE_MT.out.tbi // channel: [ val(meta), path(tbi) ]
stats = ALIGN_AND_CALL_MT.out.stats // channel: [ val(meta), path(stats) ]
filt_stats = ALIGN_AND_CALL_MT.out.filt_stats // channel: [ val(meta), path(tsv) ]
mt_del_result = ALIGN_AND_CALL_MT.out.mt_del_result // channel: [ val(meta), path(txt) ]
stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.stats // channel: [ val(meta), path(stats) ]
filt_stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.filt_stats // channel: [ val(meta), path(tsv) ]
haplog = MERGE_ANNOTATE_MT.out.haplog // channel: [ val(meta), path(txt) ]
Expand Down
19 changes: 12 additions & 7 deletions subworkflows/local/mitochondria/align_and_call_MT.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ include { HAPLOCHECK as HAPLOCHECK_MT } fr
include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main'
include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main'
include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main'
include { MT_DELETION } from '../../../modules/local/mt_deletion_script'

workflow ALIGN_AND_CALL_MT {
take:
Expand Down Expand Up @@ -49,6 +50,8 @@ workflow ALIGN_AND_CALL_MT {
ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true)
ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals)

MT_DELETION(ch_sort_index_bam, ch_fasta)

GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[])

HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf)
Expand All @@ -69,16 +72,18 @@ workflow ALIGN_AND_CALL_MT {
ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first())
ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first())
ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first())
ch_versions = ch_versions.mix(MT_DELETION.out.versions.first())
ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first())
ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first())
ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first())

emit:
vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ]
tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ]
stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ]
filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ]
txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ]
html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ]
versions = ch_versions // channel: [ path(versions.yml) ]
vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ]
tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ]
stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ]
filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ]
txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ]
html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ]
mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}

0 comments on commit b9bac75

Please sign in to comment.