diff --git a/assets/schema_input.json b/assets/schema_input.json index 555f2c0a..191f1394 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,7 +10,7 @@ "sample": { "type": "string", "exists": true, - "meta": ["id"], + "meta": ["id", "sample"], "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" }, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index e7314f2a..17a6252b 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -7,7 +7,7 @@ include { ALIGN_SENTIEON } from './alignment/align_sentieon' include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' include { ALIGN_MT } from './alignment/align_MT' include { ALIGN_MT as ALIGN_MT_SHIFT } from './alignment/align_MT' -include { CONVERT_MT_BAM_TO_FASTQ } from './convert_mt_bam_to_fastq' +include { CONVERT_MT_BAM_TO_FASTQ } from './mitochondria/convert_mt_bam_to_fastq' workflow ALIGN { take: diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf index 5a6b65b5..8f449e7b 100644 --- a/subworkflows/local/annotate_mt_snvs.nf +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -36,12 +36,12 @@ workflow ANNOTATE_MT_SNVS { // Pick input for vep ch_mt_vcf - .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) - .branch { it -> - merged: it[2].equals("null") + .join(ANNOTATE_CADD.out.vcf, remainder: true) // If CADD is not run then the third element in this channel will be `null` + .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null] + merged: it[2].equals(null) return [it[0], it[1]] - cadd: !(it[2].equals("null")) - return [it[2], it[3]] + cadd: !(it[2].equals(null)) + return [it[0], it[2]] } .set { ch_for_mix } ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd) diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index d0846e6e..28f4e8db 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -50,8 +50,6 @@ workflow CALL_STRUCTURAL_VARIANTS { CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) -// CALL_SV_MT_SHIFT (ch_mtshift_bam_bai, ch_mtshift_fasta) - //merge if (params.skip_cnv_calling) { tiddit_vcf diff --git a/subworkflows/local/convert_mt_bam_to_fastq.nf b/subworkflows/local/convert_mt_bam_to_fastq.nf deleted file mode 100644 index ca19ea82..00000000 --- a/subworkflows/local/convert_mt_bam_to_fastq.nf +++ /dev/null @@ -1,36 +0,0 @@ -// -// Prepare bam files for MT allignment -// - -include { GATK4_PRINTREADS as GATK4_PRINTREADS_MT } from '../../modules/nf-core/gatk4/printreads/main' -include { GATK4_REVERTSAM as GATK4_REVERTSAM_MT } from '../../modules/nf-core/gatk4/revertsam/main' -include { GATK4_SAMTOFASTQ as GATK4_SAMTOFASTQ_MT } from '../../modules/nf-core/gatk4/samtofastq/main' - -workflow CONVERT_MT_BAM_TO_FASTQ { - take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - - main: - ch_versions = Channel.empty() - - // Outputs bam containing only MT - GATK4_PRINTREADS_MT ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_genome_dict ) - - // Removes alignment information - GATK4_REVERTSAM_MT ( GATK4_PRINTREADS_MT.out.bam ) - - // Outputs fastq files - GATK4_SAMTOFASTQ_MT ( GATK4_REVERTSAM_MT.out.bam ) - - ch_versions = ch_versions.mix(GATK4_PRINTREADS_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_REVERTSAM_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_SAMTOFASTQ_MT.out.versions.first()) - - emit: - fastq = GATK4_SAMTOFASTQ_MT.out.fastq // channel: [ val(meta), [ path(fastq) ] ] - bam = GATK4_REVERTSAM_MT.out.bam // channel: [ val(meta), path(bam) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf deleted file mode 100644 index d857b11a..00000000 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ /dev/null @@ -1,96 +0,0 @@ -// -// Align and call MT -// - -include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' -include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' -include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' -include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' -include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MT } from '../../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } from '../../../modules/nf-core/samtools/sort/main' -include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main' -include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' -include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' -include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' -include { MT_DELETION } from '../../../modules/local/mt_deletion_script' -include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main' - -workflow ALIGN_AND_CALL_MT { - take: - ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] - ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] - ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_intervals // channel: [mandatory] [ path(interval_list) ] - - main: - ch_versions = Channel.empty() - - BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) - - SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai ) - - Channel.empty() - .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] }) - .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) - .set {ch_bam_ubam} - - GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict) - - PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam) - - PICARD_MARKDUPLICATES_MT (PICARD_ADDORREPLACEREADGROUPS_MT.out.bam, ch_fasta, ch_fai) - - SAMTOOLS_SORT_MT (PICARD_MARKDUPLICATES_MT.out.bam) - - SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) - ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true) - ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals) - - EKLIPSE_MT(ch_sort_index_bam,[]) - - MT_DELETION(ch_sort_index_bam, ch_fasta) - - GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[]) - - HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf) - - // Filter Mutect2 calls - ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - ch_mutect_out = ch_mutect_vcf.join(GATK4_MUTECT2_MT.out.stats, failOnMismatch:true, failOnDuplicate:true) - ch_to_filt = ch_mutect_out.map { - meta, vcf, tbi, stats -> - return [meta, vcf, tbi, stats, [], [], [], []] - } - - GATK4_FILTERMUTECTCALLS_MT (ch_to_filt, ch_fasta, ch_fai, ch_dict) - - ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first()) - ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first()) - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first()) - ch_versions = ch_versions.mix(EKLIPSE_MT.out.versions.first()) - ch_versions = ch_versions.mix(MT_DELETION.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first()) - ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first()) - - emit: - vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] - eklipse_del = EKLIPSE_MT.out.deletions // channel: [ val(meta), path(csv) ] - eklipse_genes = EKLIPSE_MT.out.genes // channel: [ val(meta), path(csv) ] - eklipse_circos = EKLIPSE_MT.out.circos // channel: [ val(meta), path(png) ] - txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] - html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] - mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf deleted file mode 100644 index 43edd93b..00000000 --- a/subworkflows/local/mitochondria/merge_annotate_MT.nf +++ /dev/null @@ -1,176 +0,0 @@ -// -// Merge and annotate MT -// - -include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' -include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main' -include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main' -include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main' -include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main' -include { ANNOTATE_CADD } from '../annotation/annotate_cadd' -include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main' -include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main' - -workflow MERGE_ANNOTATE_MT { - take: - ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] - ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] - val_vep_genome // string: [mandatory] GRCh37 or GRCh38 - val_vep_cache_version // string: [mandatory] 107 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - - main: - ch_versions = Channel.empty() - - ch_vcfs = ch_vcf1 - .join(ch_vcf2, remainder: true) - .map{ meta, vcf1, vcf2 -> - [meta, [vcf1, vcf2]] - } - GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict) - - // Filtering Variants - GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf - .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_filt_vcf } - GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict) - - // Spliting multiallelic calls - GATK4_VARIANTFILTRATION_MT.out.vcf - .join(GATK4_VARIANTFILTRATION_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_in_split } - SPLIT_MULTIALLELICS_MT (ch_in_split, ch_genome_fasta) - TABIX_TABIX_MT(SPLIT_MULTIALLELICS_MT.out.vcf) - - // Removing duplicates and merging if there is more than one sample - SPLIT_MULTIALLELICS_MT.out.vcf - .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_in_remdup } - REMOVE_DUPLICATES_MT(ch_in_remdup, ch_genome_fasta) - TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf) - - REMOVE_DUPLICATES_MT.out.vcf - .collect{it[1]} - .ifEmpty([]) - .toList() - .set { file_list_vcf } - - TABIX_TABIX_MT2.out.tbi - .collect{it[1]} - .ifEmpty([]) - .toList() - .set { file_list_tbi } - - ch_case_info - .combine(file_list_vcf) - .combine(file_list_tbi) - .set { ch_rem_dup_vcf_tbi } - - ch_rem_dup_vcf_tbi.branch { - meta, vcf, tbi -> - single: vcf.size() == 1 - return [meta, vcf] - multiple: vcf.size() > 1 - return [meta, vcf, tbi] - }.set { ch_case_vcf } - - BCFTOOLS_MERGE_MT( ch_case_vcf.multiple, - ch_genome_fasta, - ch_genome_fai, - [] - ) - - BCFTOOLS_MERGE_MT.out.merged_variants - .mix(ch_case_vcf.single) - .set { ch_annotation_in } - - TABIX_TABIX_MERGE(ch_annotation_in) - - // Annotating with CADD - ANNOTATE_CADD ( - ch_annotation_in, - TABIX_TABIX_MERGE.out.tbi, - ch_cadd_header, - ch_cadd_resources - ) - - // Pick input for vep - ch_annotation_in - .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) - .branch { it -> - merged: it[2].equals("null") - return [it[0], it[1]] - cadd: !(it[2].equals("null")) - return [it[2], it[3]] - } - .set { ch_for_mix } - ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd) - - // Annotating with ensembl Vep - ENSEMBLVEP_MT( - ch_vep_in, - ch_genome_fasta, - val_vep_genome, - "homo_sapiens", - val_vep_cache_version, - ch_vep_cache, - [] - ) - - // Running vcfanno - TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz) - ENSEMBLVEP_MT.out.vcf_gz - .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} - .set { ch_in_vcfanno } - - VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources) - - // HMTNOTE ANNOTATE - HMTNOTE_ANNOTATE(VCFANNO_MT.out.vcf) - HMTNOTE_ANNOTATE.out.vcf.map{meta, vcf -> - return [meta, WorkflowRaredisease.replaceSpacesInInfoColumn(vcf, vcf.parent.toString(), vcf.baseName)] - } - .set { ch_hmtnote_reformatted } - ZIP_TABIX_HMTNOTE(ch_hmtnote_reformatted) - - // Prepare output - ch_vcf_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf] } - ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] } - - // Running haplogrep2 - HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz") - - ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) - ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) - ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) - ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) - ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) - ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) - ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) - ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions) - ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions) - - emit: - haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] - vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ] - tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] - report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf index 81738085..cb14c9b7 100644 --- a/subworkflows/local/variant_calling/postprocess_MT_calls.nf +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -1,5 +1,5 @@ // -// Merge and annotate MT +// Merge and normalize MT variants // include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' @@ -25,7 +25,7 @@ workflow POSTPROCESS_MT_CALLS { main: ch_versions = Channel.empty() - // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT + // LIFTOVER SHIFTED VCF TO REFERENCE MT POSITIONS PICARD_LIFTOVERVCF ( ch_mtshift_vcf, ch_genome_dictionary,