From 199fc7289d2780471397098578c0c51329378b8d Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 22 May 2019 15:37:49 +0200 Subject: [PATCH 01/11] add tiddit to container --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 891322f26e..023b057811 100644 --- a/environment.yml +++ b/environment.yml @@ -26,5 +26,6 @@ dependencies: - samtools=1.9 - snpeff=4.3.1t - strelka=2.9.10 + - tiddit=2.6.0 - vcfanno=0.3.1 - vcftools=0.1.16 From a385337162428ef7e0180b6430a9f3fd9a30951e Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 22 May 2019 15:37:58 +0200 Subject: [PATCH 02/11] add process for TIDDIT --- main.nf | 57 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 44cbb28c91..a7e1506074 100644 --- a/main.nf +++ b/main.nf @@ -50,12 +50,14 @@ def helpMessage() { Available: Mapping, Recalibrate, VariantCalling, Annotate Default: Mapping --targetBED Target BED file for targeted or whole exome sequencing - --tools Specify tools to use for variant calling + --tools Specify tools to use for variant calling: Available: ASCAT, ControlFREEC, FreeBayes, HaplotypeCaller - Manta, mpileup, MuTect2, Strelka + Manta, mpileup, MuTect2, Strelka, TIDDIT + or for annotation: + snpEff, VEP Default: HaplotypeCaller, Manta, Strelka --annotateTools Specify from which tools Sarek will annotate VCF, only for step annotate - Available: HaplotypeCaller, Manta, MuTect2, Strelka + Available: HaplotypeCaller, Manta, MuTect2, Strelka, TIDDIT References If not specified in the configuration file or you wish to overwrite any of the references. --acLoci acLoci file @@ -792,7 +794,7 @@ bamRecal = bamRecal.dump(tag:'BAM') // Manta will be run in Germline mode, or in Tumor mode depending on status // HaplotypeCaller and Strelka will be run for Normal and Tumor samples -(bamMantaSingle, bamStrelkaSingle, bamRecalAllTemp, bamRecalAll) = bamRecal.into(4) +(bamMantaSingle, bamStrelkaSingle, bamTIDDIT, bamRecalAll, bamRecalAllTemp) = bamRecal.into(5) // To speed Variant Callers up we are chopping the reference into smaller pieces // Do variant calling by this intervals, and re-merge the VCFs @@ -975,6 +977,40 @@ process MantaSingle { vcfMantaSingle = vcfMantaSingle.dump(tag:'Single Manta') +// STEP TIDDIT + +process TIDDIT { + tag {idSample} + + publishDir "${params.outdir}/VariantCalling/${idSample}/TIDDIT", mode: params.publishDirMode + + publishDir params.outdir, mode: params.publishDirMode, + saveAs: { + if (it == "TIDDIT_${idSample}.vcf") "VariantCalling/${idSample}/TIDDIT/${it}" + else "Reports/${idSample}/TIDDIT/${it}" + } + + input: + set idPatient, idSample, file(bam), file(bai) from bamTIDDIT + set file(genomeFile), file(genomeIndex) from Channel.value([ + referenceMap.genomeFile, + referenceMap.genomeIndex + ]) + + output: + set val("TIDDIT"), idPatient, idSample, file("*.vcf") into vcfTIDDIT + set file("*."), file("*.") into tidditOut + + when: 'tiddit' in tools + + script: + """ + tiddit --sv -o TIDDIT_${idSample}.vcf --bam ${bam} --ref ${genomeFile} + """ +} + +vcfTIDDIT = vcfTIDDIT.dump(tag:'TIDDIT') + /* ================================================================================ SOMATIC VARIANT CALLING @@ -1104,7 +1140,7 @@ process ConcatVCF { // we have this funny *_* pattern to avoid copying the raw calls to publishdir set variantCaller, idPatient, idSample, file("*_*.vcf.gz"), file("*_*.vcf.gz.tbi") into vcfConcatenated - when: ('haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools) + when: 'haplotypecaller' in tools || 'mutect2' in tools || 'freebayes' in tools script: if (variantCaller == 'HaplotypeCallerGVCF') outputFile = "HaplotypeCaller_${idSample}.g.vcf" @@ -1191,7 +1227,7 @@ process Manta { options = params.targetBED ? "--exome --callRegions call_targets.bed.gz" : "" """ ${beforeScript} - configManta.py \ + configManta.py \ --normalBam ${bamNormal} \ --tumorBam ${bamTumor} \ --reference ${genomeFile} \ @@ -1391,7 +1427,7 @@ process Mpileup { output: set idPatient, idSample, file("${intervalBed.baseName}_${idSample}.pileup.gz") into mpileupMerge - when: ('controlfreec' in tools || 'mpileup' in tools) + when: 'controlfreec' in tools || 'mpileup' in tools script: """ @@ -1417,7 +1453,7 @@ process MergeMpileup { output: set idPatient, idSample, file("${idSample}.pileup.gz") into mpileupOut - when: ('controlfreec' in tools || 'mpileup' in tools) + when: 'controlfreec' in tools || 'mpileup' in tools script: """ @@ -1581,6 +1617,10 @@ vcfKeep = Channel.empty().mix( vcfStrelkaBPSNVS.map { variantcaller, idPatient, idSample, vcf, tbi -> [variantcaller, idSample, vcf[1]] + }, + vcfTIDDIT.map { + variantcaller, idPatient, idSample, vcf, tbi -> + [variantcaller, idSample, vcf] }) (vcfBCFtools, vcfVCFtools, vcfAnnotation) = vcfKeep.into(3) @@ -2192,6 +2232,7 @@ def defineToolList() { 'mutect2', 'snpeff', 'strelka', + 'tiddit', 'vep' ] } From 1fc66547b35a3e4896abfa9c6b590b97549f1186 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Wed, 22 May 2019 16:14:03 +0200 Subject: [PATCH 03/11] update TIDDIT to 2.7.1 --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 023b057811..2ef20b6310 100644 --- a/environment.yml +++ b/environment.yml @@ -26,6 +26,6 @@ dependencies: - samtools=1.9 - snpeff=4.3.1t - strelka=2.9.10 - - tiddit=2.6.0 + - tiddit=2.7.1 - vcfanno=0.3.1 - vcftools=0.1.16 From 05d155d213cb318e69c12495086a9ab2763a9849 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 2 Aug 2019 10:16:37 +0200 Subject: [PATCH 04/11] fix merge issues --- main.nf | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/main.nf b/main.nf index 8856fdf63f..07ab375805 100644 --- a/main.nf +++ b/main.nf @@ -49,31 +49,21 @@ def helpMessage() { --step Specify starting step Available: Mapping, Recalibrate, VariantCalling, Annotate Default: Mapping -<<<<<<< HEAD - --targetBED Target BED file for targeted or whole exome sequencing --tools Specify tools to use for variant calling: Available: ASCAT, ControlFREEC, FreeBayes, HaplotypeCaller Manta, mpileup, MuTect2, Strelka, TIDDIT - or for annotation: - snpEff, VEP - Default: HaplotypeCaller, Manta, Strelka - --annotateTools Specify from which tools Sarek will annotate VCF, only for step annotate - Available: HaplotypeCaller, Manta, MuTect2, Strelka, TIDDIT -======= - --tools Specify tools to use for variant calling, and annotation - Available: ASCAT, ControlFREEC, FreeBayes, HaplotypeCaller - Manta, mpileup, MuTect2, Strelka, snpEff, VEP, merge + and/or for annotation: + snpEff, VEP, merge Default: None --skip Specify which QC tools to skip when running Sarek Available: bamQC, BCFtools, FastQC, MultiQC, samtools, vcftools, versions Default: None --annotateTools Specify from which tools Sarek will look for VCF files to annotate, only for step annotate - Available: HaplotypeCaller, Manta, MuTect2, Strelka + Available: HaplotypeCaller, Manta, MuTect2, Strelka, TIDDIT Default: None --annotation_cache Enable the use of cache for annotation, to be used with --snpEff_cache and/or --vep_cache --snpEff_cache Specity the path to snpEff cache, to be used with --annotation_cache --vep_cache Specity the path to VEP cache, to be used with --annotation_cache ->>>>>>> upstream/dev References If not specified in the configuration file or you wish to overwrite any of the references. --acLoci acLoci file From 9f464064d1935ae0aef7da30b8a1931a30a3ead3 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Fri, 2 Aug 2019 10:20:13 +0200 Subject: [PATCH 05/11] feat: add tests for TIDDIT --- scripts/run_tests.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 43661935ed..46d85d17f6 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -88,7 +88,7 @@ fi if [[ ALL,SOMATIC =~ $TEST ]] then - OPTIONS="--tools FreeBayes,HaplotypeCaller,Manta,Strelka,Mutect2" + OPTIONS="--tools FreeBayes,HaplotypeCaller,Manta,Strelka,TIDDIT,Mutect2" if [[ $OFFLINE == false ]] then run_sarek ${OPTIONS} @@ -100,7 +100,7 @@ fi if [[ ALL,TARGETED =~ $TEST ]] then - OPTIONS="--tools FreeBayes,HaplotypeCaller,Manta,Strelka,Mutect2" + OPTIONS="--tools FreeBayes,HaplotypeCaller,Manta,Strelka,TIDDIT,Mutect2" if [[ $OFFLINE == false ]] then run_sarek ${OPTIONS} --targetBED https://github.com/nf-core/test-datasets/raw/sarek/testdata/target.bed @@ -135,7 +135,7 @@ fi if [[ MULTIPLE =~ $TEST ]] then - OPTIONS="--tools FreeBayes,HaplotypeCaller,Manta,Strelka,Mutect2,snpEff,VEP,merge" + OPTIONS="--tools FreeBayes,HaplotypeCaller,Manta,Strelka,TIDDIT,Mutect2,snpEff,VEP,merge" if [[ $OFFLINE == false ]] then run_sarek ${OPTIONS} --sample https://github.com/nf-core/test-datasets/raw/sarek/testdata/tsv/tiny-multiple-https.tsv From 22ca372c6a7208553954b77d11ef3c45ab8647c9 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Mon, 5 Aug 2019 17:06:42 +0200 Subject: [PATCH 06/11] feat: update CHANGELOG --- CHANGELOG.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 910f48ef5b..a5cf4380c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,13 +12,13 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) ### `Added` - [#2](https://github.com/nf-core/sarek/pull/2) - Create `nf-core/sarek` `environment.yml` file -- [#2](https://github.com/nf-core/sarek/pull/2), [#3](https://github.com/nf-core/sarek/pull/3), [#4](https://github.com/nf-core/sarek/pull/4), [#5](https://github.com/nf-core/sarek/pull/5), [#7](https://github.com/nf-core/sarek/pull/7), [#9](https://github.com/nf-core/sarek/pull/9), [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12) - Add CI for `nf-core/sarek` +- [#2](https://github.com/nf-core/sarek/pull/2), [#3](https://github.com/nf-core/sarek/pull/3), [#4](https://github.com/nf-core/sarek/pull/4), [#5](https://github.com/nf-core/sarek/pull/5), [#7](https://github.com/nf-core/sarek/pull/7), [#9](https://github.com/nf-core/sarek/pull/9), [#10](https://github.com/nf-core/sarek/pull/10), [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12) - Add CI for `nf-core/sarek` - [#3](https://github.com/nf-core/sarek/pull/3) - Add preprocessing to `nf-core/sarek` - [#4](https://github.com/nf-core/sarek/pull/4) - Add variant calling to `nf-core/sarek` with `HaplotypeCaller`, and single mode `Manta` and `Strelka` - [#5](https://github.com/nf-core/sarek/pull/5) - Add variant calling to `nf-core/sarek` with `Manta`, `Strelka`, `Strelka Best Practices`, `MuTecT2`, `FreeBayes`, `ASCAT`, `ControlFREEC` - [#6](https://github.com/nf-core/sarek/pull/6) - Add default containers for annotation to `nf-core/sarek` -- [#7](https://github.com/nf-core/sarek/pull/7) - Add annotation - [#7](https://github.com/nf-core/sarek/pull/7) - Add MultiQC +- [#7](https://github.com/nf-core/sarek/pull/7) - Add annotation - [#7](https://github.com/nf-core/sarek/pull/7) - Add social preview image in `png` and `svg` format - [#7](https://github.com/nf-core/sarek/pull/7), [#8](https://github.com/nf-core/sarek/pull/8), [#11](https://github.com/nf-core/sarek/pull/11) - Add helper script `run_tests.sh` to run different tests - [#7](https://github.com/nf-core/sarek/pull/7), [#8](https://github.com/nf-core/sarek/pull/8), [#9](https://github.com/nf-core/sarek/pull/9) - Add automatic build of specific containers for annotation for `GRCh37`, `GRCh38` and `GRCm38` using `CircleCI` @@ -26,23 +26,24 @@ Initial release of `nf-core/sarek`, created with the [nf-core](http://nf-co.re/) - [#7](https://github.com/nf-core/sarek/pull/7), [#9](https://github.com/nf-core/sarek/pull/9), [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12) - Add helper script `download_image.sh` to download containers for testing - [#8](https://github.com/nf-core/sarek/pull/8) - Add test configation for easier testing - [#9](https://github.com/nf-core/sarek/pull/9), [#11](https://github.com/nf-core/sarek/pull/11) - Add scripts for `ASCAT` +- [#10](https://github.com/nf-core/sarek/pull/10) - Add `TIDDIT` to detect structural variants - [#11](https://github.com/nf-core/sarek/pull/11) - Add automatic build of specific containers for annotation for `CanFam3.1` using `CircleCI` - [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12) - Add posters and abstracts -- [#12](https://github.com/nf-core/sarek/pull/12) - Use `label` for processes configation -- [#12](https://github.com/nf-core/sarek/pull/12) - Add helper scripts `filter_locifile.py` and `selectROI.py` - [#12](https://github.com/nf-core/sarek/pull/12) - Add helper script `make_snapshot.sh` to make an archive for usage on a secure cluster +- [#12](https://github.com/nf-core/sarek/pull/12) - Add helper scripts `filter_locifile.py` and `selectROI.py` +- [#12](https://github.com/nf-core/sarek/pull/12) - Use `label` for processes configation - [#13](https://github.com/nf-core/sarek/pull/13) - Add Citation documentation - [#13](https://github.com/nf-core/sarek/pull/13) - Add `BamQC` process - [#13](https://github.com/nf-core/sarek/pull/13) - Add `CompressVCFsnpEff` and `CompressVCFvep` processes - [#18](https://github.com/nf-core/sarek/pull/18) - Add `--no-reports` option for tests + add snpEff,VEP,merge to MULTIPLE test -- [#18](https://github.com/nf-core/sarek/pull/18) - Add possibility to download other genome for `sareksnpeff` and `sarekvep` containers -- [#18](https://github.com/nf-core/sarek/pull/18) - Add params `--skip` to skip specified QC tools - [#18](https://github.com/nf-core/sarek/pull/18) - Add logo to MultiQC report +- [#18](https://github.com/nf-core/sarek/pull/18) - Add params `--skip` to skip specified QC tools +- [#18](https://github.com/nf-core/sarek/pull/18) - Add possibility to download other genome for `sareksnpeff` and `sarekvep` containers - [#20](https://github.com/nf-core/sarek/pull/20) - Add `markdownlint` config file ### `Changed` -- [#1](https://github.com/nf-core/sarek/pull/1), [#2](https://github.com/nf-core/sarek/pull/2), [#3](https://github.com/nf-core/sarek/pull/3), [#4](https://github.com/nf-core/sarek/pull/4), [#5](https://github.com/nf-core/sarek/pull/5), [#6](https://github.com/nf-core/sarek/pull/6), [#7](https://github.com/nf-core/sarek/pull/7), [#8](https://github.com/nf-core/sarek/pull/8), [#9](https://github.com/nf-core/sarek/pull/9), [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12), [#18](https://github.com/nf-core/sarek/pull/18), [#20](https://github.com/nf-core/sarek/pull/20) - Update docs +- [#1](https://github.com/nf-core/sarek/pull/1), [#2](https://github.com/nf-core/sarek/pull/2), [#3](https://github.com/nf-core/sarek/pull/3), [#4](https://github.com/nf-core/sarek/pull/4), [#5](https://github.com/nf-core/sarek/pull/5), [#6](https://github.com/nf-core/sarek/pull/6), [#7](https://github.com/nf-core/sarek/pull/7), [#8](https://github.com/nf-core/sarek/pull/8), [#9](https://github.com/nf-core/sarek/pull/9), [#10](https://github.com/nf-core/sarek/pull/10), [#11](https://github.com/nf-core/sarek/pull/11), [#12](https://github.com/nf-core/sarek/pull/12), [#18](https://github.com/nf-core/sarek/pull/18), [#20](https://github.com/nf-core/sarek/pull/20) - Update docs - [#4](https://github.com/nf-core/sarek/pull/4) - Update `cancerit-allelecount` from `2.1.2` to `4.0.2` - [#4](https://github.com/nf-core/sarek/pull/4) - Update `gatk4` from `4.1.1.0` to `4.1.2.0` - [#7](https://github.com/nf-core/sarek/pull/7) - `--sampleDir` is now deprecated, use `--sample` instead From 3da452dbd6c8b14a224024e241ce446571a0b298 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 6 Aug 2019 11:49:18 +0200 Subject: [PATCH 07/11] feat: Add documentation for TIDDIT --- docs/containers.md | 2 ++ docs/output.md | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/docs/containers.md b/docs/containers.md index fd5de52105..20d7ee30e0 100644 --- a/docs/containers.md +++ b/docs/containers.md @@ -32,6 +32,7 @@ For annotation, the main container can be used, but the cache has to be download - Contain **[samtools][samtools-link]** 1.9 - Contain **[snpEff][snpeff-link]** 4.3.1t - Contain **[Strelka2][strelka-link]** 2.9.10 +- Contain **[TIDDIT][tiddit-link]** 2.7.1 - Contain **[VCFanno][vcfanno-link]** 0.3.1 - Contain **[VCFtools][vcftools-link]** 0.1.16 - Contain **[VEP][vep-link]** 96.0 @@ -111,6 +112,7 @@ The `environment.yml` file can easilly be modified if particular versions of too [sareksnpeff-docker-badge]: https://img.shields.io/docker/automated/nfcore/sareksnpeff.svg [sareksnpeff-docker-link]: https://hub.docker.com/r/nfcore/sareksnpeff [strelka-link]: https://github.com/Illumina/strelka +[tiddit-link]: https://github.com/SciLifeLab/TIDDIT [vcfanno-link]: https://github.com/brentp/vcfanno [vcftools-link]: https://vcftools.github.io/index.html [vep-link]: https://github.com/Ensembl/ensembl-vep diff --git a/docs/output.md b/docs/output.md index 105010936c..5e26d8cee0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -24,6 +24,7 @@ The pipeline processes data using the following steps: * [`Strelka2`](#Strelka2) * Structural variants * [`Manta`](#Manta) + * [`TIDDIT`](#TIDDIT) * Sample heterogeneity, ploidy and CNVs * `alleleCounter` * [`ConvertAlleleCounts`](#ConvertAlleleCounts) @@ -48,6 +49,7 @@ The pipeline processes data using the following steps: * [`MultiQC`](#MultiQC) ## Preprocessing + Sarek preprocesses raw FastQ files or unmapped BAM files, based on [GATK best practices](https://software.broadinstitute.org/gatk/best-practices/). BAM files with Recalibration tables can also be used as an input to start with the recalibration of said BAM files, for more information see [TSV files output information](#TSV-files) @@ -79,6 +81,7 @@ For all samples: * BAM file and index ### TSV files + The TSV files are autogenerated and can be used by Sarek for further processing and/or variant calling. For further reading and documentation see the [input documentation](https://github.com/nf-core/sarek/blob/master/docs/input.md). @@ -89,14 +92,16 @@ For all samples: * `duplicateMarked.tsv` and `recalibrated.tsv` * TSV files to start Sarek from `recalibration` or `variantcalling` steps. * `duplicateMarked_[SAMPLE].tsv` and `recalibrated_[SAMPLE].tsv` - * TSV files to start Sarek from `recalibration` or `variantcalling` steps for a specific sample. + * TSV files to start Sarek from `recalibration` or `variantcalling` steps for a specific sample. ## Variant Calling + All the results regarding variant-calling are collected in this directory. Recalibrated BAM files can also be used as an input to start the Variant Calling, for more information see [TSV files output information](#TSV-files) ### FreeBayes + [FreeBayes](https://github.com/ekg/freebayes) is a Bayesian genetic variant detector designed to find small polymorphisms, specifically SNPs, indels, MNPs, and complex events smaller than the length of a short-read sequencing alignment.. For further reading and documentation see the [FreeBayes manual](https://github.com/ekg/freebayes/blob/master/README.md#user-manual-and-guide). @@ -108,6 +113,7 @@ For a Tumor/Normal pair only: * VCF with Tabix index ### HaplotypeCaller + [GATK HaplotypeCaller](https://github.com/broadinstitute/gatk) calls germline SNPs and indels via local re-assembly of haplotypes. Germline calls are provided for all samples, to able comparison of both tumor and normal for possible mixup. @@ -121,6 +127,7 @@ For all samples: * VCF with Tabix index ### GenotypeGVCFs + [GATK GenotypeGVCFs](https://github.com/broadinstitute/gatk) performs joint genotyping on one or more samples pre-called with HaplotypeCaller. Germline calls are provided for all samples, to able comparison of both tumor and normal for possible mixup. @@ -134,6 +141,7 @@ For all samples: * VCF with Tabix index ### MuTect2 + [GATK MuTect2](https://github.com/broadinstitute/gatk) calls somatic SNVs and indels via local assembly of haplotypes. For further reading and documentation see the [MuTect2 manual](https://software.broadinstitute.org/gatk/documentation/tooldocs/4.1.2.0/org_broadinstitute_hellbender_tools_walkers_mutect_Mutect2.php). @@ -144,7 +152,22 @@ For a Tumor/Normal pair only: * `MuTect2_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz` and `MuTect2_[TUMORSAMPLE]_vs_[NORMALSAMPLE].vcf.gz.tbi` * VCF with Tabix index +### TIDDIT + +[TIDDIT](https://github.com/SciLifeLab/TIDDIT)identifies intra and inter-chromosomal translocations, deletions, tandem-duplications and inversions. + +Germline calls are provided for all samples, to able comparison of both tumor and normal for possible mixup. + +For further reading and documentation see the [TIDDIT manual](https://github.com/SciLifeLab/TIDDIT/blob/master/README.md). + +For all samples: +**Output directory: `results/VariantCalling/[SAMPLE]/TIDDIT`** + +* `TIDDIT_[SAMPLE].g.vcf.gz` and `TIDDIT_[SAMPLE].g.vcf.gz.tbi` + * VCF with Tabix index + ### Strelka2 + [Strelka2](https://github.com/Illumina/strelka) is a fast and accurate small variant caller optimized for analysis of germline variation in small cohorts and somatic variation in tumor/normal sample pairs. For further reading and documentation see the [Strelka2 user guide](https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md). @@ -167,15 +190,17 @@ For a Tumor/Normal pair: Using [Strelka Best Practices](https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md#somatic-configuration-example) with the `candidateSmallIndels` from `Manta`: **Output directory: `results/VariantCalling/[TUMOR_vs_NORMAL]/Strelka`** + * `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_indels.vcf.gz` and `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_indels.vcf.gz.tbi` * VCF with Tabix index * `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_snvs.vcf.gz` and `StrelkaBP_[TUMORSAMPLE]_vs_[NORMALSAMPLE]_somatic_snvs.vcf.gz.tbi` * VCF with Tabix index ### Manta + [Manta](https://github.com/Illumina/manta) calls structural variants (SVs) and indels from mapped paired-end sequencing reads. It is optimized for analysis of germline variation in small sets of individuals and somatic variation in tumor/normal sample pairs. -`Manta` provides a candidate list for small indels also that can be fed to `Strelka` following [Strelka Best Practices](https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md#somatic-configuration-example. +`Manta` provides a candidate list for small indels also that can be fed to `Strelka` following [Strelka Best Practices](https://github.com/Illumina/strelka/blob/v2.9.x/docs/userGuide/README.md#somatic-configuration-example). For further reading and documentation see the [Manta user guide](https://github.com/Illumina/manta/blob/master/docs/userGuide/README.md). @@ -188,10 +213,12 @@ For all samples: * VCF with Tabix index For Normal sample only: + * `Manta_[NORMALSAMPLE].diploidSV.vcf.gz` and `Manta_[NORMALSAMPLE].diploidSV.vcf.gz.tbi` * VCF with Tabix index For a Tumor sample only: + * `Manta_[TUMORSAMPLE].tumorSV.vcf.gz` and `Manta_[TUMORSAMPLE].tumorSV.vcf.gz.tbi` * VCF with Tabix index @@ -208,6 +235,7 @@ For a Tumor/Normal pair only: * VCF with Tabix index ### ConvertAlleleCounts + [ConvertAlleleCounts](https://github.com/nf-core/sarek/blob/master/bin/convertAlleleCounts.r) is a R-script for converting output from AlleleCount to BAF and LogR values. For a Tumor/Normal pair only: From e79211ce87fa32f5c3309bb858cca6bd0d235149 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 6 Aug 2019 11:51:53 +0200 Subject: [PATCH 08/11] fix: arrange TIDDIT output --- main.nf | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 07ab375805..b8b1dd7102 100644 --- a/main.nf +++ b/main.nf @@ -967,7 +967,7 @@ process MantaSingle { ]) output: - set val("Manta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfMantaSingle + set val("Manta"), idPatient, idSample, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfMantaSingle when: 'manta' in tools @@ -1025,14 +1025,18 @@ process TIDDIT { ]) output: - set val("TIDDIT"), idPatient, idSample, file("*.vcf") into vcfTIDDIT - set file("*."), file("*.") into tidditOut + set val("TIDDIT"), idPatient, idSample, file("*.vcf.gz"), file("*.tbi") into vcfTIDDIT + set file("TIDDIT_${idSample}.ploidy.tab"), file("TIDDIT_${idSample}.signals.tab") into tidditOut when: 'tiddit' in tools script: """ - tiddit --sv -o TIDDIT_${idSample}.vcf --bam ${bam} --ref ${genomeFile} + tiddit --sv -o TIDDIT_${idSample} --bam ${bam} --ref ${genomeFile} + + bgzip --threads ${task.cpus} -c TIDDIT_${idSample}.vcf > TIDDIT_${idSample}.vcf.gz + + tabix TIDDIT_${idSample}.vcf.gz """ } @@ -1396,7 +1400,7 @@ alleleCounterOut = alleleCountOutNormal.combine(alleleCountOutTumor) alleleCounterOut = alleleCounterOut.map { idPatientNormal, idSampleNormal, alleleCountOutNormal, - idPatientTumor, idSampleTumor, alleleCountOutTumor -> + idPatientTumor, idSampleTumor, alleleCountOutTumor -> [idPatientNormal, idSampleNormal, idSampleTumor, alleleCountOutNormal, alleleCountOutTumor] } @@ -1525,7 +1529,7 @@ mpileupOut = mpileupOutNormal.combine(mpileupOutTumor) mpileupOut = mpileupOut.map { idPatientNormal, idSampleNormal, mpileupOutNormal, - idPatientTumor, idSampleTumor, mpileupOutTumor -> + idPatientTumor, idSampleTumor, mpileupOutTumor -> [idPatientNormal, idSampleNormal, idSampleTumor, mpileupOutNormal, mpileupOutTumor] } @@ -1862,7 +1866,7 @@ process VEP { } input: - set variantCaller, idSample, file(vcf), file(idx) from vcfVep + set variantCaller, idSample, file(vcf), file(idx) from vcfVep file dataDir from Channel.value(params.vep_cache ? file(params.vep_cache) : "null") val cache_version from Channel.value(params.genomes[params.genome].vepCacheVersion) set file(cadd_WG_SNVs), file(cadd_WG_SNVs_tbi), file(cadd_InDels), file(cadd_InDels_tbi) from Channel.value([ @@ -1925,7 +1929,7 @@ process VEPmerge { } input: - set variantCaller, idSample, file(vcf), file(idx) from compressVCFsnpEffOut + set variantCaller, idSample, file(vcf), file(idx) from compressVCFsnpEffOut file dataDir from Channel.value(params.vep_cache ? file(params.vep_cache) : "null") val cache_version from Channel.value(params.genomes[params.genome].vepCacheVersion) set file(cadd_WG_SNVs), file(cadd_WG_SNVs_tbi), file(cadd_InDels), file(cadd_InDels_tbi) from Channel.value([ From a64aef875b45ed86cec95bfb081c0c82fd7d5ad2 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 6 Aug 2019 12:44:32 +0200 Subject: [PATCH 09/11] feat: filter out low quality calls from TIDDIT --- main.nf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index b8b1dd7102..35d743a470 100644 --- a/main.nf +++ b/main.nf @@ -1026,7 +1026,7 @@ process TIDDIT { output: set val("TIDDIT"), idPatient, idSample, file("*.vcf.gz"), file("*.tbi") into vcfTIDDIT - set file("TIDDIT_${idSample}.ploidy.tab"), file("TIDDIT_${idSample}.signals.tab") into tidditOut + set file("TIDDIT_${idSample}.old.vcf"), file("TIDDIT_${idSample}.ploidy.tab"), file("TIDDIT_${idSample}.signals.tab"), file("TIDDIT_${idSample}.wig"), file("TIDDIT_${idSample}.gc.wig") into tidditOut when: 'tiddit' in tools @@ -1034,6 +1034,10 @@ process TIDDIT { """ tiddit --sv -o TIDDIT_${idSample} --bam ${bam} --ref ${genomeFile} + cp TIDDIT_${idSample}.vcf TIDDIT_${idSample}.old.vcf + + grep -E "#|PASS" TIDDIT_${idSample}.vcf > TIDDIT_${idSample}.vcf + bgzip --threads ${task.cpus} -c TIDDIT_${idSample}.vcf > TIDDIT_${idSample}.vcf.gz tabix TIDDIT_${idSample}.vcf.gz From e24688cfa8fe17bda327ecbcd91c0eb16edbc18c Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 6 Aug 2019 12:44:45 +0200 Subject: [PATCH 10/11] feat: complete TIDDIT docs --- docs/output.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/output.md b/docs/output.md index 5e26d8cee0..3d8a44fd48 100644 --- a/docs/output.md +++ b/docs/output.md @@ -157,14 +157,25 @@ For a Tumor/Normal pair only: [TIDDIT](https://github.com/SciLifeLab/TIDDIT)identifies intra and inter-chromosomal translocations, deletions, tandem-duplications and inversions. Germline calls are provided for all samples, to able comparison of both tumor and normal for possible mixup. +Low quality calls are removed internally, to simplify processing of variant calls but they are saved by Sarek. For further reading and documentation see the [TIDDIT manual](https://github.com/SciLifeLab/TIDDIT/blob/master/README.md). For all samples: **Output directory: `results/VariantCalling/[SAMPLE]/TIDDIT`** -* `TIDDIT_[SAMPLE].g.vcf.gz` and `TIDDIT_[SAMPLE].g.vcf.gz.tbi` +* `TIDDIT_[SAMPLE].vcf.gz` and `TIDDIT_[SAMPLE].vcf.gz.tbi` * VCF with Tabix index +* `TIDDIT_[SAMPLE].signals.tab` + * tab file describing coverage across the genome, binned per 50 bp +* `TIDDIT_[SAMPLE].ploidy.tab` + * tab file describing the estimated ploïdy and coverage across each contig +* `TIDDIT_[SAMPLE].old.vcf` + * VCF including the low qualiy calls +* `TIDDIT_[SAMPLE].wig` + * wiggle file containing coverage across the genome, binned per 50 bp +* `TIDDIT_[SAMPLE].gc.wig` + * wiggle file containing fraction of gc content, binned per 50 bp ### Strelka2 From a8c02cf65f1794d548bc0901f8e735819640c653 Mon Sep 17 00:00:00 2001 From: MaxUlysse Date: Tue, 6 Aug 2019 15:11:11 +0200 Subject: [PATCH 11/11] fix: syntac error, bad grep --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 35d743a470..5c2f6325d9 100644 --- a/main.nf +++ b/main.nf @@ -1034,9 +1034,9 @@ process TIDDIT { """ tiddit --sv -o TIDDIT_${idSample} --bam ${bam} --ref ${genomeFile} - cp TIDDIT_${idSample}.vcf TIDDIT_${idSample}.old.vcf + mv TIDDIT_${idSample}.vcf TIDDIT_${idSample}.old.vcf - grep -E "#|PASS" TIDDIT_${idSample}.vcf > TIDDIT_${idSample}.vcf + grep -E "#|PASS" TIDDIT_${idSample}.old.vcf > TIDDIT_${idSample}.vcf bgzip --threads ${task.cpus} -c TIDDIT_${idSample}.vcf > TIDDIT_${idSample}.vcf.gz