-
Notifications
You must be signed in to change notification settings - Fork 418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Joint Germline subworkflow haplotypecaller -> Vqsr #595
Changes from 75 commits
19e5e0e
cd8d767
03909c9
c22a64f
028ad1a
57c2dff
ea8dd93
d6cc403
b78ad1c
37aa902
d49a5ac
d574b0c
dd7445f
06c66ad
8686dfb
f9fcb65
6e5e8dd
9a29759
aa110ab
fa41fa0
17924d0
50de0de
7947ce0
3463c56
1c48729
b835c37
d45423f
ed5ccea
3de8f7f
b2fd565
bfaf48d
cb25481
8fd7ff9
4eac972
ced2398
66a7f00
0ea9747
0717286
37e5e77
1db8bc8
ff6b530
aa7127a
5be916a
21182a3
9eebd71
69feebd
20a7e2e
3cbe260
e9fb076
6ea35cc
8d7235d
13f9bd7
e75a7f3
fb7c594
04b3191
9e31397
fc0c251
2e9a724
748947d
7e3a11e
77eb3d6
3fe7927
acb9fce
c793d5f
87f3405
ef8f526
72b4bc8
e5b1703
d6c006e
c94e00b
0dd0976
717e271
da17db9
4b9bd9f
9bae10f
b77a116
468dedb
e919d09
46bc423
f057709
4d50caf
c707f7c
ac0a1f5
a6d6e9a
7f5a916
1fc9e2a
d842fe0
73e84db
9fa1a5c
b0c32c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -19,16 +19,21 @@ params { | |||||
ascat_loci_rt = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/ASCAT/RT_G1000_hg19.zip" | ||||||
bwa = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/BWAIndex/" | ||||||
chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/Chromosomes" | ||||||
dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf" | ||||||
dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.idx" | ||||||
dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz" | ||||||
dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/dbsnp_138.b37.vcf.gz.tbi" | ||||||
dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_138.b37.vcf.gz' | ||||||
dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.dict" | ||||||
fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta" | ||||||
fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Sequence/WholeGenomeFasta/human_g1k_v37_decoy.fasta.fai" | ||||||
germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh37.PASS.AC.AF.only.vcf.gz" | ||||||
germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh37.PASS.AC.AF.only.vcf.gz.tbi" | ||||||
intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/intervals/wgs_calling_regions_Sarek.list" | ||||||
known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf" | ||||||
known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.idx" | ||||||
known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.gz" | ||||||
known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/1000G_phase1.snps.high_confidence.b37.vcf.idx.gz" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.snps.high_confidence.b37.vcf.gz' | ||||||
known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" | ||||||
known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz.idx" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
known_indels_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_phase1.indels.b37.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.b37.vcf.gz' | ||||||
mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh37/Annotation/Control-FREEC/out100m2_hg19.gem" | ||||||
snpeff_db = 'GRCh37.87' | ||||||
snpeff_genome = 'GRCh37' | ||||||
|
@@ -50,14 +55,19 @@ params { | |||||
chr_dir = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/Chromosomes" | ||||||
dbsnp = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz" | ||||||
dbsnp_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/dbsnp_146.hg38.vcf.gz.tbi" | ||||||
dbsnp_vqsr = '--resource:dbsnp,known=false,training=true,truth=false,prior=2.0 dbsnp_146.hg38.vcf.gz' | ||||||
dict = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.dict" | ||||||
fasta = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta" | ||||||
fasta_fai = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Sequence/WholeGenomeFasta/Homo_sapiens_assembly38.fasta.fai" | ||||||
germline_resource = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz" | ||||||
germline_resource_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GermlineResource/gnomAD.r2.1.1.GRCh38.PASS.AC.AF.only.vcf.gz.tbi" | ||||||
intervals = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/intervals/wgs_calling_regions.hg38.bed" | ||||||
known_snps = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz" | ||||||
known_snps_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000G_omni2.5.hg38.vcf.gz.tbi" | ||||||
known_snps_vqsr = '--resource:1000G,known=false,training=true,truth=true,prior=10.0 1000G_omni2.5.hg38.vcf.gz' | ||||||
known_indels = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" | ||||||
known_indels_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz.tbi" | ||||||
known_indels_vqsr = '--resource:gatk,known=false,training=true,truth=true,prior=10.0 Homo_sapiens_assembly38.known_indels.vcf.gz --resource:mills,known=false,training=true,truth=true,prior=10.0 Mills_and_1000G_gold_standard.indels.hg38.vcf.gz' | ||||||
mappability = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/Control-FREEC/out100m2_hg38.gem" | ||||||
pon = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz" | ||||||
pon_tbi = "${params.igenomes_base}/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/1000g_pon.hg38.vcf.gz.tbi" | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -606,9 +606,6 @@ process{ | |||||
ext.prefix = { meta.num_intervals <= 1 ? "${meta.id}" : "${meta.id}.${target_bed.simpleName}" } | ||||||
ext.args = '--min-alternate-fraction 0.1 --min-mapping-quality 1' | ||||||
ext.when = { params.tools && params.tools.split(',').contains('freebayes') } | ||||||
publishDir = [ | ||||||
enabled: false | ||||||
] | ||||||
maxulysse marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
|
||||||
withName: 'BCFTOOLS_SORT' { | ||||||
|
@@ -644,17 +641,13 @@ process{ | |||||
ext.prefix = { meta.num_intervals <= 1 ? ( params.joint_germline ? "${meta.id}.haplotypecaller.g" : "${meta.id}.haplotypecaller" ) : ( params.joint_germline ? "${meta.id}.haplotypecaller.${intervals.simpleName}.g" :"${meta.id}.haplotypecaller.${intervals.simpleName}" ) } | ||||||
ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') } | ||||||
publishDir = [ | ||||||
enabled: !params.joint_germline, | ||||||
FriederikeHanssen marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
mode: params.publish_dir_mode, | ||||||
path: { "${params.outdir}/variant_calling/"}, | ||||||
pattern: "*{vcf.gz,vcf.gz.tbi}", | ||||||
saveAs: { meta.num_intervals > 1 ? null : "haplotypecaller/${meta.id}/${it}" } | ||||||
] | ||||||
} | ||||||
withName: 'CNNSCOREVARIANTS' { | ||||||
publishDir = [ | ||||||
enabled: false | ||||||
] | ||||||
} | ||||||
maxulysse marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
withName: 'FILTERVARIANTTRANCHES' { | ||||||
ext.prefix = {"${meta.id}.haplotypecaller"} | ||||||
ext.args = { "--info-key CNN_1D" } | ||||||
|
@@ -665,12 +658,48 @@ process{ | |||||
] | ||||||
} | ||||||
|
||||||
withName: 'GENOTYPEGVCFS' { | ||||||
ext.prefix = {"${meta.id}.haplotypecaller"} | ||||||
ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') && params.joint_germline} | ||||||
withName: 'GATK4_GENOMICSDBIMPORT' { | ||||||
ext.prefix = { meta.num_intervals > 1 ? meta.intervals_name : "joint_interval" } | ||||||
ext.when = { params.tools && params.tools.split(',').contains('haplotypecaller') && params.joint_germline && !params.no_intervals} | ||||||
nickhsmith marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
|
||||||
withName: 'GATK4_GENOTYPEGVCFS' { | ||||||
ext.prefix = { meta.num_intervals > 1 ? meta.intervals_name: "joint_interval" } | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we had There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And no more when statement for this one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. either the merged and unmerged should have the same name so that regardless if 1 interval or a 100, the output file always has the same name |
||||||
} | ||||||
withName: 'MERGE_GENOTYPEGVCFS' { | ||||||
ext.prefix = "joint_germline" | ||||||
publishDir = [ | ||||||
mode: params.publish_dir_mode, | ||||||
path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/" }, | ||||||
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, | ||||||
pattern: "*{vcf.gz,vcf.gz.tbi}" | ||||||
] | ||||||
} | ||||||
|
||||||
withName: 'VARIANTRECALIBRATOR_INDEL' { | ||||||
ext.prefix = { "${meta.id}_INDEL" } | ||||||
ext.args = "-an QD -an MQRankSum -an ReadPosRankSum -an FS -an SOR -an DP -mode INDEL" | ||||||
} | ||||||
|
||||||
withName: 'VARIANTRECALIBRATOR_SNP' { | ||||||
ext.prefix = { "${meta.id}_SNP" } | ||||||
ext.args = "-an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR -mode SNP" | ||||||
} | ||||||
withName: 'GATK4_APPLYVQSR_SNP'{ | ||||||
ext.prefix = { "${meta.id}_SNP" } | ||||||
ext.args = '--truth-sensitivity-filter-level 99.9 -mode SNP' | ||||||
} | ||||||
|
||||||
withName: 'GATK4_APPLYVQSR_INDEL'{ | ||||||
ext.prefix = { "${meta.id}_INDEL" } | ||||||
ext.args = '--truth-sensitivity-filter-level 99.9 -mode INDEL' | ||||||
} | ||||||
withName: 'MERGE_VQSR' { | ||||||
ext.prefix = "joint_germline_recalibrated" | ||||||
publishDir = [ | ||||||
mode: params.publish_dir_mode, | ||||||
path: { "${params.outdir}/variant_calling/haplotypecaller/${meta.id}/"}, | ||||||
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, | ||||||
pattern: "*{vcf.gz,vcf.gz.tbi}" | ||||||
] | ||||||
} | ||||||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The files are not yet on igenomes, so I would update this all together in #662 once they are there