-
Notifications
You must be signed in to change notification settings - Fork 243
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Support lenient (optimistic) read-only VCF 4.4. (#1683)
add new 'optimistic_vcf_4_4` which allows reading vcf 4.4 files as if they were 4.3 useful in many cases, possibly catastrophic in others
- Loading branch information
Showing
7 changed files
with
101 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
##fileformat=VCFv4.4 | ||
##FILTER=<ID=ABFilter,Description="AB 0.75 && DP 40"> | ||
##FILTER=<ID=DPFilter,Description="DP 120 || SB -0.10"> | ||
##FILTER=<ID=FDRtranche0.00to0.10,Description="FDR tranche level at qual 0.06"> | ||
##FILTER=<ID=FDRtranche0.10to1.00,Description="FDR tranche level at qual 0.03"> | ||
##FILTER=<ID=FDRtranche1.00to2.00,Description="FDR tranche level at qual 0.02"> | ||
##FILTER=<ID=FDRtranche2.00to10.00+,Description="FDR tranche level at qual > 0.06"> | ||
##FILTER=<ID=FDRtranche2.00to10.00,Description="FDR tranche level at qual unknown"> | ||
##FILTER=<ID=HARD_TO_VALIDATE,Description="MQ0 = 4 && ((MQ0 / (1.0 * DP)) 0.1)"> | ||
##FILTER=<ID=Indel,Description="Overlaps a user-input mask"> | ||
##FILTER=<ID=LowQual,Description="Low quality"> | ||
##FILTER=<ID=LowQual,Description="QUAL 50.0"> | ||
##FILTER=<ID=ANNOTATION,Description="ANNOTATION != \"NA\" || ANNOTATION <= 0.01"> | ||
##FILTER=<ID=ANNOTATION2,Description="ANNOTATION with quote \" that is unmatched but escaped"> | ||
##FILTER=<ID=SnpCluster,Description="SNPs found in clusters"> | ||
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> | ||
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)"> | ||
##FORMAT=<ID=GL,Number=3,Type=Float,Description="Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"> | ||
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality"> | ||
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> | ||
##INFO=<ID=AC,Number=.,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> | ||
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> | ||
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> | ||
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> | ||
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> | ||
##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction"> | ||
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with two (and only two) segregating haplotypes"> | ||
##INFO=<ID=EscapingQuote,Number=1,Type=Float,Description="This description has an escaped \" quote in it"> | ||
##INFO=<ID=EscapingBackslash,Number=1,Type=Float,Description="This description has an escaped \\ backslash in it"> | ||
##INFO=<ID=EscapingNonQuoteOrBackslash,Number=1,Type=Float,Description="This other value has a \n newline in it"> | ||
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> | ||
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> | ||
##INFO=<ID=OQ,Number=1,Type=Float,Description="The original variant quality score"> | ||
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> | ||
##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias"> | ||
##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-23/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-24/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-5/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-9/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-6/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-19/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-25/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-4/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-14/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-22/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-2/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-3/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-7/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-16/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-1/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-17/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-8/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-10/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-18/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-20/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-11/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-15/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-21/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-12/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam, /humgen/1kg/analysis/bamsForDataProcessingPapers/scriptsToMakeBams/Q-2970@gsa2-1-temp-13/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam] read_buffer_size=null read_filter=[] intervals=[chrX] excludeIntervals=[chrM, chrY] reference_sequence=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta rodBind=[dbsnp,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod, interval,Intervals,chrX] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=/humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod hapmap=null hapmap_chip=null out=null err=null outerr=null filterZeroMappingQualityReads=false downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null useOriginalQualities=false validation_strictness=SILENT unsafe=null max_reads_at_locus=10000 num_threads=1 interval_merging=ALL read_group_black_list=null genotype_model=JOINT_ESTIMATE base_model=EMPIRICAL heterozygosity=7.8E-4 genotype=false output_all_callable_bases=false standard_min_confidence_threshold_for_calling=30.0 standard_min_confidence_threshold_for_emitting=10.0 trigger_min_confidence_threshold_for_calling=30.0 trigger_min_confidence_threshold_for_emitting=30.0 noSLOD=false assume_single_sample_reads=null platform=null min_base_quality_score=20 min_mapping_quality_score=20 max_mismatches_in_40bp_window=3 use_reads_with_bad_mates=false max_deletion_fraction=0.05 cap_base_quality_by_mapping_quality=false" | ||
##VariantFiltration="analysis_type=VariantFiltration input_file=[] read_buffer_size=null read_filter=[] intervals=null excludeIntervals=[chrM, chrY] reference_sequence=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta rodBind=[variant,VCF,wgs.v9/HiSeq.WGS.cleaned.ug.snpfiltered.vcf, mask,Bed,wgs.v9/HiSeq.WGS.cleaned.indels.10.mask] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null hapmap=null hapmap_chip=null out=wgs.v9/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf err=null outerr=null filterZeroMappingQualityReads=false downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null useOriginalQualities=false validation_strictness=SILENT unsafe=null max_reads_at_locus=2147483647 num_threads=1 interval_merging=ALL read_group_black_list=null filterExpression=[] filterName=[] genotypeFilterExpression=[] genotypeFilterName=[] clusterSize=3 clusterWindowSize=0 maskName=Indel NO_HEADER=false" | ||
##VariantFiltration="analysis_type=VariantFiltration input_file=[] read_buffer_size=null read_filter=[] intervals=null excludeIntervals=[chrM, chrY] reference_sequence=/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta rodBind=[variant,VCF,wgs.v9/HiSeq.WGS.cleaned.ug.vcf] rodToIntervalTrackName=null BTI_merge_rule=UNION DBSNP=null hapmap=null hapmap_chip=null out=wgs.v9/HiSeq.WGS.cleaned.ug.snpfiltered.vcf err=null outerr=null filterZeroMappingQualityReads=false downsampling_type=NONE downsample_to_fraction=null downsample_to_coverage=null useOriginalQualities=false validation_strictness=SILENT unsafe=null max_reads_at_locus=2147483647 num_threads=1 interval_merging=ALL read_group_black_list=null filterExpression=[QUAL < 50.0, MQ0 >= 4 && ((MQ0 / (1.0 * DP)) > 0.1), AB > 0.75 && DP > 40, DP > 120 || SB > -0.10] filterName=[LowQual, HARD_TO_VALIDATE, ABFilter, DPFilter] genotypeFilterExpression=[] genotypeFilterName=[] clusterSize=3 clusterWindowSize=10 maskName=Mask NO_HEADER=false" | ||
##source=VariantOptimizer | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 | ||
chr1 109 . A T 0 FDRtranche2.00to10.00+ AC=1;AF=0.50;AN=2;DP=1019;Dels=0.00;HRun=0;HaplotypeScore=686.65;MQ=19.20;MQ0=288;OQ=2175.54;QD=2.13;SB=-1042.18 GT:AD:DP:GL:GQ 0/1:610,327:308:-316.30,-95.47,-803.03:99 |