broadinstitute · cmnbroad · May 14, 2018 · May 9, 2018 · cmnbroad · May 10, 2018
diff --git a/src/main/python/org/broadinstitute/hellbender/vqsr_cnn/vqsr_cnn/inference.py b/src/main/python/org/broadinstitute/hellbender/vqsr_cnn/vqsr_cnn/inference.py
@@ -103,6 +103,8 @@ def reference_string_to_tensor(reference):
             dna_data[i, defines.DNA_SYMBOLS[b]] = 1.0
         elif b in defines.AMBIGUITY_CODES:
             dna_data[i] = defines.AMBIGUITY_CODES[b]
+        elif b == '\x00':
+            break
         else:
             raise ValueError('Error! Unknown code:', b)
 

diff --git a/...ava/org/broadinstitute/hellbender/tools/walkers/vqsr/CNNScoreVariantsIntegrationTest.java b/...ava/org/broadinstitute/hellbender/tools/walkers/vqsr/CNNScoreVariantsIntegrationTest.java
@@ -99,6 +99,19 @@ public void testSmallBatchInference()throws IOException {
         spec.executeTest("testInference", this);
     }
 
+    @Test(groups = {"python"})
+    public void testOnContigEdge() throws IOException{
+        final String edgeVcf = toolsTestDir + "walkers/VQSR/variantNearContigEdge.vcf";
+        final ArgumentsBuilder argsBuilder = new ArgumentsBuilder();
+        argsBuilder.addArgument(StandardArgumentDefinitions.VARIANT_LONG_NAME, edgeVcf)
+                .addArgument(StandardArgumentDefinitions.REFERENCE_LONG_NAME, hg19MiniReference)
+                .addArgument("architecture", architecture1D)
+                .addArgument(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false");
+
+        argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, largeFileTestDir + "VQSR/expected/chrM.vcf");
+        runCommandLine(argsBuilder);
+    }
+
     /**
      * Run the 2D Model on a small test VCF.
      */

diff --git a/...test/resources/org/broadinstitute/hellbender/tools/walkers/VQSR/variantNearContigEdge.vcf b/...test/resources/org/broadinstitute/hellbender/tools/walkers/VQSR/variantNearContigEdge.vcf
@@ -0,0 +1,28 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=LowQual,Description="Low quality">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
+##contig=<ID=1,length=16000>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	test
+1	15990	.	C	T	157767.77	.	AC=2;AF=1.00;AN=2;BaseQRankSum=5.566;ClippingRankSum=0.053;DP=4492;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=57.64;MQRankSum=-0.712;QD=30.32;ReadPosRankSum=-1.183;SOR=0.761	GT:AD:DP:GQ:PL	1/1:21,4410:4431:99:157796,12877,0