diff --git a/src/main/java/picard/fingerprint/FingerprintChecker.java b/src/main/java/picard/fingerprint/FingerprintChecker.java index cdeaed0fae..62a7ee247b 100644 --- a/src/main/java/picard/fingerprint/FingerprintChecker.java +++ b/src/main/java/picard/fingerprint/FingerprintChecker.java @@ -535,7 +535,15 @@ public Map fingerprintSamFile(final Path samF log.info(String.format("Reading an indexed file (%s)", samFile.toUri().toString())); } - final SamLocusIterator iterator = new SamLocusIterator(in, this.haplotypes.getIntervalList(), in.hasIndex()); + // fingerprinting allows that headers differ, but SamLocusIterator doesn't allow the dictionary of the query + // interval list to differ from that of the samfile, leading to an exception thrown. + // At this point we already know that the dictionary of the haplotypes is a proper prefix of that of the sam file + // So we just swap out the header in the interval list. + final IntervalList il = this.haplotypes.getIntervalList(); + final IntervalList newIl = new IntervalList(in.getFileHeader()); + newIl.addall(il.getIntervals()); + + final SamLocusIterator iterator = new SamLocusIterator(in, newIl, in.hasIndex()); iterator.setEmitUncoveredLoci(true); iterator.setMappingQualityScoreCutoff(this.minimumMappingQuality); iterator.setQualityScoreCutoff(this.minimumBaseQuality); diff --git a/src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java b/src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java index 582e59dfb4..0d978c03aa 100644 --- a/src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java +++ b/src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java @@ -10,6 +10,7 @@ import picard.PicardException; import picard.cmdline.CommandLineProgramTest; import picard.util.TabbedTextFileWithHeaderParser; +import picard.util.TestNGUtil; import picard.vcf.SamTestUtils; import picard.vcf.VcfTestUtils; @@ -30,7 +31,6 @@ import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; -import java.util.stream.IntStream; import java.util.stream.Stream; /** @@ -40,8 +40,11 @@ public class CrosscheckFingerprintsTest extends CommandLineProgramTest { private final File TEST_DATA_DIR = new File("testdata/picard/fingerprint/"); private final File HAPLOTYPE_MAP = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.txt"); + private final File HAPLOTYPE_MAP_SHORT = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.short_dictionary.txt"); private final File HAPLOTYPE_MAP_FOR_CRAMS = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.txt"); + private final File HAPLOTYPE_MAP_FOR_CRAMS_SHORT = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.short_dictionary.txt"); + private final File NA12891_r1_sam = new File(TEST_DATA_DIR, "NA12891.over.fingerprints.r1.sam"); private final File NA12891_r2_sam = new File(TEST_DATA_DIR, "NA12891.over.fingerprints.r2.sam"); private final File NA12891_r1_sam_shifted_for_cram = new File(TEST_DATA_DIR, "NA12891.over.fingerprints.shifted.for.crams.r1.sam"); @@ -148,7 +151,6 @@ public String getCommandLineProgramName() { return CrosscheckFingerprints.class.getSimpleName(); } - @DataProvider(name = "bamFilesRGs") public Object[][] bamAndCramFilesRGs() { return new Object[][] { {NA12891_r1, NA12891_r2, false, 0, (NA12891_r1_RGs + NA12891_r2_RGs) * (NA12891_r1_RGs + NA12891_r2_RGs )}, @@ -178,10 +180,26 @@ public Object[][] bamAndCramFilesRGs() { }; } - @Test(dataProvider = "bamFilesRGs") - public void testCrossCheckRGs(final File file1, final File file2, final boolean expectAllMatch, final int expectedRetVal, final int expectedNMetrics) throws IOException { + private Object[][] haplotypeMaps(){ + return new Object[][]{ + {HAPLOTYPE_MAP, HAPLOTYPE_MAP_FOR_CRAMS}, + {HAPLOTYPE_MAP_SHORT, HAPLOTYPE_MAP_FOR_CRAMS_SHORT}}; + } + @DataProvider(name = "bamFilesRGs") + public Iterator bamFilesRGsAndHapMaps(){ + return TestNGUtil.interaction(bamAndCramFilesRGs(), haplotypeMaps()); + } - File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics"); + @Test(dataProvider = "bamFilesRGs") + public void testCrossCheckRGs(final File file1, + final File file2, + final boolean expectAllMatch, + final int expectedRetVal, + final int expectedNMetrics, + final File haplotypeMap, + final File haplotypeMapForCram) throws IOException { + + final File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics"); metrics.deleteOnExit(); final List args = new ArrayList<>(Arrays.asList("INPUT=" + file1.getAbsolutePath(), @@ -193,14 +211,15 @@ public void testCrossCheckRGs(final File file1, final File file2, final boolean if (file1.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) { args.add("R=" + referenceForCrams); - args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS); + args.add("HAPLOTYPE_MAP=" + haplotypeMapForCram.getAbsolutePath()); } else { - args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP); + args.add("HAPLOTYPE_MAP=" + haplotypeMap.getAbsolutePath()); } doTest(args.toArray(new String[0]), metrics, expectedRetVal, expectedNMetrics, CrosscheckMetric.DataType.READGROUP, expectAllMatch); } + @DataProvider(name = "cramsWithNoReference") public Object[][] cramsWithNoReference() { return new Object[][]{ @@ -212,7 +231,7 @@ public Object[][] cramsWithNoReference() { @Test(dataProvider = "cramsWithNoReference") public void testCramsWithNoReference(final File file1, final File file2) throws IOException { - File metrics = File.createTempFile("Fingerprinting", "NA12891.RG.crosscheck_metrics"); + final File metrics = File.createTempFile("Fingerprinting", "NA12891.RG.crosscheck_metrics"); metrics.deleteOnExit(); final String[] args = {"INPUT=" + file1.getAbsolutePath(), @@ -225,8 +244,11 @@ public void testCramsWithNoReference(final File file1, final File file2) throws Assert.assertEquals(crossChecker.instanceMain(args), 1); } - @DataProvider(name = "cramBamComparison") - public Object[][] cramBamComparison() { + private Object[][] haplotypeMapsForCram(){ + return new Object[][]{{HAPLOTYPE_MAP_FOR_CRAMS}, {HAPLOTYPE_MAP_FOR_CRAMS_SHORT}}; + } + + private Object[][] cramBamComparison() { return new Object[][]{ {NA12891_r1_shifted_bam, NA12891_r2_shifted_bam, NA12891_r1_cram, NA12891_r2_cram}, {NA12891_r1_shifted_bam, NA12892_r1_shifted_bam, NA12891_r1_cram, NA12892_r1_cram}, @@ -237,25 +259,30 @@ public Object[][] cramBamComparison() { }; } + @DataProvider(name = "cramBamComparison") + public Iterator cramBamComparisonWithHapMap() { + return TestNGUtil.interaction(cramBamComparison(), haplotypeMapsForCram()); + } + @Test(dataProvider = "cramBamComparison") - public void testCramBamComparison(final File bam1, final File bam2, final File cram1, final File cram2) throws IOException { - File metricsBam = File.createTempFile("Fingerprinting.bam.comparison", "crosscheck_metrics"); + public void testCramBamComparison(final File bam1, final File bam2, final File cram1, final File cram2, final File haplotypeMap) throws IOException { + final File metricsBam = File.createTempFile("Fingerprinting.bam.comparison", "crosscheck_metrics"); metricsBam.deleteOnExit(); - File metricsCram = File.createTempFile("Fingerprinting.cram.comparison", "crosscheck_metrics"); + final File metricsCram = File.createTempFile("Fingerprinting.cram.comparison", "crosscheck_metrics"); metricsCram.deleteOnExit(); final List argsBam = new ArrayList<>(Arrays.asList("INPUT=" + bam1.getAbsolutePath(), "INPUT=" + bam2.getAbsolutePath(), "OUTPUT=" + metricsBam.getAbsolutePath(), "LOD_THRESHOLD=" + -2.0, - "HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS) + "HAPLOTYPE_MAP=" + haplotypeMap.getAbsolutePath()) ); final List argsCram = new ArrayList<>(Arrays.asList("INPUT=" + cram1.getAbsolutePath(), "INPUT=" + cram2.getAbsolutePath(), "OUTPUT=" + metricsCram.getAbsolutePath(), "LOD_THRESHOLD=" + -2.0, - "HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS, + "HAPLOTYPE_MAP=" + haplotypeMap.getAbsolutePath(), "R=" + referenceForCrams) ); @@ -416,10 +443,10 @@ public Object[][] bamFilesSMs() { @Test(dataProvider = "bamFilesSMs") public void testCrossCheckSMs(final File file1, final File file2, final int expectedRetVal, final int numberOfSamples) throws IOException { - File metrics = File.createTempFile("Fingerprinting", "NA1291.SM.crosscheck_metrics"); + final File metrics = File.createTempFile("Fingerprinting", "NA1291.SM.crosscheck_metrics"); metrics.deleteOnExit(); - File matrix = File.createTempFile("Fingerprinting", "NA1291.SM.matrix"); + final File matrix = File.createTempFile("Fingerprinting", "NA1291.SM.matrix"); matrix.deleteOnExit(); final String[] args = new String[]{ diff --git a/testdata/picard/fingerprint/Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.short_dictionary.txt b/testdata/picard/fingerprint/Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.short_dictionary.txt new file mode 100755 index 0000000000..be1edc0ea6 --- /dev/null +++ b/testdata/picard/fingerprint/Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.short_dictionary.txt @@ -0,0 +1,10 @@ +@HD VN:1.5 +@SQ SN:1 LN:1001 M5:af506819d4b9357827ce128b6c83a816 UR:file:reference.shifted.for.crams.fasta +@SQ SN:3 LN:2001 M5:bfecd72141fd7c10b33f04958736baef UR:file:reference.shifted.for.crams.fasta +@SQ SN:4 LN:1001 M5:ac7d9a3274b2f1fa8eac3f01b3ff9b43 UR:file:reference.shifted.for.crams.fasta +@SQ SN:5 LN:1001 M5:468698a81368c9d42bd876a2dae5c330 UR:file:reference.shifted.for.crams.fasta +#CHROMOSOME POSITION NAME MAJOR_ALLELE MINOR_ALLELE MAF ANCHOR_SNP PANELS +1 251 rs7555566 A G 0.223794 +3 251 rs17272796 C T 0.623026 +4 251 rs6834736 C G 0.512884 +5 251 rs2862058 A G 0.349127 diff --git a/testdata/picard/fingerprint/Homo_sapiens_assembly19.haplotype_database.subset.short_dictionary.txt b/testdata/picard/fingerprint/Homo_sapiens_assembly19.haplotype_database.subset.short_dictionary.txt new file mode 100755 index 0000000000..38edc57c76 --- /dev/null +++ b/testdata/picard/fingerprint/Homo_sapiens_assembly19.haplotype_database.subset.short_dictionary.txt @@ -0,0 +1,11 @@ +@HD VN:1.4 GO:none SO:coordinate +@SQ SN:1 LN:249250621 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:1b22b98cdeb4a9304cb5d48026a85128 SP:Homo Sapiens +@SQ SN:2 LN:243199373 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:a0d9851da00400dec1098a9255ac712e SP:Homo Sapiens +@SQ SN:3 LN:198022430 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:fdfd811849cc2fadebc929bb925902e5 SP:Homo Sapiens +@SQ SN:4 LN:191154276 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:23dccd106897542ad87d2765d28a19a1 SP:Homo Sapiens +@SQ SN:5 LN:180915260 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:0740173db9ffd264d728f32784845cd7 SP:Homo Sapiens +#CHROMOSOME POSITION NAME MAJOR_ALLELE MINOR_ALLELE MAF ANCHOR_SNP PANELS +1 14804874 rs7555566 A G 0.223794 +3 17077268 rs17272796 C T 0.623026 +4 57194525 rs6834736 C G 0.512884 +5 156355375 rs2862058 A G 0.349127