From 83832eade4773d830b9681f0b4e36fd56c94ea2e Mon Sep 17 00:00:00 2001 From: jamesemery Date: Fri, 7 Sep 2018 14:29:42 -0400 Subject: [PATCH] Switched MarkDuplicatesSpark tile parsing code to use shorts in order to match picard (#5165) --- .../TransientFieldPhysicalLocation.java | 10 +-- .../MarkDuplicatesSparkIntegrationTest.java | 65 ++++++++++++++++++- ...tMarkDuplicatesCommandLineProgramTest.java | 60 +---------------- .../MarkDuplicatesGATKIntegrationTest.java | 2 - 4 files changed, 70 insertions(+), 67 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java index 8b64578c785..9f22c1f0b23 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java @@ -12,8 +12,8 @@ public abstract class TransientFieldPhysicalLocation extends PairedEnds implemen // Information used to detect optical dupes protected short readGroupIndex = -1; protected transient short tile = -1; - protected transient int x = -1; - protected transient int y = -1; + protected transient short x = -1; + protected transient short y = -1; protected transient short libraryId = -1; public TransientFieldPhysicalLocation(int partitionIndex, String name) { @@ -36,14 +36,16 @@ public TransientFieldPhysicalLocation(int partitionIndex, String name) { @Override public int getX() { return this.x; } + // NOTE picard in practice compresses the pixel values to signed shorts for space purposes despite the api using an integer @Override - public void setX(final int x) { this.x = x; } + public void setX(final int x) { this.x = (short)x; } @Override public int getY() { return this.y; } + // NOTE picard in practice compresses the pixel values to signed shorts for space purposes despite the api using an integer @Override - public void setY(final int y) { this.y = y; } + public void setY(final int y) { this.y = (short)y; } @Override public short getLibraryId() { return this.libraryId; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index dc0a996f0cb..c8a5ff2b0e6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -267,11 +267,11 @@ public void testOpticalDuplicatesTiebrokenByPhysicalLocationNotStartPosition(fin int compare = position1.tile - position2.tile; if (compare == 0) { - compare = position1.x - position2.x; + compare = (short)position1.x - (short)position2.x; } if (compare == 0) { - compare = position1.y - position2.y; + compare = (short)position1.y - (short)position2.y; } final boolean isDuplicate = compare < 0; @@ -281,4 +281,65 @@ public void testOpticalDuplicatesTiebrokenByPhysicalLocationNotStartPosition(fin tester.addMatePair(readName2, 1,2, 51, false, false, isDuplicate, isDuplicate, "6S42M28S", "8S68M", true, false, false, false, false, DEFAULT_BASE_QUALITY); tester.runTest(); } + + @DataProvider + public Object[][] readNameData(){ + return new Object[][]{ + {"RUNID:7:1203:2886:82292", "RUNID:7:1205:3886:16834"}, + + {"RUNID:7:1203:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1204:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1205:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1206:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1207:2886:16756", "RUNID:7:1205:3886:16756"}, + + {"RUNID:7:1203:2886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:3886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:4886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:5886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:6886:16756", "RUNID:7:1203:4886:26756"}, + + {"RUNID:7:1203:2886:34756", "RUNID:7:1203:2886:36756"}, + {"RUNID:7:1203:2886:35756", "RUNID:7:1203:2886:36756"}, + {"RUNID:7:1203:2886:37756", "RUNID:7:1203:2886:36756"}, + {"RUNID:7:1203:2886:38756", "RUNID:7:1203:2886:36756"}, + + //Added a test for tiebreaking accounting for the short casting done in picard + {"HK3T5CCXX160204:3:1112:11586:37067", "HK3T5CCXX160204:3:1112:11586:32144"} + }; + } + + @Test(dataProvider = "readNameData") + public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final String readName1, final String readName2) { + // This tests the readname based tiebreaking code in mark duplicates. Since it's ambiguous which read should be marked + // as duplicate or not if scores match we break ties by evaluating the readname for consistencies sake. + + final ReadNameParser parser = new ReadNameParser(); + + final PhysicalLocationInt position1 = new PhysicalLocationInt(); + final PhysicalLocationInt position2 = new PhysicalLocationInt(); + + parser.addLocationInformation(readName1, position1); + parser.addLocationInformation(readName2, position2); + + final AbstractMarkDuplicatesTester tester = getTester(); + tester.getSamRecordSetBuilder().setReadLength(101); + tester.setExpectedOpticalDuplicate(0); + + int compare = position1.tile - position2.tile; + if (compare == 0) { + compare = (short)position1.x - (short)position2.x; + } + + if (compare == 0) { + compare = (short)position1.y - (short)position2.y; + } + + final boolean isDuplicate = compare < 0; + + tester.addMatePair(readName1, 1,485253, 485253, false, false, !isDuplicate, !isDuplicate, "42M59S", "59S42M", false, true, false, false, false, DEFAULT_BASE_QUALITY); + tester.addMatePair(readName2, 1,485253, 485253, false, false, isDuplicate, isDuplicate, "59S42M", "42M59S", true, false, false, false, false, DEFAULT_BASE_QUALITY); + + tester.runTest(); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java index 29adf54a717..3b961f99e56 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java @@ -156,64 +156,6 @@ public void testOpticalDuplicateFinding() { tester.runTest(); } - @DataProvider - public Object[][] readNameData(){ - return new Object[][]{ - {"RUNID:7:1203:2886:82292", "RUNID:7:1205:3886:16834"}, - - {"RUNID:7:1203:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1204:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1205:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1206:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1207:2886:16756", "RUNID:7:1205:3886:16756"}, - - {"RUNID:7:1203:2886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:3886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:4886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:5886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:6886:16756", "RUNID:7:1203:4886:26756"}, - - {"RUNID:7:1203:2886:34756", "RUNID:7:1203:2886:36756"}, - {"RUNID:7:1203:2886:35756", "RUNID:7:1203:2886:36756"}, - {"RUNID:7:1203:2886:37756", "RUNID:7:1203:2886:36756"}, - {"RUNID:7:1203:2886:38756", "RUNID:7:1203:2886:36756"}, - }; - } - - @Test(dataProvider = "readNameData") - public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final String readName1, final String readName2) { - // This tests the readname based tiebreaking code in mark duplicates. Since it's ambiguous which read should be marked - // as duplicate or not if scores match we break ties by evaluating the readname for consistencies sake. - - final ReadNameParser parser = new ReadNameParser(); - - final PhysicalLocationInt position1 = new PhysicalLocationInt(); - final PhysicalLocationInt position2 = new PhysicalLocationInt(); - - parser.addLocationInformation(readName1, position1); - parser.addLocationInformation(readName2, position2); - - final AbstractMarkDuplicatesTester tester = getTester(); - tester.getSamRecordSetBuilder().setReadLength(101); - tester.setExpectedOpticalDuplicate(0); - - int compare = position1.tile - position2.tile; - if (compare == 0) { - compare = position1.x - position2.x; - } - - if (compare == 0) { - compare = position1.y - position2.y; - } - - final boolean isDuplicate = compare < 0; - - tester.addMatePair(readName1, 1,485253, 485253, false, false, !isDuplicate, !isDuplicate, "42M59S", "59S42M", false, true, false, false, false, DEFAULT_BASE_QUALITY); - tester.addMatePair(readName2, 1,485253, 485253, false, false, isDuplicate, isDuplicate, "59S42M", "42M59S", true, false, false, false, false, DEFAULT_BASE_QUALITY); - - tester.runTest(); - } - @Test public void testOpticalDuplicatesDifferentReadGroups() { final AbstractMarkDuplicatesTester tester = getTester(); @@ -251,7 +193,7 @@ public void testOpticalDuplicatesTheSameReadGroup() { public void testOpticalDuplicatesAndPCRDuplicatesOrientationDifference() { final AbstractMarkDuplicatesTester tester = getTester(); tester.setExpectedOpticalDuplicate(0); - tester.addMatePair("RUNID:7:1203:2886:82292", 19, 19, 485253, 486253, false, false, true, true, "101M", "101M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // duplicate + tester.addMatePair("RUNID:7:1203:2886:16838", 19, 19, 485253, 486253, false, false, true, true, "101M", "101M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // duplicate tester.addMatePair("RUNID:7:1203:2886:16834", 19, 19, 486253, 485253, false, false, false, false, "101M", "101M", false, true, false, false, false, DEFAULT_BASE_QUALITY, "1"); // Even though these reads are in a duplicate group together, we don't want to mark them as Optical Duplicates because their orientation is flipped (which doesn't matter for PCR duplicates) tester.runTest(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java index 115087cf791..06e8209a5f1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java @@ -14,8 +14,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import picard.sam.util.PhysicalLocationInt; -import picard.sam.util.ReadNameParser; import java.io.File; import java.util.*;