Skip to content

Commit

Permalink
Switched MarkDuplicatesSpark tile parsing code to use shorts in order…
Browse files Browse the repository at this point in the history
… to match picard (#5165)
  • Loading branch information
jamesemery authored Sep 7, 2018
1 parent 813e593 commit 83832ea
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ public abstract class TransientFieldPhysicalLocation extends PairedEnds implemen
// Information used to detect optical dupes
protected short readGroupIndex = -1;
protected transient short tile = -1;
protected transient int x = -1;
protected transient int y = -1;
protected transient short x = -1;
protected transient short y = -1;
protected transient short libraryId = -1;

public TransientFieldPhysicalLocation(int partitionIndex, String name) {
Expand All @@ -36,14 +36,16 @@ public TransientFieldPhysicalLocation(int partitionIndex, String name) {
@Override
public int getX() { return this.x; }

// NOTE picard in practice compresses the pixel values to signed shorts for space purposes despite the api using an integer
@Override
public void setX(final int x) { this.x = x; }
public void setX(final int x) { this.x = (short)x; }

@Override
public int getY() { return this.y; }

// NOTE picard in practice compresses the pixel values to signed shorts for space purposes despite the api using an integer
@Override
public void setY(final int y) { this.y = y; }
public void setY(final int y) { this.y = (short)y; }

@Override
public short getLibraryId() { return this.libraryId; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,11 +267,11 @@ public void testOpticalDuplicatesTiebrokenByPhysicalLocationNotStartPosition(fin

int compare = position1.tile - position2.tile;
if (compare == 0) {
compare = position1.x - position2.x;
compare = (short)position1.x - (short)position2.x;
}

if (compare == 0) {
compare = position1.y - position2.y;
compare = (short)position1.y - (short)position2.y;
}

final boolean isDuplicate = compare < 0;
Expand All @@ -281,4 +281,65 @@ public void testOpticalDuplicatesTiebrokenByPhysicalLocationNotStartPosition(fin
tester.addMatePair(readName2, 1,2, 51, false, false, isDuplicate, isDuplicate, "6S42M28S", "8S68M", true, false, false, false, false, DEFAULT_BASE_QUALITY);
tester.runTest();
}

@DataProvider
public Object[][] readNameData(){
return new Object[][]{
{"RUNID:7:1203:2886:82292", "RUNID:7:1205:3886:16834"},

{"RUNID:7:1203:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1204:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1205:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1206:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1207:2886:16756", "RUNID:7:1205:3886:16756"},

{"RUNID:7:1203:2886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:3886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:4886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:5886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:6886:16756", "RUNID:7:1203:4886:26756"},

{"RUNID:7:1203:2886:34756", "RUNID:7:1203:2886:36756"},
{"RUNID:7:1203:2886:35756", "RUNID:7:1203:2886:36756"},
{"RUNID:7:1203:2886:37756", "RUNID:7:1203:2886:36756"},
{"RUNID:7:1203:2886:38756", "RUNID:7:1203:2886:36756"},

//Added a test for tiebreaking accounting for the short casting done in picard
{"HK3T5CCXX160204:3:1112:11586:37067", "HK3T5CCXX160204:3:1112:11586:32144"}
};
}

@Test(dataProvider = "readNameData")
public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final String readName1, final String readName2) {
// This tests the readname based tiebreaking code in mark duplicates. Since it's ambiguous which read should be marked
// as duplicate or not if scores match we break ties by evaluating the readname for consistencies sake.

final ReadNameParser parser = new ReadNameParser();

final PhysicalLocationInt position1 = new PhysicalLocationInt();
final PhysicalLocationInt position2 = new PhysicalLocationInt();

parser.addLocationInformation(readName1, position1);
parser.addLocationInformation(readName2, position2);

final AbstractMarkDuplicatesTester tester = getTester();
tester.getSamRecordSetBuilder().setReadLength(101);
tester.setExpectedOpticalDuplicate(0);

int compare = position1.tile - position2.tile;
if (compare == 0) {
compare = (short)position1.x - (short)position2.x;
}

if (compare == 0) {
compare = (short)position1.y - (short)position2.y;
}

final boolean isDuplicate = compare < 0;

tester.addMatePair(readName1, 1,485253, 485253, false, false, !isDuplicate, !isDuplicate, "42M59S", "59S42M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair(readName2, 1,485253, 485253, false, false, isDuplicate, isDuplicate, "59S42M", "42M59S", true, false, false, false, false, DEFAULT_BASE_QUALITY);

tester.runTest();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -156,64 +156,6 @@ public void testOpticalDuplicateFinding() {
tester.runTest();
}

@DataProvider
public Object[][] readNameData(){
return new Object[][]{
{"RUNID:7:1203:2886:82292", "RUNID:7:1205:3886:16834"},

{"RUNID:7:1203:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1204:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1205:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1206:2886:16756", "RUNID:7:1205:3886:16756"},
{"RUNID:7:1207:2886:16756", "RUNID:7:1205:3886:16756"},

{"RUNID:7:1203:2886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:3886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:4886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:5886:16756", "RUNID:7:1203:4886:26756"},
{"RUNID:7:1203:6886:16756", "RUNID:7:1203:4886:26756"},

{"RUNID:7:1203:2886:34756", "RUNID:7:1203:2886:36756"},
{"RUNID:7:1203:2886:35756", "RUNID:7:1203:2886:36756"},
{"RUNID:7:1203:2886:37756", "RUNID:7:1203:2886:36756"},
{"RUNID:7:1203:2886:38756", "RUNID:7:1203:2886:36756"},
};
}

@Test(dataProvider = "readNameData")
public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final String readName1, final String readName2) {
// This tests the readname based tiebreaking code in mark duplicates. Since it's ambiguous which read should be marked
// as duplicate or not if scores match we break ties by evaluating the readname for consistencies sake.

final ReadNameParser parser = new ReadNameParser();

final PhysicalLocationInt position1 = new PhysicalLocationInt();
final PhysicalLocationInt position2 = new PhysicalLocationInt();

parser.addLocationInformation(readName1, position1);
parser.addLocationInformation(readName2, position2);

final AbstractMarkDuplicatesTester tester = getTester();
tester.getSamRecordSetBuilder().setReadLength(101);
tester.setExpectedOpticalDuplicate(0);

int compare = position1.tile - position2.tile;
if (compare == 0) {
compare = position1.x - position2.x;
}

if (compare == 0) {
compare = position1.y - position2.y;
}

final boolean isDuplicate = compare < 0;

tester.addMatePair(readName1, 1,485253, 485253, false, false, !isDuplicate, !isDuplicate, "42M59S", "59S42M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.addMatePair(readName2, 1,485253, 485253, false, false, isDuplicate, isDuplicate, "59S42M", "42M59S", true, false, false, false, false, DEFAULT_BASE_QUALITY);

tester.runTest();
}

@Test
public void testOpticalDuplicatesDifferentReadGroups() {
final AbstractMarkDuplicatesTester tester = getTester();
Expand Down Expand Up @@ -251,7 +193,7 @@ public void testOpticalDuplicatesTheSameReadGroup() {
public void testOpticalDuplicatesAndPCRDuplicatesOrientationDifference() {
final AbstractMarkDuplicatesTester tester = getTester();
tester.setExpectedOpticalDuplicate(0);
tester.addMatePair("RUNID:7:1203:2886:82292", 19, 19, 485253, 486253, false, false, true, true, "101M", "101M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // duplicate
tester.addMatePair("RUNID:7:1203:2886:16838", 19, 19, 485253, 486253, false, false, true, true, "101M", "101M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // duplicate
tester.addMatePair("RUNID:7:1203:2886:16834", 19, 19, 486253, 485253, false, false, false, false, "101M", "101M", false, true, false, false, false, DEFAULT_BASE_QUALITY, "1");
// Even though these reads are in a duplicate group together, we don't want to mark them as Optical Duplicates because their orientation is flipped (which doesn't matter for PCR duplicates)
tester.runTest();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.sam.util.PhysicalLocationInt;
import picard.sam.util.ReadNameParser;

import java.io.File;
import java.util.*;
Expand Down

0 comments on commit 83832ea

Please sign in to comment.