diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java index 7f4a7a0c5d2..710ff436df4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentInterval.java @@ -21,8 +21,6 @@ import java.util.*; -import static org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.GAPPED_ALIGNMENT_BREAK_DEFAULT_SENSITIVITY; - /** * Each assembled contig should have at least one such accompanying structure, or 0 when it is unmapped. */ @@ -308,6 +306,8 @@ public AlignmentInterval(final BwaMemAlignment alignment, final List ref public AlignmentInterval(final SimpleInterval referenceSpan, final int startInAssembledContig, final int endInAssembledContig, final Cigar cigarAlong5to3DirectionOfContig, final boolean forwardStrand, final int mapQual, final int mismatches, final int alignerScore, final ContigAlignmentsModifier.AlnModType modType) { + checkValidArgument(cigarAlong5to3DirectionOfContig, referenceSpan, startInAssembledContig, endInAssembledContig); + this.referenceSpan = referenceSpan; this.startInAssembledContig = startInAssembledContig; this.endInAssembledContig = endInAssembledContig; @@ -321,6 +321,20 @@ public AlignmentInterval(final SimpleInterval referenceSpan, final int startInAs this.alnModType = modType; } + @VisibleForTesting + static final void checkValidArgument(final Cigar cigar, final SimpleInterval referenceSpan, + final int readStart, final int readEnd) { + + final int softClippedBases = SvCigarUtils.checkCigarAndConvertTerminalInsertionToSoftClip(cigar).stream().filter(ce -> ce.getOperator().equals(CigarOperator.S)).mapToInt(CigarElement::getLength).sum(); + final int readLength = cigar.getReadLength() - softClippedBases; + final int referenceLength = cigar.getReferenceLength(); + final boolean validState = referenceLength == referenceSpan.size() && readLength == (readEnd - readStart + 1); + if ( ! validState) { + throw new IllegalArgumentException("Encountering invalid arguments for constructing alignment,\t" + + "cigar: " + cigar.toString() + " ref.span: " + referenceSpan.toString() + " read span: " + readStart + "-" + readEnd); + } + } + public boolean containsGapOfEqualOrLargerSize(final int gapSize) { return cigarAlong5to3DirectionOfContig.getCigarElements().stream() .anyMatch(cigarElement -> diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifier.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifier.java index bcabd033cb6..dfaa23b8c82 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifier.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifier.java @@ -9,6 +9,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.utils.SvCigarUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; +import org.broadinstitute.hellbender.utils.read.CigarUtils; import scala.Tuple2; import scala.Tuple3; @@ -39,17 +40,37 @@ public static AlignmentInterval clipAlignmentInterval(final AlignmentInterval in Utils.validateArg(clipLengthOnRead < input.endInAssembledContig - input.startInAssembledContig + 1, "input alignment to be clipped away: " + input.toPackedString() + "\twith clip length: " + clipLengthOnRead); - final Tuple2 result = computeNewRefSpanAndCigar(input, clipLengthOnRead, clipFrom3PrimeEnd); + final Tuple2 newRefSpanAndCigar = computeNewRefSpanAndCigar(input, clipLengthOnRead, clipFrom3PrimeEnd); + final Tuple2 newContigStartAndEnd = + computeNewReadSpan(input.startInAssembledContig, input.endInAssembledContig, newRefSpanAndCigar._2, + clipLengthOnRead, clipFrom3PrimeEnd); + return new AlignmentInterval(newRefSpanAndCigar._1, newContigStartAndEnd._1, newContigStartAndEnd._2, newRefSpanAndCigar._2, + input.forwardStrand, input.mapQual, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, AlnModType.UNDERGONE_OVERLAP_REMOVAL); + } + + /** + * The new read span can NOT be simply calculated by subtracting the requested {@code clipLengthOnRead}, + * for a reason that can be demonstrated below: + * suppose an alignment has cigar "20S100M10I...", and it is being clipped from the 5'-end with a length of 105. + * If we simply use the 105 to calculate the new start, it would be 21 + 105 = 126, + * but because the whole 100M alignment block would be clipped away, the new start should be 131. + */ + private static Tuple2 computeNewReadSpan(final int originalContigStart, final int originalContigEnd, + final Cigar newCigarAlong5to3DirectionOfContig, + final int clipLengthOnRead, final boolean clipFrom3PrimeEnd) { final int newTigStart, newTigEnd; if (clipFrom3PrimeEnd) { - newTigStart = input.startInAssembledContig; - newTigEnd = input.endInAssembledContig - clipLengthOnRead; + newTigStart = originalContigStart; + newTigEnd = Math.min(originalContigEnd - clipLengthOnRead, + SvCigarUtils.getUnclippedReadLength(newCigarAlong5to3DirectionOfContig) - + CigarUtils.countRightClippedBases(newCigarAlong5to3DirectionOfContig)); } else { - newTigStart = input.startInAssembledContig + clipLengthOnRead; - newTigEnd = input.endInAssembledContig; + newTigStart = Math.max(originalContigStart + clipLengthOnRead, + CigarUtils.countLeftClippedBases(newCigarAlong5to3DirectionOfContig) + 1); + newTigEnd = originalContigEnd; } - return new AlignmentInterval(result._1, newTigStart, newTigEnd, result._2, input.forwardStrand, input.mapQual, - AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, AlnModType.UNDERGONE_OVERLAP_REMOVAL); + + return new Tuple2<>(newTigStart, newTigEnd); } /** @@ -103,7 +124,7 @@ static Tuple2 computeNewRefSpanAndCigar(final AlignmentIn // then deal with ref span refBasesConsumed += ce.getOperator().isAlignment() ? (clipLengthOnRead - readBasesConsumed) - : ce.getLength(); + : 0; break; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java index 4b4fe5e6ce9..0d924485dda 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SimpleSVDiscoveryTestDataProvider.java @@ -441,7 +441,7 @@ public static final class TestDataForSimpleSVs { System.arraycopy(rightRefFlank, 0, contigSeq, 50, 40); AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1 ,50, TextCigarCodec.decode("50M40S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 41 ,100, TextCigarCodec.decode("40S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 41 ,90, TextCigarCodec.decode("40S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001")); @@ -453,7 +453,7 @@ public static final class TestDataForSimpleSVs { System.arraycopy(doubleDup, 0, contigSeq, 40, 10); System.arraycopy(leftRefFlank, 0, contigSeq, 50, 40); region1 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 1 ,50, TextCigarCodec.decode("50M40S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 41 ,100, TextCigarCodec.decode("40S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 41 ,90, TextCigarCodec.decode("40S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict); result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001")); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java index 8217ca4b50c..a145dc0d7a1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignedAssemblyUnitTest.java @@ -61,12 +61,12 @@ private Object[][] createInputsAndExpectedResults_Serialization() { for(int pair=0; pair alignmentIntervalsForSimpleInversion = new ArrayList<>(8); - final SimpleInterval referenceIntervalLeft = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair]+1, alignmentStartsOnRef_0Based[2*pair]+cigars[2*pair].getReferenceLength()+1); + final SimpleInterval referenceIntervalLeft = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair]+1, alignmentStartsOnRef_0Based[2*pair]+cigars[2*pair].getReferenceLength()); final AlignmentInterval alignmentIntervalLeft = new AlignmentInterval(referenceIntervalLeft, alignmentStartsOnTig_0BasedInclusive[2*pair]+1, alignmentEndsOnTig_0BasedExclusive[2*pair], strandedness[2*pair] ? cigars[2*pair] : CigarUtils.invertCigar(cigars[2*pair]), strandedness[2*pair], mapQual[2*pair], mismatches[2*pair], 100, ContigAlignmentsModifier.AlnModType.NONE); alignmentIntervalsForSimpleInversion.add(alignmentIntervalLeft); - final SimpleInterval referenceIntervalRight = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair+1]+1, alignmentStartsOnRef_0Based[2*pair+1]+cigars[2*pair+1].getReferenceLength()+1); + final SimpleInterval referenceIntervalRight = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair+1]+1, alignmentStartsOnRef_0Based[2*pair+1]+cigars[2*pair+1].getReferenceLength()); final AlignmentInterval alignmentIntervalRight = new AlignmentInterval(referenceIntervalRight, alignmentStartsOnTig_0BasedInclusive[2*pair+1]+1, alignmentEndsOnTig_0BasedExclusive[2*pair+1], strandedness[2*pair+1] ? cigars[2*pair+1] : CigarUtils.invertCigar(cigars[2*pair+1]), strandedness[2*pair+1], mapQual[2*pair+1], mismatches[2*pair+1], 100, ContigAlignmentsModifier.AlnModType.NONE); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java index cd9ee2f56fc..fb7d59aeb3c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AlignmentIntervalUnitTest.java @@ -26,12 +26,12 @@ Object[][] testDataForAIOverlaps() { final List data = new ArrayList<>(20); AlignmentInterval ar1 = new AlignmentInterval(new SimpleInterval("1",1,5), 1,5, TextCigarCodec.decode("5M5H"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - AlignmentInterval ar2 = new AlignmentInterval(new SimpleInterval("1",10,16), 5,10, TextCigarCodec.decode("4S6M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + AlignmentInterval ar2 = new AlignmentInterval(new SimpleInterval("1",11,16), 5,10, TextCigarCodec.decode("4S6M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{ar1, ar2, 1, 0}); ar1 = new AlignmentInterval(new SimpleInterval("1",1,5), 1,5, TextCigarCodec.decode("5M5H"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - ar2 = new AlignmentInterval(new SimpleInterval("1",11,16), 6,10, TextCigarCodec.decode("5S5M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + ar2 = new AlignmentInterval(new SimpleInterval("1",11,15), 6,10, TextCigarCodec.decode("5S5M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{ar1, ar2, 0, 0}); // overlaps on ref only @@ -40,7 +40,7 @@ Object[][] testDataForAIOverlaps() { data.add(new Object[]{ar1, ar2, 0, 59}); ar1 = new AlignmentInterval(new SimpleInterval("chr1",9170350,9171390), 1,1041, TextCigarCodec.decode("1041M1298H"),false, 60, 4, 1021, ContigAlignmentsModifier.AlnModType.NONE); - ar2 = new AlignmentInterval(new SimpleInterval("chr1",9169370,9170505), 1204,2239, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE); + ar2 = new AlignmentInterval(new SimpleInterval("chr1",9169370,9170505), 1204,2339, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{ar1, ar2, 0, 505-350+1}); // overlaps on read only @@ -72,7 +72,7 @@ Object[][] testDataForAIOverlaps() { // different chr ar1 = new AlignmentInterval(new SimpleInterval("chr1",9170350,9171390), 1,1041, TextCigarCodec.decode("1041M1298H"),false, 60, 4, 1021, ContigAlignmentsModifier.AlnModType.NONE); - ar2 = new AlignmentInterval(new SimpleInterval("chr2",9169370,9170505), 1204,2239, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE); + ar2 = new AlignmentInterval(new SimpleInterval("chr2",9169370,9170505), 1204,2339, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{ar1, ar2, 0, 0}); return data.toArray(new Object[data.size()][]); @@ -432,4 +432,27 @@ public void testContainsGapOfEqualOrLargerSize(final AlignmentInterval alignment final boolean expectedResult) { Assert.assertEquals(alignment.containsGapOfEqualOrLargerSize(gapSize), expectedResult); } + + @DataProvider(name = "forTestCtorArgChecking") + private Object[][] forTestCtorArgChecking() { + final List data = new ArrayList<>(20); + + data.add(new Object[]{TextCigarCodec.decode("1155M1154S"), new SimpleInterval("chr22", 47043976, 47045130), 1, 1155, null}); + data.add(new Object[]{TextCigarCodec.decode("1424M1424S"), new SimpleInterval("chr15", 80355809, 80357232), 1, 1424, null}); + + data.add(new Object[]{TextCigarCodec.decode("1155M1154S"), new SimpleInterval("chr22", 47043976, 47045131), 1, 1155, IllegalArgumentException.class}); + data.add(new Object[]{TextCigarCodec.decode("1424M1424S"), new SimpleInterval("chr15", 80355809, 80357232), 1, 1429, IllegalArgumentException.class}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forTestCtorArgChecking") + @SuppressWarnings("rawtypes") + public void testCtorArgChecking(final Cigar cigar, final SimpleInterval referenceSpan, final int readStart, final int readEnd, + final Class expectedExceptionClass) { + try { + AlignmentInterval.checkValidArgument(cigar, referenceSpan, readStart, readEnd); + } catch (final Exception e) { + Assert.assertEquals(e.getClass(), expectedExceptionClass); + } + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java index 8cf49adfb78..32086613880 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java @@ -43,7 +43,7 @@ private Object[][] createTestData() { intervalOne = new AlignmentInterval(new SimpleInterval("chr2", 1422222, 1422435), 1, 270, TextCigarCodec.decode("75M56I139M"), false, 60, 56, 142, ContigAlignmentsModifier.AlnModType.NONE); - intervalTwo = new AlignmentInterval(new SimpleInterval("chr2_KI270774v1_alt", 105288, 105557), + intervalTwo = new AlignmentInterval(new SimpleInterval("chr2_KI270774v1_alt", 105288, 105555), 1, 270, TextCigarCodec.decode("114M1I27M1I127M"), false, 56, 13, 179, ContigAlignmentsModifier.AlnModType.NONE); contig = new AlignedContig("asm002608:tig00001", "ATGCTGGGGAATTTGTGTGCTCCTTGGGTGGGGACGAGCATGGAAGGCGCGTGGGACTGAAGCCTTGAAGACCCCGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCGCGCTGGACCGACCTCGTGCAGGCGCCGCGCTGGGCCATGGGGAGAGCGAGAGCCTGGTGTGCCCCTCAGGGAC".getBytes(), Arrays.asList(intervalOne, intervalTwo)/*, true*/); @@ -130,7 +130,7 @@ private Object[][] forFilterSecondaryConfigurationsByMappingQualityThreshold() { final List data = new ArrayList<>(20); AlignmentInterval intervalOne = new AlignmentInterval( - new SimpleInterval("chr21", 100000, 100100), + new SimpleInterval("chr21", 100001, 100100), 1, 100, TextCigarCodec.decode("100M220S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval intervalTwo = new AlignmentInterval( @@ -138,8 +138,8 @@ private Object[][] forFilterSecondaryConfigurationsByMappingQualityThreshold() { 99, 122, TextCigarCodec.decode("98S24M78S"), true, 10, 3, 241, ContigAlignmentsModifier.AlnModType.NONE); AlignmentInterval intervalThree = new AlignmentInterval( - new SimpleInterval("chr21", 100121, 100200), - 122, 200, TextCigarCodec.decode("222S78M"), + new SimpleInterval("chr21", 100123, 100200), + 223, 300, TextCigarCodec.decode("222S78M"), true, 60, 0, 78, ContigAlignmentsModifier.AlnModType.NONE); final GoodAndBadMappings rep1 = new GoodAndBadMappings(Arrays.asList(intervalOne, intervalThree), @@ -188,7 +188,7 @@ private Object[][] forTestingNotDiscardForBadMQ() { data.add(new Object[]{outForSingleBadMapping, false}); final AlignmentInterval intervalOne = new AlignmentInterval( - new SimpleInterval("chr21", 100000, 100100), + new SimpleInterval("chr21", 100001, 100100), 1, 100, TextCigarCodec.decode("100M220S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignmentInterval intervalTwo = new AlignmentInterval( @@ -349,7 +349,7 @@ private Object[][] forSpecialCaseGapSplit() { AlignmentInterval gapped; // case one: gapped alignment provides worse coverage noGap = new AlignmentInterval(new SimpleInterval("chr1", 1_000_001, 1_000_950), - 1, 1150, TextCigarCodec.decode("950M50S"), + 1, 950, TextCigarCodec.decode("950M50S"), true, 60, 0, 950, ContigAlignmentsModifier.AlnModType.NONE); gapped = new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_001_200), 101, 1000, TextCigarCodec.decode("100S300M200D600M"), @@ -377,13 +377,13 @@ private Object[][] forSpecialCaseGapSplit() { }); // case three: gapped alignment provides better coverage with a I-gap - gapped = new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_001_850), + gapped = new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_000_850), 101, 1000, TextCigarCodec.decode("100S300M150I450M"), true, 60, 150, 750, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{new Tuple2<>(noGap, gapped), true, new GoodAndBadMappings(Arrays.asList(new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_000_400), 101, 400, TextCigarCodec.decode("100S300M600S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT), - new AlignmentInterval(new SimpleInterval("chr1", 1_000_401, 1_001_850), 551, 1000, TextCigarCodec.decode("550S450M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)), + new AlignmentInterval(new SimpleInterval("chr1", 1_000_401, 1_000_850), 551, 1000, TextCigarCodec.decode("550S450M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)), Collections.singletonList(noGap)) }); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifierUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifierUnitTest.java index 411355e0773..bb5e96dbe93 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifierUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/ContigAlignmentsModifierUnitTest.java @@ -58,7 +58,7 @@ public void testGappedAlignmentBreaker_OneInsertion() { public void testGappedAlignmentBreaker_OneDeletion() { final Cigar cigar = TextCigarCodec.decode("2S205M2D269M77S"); final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 575), - 208, 476, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + 3, 476, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final List generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1, cigar.getReadLength())).collect(Collectors.toList()); @@ -225,7 +225,7 @@ public void testGappedAlignmentBreaker_NegativeStrand() { final Cigar cigar = TextCigarCodec.decode("10S1044M122I395M75I"); final AlignmentInterval alignmentInterval = new AlignmentInterval( new SimpleInterval("chrUn_JTFH01000557v1_decoy", 21, 1459), 11, - 1646, cigar, false, 60, 200, 100, ContigAlignmentsModifier.AlnModType.NONE); + 1571, cigar, false, 60, 200, 100, ContigAlignmentsModifier.AlnModType.NONE); final List generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1, cigar.getReadLength())).collect(Collectors.toList()); Assert.assertEquals(generatedARList.get(0), new AlignmentInterval( @@ -242,37 +242,71 @@ public void testGappedAlignmentBreaker_NegativeStrand() { public void testGappedAlignmentBreaker_ExpectException() { int exceptionCount = 0; - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10H10D10M"));} catch (final Exception ex) {++exceptionCount;} - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10S10D10M"));} catch (final Exception ex) {++exceptionCount;} - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10M10D10S"));} catch (final Exception ex) {++exceptionCount;} - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10M10D10H"));} catch (final Exception ex) {++exceptionCount;} + try {willThrowOnInvalidCigar(TextCigarCodec.decode("10H10D10M"), 11);} catch (final Exception ex) {++exceptionCount;} + try {willThrowOnInvalidCigar(TextCigarCodec.decode("10S10D10M"), 11);} catch (final Exception ex) {++exceptionCount;} + try {willThrowOnInvalidCigar(TextCigarCodec.decode("10M10D10S"), 1);} catch (final Exception ex) {++exceptionCount;} + try {willThrowOnInvalidCigar(TextCigarCodec.decode("10M10D10H"), 1);} catch (final Exception ex) {++exceptionCount;} - try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10I10S"));} catch (final Exception ex) {++exceptionCount;} - try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10D10S"));} catch (final Exception ex) {++exceptionCount;} + try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10I10S"), 11);} catch (final Exception ex) {++exceptionCount;} + try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10D10S"), 21);} catch (final Exception ex) {++exceptionCount;} - try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10I10M10D10S"));} catch (final Exception ex) {++exceptionCount;} - try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10D10M10I10S"));} catch (final Exception ex) {++exceptionCount;} - - // these 4 are fine now - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10H10I10M"));} catch (final Exception ex) {++exceptionCount;} - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10S10I10M"));} catch (final Exception ex) {++exceptionCount;} - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10M10I10S"));} catch (final Exception ex) {++exceptionCount;} - try {willThrowOnInvalidCigar(TextCigarCodec.decode("10M10I10H"));} catch (final Exception ex) {++exceptionCount;} - - // last two are valid - try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10M10I10M10S"));} catch (final Exception ex) {++exceptionCount;} - try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10M10D10M10S"));} catch (final Exception ex) {++exceptionCount;} + try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10I10M10D10S"), 11);} catch (final Exception ex) {++exceptionCount;} + try{willThrowOnInvalidCigar(TextCigarCodec.decode("10H10D10M10I10S"), 21);} catch (final Exception ex) {++exceptionCount;} Assert.assertEquals(exceptionCount, 8); } - private static Iterable willThrowOnInvalidCigar(final Cigar cigar) throws GATKException { + private static Iterable willThrowOnInvalidCigar(final Cigar cigar, final int readStart) throws GATKException { final AlignmentInterval detailsDoesnotMatter = new AlignmentInterval( - new SimpleInterval("1", 1, 110), 21, 30, cigar, + new SimpleInterval("1", 1, cigar.getReferenceLength()), + readStart, readStart+cigar.getReadLength()-SvCigarUtils.getNumSoftClippingBases(true, cigar.getCigarElements())-SvCigarUtils.getNumSoftClippingBases(false, cigar.getCigarElements()), cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); return ContigAlignmentsModifier.splitGappedAlignment(detailsDoesnotMatter, 1, cigar.getReadLength() + SvCigarUtils.getTotalHardClipping(cigar)); } + + @Test(groups = "sv") + public void testGappedAlignmentBreaker_NoLongerExpectException() { // not testing correctness, just testing the function now accepts these + + // these 4 are fine now + Cigar cigar = TextCigarCodec.decode("10H10I10M"); + AlignmentInterval alignment = new AlignmentInterval(new SimpleInterval("1", 1, 10), + 21, 30, cigar, + true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + ContigAlignmentsModifier.splitGappedAlignment(alignment, 1, cigar.getReadLength() + SvCigarUtils.getTotalHardClipping(cigar)); + + cigar = TextCigarCodec.decode("10S10I10M"); + alignment = new AlignmentInterval(new SimpleInterval("1", 1, 10), + 21, 30, cigar, + true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + ContigAlignmentsModifier.splitGappedAlignment(alignment, 1, cigar.getReadLength() + SvCigarUtils.getTotalHardClipping(cigar)); + + cigar = TextCigarCodec.decode("10M10I10S"); + alignment = new AlignmentInterval(new SimpleInterval("1", 1, 10), + 1, 10, cigar, + true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + ContigAlignmentsModifier.splitGappedAlignment(alignment, 1, cigar.getReadLength() + SvCigarUtils.getTotalHardClipping(cigar)); + + cigar = TextCigarCodec.decode("10M10I10H"); + alignment = new AlignmentInterval(new SimpleInterval("1", 1, 10), + 1, 10, cigar, + true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + ContigAlignmentsModifier.splitGappedAlignment(alignment, 1, cigar.getReadLength() + SvCigarUtils.getTotalHardClipping(cigar)); + + // last two are valid + cigar = TextCigarCodec.decode("10H10M10I10M10S"); + alignment = new AlignmentInterval(new SimpleInterval("1", 1, 20), + 11, 40, cigar, + true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + ContigAlignmentsModifier.splitGappedAlignment(alignment, 1, cigar.getReadLength() + SvCigarUtils.getTotalHardClipping(cigar)); + + cigar = TextCigarCodec.decode("10H10M10D10M10S"); + alignment = new AlignmentInterval(new SimpleInterval("1", 1, 30), + 11, 30, cigar, + true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + ContigAlignmentsModifier.splitGappedAlignment(alignment, 1, cigar.getReadLength() + SvCigarUtils.getTotalHardClipping(cigar)); + } + //================================================================================================================== @DataProvider(name = "forComputeNewRefSpanAndCigar") @@ -311,10 +345,10 @@ private Object[][] createTestDataForComputeNewRefSpanAndCigar() { refSpan = new SimpleInterval("chr2", 128791173, 128792476); data.add(new Object[]{alignment, 28, true, refSpan, TextCigarCodec.decode("1190M4D53M2I26M2I31M1450S")}); - alignment = new AlignmentInterval(originalRefSpan, + alignment = new AlignmentInterval(new SimpleInterval("chr2", 128791173, 128792504), 1, 1334, TextCigarCodec.decode("1190M4D53M2I26M2I31M2I28M1422S"), true, 60, 13, 1239, ContigAlignmentsModifier.AlnModType.NONE); - refSpan = new SimpleInterval("chr2", 128791173, 128792478); + refSpan = new SimpleInterval("chr2", 128791173, 128792476); data.add(new Object[]{alignment, 28, true, refSpan, TextCigarCodec.decode("1190M4D53M2I26M2I31M1452S")}); alignment = new AlignmentInterval(originalRefSpan, @@ -323,10 +357,10 @@ private Object[][] createTestDataForComputeNewRefSpanAndCigar() { refSpan = new SimpleInterval("chr2", 128792367, 128792506); data.add(new Object[]{alignment, 1190, false, refSpan, TextCigarCodec.decode("1190S53M2I26M2I31M2D28M1422S")}); - alignment = new AlignmentInterval(originalRefSpan, - 1, 1334, TextCigarCodec.decode("1190M4I53M2I26M2I31M2I28M1422S"), + alignment = new AlignmentInterval(new SimpleInterval("chr2", 128791173, 128792500), + 1, 1338, TextCigarCodec.decode("1190M4I53M2I26M2I31M2I28M1422S"), true, 60, 13, 1239, ContigAlignmentsModifier.AlnModType.NONE); - refSpan = new SimpleInterval("chr2", 128792363, 128792506); + refSpan = new SimpleInterval("chr2", 128792363, 128792500); data.add(new Object[]{alignment, 1190, false, refSpan, TextCigarCodec.decode("1194S53M2I26M2I31M2I28M1422S")}); alignment = new AlignmentInterval(new SimpleInterval("chr20", 38653045, 38653268), @@ -334,6 +368,15 @@ private Object[][] createTestDataForComputeNewRefSpanAndCigar() { false, 60, 59, 85, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{alignment, 89, false, new SimpleInterval("chr20", 38653045, 38653179), TextCigarCodec.decode("1915S106M3I29M1431S")}); + alignment = new AlignmentInterval(new SimpleInterval("chr5", 33757389, 33757589), + 419, 658, TextCigarCodec.decode("418H78M39I123M180H"), + true, 60, 41, 136, ContigAlignmentsModifier.AlnModType.NONE); + data.add(new Object[]{alignment, 79, false, new SimpleInterval("chr5", 33757467, 33757589), TextCigarCodec.decode("418H117S123M180H")}); + alignment = new AlignmentInterval(new SimpleInterval("chr5", 33757467, 33757589), + 536, 658, TextCigarCodec.decode("418H117S123M180H"), + true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); + data.add(new Object[]{alignment, 101, true, new SimpleInterval("chr5", 33757467, 33757488), TextCigarCodec.decode("418H117S22M101S180H")}); + return data.toArray(new Object[data.size()][]); } @@ -400,19 +443,52 @@ private Object[][] createTestDataForClipAlignmentInterval() { data.add(new Object[]{null, 10, true, null, IllegalArgumentException.class}); - final AlignmentInterval alignmentInterval = SVTestUtils.fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true); + AlignmentInterval alignment = SVTestUtils.fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true); - data.add(new Object[]{alignmentInterval, -1, true, null, IllegalArgumentException.class}); + data.add(new Object[]{alignment, -1, true, null, IllegalArgumentException.class}); - data.add(new Object[]{alignmentInterval, 51, true, null, IllegalArgumentException.class}); + data.add(new Object[]{alignment, 51, true, null, IllegalArgumentException.class}); - final AlignmentInterval expected = new AlignmentInterval( + AlignmentInterval expected = new AlignmentInterval( new SimpleInterval("chr1", 202317371, 202317402), 1104, 1135, TextCigarCodec.decode("1085H18S32M1393H"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); - data.add(new Object[]{alignmentInterval, 18, false, expected, null}); + data.add(new Object[]{alignment, 18, false, expected, null}); + + alignment = new AlignmentInterval(new SimpleInterval("chr5", 33757389, 33757589), + 419, 658, TextCigarCodec.decode("418H78M39I123M180H"), + true, 60, 41, 136, ContigAlignmentsModifier.AlnModType.NONE); + expected = new AlignmentInterval( + new SimpleInterval("chr5", 33757467, 33757589), + 536, 658, + TextCigarCodec.decode("418H117S123M180H"), true, + 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, + ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); + data.add(new Object[]{alignment, 79, false, expected, null}); + alignment = new AlignmentInterval(new SimpleInterval("chr5", 33757467, 33757589), + 536, 658, TextCigarCodec.decode("418H117S123M180H"), + true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); + expected = new AlignmentInterval( + new SimpleInterval("chr5", 33757467, 33757488), + 536, 557, + TextCigarCodec.decode("418H117S22M101S180H"), true, + 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, + ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); + data.add(new Object[]{alignment, 101, true, expected, null}); + + alignment = new AlignmentInterval(new SimpleInterval("chr12", 31118319, 31118856), + 179, 714, TextCigarCodec.decode("178S91M1D118M1D327M285S"), + false, 60, 51, 257, ContigAlignmentsModifier.AlnModType.NONE); + expected = new AlignmentInterval( + new SimpleInterval("chr12", 31118372, 31118856), + 179, 661, + TextCigarCodec.decode("178S91M1D118M1D274M338S"), false, + 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, + ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL); + data.add(new Object[]{alignment, 53, true, expected, null}); + return data.toArray(new Object[data.size()][]); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/BreakpointComplicationsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/BreakpointComplicationsUnitTest.java index 3b150e535c1..726d52fb8fe 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/BreakpointComplicationsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/BreakpointComplicationsUnitTest.java @@ -26,8 +26,8 @@ public void testGetHomology() { Assert.assertEquals(BreakpointComplications.inferHomology(region1, region2, contigSequence), "AAAA"); - final AlignmentInterval region3 = new AlignmentInterval(new SimpleInterval("1", 1, 12), 1, 8, TextCigarCodec.decode("8M"), true, 60, 1, 100, ContigAlignmentsModifier.AlnModType.NONE); // dummy test data, almost guaranteed to be non-factual - final AlignmentInterval region4 = new AlignmentInterval(new SimpleInterval("1", 101, 112), 13, 20, TextCigarCodec.decode("8M"), false, 60, 1, 100, ContigAlignmentsModifier.AlnModType.NONE); // dummy test data, almost guaranteed to be non-factual + final AlignmentInterval region3 = new AlignmentInterval(new SimpleInterval("1", 1, 8), 1, 8, TextCigarCodec.decode("8M"), true, 60, 1, 100, ContigAlignmentsModifier.AlnModType.NONE); // dummy test data, almost guaranteed to be non-factual + final AlignmentInterval region4 = new AlignmentInterval(new SimpleInterval("1", 101, 108), 13, 20, TextCigarCodec.decode("8M"), false, 60, 1, 100, ContigAlignmentsModifier.AlnModType.NONE); // dummy test data, almost guaranteed to be non-factual Assert.assertTrue(BreakpointComplications.inferHomology(region3, region4, contigSequence).isEmpty()); } @@ -35,9 +35,9 @@ public void testGetHomology() { @Test(groups = "sv") public void testGetInsertedSequence() { final byte[] contigSequence = "GACGAACGATTTGACTTTAATATGAAATGTTTTATGTGGGCTATAAAATTATCCAAACTCGACACAGGACATTTTGAGCTTATTTCCAAATCATCTGGCCTTCATCTACCCACTGGAACTATTACTCTGCTGGGTCCTCATGGAAACATATCTTTCAGCCCTAACAATGAGACTACAGACATCTACGTCCCCAACACAACAGCTAAAAAGCAGTAGAATGTCAGAAAGGCTATCCACTTAGCCCTTGGCTGACAGGCCCCACTGAGCATCCTTTGCGAAGTCCATTTACTAGCTAATTCATAATTTACACAAGGCATTCAGACATAGCAGCTAAGATATAAAACATTTATCAACACAGGGACTAGTTTGTCATTTTAAAATAATTATGTTTAAGTAAGCCAATAAAGTCTATCTTCTCCAATTTACTTATTGAGCTTTATGAGGCAATTTAAGTCCCGATTTTGGGGGGTATGTATGAAAGGAGAGCATGGAAATGCCATTTGCTCCCTGAAGTTTTTATCTTTTTTTTTTTGAGATAGAGTCTTGTGTTTTCTGTGGAGTACATGAGTATGCATCAAAGCTAACAACGCCCACTGCCCTGTTAGTCAAATACCTTTGA".getBytes(); - final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("8", 118873207, 118873739), 1, 532, TextCigarCodec.decode("532M87S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("1", 175705642, 175705671), 519, 547, TextCigarCodec.decode("518S29M72S"), false, 3, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final AlignmentInterval region3 = new AlignmentInterval(new SimpleInterval("1", 118875262, 118875338), 544, 619, TextCigarCodec.decode("543S76M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("8", 118873207, 118873738), 1, 532, TextCigarCodec.decode("532M87S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("1", 175705642, 175705670), 519, 547, TextCigarCodec.decode("518S29M72S"), false, 3, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region3 = new AlignmentInterval(new SimpleInterval("1", 118875262, 118875337), 544, 619, TextCigarCodec.decode("543S76M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); Assert.assertTrue(BreakpointComplications.inferInsertedSequence(region3, region1, contigSequence).isEmpty()); Assert.assertEquals(BreakpointComplications.inferInsertedSequence(region1, region3, contigSequence), "GAGATAGAGTC"); @@ -54,10 +54,10 @@ public void testExtractCigarForSimpleTandup() { final int contigTotalLength = 355; // forward strand - final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("1", 1000001, 1000125), 16, 75, + final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("1", 1000001, 1000125), 16, 90, TextCigarCodec.decode("5H10S15M20D25M30D35M260S5H"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("1", 1000041, 1000145), 191, 340, + final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("1", 1000041, 1000145), 191, 345, TextCigarCodec.decode("5H185S45M30I55M20I5M10S5H"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java index bcb4fa03017..b110c069f45 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ChimericAlignmentUnitTest.java @@ -46,8 +46,8 @@ public void testFilterByRegionTooSmall() { @Test(groups = "sv") public void testFilterByNextAlignmentMayBeInsertion() { - final AlignmentInterval overlappingRegion1 = new AlignmentInterval(new SimpleInterval("19", 48699881, 48700035), 1, 154, TextCigarCodec.decode("47S154M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final AlignmentInterval overlappingRegion2 = new AlignmentInterval(new SimpleInterval("19", 48700584, 48700669), 117, 201, TextCigarCodec.decode("116H85M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval overlappingRegion1 = new AlignmentInterval(new SimpleInterval("19", 48699881, 48700034), 1, 154, TextCigarCodec.decode("47S154M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval overlappingRegion2 = new AlignmentInterval(new SimpleInterval("19", 48700584, 48700668), 117, 201, TextCigarCodec.decode("116H85M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); Assert.assertTrue(ChimericAlignment.nextAlignmentMayBeInsertion(overlappingRegion1, overlappingRegion2, CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD, 50,true)); } @@ -180,7 +180,7 @@ static List> result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); intervalOne = new AlignmentInterval(new SimpleInterval("chr20", 48513458, 48513545), 1, 88, TextCigarCodec.decode("88M227H"), true, 39, 1, 83, ContigAlignmentsModifier.AlnModType.NONE); - intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 48513297, 48513579), 84, 365, TextCigarCodec.decode("83S282M"), false, 60, 0, 282, ContigAlignmentsModifier.AlnModType.NONE); + intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 48513297, 48513578), 84, 365, TextCigarCodec.decode("83S282M"), false, 60, 0, 282, ContigAlignmentsModifier.AlnModType.NONE); result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); @@ -199,7 +199,7 @@ static List> result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); // diff-chr translocation suspect with SS - intervalOne = new AlignmentInterval(new SimpleInterval("chr21", 5374092, 5374748), 1, 656, TextCigarCodec.decode("656M322S"), true, 60, 14, 586, ContigAlignmentsModifier.AlnModType.NONE); + intervalOne = new AlignmentInterval(new SimpleInterval("chr21", 5374092, 5374747), 1, 656, TextCigarCodec.decode("656M322S"), true, 60, 14, 586, ContigAlignmentsModifier.AlnModType.NONE); intervalTwo = new AlignmentInterval(new SimpleInterval("chr20", 28764673, 28765145), 506, 978, TextCigarCodec.decode("473M505H"), false, 60, 16, 393, ContigAlignmentsModifier.AlnModType.NONE); result.add(new Tuple3<>(intervalOne, intervalTwo, SimpleSVDiscoveryTestDataProvider.b38_seqDict)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java index 2e700efd9a3..4b581979e7b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/CpxVariantInterpreterUnitTest.java @@ -137,7 +137,7 @@ private static List validInputsToOverlapYieldingStrategy() { private Object[][] forOverlapYieldingStrategy() { final List data = new ArrayList<>(20); - final AlignmentInterval one = new AlignmentInterval(new SimpleInterval("chr1", 100000, 100100), 1, 100, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval one = new AlignmentInterval(new SimpleInterval("chr1", 100001, 100100), 1, 100, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignmentInterval two = new AlignmentInterval(new SimpleInterval("chr1", 100041, 100070), 33, 62, TextCigarCodec.decode("30M"), true, 30, 5, 26, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{one, two, true, CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict, IllegalArgumentException.class}); @@ -190,7 +190,7 @@ private Object[][] forRemoveOverlap() { final List data = new ArrayList<>(20); // containment - final AlignmentInterval one = new AlignmentInterval(new SimpleInterval("chr1", 100000, 100100), 1, 100, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval one = new AlignmentInterval(new SimpleInterval("chr1", 100001, 100100), 1, 100, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignmentInterval two = new AlignmentInterval(new SimpleInterval("chr1", 100041, 100070), 33, 62, TextCigarCodec.decode("30M"), true, 30, 5, 26, ContigAlignmentsModifier.AlnModType.NONE); data.add(new Object[]{one, two, 30, CpxSVInferenceTestUtils.bareBoneHg38SAMSeqDict, false, false, null, IllegalArgumentException.class}); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java index a5ea154951b..398040499f5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/NovelAdjacencyAndAltHaplotypeUnitTest.java @@ -77,8 +77,8 @@ void testKryoSerializer() throws IOException { } private static NovelAdjacencyAndAltHaplotype getBreakpoints(final String contigName, final String insertionMapping) { - final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 10000, 10100), 1, 100, TextCigarCodec.decode("100M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); - final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 20100, 20200), 101, 200, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 10001, 10100), 1, 100, TextCigarCodec.decode("100M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 20101, 20200), 101, 200, TextCigarCodec.decode("100M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final ArrayList insertionMappings = new ArrayList<>(); insertionMappings.add(insertionMapping); final ChimericAlignment breakpoint = new ChimericAlignment(region1, region2, insertionMappings, contigName, b37_seqDict); @@ -153,9 +153,9 @@ private static SimpleInterval shiftRightOneBase(final SimpleInterval toBeShifted @Test(groups = "sv") public void testGetAssembledBreakpointsFromAlignmentIntervalsWithOverlappingAlignmentInterval() { final byte[] contigSequence = "ACTAGAGCATCTACGTGTTCCTGTGGTTTTGGAGCAAGAGTGATTTGAGTTTCAGAGATTTTTACTAATTCTTCTTCCCCTACCAGAAAAAAAGATCTTACCATTTGAGAGTGAGATGTAAACCCAGCCCTGTCTGACCTGAGTCTGTGCCCTAAGCCTATGCTAAGCCAAGCAGTGCCTGGAGCCACCACAGGTCCACACAATTCGTTAACATGATGAAGCAAGGATGGAAATTGGACAAAATAGTGTGCCTACTGAATCTAAGAATGAAAAATGATTGCACTCCTACTCTGAGTGCTTTGGAGCACTGCCCAGTTGGGCAAAGGGTCAGCGCCTGGGCAGAGGTCCCCACAACCTGGCAGGAGTGTGGTCGGCCACCCTATGGGCCTCCATCATGTGCAGTGACAGCGGGGCTGTCATGTCACCGTGTGGGAGGGCTTGCAGGTGAAGTGGTCTGGGAGGGGTCCCCCAGACAAAGCCAAGGTTCTGAGAGTTGGCCCGAACACTGCTGGATTCCACTTCACCTGCAAGCCCTCCCACACGGTGACATGACAGCCTATAATACAGTTCCGCATGGCCACGTCATACAACCCTGTCATATTGGTGAGCAATTGCTGTGTAGCCAAAGACCCCAAAACTCAAACAGCATTTATTATTATTGCCCCCATGTCTGAGAGTCAGATGTGCATTTGCTGATCTCAGCTTGTTTGAGCTGCTGCAGGGTTGGGGCTCTGCTCCAGGCAGGCTTAGCTGTCACCACATGCACACATACATTCTGGGCCTCTGCTGCGCGCGTCACGTTCACTGAAGATCTTGGGATTGGGAGTTAGGGCGGTGGGAGGGCCCAGCAAAGTCACCTGGCGATGGCAGGGACACAGGGAGGAATGTAGAATGGGGCCGATGATGGGACCCACACGTCTGCAAAGCTGCGGTCTCCTTGAGGGGTGGAGACAGCAACAACTCACCGCACGCGGTGCTTCAGTTCACCATCTCCCTGGGACATTAGGGGGCCCCGTGTTATCTCATTTTGCTCTGGTTTGCATTAGTTTTTTATCACTTCGTAGATGAAGCCACTGACACCCAGAGAGGGAAAGTGGCCTGACCAAGGGCCACAGCAGGGGAGCGAAGGAGCCCCACAGTTCGGCAGGAACACAGCCTCTCCCTGGCTTTCAGGTTCACTGACATCTTCTCATGGCCTCTGTAACTCACCAGGCATCAGGGTGTAGTCCTTAGACCAGTGTCCCACAGCTGCCACAGAGTGGGAGCTCACCATCAGTTATAAGTCACTAGAAAGGCTTTTGGACATTATAAGCTACAATGGAAAATAAGTCATCTGTGGATTTTTGTGACAGATTCCAAAAATTTGAATATTTTGTCTACTTAGGTTTTTGGTTAATTTTATCCTCAAAACTGTTCTGCAGTGATTAAGCTGTACAAACTGCATCATGGGCGAATTGGCATATTCAGAAATGACTGATATTCTTGATTTCAGTTTTTTACTTTGTATGTAGCTCCTCAAGGAAAC".getBytes(); - final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 23102785, 23103304), 1, 519, TextCigarCodec.decode("519M1006S"), true, 60, 1, 100, ContigAlignmentsModifier.AlnModType.NONE); - final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 23103196, 23103238), 516, 557, TextCigarCodec.decode("515S42M968S"), false, 60, 2, 100, ContigAlignmentsModifier.AlnModType.NONE); - final AlignmentInterval region3 = new AlignmentInterval(new SimpleInterval("20", 23103633, 23104603), 556, 1525, TextCigarCodec.decode("555S970M"), true, 60, 3, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("20", 23102785, 23103303), 1, 519, TextCigarCodec.decode("519M1006S"), true, 60, 1, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("20", 23103196, 23103237), 516, 557, TextCigarCodec.decode("515S42M968S"), false, 60, 2, 100, ContigAlignmentsModifier.AlnModType.NONE); + final AlignmentInterval region3 = new AlignmentInterval(new SimpleInterval("20", 23103633, 23104602), 556, 1525, TextCigarCodec.decode("555S970M"), true, 60, 3, 100, ContigAlignmentsModifier.AlnModType.NONE); final AlignedContig alignedContig = new AlignedContig("asm00001:tig0001", contigSequence, Arrays.asList(region1, region2, region3)); final List assembledBreakpointsFromAlignmentIntervals = ChimericAlignment.parseOneContig(alignedContig, b37_seqDict, true, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.DEFAULT_MIN_ALIGNMENT_LENGTH, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.CHIMERIC_ALIGNMENTS_HIGHMQ_THRESHOLD, true); @@ -165,7 +165,7 @@ public void testGetAssembledBreakpointsFromAlignmentIntervalsWithOverlappingAlig Assert.assertEquals(chimericAlignment.regionWithLowerCoordOnContig, region1); Assert.assertEquals(chimericAlignment.regionWithHigherCoordOnContig, region3); Assert.assertEquals(chimericAlignment.insertionMappings.size(), 1); - final String expectedInsertionMappingsString = String.join(AlignmentInterval.PACKED_STRING_REP_SEPARATOR, "516", "557", "20:23103196-23103238", "-", "515S42M968S", "60", "2", "100", "O"); + final String expectedInsertionMappingsString = String.join(AlignmentInterval.PACKED_STRING_REP_SEPARATOR, "516", "557", "20:23103196-23103237", "-", "515S42M968S", "60", "2", "100", "O"); Assert.assertEquals(chimericAlignment.insertionMappings.get(0), expectedInsertionMappingsString); final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(chimericAlignment, contigSequence, b37_seqDict); Assert.assertTrue(breakpoints.getComplication().getHomologyForwardStrandRep().isEmpty());