Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix 4648 #4670

Merged
merged 1 commit into from
Apr 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@

import java.util.*;

import static org.broadinstitute.hellbender.tools.spark.sv.StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection.GAPPED_ALIGNMENT_BREAK_DEFAULT_SENSITIVITY;

/**
* Each assembled contig should have at least one such accompanying structure, or 0 when it is unmapped.
*/
Expand Down Expand Up @@ -308,6 +306,8 @@ public AlignmentInterval(final BwaMemAlignment alignment, final List<String> ref
public AlignmentInterval(final SimpleInterval referenceSpan, final int startInAssembledContig, final int endInAssembledContig,
final Cigar cigarAlong5to3DirectionOfContig, final boolean forwardStrand,
final int mapQual, final int mismatches, final int alignerScore, final ContigAlignmentsModifier.AlnModType modType) {
checkValidArgument(cigarAlong5to3DirectionOfContig, referenceSpan, startInAssembledContig, endInAssembledContig);

this.referenceSpan = referenceSpan;
this.startInAssembledContig = startInAssembledContig;
this.endInAssembledContig = endInAssembledContig;
Expand All @@ -321,6 +321,20 @@ public AlignmentInterval(final SimpleInterval referenceSpan, final int startInAs
this.alnModType = modType;
}

@VisibleForTesting
static final void checkValidArgument(final Cigar cigar, final SimpleInterval referenceSpan,
final int readStart, final int readEnd) {

final int softClippedBases = SvCigarUtils.checkCigarAndConvertTerminalInsertionToSoftClip(cigar).stream().filter(ce -> ce.getOperator().equals(CigarOperator.S)).mapToInt(CigarElement::getLength).sum();
final int readLength = cigar.getReadLength() - softClippedBases;
final int referenceLength = cigar.getReferenceLength();
final boolean validState = referenceLength == referenceSpan.size() && readLength == (readEnd - readStart + 1);
if ( ! validState) {
throw new IllegalArgumentException("Encountering invalid arguments for constructing alignment,\t" +
"cigar: " + cigar.toString() + " ref.span: " + referenceSpan.toString() + " read span: " + readStart + "-" + readEnd);
}
}

public boolean containsGapOfEqualOrLargerSize(final int gapSize) {
return cigarAlong5to3DirectionOfContig.getCigarElements().stream()
.anyMatch(cigarElement ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.broadinstitute.hellbender.tools.spark.sv.utils.SvCigarUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.CigarUtils;
import scala.Tuple2;
import scala.Tuple3;

Expand Down Expand Up @@ -39,17 +40,37 @@ public static AlignmentInterval clipAlignmentInterval(final AlignmentInterval in
Utils.validateArg(clipLengthOnRead < input.endInAssembledContig - input.startInAssembledContig + 1,
"input alignment to be clipped away: " + input.toPackedString() + "\twith clip length: " + clipLengthOnRead);

final Tuple2<SimpleInterval, Cigar> result = computeNewRefSpanAndCigar(input, clipLengthOnRead, clipFrom3PrimeEnd);
final Tuple2<SimpleInterval, Cigar> newRefSpanAndCigar = computeNewRefSpanAndCigar(input, clipLengthOnRead, clipFrom3PrimeEnd);
final Tuple2<Integer, Integer> newContigStartAndEnd =
computeNewReadSpan(input.startInAssembledContig, input.endInAssembledContig, newRefSpanAndCigar._2,
clipLengthOnRead, clipFrom3PrimeEnd);
return new AlignmentInterval(newRefSpanAndCigar._1, newContigStartAndEnd._1, newContigStartAndEnd._2, newRefSpanAndCigar._2,
input.forwardStrand, input.mapQual, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, AlnModType.UNDERGONE_OVERLAP_REMOVAL);
}

/**
* The new read span can NOT be simply calculated by subtracting the requested {@code clipLengthOnRead},
* for a reason that can be demonstrated below:
* suppose an alignment has cigar "20S100M10I...", and it is being clipped from the 5'-end with a length of 105.
* If we simply use the 105 to calculate the new start, it would be 21 + 105 = 126,
* but because the whole 100M alignment block would be clipped away, the new start should be 131.
*/
private static Tuple2<Integer, Integer> computeNewReadSpan(final int originalContigStart, final int originalContigEnd,
final Cigar newCigarAlong5to3DirectionOfContig,
final int clipLengthOnRead, final boolean clipFrom3PrimeEnd) {
final int newTigStart, newTigEnd;
if (clipFrom3PrimeEnd) {
newTigStart = input.startInAssembledContig;
newTigEnd = input.endInAssembledContig - clipLengthOnRead;
newTigStart = originalContigStart;
newTigEnd = Math.min(originalContigEnd - clipLengthOnRead,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed in person maybe add a couple of notes in comments to highlight why this is necessary?

SvCigarUtils.getUnclippedReadLength(newCigarAlong5to3DirectionOfContig) -
CigarUtils.countRightClippedBases(newCigarAlong5to3DirectionOfContig));
} else {
newTigStart = input.startInAssembledContig + clipLengthOnRead;
newTigEnd = input.endInAssembledContig;
newTigStart = Math.max(originalContigStart + clipLengthOnRead,
CigarUtils.countLeftClippedBases(newCigarAlong5to3DirectionOfContig) + 1);
newTigEnd = originalContigEnd;
}
return new AlignmentInterval(result._1, newTigStart, newTigEnd, result._2, input.forwardStrand, input.mapQual,
AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, AlnModType.UNDERGONE_OVERLAP_REMOVAL);

return new Tuple2<>(newTigStart, newTigEnd);
}

/**
Expand Down Expand Up @@ -103,7 +124,7 @@ static Tuple2<SimpleInterval, Cigar> computeNewRefSpanAndCigar(final AlignmentIn

// then deal with ref span
refBasesConsumed += ce.getOperator().isAlignment() ? (clipLengthOnRead - readBasesConsumed)
: ce.getLength();
: 0;

break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ public static final class TestDataForSimpleSVs {
System.arraycopy(rightRefFlank, 0, contigSeq, 50, 40);

AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 1 ,50, TextCigarCodec.decode("50M40S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 41 ,100, TextCigarCodec.decode("40S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 41 ,90, TextCigarCodec.decode("40S50M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
final NovelAdjacencyAndAltHaplotype breakpoints = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict);
result.add(new TestDataForSimpleSVs(region1, region2, breakpoints, "asm000001:tig00001"));

Expand All @@ -453,7 +453,7 @@ public static final class TestDataForSimpleSVs {
System.arraycopy(doubleDup, 0, contigSeq, 40, 10);
System.arraycopy(leftRefFlank, 0, contigSeq, 50, 40);
region1 = new AlignmentInterval(new SimpleInterval("21", 100051, 100100), 1 ,50, TextCigarCodec.decode("50M40S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 41 ,100, TextCigarCodec.decode("40S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
region2 = new AlignmentInterval(new SimpleInterval("21", 100001, 100050), 41 ,90, TextCigarCodec.decode("40S50M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
final NovelAdjacencyAndAltHaplotype breakpointsDetectedFromReverseStrand = new NovelAdjacencyAndAltHaplotype(new ChimericAlignment(region1, region2, Collections.emptyList(), "asm000001:tig00001", b37_seqDict), contigSeq, b37_seqDict);
result.add(new TestDataForSimpleSVs(region1, region2, breakpointsDetectedFromReverseStrand, "asm000001:tig00001"));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ private Object[][] createInputsAndExpectedResults_Serialization() {
for(int pair=0; pair<cigars.length/2; ++pair) {

final List<AlignmentInterval> alignmentIntervalsForSimpleInversion = new ArrayList<>(8);
final SimpleInterval referenceIntervalLeft = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair]+1, alignmentStartsOnRef_0Based[2*pair]+cigars[2*pair].getReferenceLength()+1);
final SimpleInterval referenceIntervalLeft = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair]+1, alignmentStartsOnRef_0Based[2*pair]+cigars[2*pair].getReferenceLength());
final AlignmentInterval alignmentIntervalLeft = new AlignmentInterval(referenceIntervalLeft, alignmentStartsOnTig_0BasedInclusive[2*pair]+1, alignmentEndsOnTig_0BasedExclusive[2*pair],
strandedness[2*pair] ? cigars[2*pair] : CigarUtils.invertCigar(cigars[2*pair]),
strandedness[2*pair], mapQual[2*pair], mismatches[2*pair], 100, ContigAlignmentsModifier.AlnModType.NONE);
alignmentIntervalsForSimpleInversion.add(alignmentIntervalLeft);
final SimpleInterval referenceIntervalRight = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair+1]+1, alignmentStartsOnRef_0Based[2*pair+1]+cigars[2*pair+1].getReferenceLength()+1);
final SimpleInterval referenceIntervalRight = new SimpleInterval(refNames.get(0), alignmentStartsOnRef_0Based[2*pair+1]+1, alignmentStartsOnRef_0Based[2*pair+1]+cigars[2*pair+1].getReferenceLength());
final AlignmentInterval alignmentIntervalRight = new AlignmentInterval(referenceIntervalRight, alignmentStartsOnTig_0BasedInclusive[2*pair+1]+1, alignmentEndsOnTig_0BasedExclusive[2*pair+1],
strandedness[2*pair+1] ? cigars[2*pair+1] : CigarUtils.invertCigar(cigars[2*pair+1]),
strandedness[2*pair+1], mapQual[2*pair+1], mismatches[2*pair+1], 100, ContigAlignmentsModifier.AlnModType.NONE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ Object[][] testDataForAIOverlaps() {
final List<Object[]> data = new ArrayList<>(20);

AlignmentInterval ar1 = new AlignmentInterval(new SimpleInterval("1",1,5), 1,5, TextCigarCodec.decode("5M5H"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
AlignmentInterval ar2 = new AlignmentInterval(new SimpleInterval("1",10,16), 5,10, TextCigarCodec.decode("4S6M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
AlignmentInterval ar2 = new AlignmentInterval(new SimpleInterval("1",11,16), 5,10, TextCigarCodec.decode("4S6M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);

data.add(new Object[]{ar1, ar2, 1, 0});

ar1 = new AlignmentInterval(new SimpleInterval("1",1,5), 1,5, TextCigarCodec.decode("5M5H"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
ar2 = new AlignmentInterval(new SimpleInterval("1",11,16), 6,10, TextCigarCodec.decode("5S5M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
ar2 = new AlignmentInterval(new SimpleInterval("1",11,15), 6,10, TextCigarCodec.decode("5S5M"),true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
data.add(new Object[]{ar1, ar2, 0, 0});

// overlaps on ref only
Expand All @@ -40,7 +40,7 @@ Object[][] testDataForAIOverlaps() {
data.add(new Object[]{ar1, ar2, 0, 59});

ar1 = new AlignmentInterval(new SimpleInterval("chr1",9170350,9171390), 1,1041, TextCigarCodec.decode("1041M1298H"),false, 60, 4, 1021, ContigAlignmentsModifier.AlnModType.NONE);
ar2 = new AlignmentInterval(new SimpleInterval("chr1",9169370,9170505), 1204,2239, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE);
ar2 = new AlignmentInterval(new SimpleInterval("chr1",9169370,9170505), 1204,2339, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE);
data.add(new Object[]{ar1, ar2, 0, 505-350+1});

// overlaps on read only
Expand Down Expand Up @@ -72,7 +72,7 @@ Object[][] testDataForAIOverlaps() {

// different chr
ar1 = new AlignmentInterval(new SimpleInterval("chr1",9170350,9171390), 1,1041, TextCigarCodec.decode("1041M1298H"),false, 60, 4, 1021, ContigAlignmentsModifier.AlnModType.NONE);
ar2 = new AlignmentInterval(new SimpleInterval("chr2",9169370,9170505), 1204,2239, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE);
ar2 = new AlignmentInterval(new SimpleInterval("chr2",9169370,9170505), 1204,2339, TextCigarCodec.decode("1203S1136M"),false, 60, 22, 1026, ContigAlignmentsModifier.AlnModType.NONE);
data.add(new Object[]{ar1, ar2, 0, 0});

return data.toArray(new Object[data.size()][]);
Expand Down Expand Up @@ -432,4 +432,27 @@ public void testContainsGapOfEqualOrLargerSize(final AlignmentInterval alignment
final boolean expectedResult) {
Assert.assertEquals(alignment.containsGapOfEqualOrLargerSize(gapSize), expectedResult);
}

@DataProvider(name = "forTestCtorArgChecking")
private Object[][] forTestCtorArgChecking() {
final List<Object[]> data = new ArrayList<>(20);

data.add(new Object[]{TextCigarCodec.decode("1155M1154S"), new SimpleInterval("chr22", 47043976, 47045130), 1, 1155, null});
data.add(new Object[]{TextCigarCodec.decode("1424M1424S"), new SimpleInterval("chr15", 80355809, 80357232), 1, 1424, null});

data.add(new Object[]{TextCigarCodec.decode("1155M1154S"), new SimpleInterval("chr22", 47043976, 47045131), 1, 1155, IllegalArgumentException.class});
data.add(new Object[]{TextCigarCodec.decode("1424M1424S"), new SimpleInterval("chr15", 80355809, 80357232), 1, 1429, IllegalArgumentException.class});

return data.toArray(new Object[data.size()][]);
}
@Test(groups = "sv", dataProvider = "forTestCtorArgChecking")
@SuppressWarnings("rawtypes")
public void testCtorArgChecking(final Cigar cigar, final SimpleInterval referenceSpan, final int readStart, final int readEnd,
final Class expectedExceptionClass) {
try {
AlignmentInterval.checkValidArgument(cigar, referenceSpan, readStart, readEnd);
} catch (final Exception e) {
Assert.assertEquals(e.getClass(), expectedExceptionClass);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ private Object[][] createTestData() {

intervalOne = new AlignmentInterval(new SimpleInterval("chr2", 1422222, 1422435),
1, 270, TextCigarCodec.decode("75M56I139M"), false, 60, 56, 142, ContigAlignmentsModifier.AlnModType.NONE);
intervalTwo = new AlignmentInterval(new SimpleInterval("chr2_KI270774v1_alt", 105288, 105557),
intervalTwo = new AlignmentInterval(new SimpleInterval("chr2_KI270774v1_alt", 105288, 105555),
1, 270, TextCigarCodec.decode("114M1I27M1I127M"), false, 56, 13, 179, ContigAlignmentsModifier.AlnModType.NONE);
contig = new AlignedContig("asm002608:tig00001", "ATGCTGGGGAATTTGTGTGCTCCTTGGGTGGGGACGAGCATGGAAGGCGCGTGGGACTGAAGCCTTGAAGACCCCGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCTCTCCTGGACAGACCTCGTGCAGGCGCCTCTCCTGGACCGACCTCGTGCAGGCGCCGCGCTGGACCGACCTCGTGCAGGCGCCGCGCTGGGCCATGGGGAGAGCGAGAGCCTGGTGTGCCCCTCAGGGAC".getBytes(),
Arrays.asList(intervalOne, intervalTwo)/*, true*/);
Expand Down Expand Up @@ -130,16 +130,16 @@ private Object[][] forFilterSecondaryConfigurationsByMappingQualityThreshold() {
final List<Object[]> data = new ArrayList<>(20);

AlignmentInterval intervalOne = new AlignmentInterval(
new SimpleInterval("chr21", 100000, 100100),
new SimpleInterval("chr21", 100001, 100100),
1, 100, TextCigarCodec.decode("100M220S"),
true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
AlignmentInterval intervalTwo = new AlignmentInterval(
new SimpleInterval("chr21", 100099, 100122),
99, 122, TextCigarCodec.decode("98S24M78S"),
true, 10, 3, 241, ContigAlignmentsModifier.AlnModType.NONE);
AlignmentInterval intervalThree = new AlignmentInterval(
new SimpleInterval("chr21", 100121, 100200),
122, 200, TextCigarCodec.decode("222S78M"),
new SimpleInterval("chr21", 100123, 100200),
223, 300, TextCigarCodec.decode("222S78M"),
true, 60, 0, 78, ContigAlignmentsModifier.AlnModType.NONE);
final GoodAndBadMappings rep1 =
new GoodAndBadMappings(Arrays.asList(intervalOne, intervalThree),
Expand Down Expand Up @@ -188,7 +188,7 @@ private Object[][] forTestingNotDiscardForBadMQ() {
data.add(new Object[]{outForSingleBadMapping, false});

final AlignmentInterval intervalOne = new AlignmentInterval(
new SimpleInterval("chr21", 100000, 100100),
new SimpleInterval("chr21", 100001, 100100),
1, 100, TextCigarCodec.decode("100M220S"),
true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
final AlignmentInterval intervalTwo = new AlignmentInterval(
Expand Down Expand Up @@ -349,7 +349,7 @@ private Object[][] forSpecialCaseGapSplit() {
AlignmentInterval gapped;
// case one: gapped alignment provides worse coverage
noGap = new AlignmentInterval(new SimpleInterval("chr1", 1_000_001, 1_000_950),
1, 1150, TextCigarCodec.decode("950M50S"),
1, 950, TextCigarCodec.decode("950M50S"),
true, 60, 0, 950, ContigAlignmentsModifier.AlnModType.NONE);
gapped = new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_001_200),
101, 1000, TextCigarCodec.decode("100S300M200D600M"),
Expand Down Expand Up @@ -377,13 +377,13 @@ private Object[][] forSpecialCaseGapSplit() {
});

// case three: gapped alignment provides better coverage with a I-gap
gapped = new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_001_850),
gapped = new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_000_850),
101, 1000, TextCigarCodec.decode("100S300M150I450M"),
true, 60, 150, 750, ContigAlignmentsModifier.AlnModType.NONE);
data.add(new Object[]{new Tuple2<>(noGap, gapped),
true,
new GoodAndBadMappings(Arrays.asList(new AlignmentInterval(new SimpleInterval("chr1", 1_000_101, 1_000_400), 101, 400, TextCigarCodec.decode("100S300M600S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT),
new AlignmentInterval(new SimpleInterval("chr1", 1_000_401, 1_001_850), 551, 1000, TextCigarCodec.decode("550S450M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)),
new AlignmentInterval(new SimpleInterval("chr1", 1_000_401, 1_000_850), 551, 1000, TextCigarCodec.decode("550S450M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)),
Collections.singletonList(noGap))
});

Expand Down
Loading