diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java index 9119720a2d8..50bb8490867 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecord.java @@ -52,6 +52,8 @@ public class SVCallRecord implements SVLocatable { private final List altAlleles; private final GenotypesContext genotypes; private final Map attributes; + private final Set filters; + private final Double log10PError; // CPX related fields private final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype; @@ -70,8 +72,10 @@ public SVCallRecord(final String id, final List alleles, final List genotypes, final Map attributes, + final Set filters, + final Double log10PError, final SAMSequenceDictionary dictionary) { - this(id, contigA, positionA, strandA, contigB, positionB, strandB, type, cpxSubtype, length, algorithms, alleles, genotypes, attributes); + this(id, contigA, positionA, strandA, contigB, positionB, strandB, type, cpxSubtype, length, algorithms, alleles, genotypes, attributes, filters, log10PError); validateCoordinates(dictionary); } @@ -88,11 +92,14 @@ protected SVCallRecord(final String id, final List algorithms, final List alleles, final List genotypes, - final Map attributes) { + final Map attributes, + final Set filters, + final Double log10PError) { Utils.nonNull(algorithms); Utils.nonNull(alleles); Utils.nonNull(genotypes); Utils.nonNull(attributes); + Utils.nonNull(filters); this.id = Utils.nonNull(id); this.contigA = contigA; this.positionA = positionA; @@ -112,6 +119,8 @@ protected SVCallRecord(final String id, final Pair strands = inferStrands(type, strandA, strandB); this.strandA = strands.getLeft(); this.strandB = strands.getRight(); + this.filters = filters; + this.log10PError = log10PError; } /** @@ -366,4 +375,16 @@ public SimpleInterval getPositionAInterval() { public SimpleInterval getPositionBInterval() { return new SimpleInterval(contigB, positionB, positionB); } + + public Set getFilters() { + return filters; + } + + public Double getLog10PError() { + return log10PError; + } + + public GATKSVVCFConstants.ComplexVariantSubtype getCpxSubtype() { + return cpxSubtype; + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java index df8fa1cc7b8..6fdedd52321 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtils.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.sv; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.Locatable; @@ -99,15 +100,27 @@ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record) && record.getStrandA() != null && record.getStrandB() != null) { builder.attribute(GATKSVVCFConstants.STRANDS_ATTRIBUTE, getStrandString(record)); } - final GenotypesContext genotypes = GenotypesContext.create(record.getGenotypes().size()); - for (final Genotype g : record.getGenotypes()) { - genotypes.add(sanitizeEmptyGenotype(g)); + if (!record.getFilters().isEmpty()) { + builder.filters(record.getFilters()); } + if (record.getLog10PError() != null) { + builder.log10PError(record.getLog10PError()); + } + // htsjdk vcf encoder does not allow genotypes to have empty alleles builder.genotypes(record.getGenotypes().stream().map(SVCallRecordUtils::sanitizeEmptyGenotype).collect(Collectors.toList())); + sanitizeNullAttributes(builder); return builder; } + /** + * Removes entries with null values. This can cause problems with vcf parsing, for example Integer fields + * set to "." cause exceptions. + */ + private static void sanitizeNullAttributes(final VariantContextBuilder builder) { + builder.attributes(Maps.filterValues(builder.getAttributes(), o -> o != null)); + } + /** * Adds NO_CALL allele if empty */ @@ -170,12 +183,12 @@ public static GenotypesContext populateGenotypesForMissingSamplesWithAlleles(fin public static SVCallRecord copyCallWithNewGenotypes(final SVCallRecord record, final GenotypesContext genotypes) { return new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), record.getStrandA(), record.getContigB(), record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getLength(), record.getAlgorithms(), record.getAlleles(), - genotypes, record.getAttributes()); + genotypes, record.getAttributes(), record.getFilters(), record.getLog10PError()); } public static SVCallRecord copyCallWithNewAttributes(final SVCallRecord record, final Map attr) { return new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), record.getStrandA(), record.getContigB(), record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getLength(), record.getAlgorithms(), record.getAlleles(), - record.getGenotypes(), attr); + record.getGenotypes(), attr, record.getFilters(), record.getLog10PError()); } /** @@ -287,10 +300,10 @@ public static Stream convertInversionsToBreakends(final SVCallReco Utils.validateArg(record.isIntrachromosomal(), "Inversion " + record.getId() + " is not intrachromosomal"); final SVCallRecord positiveBreakend = new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), true, record.getContigB(), record.getPositionB(), true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,null, - record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), dictionary); + record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), record.getFilters(), record.getLog10PError(), dictionary); final SVCallRecord negativeBreakend = new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), false, record.getContigB(), record.getPositionB(), false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,null, - record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), dictionary); + record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), record.getFilters(), record.getLog10PError(), dictionary); return Stream.of(positiveBreakend, negativeBreakend); } @@ -372,12 +385,18 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari } } else { contigB = contigA; - positionB = variant.getEnd(); + // Force reset of END coordinate + if (type.equals(StructuralVariantType.INS)) { + positionB = positionA + 1; + } else { + positionB = variant.getEnd(); + } } + final Double log10PError = variant.hasLog10PError() ? variant.getLog10PError() : null; final Map sanitizedAttributes = sanitizeAttributes(attributes); return new SVCallRecord(id, contigA, positionA, strand1, contigB, positionB, strand2, type, cpxSubtype, length, algorithms, - variant.getAlleles(), variant.getGenotypes(), sanitizedAttributes); + variant.getAlleles(), variant.getGenotypes(), sanitizedAttributes, variant.getFilters(), log10PError); } private static Map sanitizeAttributes(final Map attributes) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java index 0ecd85cba56..8521430932e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapser.java @@ -182,8 +182,12 @@ public SVCallRecord collapse(final SVClusterEngine.OutputCluster cluster) { final Boolean strandA = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandA(); final Boolean strandB = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandB(); + final Set filters = collapseFilters(items); + final Double quality = collapseQuality(items); + return new SVCallRecord(representative.getId(), representative.getContigA(), start, strandA, representative.getContigB(), - end, strandB, type, representative.getComplexSubtype(), length, algorithms, alleles, genotypes, attributes, dictionary); + end, strandB, type, representative.getComplexSubtype(), length, algorithms, alleles, genotypes, attributes, + filters, quality, dictionary); } protected List collapseAlleles(final List altAlleles, final Allele refAllele) { @@ -193,6 +197,21 @@ protected List collapseAlleles(final List altAlleles, final Alle return alleles; } + protected Set collapseFilters(final List items) { + return items.stream() + .map(SVCallRecord::getFilters) + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + } + + protected Double collapseQuality(final List items) { + if (items.size() == 1) { + return items.get(0).getLog10PError(); + } else { + return null; + } + } + /** * Asserts that the given records are valid for collapsing. */ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java index 5703753b31b..fb239c57971 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVLinkage.java @@ -38,16 +38,20 @@ public class CanonicalSVLinkage extends SVClusterLinkage protected ClusteringParameters evidenceParams; public static final int INSERTION_ASSUMED_LENGTH_FOR_OVERLAP = 50; + public static final int INSERTION_ASSUMED_LENGTH_FOR_SIZE_SIMILARITY = 1; public static final double DEFAULT_RECIPROCAL_OVERLAP_DEPTH_ONLY = 0.8; + public static final double DEFAULT_SIZE_SIMILARITY_DEPTH_ONLY = 0; public static final int DEFAULT_WINDOW_DEPTH_ONLY = 0; public static final double DEFAULT_SAMPLE_OVERLAP_DEPTH_ONLY = 0; public static final double DEFAULT_RECIPROCAL_OVERLAP_MIXED = 0.8; + public static final double DEFAULT_SIZE_SIMILARITY_MIXED = 0; public static final int DEFAULT_WINDOW_MIXED = 1000; public static final double DEFAULT_SAMPLE_OVERLAP_MIXED = 0; public static final double DEFAULT_RECIPROCAL_OVERLAP_PESR = 0.5; + public static final double DEFAULT_SIZE_SIMILARITY_PESR = 0; public static final int DEFAULT_WINDOW_PESR = 500; public static final double DEFAULT_SAMPLE_OVERLAP_PESR = 0; @@ -56,16 +60,19 @@ public class CanonicalSVLinkage extends SVClusterLinkage public static final ClusteringParameters DEFAULT_DEPTH_ONLY_PARAMS = ClusteringParameters.createDepthParameters( DEFAULT_RECIPROCAL_OVERLAP_DEPTH_ONLY, + DEFAULT_SIZE_SIMILARITY_DEPTH_ONLY, DEFAULT_WINDOW_DEPTH_ONLY, DEFAULT_SAMPLE_OVERLAP_DEPTH_ONLY); public static final ClusteringParameters DEFAULT_MIXED_PARAMS = ClusteringParameters.createMixedParameters( DEFAULT_RECIPROCAL_OVERLAP_MIXED, + DEFAULT_SIZE_SIMILARITY_MIXED, DEFAULT_WINDOW_MIXED, DEFAULT_SAMPLE_OVERLAP_MIXED); public static final ClusteringParameters DEFAULT_PESR_PARAMS = ClusteringParameters.createPesrParameters( DEFAULT_RECIPROCAL_OVERLAP_PESR, + DEFAULT_SIZE_SIMILARITY_PESR, DEFAULT_WINDOW_PESR, DEFAULT_SAMPLE_OVERLAP_PESR); @@ -139,28 +146,55 @@ protected boolean strandsMatch(final SVCallRecord a, final SVCallRecord b) { private static boolean clusterTogetherWithParams(final SVCallRecord a, final SVCallRecord b, final ClusteringParameters params, final SAMSequenceDictionary dictionary) { - // Contigs match if (!(a.getContigA().equals(b.getContigA()) && a.getContigB().equals(b.getContigB()))) { return false; } - // Reciprocal overlap + // Reciprocal overlap and size similarity // Check bypassed if both are inter-chromosomal + final Boolean hasReciprocalOverlapAndSizeSimilarity; if (a.isIntrachromosomal()) { - final SimpleInterval intervalA = new SimpleInterval(a.getContigA(), a.getPositionA(), a.getPositionA() + getLengthForOverlap(a) - 1); - final SimpleInterval intervalB = new SimpleInterval(b.getContigA(), b.getPositionA(), b.getPositionA() + getLengthForOverlap(b) - 1); - final boolean hasReciprocalOverlap = IntervalUtils.isReciprocalOverlap(intervalA, intervalB, params.getReciprocalOverlap()); - if (params.requiresOverlapAndProximity() && !hasReciprocalOverlap) { + final boolean hasReciprocalOverlap = testReciprocalOverlap(a, b, params.getReciprocalOverlap()); + final boolean hasSizeSimilarity = testSizeSimilarity(a, b, params.getSizeSimilarity()); + hasReciprocalOverlapAndSizeSimilarity = hasReciprocalOverlap && hasSizeSimilarity; + if (params.requiresOverlapAndProximity() && !hasReciprocalOverlapAndSizeSimilarity) { return false; - } else if (!params.requiresOverlapAndProximity() && hasReciprocalOverlap) { - return true; } + } else { + hasReciprocalOverlapAndSizeSimilarity = null; } // Breakend proximity - final SimpleInterval intervalA1 = a.getPositionAInterval().expandWithinContig(params.getWindow(), dictionary); - final SimpleInterval intervalA2 = a.getPositionBInterval().expandWithinContig(params.getWindow(), dictionary); + final boolean hasBreakendProximity = testBreakendProximity(a, b, params.getWindow(), dictionary); + // Use short-circuiting statements since sample overlap is the least scalable / slowest check + if (hasReciprocalOverlapAndSizeSimilarity == null) { + return hasBreakendProximity && hasSampleOverlap(a, b, params.getSampleOverlap()); + } else { + if (params.requiresOverlapAndProximity()) { + return hasReciprocalOverlapAndSizeSimilarity && hasBreakendProximity && hasSampleOverlap(a, b, params.getSampleOverlap()); + } else { + return (hasReciprocalOverlapAndSizeSimilarity || hasBreakendProximity) && hasSampleOverlap(a, b, params.getSampleOverlap()); + } + } + } + + private static boolean testReciprocalOverlap(final SVCallRecord a, final SVCallRecord b, final double threshold) { + final SimpleInterval intervalA = new SimpleInterval(a.getContigA(), a.getPositionA(), a.getPositionA() + getLength(a, INSERTION_ASSUMED_LENGTH_FOR_OVERLAP) - 1); + final SimpleInterval intervalB = new SimpleInterval(b.getContigA(), b.getPositionA(), b.getPositionA() + getLength(b, INSERTION_ASSUMED_LENGTH_FOR_OVERLAP) - 1); + return IntervalUtils.isReciprocalOverlap(intervalA, intervalB, threshold); + } + + private static boolean testSizeSimilarity(final SVCallRecord a, final SVCallRecord b, final double threshold) { + final int sizeSimilarityLengthA = getLength(a, INSERTION_ASSUMED_LENGTH_FOR_SIZE_SIMILARITY); + final int sizeSimilarityLengthB = getLength(b, INSERTION_ASSUMED_LENGTH_FOR_SIZE_SIMILARITY); + return Math.min(sizeSimilarityLengthA, sizeSimilarityLengthB) / (double) Math.max(sizeSimilarityLengthA, sizeSimilarityLengthB) >= threshold; + } + + private static boolean testBreakendProximity(final SVCallRecord a, final SVCallRecord b, final int window, + final SAMSequenceDictionary dictionary) { + final SimpleInterval intervalA1 = a.getPositionAInterval().expandWithinContig(window, dictionary); + final SimpleInterval intervalA2 = a.getPositionBInterval().expandWithinContig(window, dictionary); if (intervalA1 == null) { logger.warn("Invalid start position " + a.getPositionA() + " in record " + a.getId() + " - record will not be matched"); @@ -173,30 +207,21 @@ private static boolean clusterTogetherWithParams(final SVCallRecord a, final SVC } final SimpleInterval intervalB1 = b.getPositionAInterval(); final SimpleInterval intervalB2 = b.getPositionBInterval(); - if (!(intervalA1.overlaps(intervalB1) && intervalA2.overlaps(intervalB2))) { - return false; - } - - // Sample overlap (possibly the least scalable check) - return hasSampleOverlap(a, b, params.getSampleOverlap()); + return intervalA1.overlaps(intervalB1) && intervalA2.overlaps(intervalB2); } /** * Gets event length used for overlap testing. */ - private static int getLengthForOverlap(final SVCallRecord record) { - Utils.validate(record.isIntrachromosomal(), "Record even must be intra-chromosomal"); - final Integer length = record.getLength(); - if (length == null) { - if (record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.INS - || record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.CPX){ - return INSERTION_ASSUMED_LENGTH_FOR_OVERLAP; - } else { - return 1; - } + private static int getLength(final SVCallRecord record, final int missingInsertionLength) { + Utils.validate(record.isIntrachromosomal(), "Record must be intra-chromosomal"); + if (record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.INS) { + return record.getLength() == null ? missingInsertionLength : Math.max(record.getLength(), 1); + } else if (record.getType() == GATKSVVCFConstants.StructuralVariantAnnotationType.BND) { + return record.getPositionB() - record.getPositionA() + 1; } else { // TODO lengths less than 1 shouldn't be valid - return Math.max(record.getLength(), 1); + return Math.max(record.getLength() == null ? 1 : record.getLength(), 1); } } @@ -227,7 +252,7 @@ private static int getMaxClusterableStartingPositionWithParams(final SVCallRecor // Reciprocal overlap window final int maxPositionByOverlap; - final int maxPosition = (int) (call.getPositionA() + (1.0 - params.getReciprocalOverlap()) * getLengthForOverlap(call)); + final int maxPosition = (int) (call.getPositionA() + (1.0 - params.getReciprocalOverlap()) * getLength(call, INSERTION_ASSUMED_LENGTH_FOR_OVERLAP)); maxPositionByOverlap = Math.min(maxPosition, contigLength); if (params.requiresOverlapAndProximity()) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java index fa39562d6ab..2365c5041e8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/ClusteringParameters.java @@ -11,6 +11,7 @@ public class ClusteringParameters { private final double reciprocalOverlap; // minimum fractional reciprocal overlap of event intervals + private final double sizeSimilarity; // minimum min(size1, size2) / max(size1, size) private final int window; // maximum distance between variant end-points private final double sampleOverlap; // minimum fractional carrier sample overlap @@ -21,9 +22,10 @@ public class ClusteringParameters { // returns true if two given records are the correct type of pair for this parameter set private final BiPredicate validRecordsPredicate; - public ClusteringParameters(final double reciprocalOverlap, final int window, final double sampleOverlap, + public ClusteringParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap, final boolean overlapAndProximity, final BiPredicate validRecordsPredicate) { this.reciprocalOverlap = reciprocalOverlap; + this.sizeSimilarity = sizeSimilarity; this.window = window; this.sampleOverlap = sampleOverlap; this.requiresOverlapAndProximity = overlapAndProximity; @@ -34,6 +36,10 @@ public double getReciprocalOverlap() { return reciprocalOverlap; } + public double getSizeSimilarity() { + return sizeSimilarity; + } + public int getWindow() { return window; } @@ -50,15 +56,15 @@ public boolean isValidPair(final SVCallRecord a, final SVCallRecord b) { return validRecordsPredicate.test(a, b); } - public static ClusteringParameters createDepthParameters(final double reciprocalOverlap, final int window, final double sampleOverlap) { - return new ClusteringParameters(reciprocalOverlap, window, sampleOverlap, false, (a,b) -> a.isDepthOnly() && b.isDepthOnly()); + public static ClusteringParameters createDepthParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap) { + return new ClusteringParameters(reciprocalOverlap, sizeSimilarity, window, sampleOverlap, false, (a,b) -> a.isDepthOnly() && b.isDepthOnly()); } - public static ClusteringParameters createMixedParameters(final double reciprocalOverlap, final int window, final double sampleOverlap) { - return new ClusteringParameters(reciprocalOverlap, window, sampleOverlap, true, (a,b) -> a.isDepthOnly() != b.isDepthOnly()); + public static ClusteringParameters createMixedParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap) { + return new ClusteringParameters(reciprocalOverlap, sizeSimilarity, window, sampleOverlap, true, (a,b) -> a.isDepthOnly() != b.isDepthOnly()); } - public static ClusteringParameters createPesrParameters(final double reciprocalOverlap, final int window, final double sampleOverlap) { - return new ClusteringParameters(reciprocalOverlap, window, sampleOverlap, true, (a,b) -> !a.isDepthOnly() && !b.isDepthOnly()); + public static ClusteringParameters createPesrParameters(final double reciprocalOverlap, final double sizeSimilarity, final int window, final double sampleOverlap) { + return new ClusteringParameters(reciprocalOverlap, sizeSimilarity, window, sampleOverlap, true, (a,b) -> !a.isDepthOnly() && !b.isDepthOnly()); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngine.java index 121699b5b2c..0199eaae3e3 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngine.java @@ -1,9 +1,6 @@ package org.broadinstitute.hellbender.tools.sv.cluster; import com.google.common.annotations.VisibleForTesting; -import com.google.common.collect.BoundType; -import com.google.common.collect.SortedMultiset; -import com.google.common.collect.TreeMultiset; import htsjdk.samtools.SAMSequenceDictionary; import org.broadinstitute.hellbender.tools.sv.SVCallRecord; import org.broadinstitute.hellbender.tools.sv.SVCallRecordUtils; @@ -436,11 +433,11 @@ public int hashCode() { } private final class ItemSortingBuffer { - private SortedMultiset buffer; + private PriorityQueue buffer; public ItemSortingBuffer() { Utils.nonNull(itemComparator); - this.buffer = TreeMultiset.create(itemComparator); + this.buffer = new PriorityQueue<>(itemComparator); } public void add(final SVCallRecord record) { @@ -459,11 +456,11 @@ public List flush() { if (minActiveStartItem == null) { forceFlush(); } - final SortedMultiset finalizedRecordView = buffer.headMultiset(minActiveStartItem, BoundType.CLOSED); - final ArrayList finalizedRecords = new ArrayList<>(finalizedRecordView); - // Clearing a view of the buffer also clears the items from the buffer itself - finalizedRecordView.clear(); - return finalizedRecords; + final List out = new ArrayList<>(); + while (!buffer.isEmpty() && buffer.comparator().compare(buffer.peek(), minActiveStartItem) < 0) { + out.add(buffer.poll()); + } + return out; } /** @@ -471,8 +468,10 @@ public List flush() { * active clusters can be clustered with any future inputs. */ public List forceFlush() { - final List result = new ArrayList<>(buffer); - buffer.clear(); + final List result = new ArrayList<>(buffer.size()); + while (!buffer.isEmpty()) { + result.add(buffer.poll()); + } return result; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineArgumentsCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineArgumentsCollection.java index 4f23460d2d2..b0969715b19 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineArgumentsCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineArgumentsCollection.java @@ -11,10 +11,12 @@ public class SVClusterEngineArgumentsCollection implements Serializable { private static final long serialVersionUID = 1L; protected static final String BASE_INTERVAL_OVERLAP_FRACTION_NAME = "-interval-overlap"; + protected static final String BASE_SIZE_SIMILARITY_FRACTION_NAME = "-size-similarity"; protected static final String BASE_BREAKEND_WINDOW_NAME = "-breakend-window"; protected static final String BASE_SAMPLE_OVERLAP_FRACTION_NAME = "-sample-overlap"; protected static final String BASE_INTERVAL_OVERLAP_FRACTION_DOC = " interval reciprocal overlap fraction"; + protected static final String BASE_SIZE_SIMILARITY_DOC = " size similarity"; protected static final String BASE_BREAKEND_WINDOW_DOC = " window size for breakend proximity"; protected static final String BASE_SAMPLE_OVERLAP_FRACTION_DOC = " shared sample overlap fraction"; @@ -22,6 +24,10 @@ public class SVClusterEngineArgumentsCollection implements Serializable { public static final String MIXED_INTERVAL_OVERLAP_FRACTION_NAME = "mixed" + BASE_INTERVAL_OVERLAP_FRACTION_NAME; public static final String PESR_INTERVAL_OVERLAP_FRACTION_NAME = "pesr" + BASE_INTERVAL_OVERLAP_FRACTION_NAME; + public static final String DEPTH_SIZE_SIMILARITY_NAME = "depth" + BASE_SIZE_SIMILARITY_FRACTION_NAME; + public static final String MIXED_SIZE_SIMILARITY_NAME = "mixed" + BASE_SIZE_SIMILARITY_FRACTION_NAME; + public static final String PESR_SIZE_SIMILARITY_NAME = "pesr" + BASE_SIZE_SIMILARITY_FRACTION_NAME; + public static final String DEPTH_BREAKEND_WINDOW_NAME = "depth" + BASE_BREAKEND_WINDOW_NAME; public static final String MIXED_BREAKEND_WINDOW_NAME = "mixed" + BASE_BREAKEND_WINDOW_NAME; public static final String PESR_BREAKEND_WINDOW_NAME = "pesr" + BASE_BREAKEND_WINDOW_NAME; @@ -51,6 +57,27 @@ public class SVClusterEngineArgumentsCollection implements Serializable { doc="PESR/PESR" + BASE_INTERVAL_OVERLAP_FRACTION_DOC, optional=true, minValue = 0, maxValue = 1) public double pesrOverlapFraction = CanonicalSVLinkage.DEFAULT_RECIPROCAL_OVERLAP_PESR; + /** + * Minimum size similarity to cluster depth-only/depth-only variant pairs. + */ + @Argument(fullName = DEPTH_SIZE_SIMILARITY_NAME, + doc="Depth/Depth" + BASE_SIZE_SIMILARITY_DOC, optional=true, minValue = 0, maxValue = 1) + public double depthSizeSimilarity = CanonicalSVLinkage.DEFAULT_SIZE_SIMILARITY_DEPTH_ONLY; + + /** + * Minimum size similarity to cluster depth-only/PESR variant pairs. + */ + @Argument(fullName = MIXED_SIZE_SIMILARITY_NAME, + doc="Depth/PESR" + BASE_SIZE_SIMILARITY_DOC, optional=true, minValue = 0, maxValue = 1) + public double mixedSizeSimilarity = CanonicalSVLinkage.DEFAULT_SIZE_SIMILARITY_MIXED; + + /** + * Minimum size similarity to cluster PESR/PESR variant pairs. + */ + @Argument(fullName = PESR_SIZE_SIMILARITY_NAME, + doc="PESR/PESR" + BASE_SIZE_SIMILARITY_DOC, optional=true, minValue = 0, maxValue = 1) + public double pesrSizeSimilarity = CanonicalSVLinkage.DEFAULT_SIZE_SIMILARITY_PESR; + /** * Maximum allowed distance between endpoints (in bp) to cluster depth-only/depth-only variant pairs. */ @@ -94,14 +121,14 @@ public class SVClusterEngineArgumentsCollection implements Serializable { public double pesrSampleOverlapFraction = CanonicalSVLinkage.DEFAULT_SAMPLE_OVERLAP_PESR; public final ClusteringParameters getDepthParameters() { - return ClusteringParameters.createDepthParameters(depthOverlapFraction, depthBreakendWindow, depthSampleOverlapFraction); + return ClusteringParameters.createDepthParameters(depthOverlapFraction, depthSizeSimilarity, depthBreakendWindow, depthSampleOverlapFraction); } public final ClusteringParameters getMixedParameters() { - return ClusteringParameters.createMixedParameters(mixedOverlapFraction, mixedBreakendWindow, mixedSampleOverlapFraction); + return ClusteringParameters.createMixedParameters(mixedOverlapFraction, mixedSizeSimilarity, mixedBreakendWindow, mixedSampleOverlapFraction); } public final ClusteringParameters getPESRParameters() { - return ClusteringParameters.createPesrParameters(pesrOverlapFraction, pesrBreakendWindow, pesrSampleOverlapFraction); + return ClusteringParameters.createPesrParameters(pesrOverlapFraction, pesrSizeSimilarity, pesrBreakendWindow, pesrSampleOverlapFraction); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java index d49be301900..81138035bb1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/JointGermlineCNVSegmentation.java @@ -128,6 +128,7 @@ public int getEndPosition() { public static final String DEFRAGMENTATION_PADDING_LONG_NAME = "defragmentation-padding-fraction"; public static final String CLUSTERING_INTERVAL_OVERLAP_LONG_NAME = "clustering-interval-overlap"; public static final String CLUSTERING_BREAKEND_WINDOW_LONG_NAME = "clustering-breakend-window"; + public static final String CLUSTERING_SIZE_SIMILARITY_LONG_NAME = "clustering-size-similarity"; public static final String MODEL_CALL_INTERVALS_LONG_NAME = "model-call-intervals"; public static final String BREAKPOINT_SUMMARY_STRATEGY_LONG_NAME = "breakpoint-summary-strategy"; public static final String ALT_ALLELE_SUMMARY_STRATEGY_LONG_NAME = "alt-allele-summary-strategy"; @@ -149,6 +150,10 @@ public int getEndPosition() { doc="Cluster events whose endpoints are within this distance of each other", optional=true) public int clusterWindow = CanonicalSVLinkage.DEFAULT_DEPTH_ONLY_PARAMS.getWindow(); + @Argument(fullName = CLUSTERING_SIZE_SIMILARITY_LONG_NAME, + doc="Minimum size similarity for clustering", optional=true) + public double clusterSizeSimilarity = CanonicalSVLinkage.DEFAULT_DEPTH_ONLY_PARAMS.getSizeSimilarity(); + @Argument(fullName = MODEL_CALL_INTERVALS_LONG_NAME, doc = "gCNV model intervals created with the FilterIntervals tool.", optional=true) private GATKPath modelCallIntervalList = null; @@ -209,7 +214,7 @@ public void onTraversalStart() { setIntervals(parser); - final ClusteringParameters clusterArgs = ClusteringParameters.createDepthParameters(clusterIntervalOverlap, clusterWindow, CLUSTER_SAMPLE_OVERLAP_FRACTION); + final ClusteringParameters clusterArgs = ClusteringParameters.createDepthParameters(clusterIntervalOverlap, clusterSizeSimilarity, clusterWindow, CLUSTER_SAMPLE_OVERLAP_FRACTION); if (callIntervals == null) { defragmenter = SVClusterEngineFactory.createCNVDefragmenter(dictionary, altAlleleSummaryStrategy, reference, defragmentationPadding, minSampleSetOverlap); } else { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java index bb09f36095d..03ac3093841 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVCluster.java @@ -23,7 +23,8 @@ import org.broadinstitute.hellbender.tools.sv.cluster.*; import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; -import java.util.*; +import java.util.List; +import java.util.Set; import static org.broadinstitute.hellbender.tools.walkers.sv.JointGermlineCNVSegmentation.BREAKPOINT_SUMMARY_STRATEGY_LONG_NAME; @@ -379,8 +380,9 @@ public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { final SVCallRecord call = SVCallRecordUtils.create(variant); final SVCallRecord filteredCall; - if (fastMode) { + if (fastMode && call.getType() != GATKSVVCFConstants.StructuralVariantAnnotationType.CNV) { // Strip out non-carrier genotypes to save memory and compute + // Don't do for multi-allelic CNVs since carrier status can't be determined final GenotypesContext filteredGenotypes = GenotypesContext.copy(call.getCarrierGenotypeList()); filteredCall = SVCallRecordUtils.copyCallWithNewGenotypes(call, filteredGenotypes); } else { @@ -451,7 +453,8 @@ public VariantContext buildVariantContext(final SVCallRecord call) { // Build new variant final SVCallRecord finalCall = new SVCallRecord(newId, call.getContigA(), call.getPositionA(), call.getStrandA(), call.getContigB(), call.getPositionB(), call.getStrandB(), call.getType(), call.getComplexSubtype(), call.getLength(), - call.getAlgorithms(), call.getAlleles(), filledGenotypes, call.getAttributes(), dictionary); + call.getAlgorithms(), call.getAlleles(), filledGenotypes, call.getAttributes(), call.getFilters(), + call.getLog10PError(), dictionary); final VariantContextBuilder builder = SVCallRecordUtils.getVariantBuilder(finalCall); if (omitMembers) { builder.rmAttribute(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java index 848c9610875..f5a9d247c0a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java @@ -27,6 +27,7 @@ import org.broadinstitute.hellbender.tools.sv.concordance.SVConcordanceAnnotator; import org.broadinstitute.hellbender.tools.sv.concordance.SVConcordanceLinkage; import org.broadinstitute.hellbender.tools.walkers.validation.Concordance; +import org.broadinstitute.hellbender.utils.SequenceDictionaryUtils; import picard.vcf.GenotypeConcordance; import java.util.HashSet; @@ -121,6 +122,10 @@ public void onTraversalStart() { throw new UserException("Reference sequence dictionary required"); } + // Check that vcfs are sorted the same + SequenceDictionaryUtils.validateDictionaries("eval", getEvalHeader().getSequenceDictionary(), + "truth", getTruthHeader().getSequenceDictionary(), false, true); + linkage = new SVConcordanceLinkage(dictionary); linkage.setDepthOnlyParams(clusterParameterArgs.getDepthParameters()); linkage.setMixedParams(clusterParameterArgs.getMixedParameters()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java index bd218c6bfad..170df7984f6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUnitTest.java @@ -1,6 +1,9 @@ package org.broadinstitute.hellbender.tools.sv; import com.google.common.collect.Lists; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.GenotypeBuilder; +import htsjdk.variant.variantcontext.GenotypesContext; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -41,14 +44,14 @@ public void testIsDepthOnly(final SVCallRecord record, final boolean expected) { @DataProvider(name = "testIsCNVData") public Object[][] testIsCNVData() { return new Object[][]{ - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL), true}, - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP), true}, - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.CNV), true}, - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.INV), false}, - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.BND), false}, - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.INS), false}, - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX), false}, - {SVTestUtils.newCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX), false} + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL), true}, + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP), true}, + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.CNV), true}, + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.INV), false}, + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.BND), false}, + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.INS), false}, + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX), false}, + {SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.CTX), false} }; } @@ -73,7 +76,7 @@ public Object[][] testCreateInvalidCoordinatesData() { public void testCreateInvalidCoordinates(final String contigA, final int posA, final String contigB, final int posB) { new SVCallRecord("var1", contigA, posA, true, contigB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); Assert.fail("Expected exception not thrown"); } @@ -90,6 +93,26 @@ public Object[][] testCreateValidCoordinatesData() { public void testCreateValidCoordinates(final String contigA, final int posA, final String contigB, final int posB) { new SVCallRecord("var1", contigA, posA, true, contigB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); + } + + public void testGetters() { + final SVCallRecord record = new SVCallRecord("var1", "chr1", 100, true, "chr1", 200, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, + GATKSVVCFConstants.ComplexVariantSubtype.dDUP, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), + GenotypesContext.create(GenotypeBuilder.create("sample1", Lists.newArrayList(Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DEL))), + Collections.singletonMap("TEST_KEY", "TEST_VALUE"), Collections.singleton("TEST_FILTER"), Double.valueOf(30), SVTestUtils.hg38Dict); + Assert.assertEquals(record.getId(), "var1"); + Assert.assertEquals(record.getContigA(), "chr1"); + Assert.assertEquals(record.getPositionA(), 100); + Assert.assertEquals(record.getStrandA(), Boolean.TRUE); + Assert.assertEquals(record.getContigB(), "chr1"); + Assert.assertEquals(record.getPositionB(), 200); + Assert.assertEquals(record.getStrandA(), Boolean.FALSE); + Assert.assertEquals(record.getAlgorithms(), SVTestUtils.PESR_ONLY_ALGORITHM_LIST); + Assert.assertEquals(record.getGenotypes().get("sample1").getAlleles(), Lists.newArrayList(Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DEL)); + Assert.assertEquals(record.getAttributes(), Collections.singletonMap("TEST_KEY", "TEST_VALUE")); + Assert.assertEquals(record.getAlleles(), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL)); + Assert.assertEquals(record.getFilters(), Collections.singleton("TEST_FILTER")); + Assert.assertEquals(record.getLog10PError(), Double.valueOf(30)); } } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java index 5a7eee5a25d..548d13239ab 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVCallRecordUtilsUnitTest.java @@ -66,15 +66,16 @@ public Object[][] testGetVariantBuilderData() { { new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, - ALLELES_DEL, - Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), - Collections.emptyMap()), + ALLELES_DEL, + Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), Collections.emptyMap(), Collections.singleton("TEST_FILTER"), Double.valueOf(-3)), new VariantContextBuilder("", "chr1", 1000, 1999, ALLELES_DEL) .id("var1") .genotypes(GENOTYPE_DEL_1, GENOTYPE_DEL_2) .attribute(VCFConstants.END_KEY, 1999) .attribute(GATKSVVCFConstants.ALGORITHMS_ATTRIBUTE, SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST) .attribute(GATKSVVCFConstants.SVTYPE, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL) + .filter("TEST_FILTER") + .log10PError(Double.valueOf(-3)) .make(), Collections.emptyList() }, @@ -84,7 +85,7 @@ public Object[][] testGetVariantBuilderData() { SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, Collections.singletonList(Allele.SV_SIMPLE_DEL), Collections.singletonList(GENOTYPE_DEL_3), - Collections.emptyMap()), + Collections.emptyMap(), Collections.emptySet(), null), new VariantContextBuilder("", "chr1", 1000, 1999, ALLELES_DEL) .id("var1") .genotypes(GENOTYPE_DEL_3) @@ -100,7 +101,7 @@ public Object[][] testGetVariantBuilderData() { SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1), - Collections.emptyMap()), + Collections.emptyMap(), Collections.emptySet(), null), new VariantContextBuilder("", "chr1", 1000, 1000, ALLELES_INS) .id("var2") .genotypes(GENOTYPE_INS_1) @@ -116,7 +117,7 @@ public Object[][] testGetVariantBuilderData() { SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1), - Collections.emptyMap()), + Collections.emptyMap(), Collections.emptySet(), null), new VariantContextBuilder("", "chr1", 1000, 1000, ALLELES_INS) .id("var2") .genotypes(GENOTYPE_INS_1) @@ -132,7 +133,7 @@ public Object[][] testGetVariantBuilderData() { SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_BND, Lists.newArrayList(GENOTYPE_BND_1), - Collections.emptyMap()), + Collections.emptyMap(), Collections.emptySet(), null), new VariantContextBuilder("", "chr1", 1000, 1000, ALLELES_BND) .id("var3") .genotypes(GENOTYPE_BND_1) @@ -154,6 +155,18 @@ public void testGetVariantBuilder(final SVCallRecord record, final VariantContex VariantContextTestUtils.assertVariantContextsAreEqual(result, expected, attributesToIgnore, Collections.emptyList()); } + @Test + public void testGetVariantBuilderHasSanitizedNullAttributes() { + final SVCallRecord record = new SVCallRecord("var3", "chr1", 1000, false, "chr2", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, + SVTestUtils.PESR_ONLY_ALGORITHM_LIST, + ALLELES_BND, + Lists.newArrayList(GENOTYPE_BND_1), + Collections.emptyMap(), Collections.emptySet(), null); + final VariantContext result = SVCallRecordUtils.getVariantBuilder(record).make(); + // BNDs shouldn't have a length + Assert.assertFalse(result.hasAttribute(GATKSVVCFConstants.SVLEN)); + } + @Test public void testFillMissingSamplesWithGenotypes() { final ArrayList genotypes = new ArrayList<>(); @@ -225,14 +238,14 @@ public void testCopyCallWithNewGenotypes() { SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, ALLELES_DEL, Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), - Collections.singletonMap(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY, Collections.singletonList("sample"))); + Collections.singletonMap(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY, Collections.singletonList("sample")), Collections.emptySet(), null); final GenotypesContext genotypes = GenotypesContext.copy(Collections.singletonList(GENOTYPE_DEL_3)); final SVCallRecord result = SVCallRecordUtils.copyCallWithNewGenotypes(record, genotypes); final SVCallRecord expected = new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, SVTestUtils.DEPTH_ONLY_ALGORITHM_LIST, ALLELES_DEL, genotypes, - Collections.singletonMap(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY, Collections.singletonList("sample"))); + Collections.singletonMap(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY, Collections.singletonList("sample")), Collections.emptySet(), null); SVTestUtils.assertEqualsExceptMembership(result, expected); } @@ -340,18 +353,18 @@ public Object[][] testGetCallComparatorData() { }, // SV type { - SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL), - SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP), + SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL), + SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP), GATKSVVCFConstants.StructuralVariantAnnotationType.DEL.compareTo(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP) }, { - SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP), - SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL), + SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP), + SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL), GATKSVVCFConstants.StructuralVariantAnnotationType.DUP.compareTo(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL) }, { - SVTestUtils.newCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.BND), - SVTestUtils.newCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.INS), + SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.BND), + SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1000, GATKSVVCFConstants.StructuralVariantAnnotationType.INS), GATKSVVCFConstants.StructuralVariantAnnotationType.BND.compareTo(GATKSVVCFConstants.StructuralVariantAnnotationType.INS) } }; @@ -364,7 +377,7 @@ public void testGetCallComparator(final SVCallRecord record1, final SVCallRecord @Test public void testConvertInversionsToBreakends() { - final SVCallRecord nonInversion = SVTestUtils.newCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL); + final SVCallRecord nonInversion = SVTestUtils.newPESRCallRecordWithIntervalAndType(1000, 1999, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL); final List nonInversionResult = SVCallRecordUtils.convertInversionsToBreakends(nonInversion, SVTestUtils.hg38Dict).collect(Collectors.toList()); Assert.assertEquals(nonInversionResult.size(), 1); Assert.assertNotNull(nonInversionResult.get(0)); @@ -374,7 +387,7 @@ public void testConvertInversionsToBreakends() { SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); final List inversionResult = SVCallRecordUtils.convertInversionsToBreakends(inversion, SVTestUtils.hg38Dict).collect(Collectors.toList()); Assert.assertEquals(inversionResult.size(), 2); @@ -447,7 +460,7 @@ public Object[][] testCreateData() { TEST_ATTRIBUTES), new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), ALLELES_DEL, Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), - TEST_ATTRIBUTES) + TEST_ATTRIBUTES, Collections.emptySet(), null) }, { SVTestUtils.newVariantContext("var1", "chr1", 1000, 1999, @@ -457,7 +470,7 @@ public Object[][] testCreateData() { TEST_ATTRIBUTES), new SVCallRecord("var1", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), ALLELES_DEL, Lists.newArrayList(GENOTYPE_DEL_1, GENOTYPE_DEL_2), - TEST_ATTRIBUTES) + TEST_ATTRIBUTES, Collections.emptySet(), null) }, { SVTestUtils.newVariantContext("var2", "chr1", 1000, 1000, @@ -466,7 +479,7 @@ public Object[][] testCreateData() { null, null, TEST_ATTRIBUTES), new SVCallRecord("var2", "chr1", 1000, true, "chr1", 1000, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, 500, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_INS, Lists.newArrayList(GENOTYPE_INS_1, GENOTYPE_INS_2), - TEST_ATTRIBUTES) + TEST_ATTRIBUTES, Collections.emptySet(), null) }, { SVTestUtils.newVariantContext("var3", "chr1", 1000, 1000, @@ -475,7 +488,7 @@ public Object[][] testCreateData() { "chrX", 2000, TEST_ATTRIBUTES), new SVCallRecord("var3", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_BND, Collections.singletonList(GENOTYPE_BND_1), - TEST_ATTRIBUTES) + TEST_ATTRIBUTES, Collections.emptySet(), null) }, { SVTestUtils.newVariantContext("var4", "chr1", 1000, 1000, @@ -484,7 +497,7 @@ public Object[][] testCreateData() { "chrX", 2000, TEST_ATTRIBUTES), new SVCallRecord("var4", "chr1", 1000, true, "chrX", 2000, true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_BND, Collections.singletonList(GENOTYPE_BND_1), - TEST_ATTRIBUTES) + TEST_ATTRIBUTES, Collections.emptySet(), null) }, { SVTestUtils.newVariantContext("var4", "chr1", 1000, 1000, @@ -493,7 +506,7 @@ public Object[][] testCreateData() { "chrX", 2000, TEST_ATTRIBUTES_CPX), new SVCallRecord("var4", "chr1", 1000, null, "chrX", 2000, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CPX, GATKSVVCFConstants.ComplexVariantSubtype.dDUP, 250, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, ALLELES_CPX, Collections.singletonList(GENOTYPE_CPX_1), - TEST_ATTRIBUTES) + TEST_ATTRIBUTES, Collections.emptySet(), null) }, }; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java index 4b882f92fd7..217ab8b483f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/SVTestUtils.java @@ -52,9 +52,9 @@ public static SVClusterEngine getNewDefaultMaxCliqueEngine() { return new SVClusterEngine(SVClusterEngine.CLUSTERING_TYPE.MAX_CLIQUE, defaultCollapser::collapse, getNewDefaultLinkage(), hg38Dict); } - public static final ClusteringParameters defaultDepthOnlyParameters = ClusteringParameters.createDepthParameters(0.8, 0, 0); - public static final ClusteringParameters defaultMixedParameters = ClusteringParameters.createMixedParameters(0.8, 1000, 0); - public static final ClusteringParameters defaultEvidenceParameters = ClusteringParameters.createPesrParameters(0.5, 500, 0); + public static final ClusteringParameters defaultDepthOnlyParameters = ClusteringParameters.createDepthParameters(0.8, 0, 0, 0); + public static final ClusteringParameters defaultMixedParameters = ClusteringParameters.createMixedParameters(0.8, 0, 1000, 0); + public static final ClusteringParameters defaultEvidenceParameters = ClusteringParameters.createPesrParameters(0.5, 0, 500, 0); public static final SVClusterEngine defaultSingleLinkageEngine = getNewDefaultSingleLinkageEngine(); public static final SVClusterEngine defaultMaxCliqueEngine = getNewDefaultMaxCliqueEngine(); @@ -135,7 +135,7 @@ public final static SVCallRecord makeRecord(final String id, genotypes.add(makeGenotypeWithRefAllele(builder, refAllele)); } return new SVCallRecord(id, contigA, positionA, strandA, contigB, positionB, strandB, type, null, length, algorithms, - newAlleles, genotypes, Collections.emptyMap(), hg38Dict); + newAlleles, genotypes, Collections.emptyMap(), Collections.emptySet(), null, hg38Dict); } public static final Genotype makeGenotypeWithRefAllele(final GenotypeBuilder builder, final Allele refAllele) { @@ -375,7 +375,7 @@ public static SVCallRecord newCallRecordWithAllelesAndSampleName(final String sa svtype, null, 100, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), variantAlleles, Collections.singletonList(builder.make()), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newNamedDeletionRecordWithAttributes(final String id, final Map attributes) { @@ -384,7 +384,7 @@ public static SVCallRecord newNamedDeletionRecordWithAttributes(final String id, 100, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), - attributes); + attributes, Collections.emptySet(), null); } public static SVCallRecord newNamedDeletionRecordWithAttributesAndGenotypes(final String id, @@ -395,7 +395,7 @@ public static SVCallRecord newNamedDeletionRecordWithAttributesAndGenotypes(fina 100, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), genotypes, - attributes); + attributes, Collections.emptySet(), null); } public static final Map keyValueArraysToMap(final String[] keys, final Object[] values) { @@ -411,27 +411,40 @@ public static SVCallRecord newCallRecordWithLengthAndType(final Integer length, final int positionB = length == null ? 1 : CoordMath.getEnd(1, length); return new SVCallRecord("", "chr1", 1, getValidTestStrandA(svtype), "chr1", positionB, getValidTestStrandB(svtype), svtype, null, length, PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newDeletionCallRecordWithIdAndAlgorithms(final String id, final List algorithms) { return new SVCallRecord(id, "chr1", 1, true, "chr1", 100, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 100, algorithms, Collections.emptyList(), - Collections.emptyList(), Collections.emptyMap()); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } // Note strands and length may not be set properly - public static SVCallRecord newCallRecordWithIntervalAndType(final int start, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { + public static SVCallRecord newPESRCallRecordWithIntervalAndType(final int start, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { return new SVCallRecord("", "chr1", start, getValidTestStrandA(svtype), "chr1", end, getValidTestStrandB(svtype), svtype, null, getLength(start, end, svtype), PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), - Collections.emptyList(), Collections.emptyMap()); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); + } + + // Note strands and length may not be set properly + public static SVCallRecord newInsertionWithPositionAndLength(final int start, final int length) { + return new SVCallRecord("", "chr1", start, true, "chr1", start + 1, false, + GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, length, PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); + } + + public static SVCallRecord newDepthCallRecordWithIntervalAndType(final int start, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { + return new SVCallRecord("", "chr1", start, getValidTestStrandA(svtype), "chr1", end, getValidTestStrandB(svtype), + svtype, null, getLength(start, end, svtype), DEPTH_ONLY_ALGORITHM_LIST, Collections.emptyList(), + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } // Note strands and length may not be set properly public static SVCallRecord newCallRecordWithContigsIntervalAndType(final String startContig, final int start, final String endContig, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType svtype) { return new SVCallRecord("", startContig, start, getValidTestStrandA(svtype), endContig, end, getValidTestStrandB(svtype), svtype, null, getLength(start, end, svtype), PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), - Collections.emptyList(), Collections.emptyMap()); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null); } public static Integer getLength(final int start, final int end, final GATKSVVCFConstants.StructuralVariantAnnotationType type) { @@ -448,7 +461,7 @@ public static SVCallRecord newBndCallRecordWithStrands(final boolean strandA, fi Collections.singletonList(PESR_ALGORITHM), Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newCtxCallRecord() { @@ -456,7 +469,7 @@ public static SVCallRecord newCtxCallRecord() { Collections.singletonList(PESR_ALGORITHM), Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newCpxCallRecordWithLength(final int length) { @@ -464,7 +477,7 @@ public static SVCallRecord newCpxCallRecordWithLength(final int length) { Collections.singletonList(PESR_ALGORITHM), Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newCnvCallRecordWithStrands(final Boolean strandA, final Boolean strandB) { @@ -472,7 +485,7 @@ public static SVCallRecord newCnvCallRecordWithStrands(final Boolean strandA, fi Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newCallRecordWithCoordinates(final String id, final String chrA, final int posA, final String chrB, final int posB) { @@ -480,7 +493,7 @@ public static SVCallRecord newCallRecordWithCoordinates(final String id, final S Collections.singletonList("peser"), Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newCallRecordWithCoordinatesAndType(final String id, final String chrA, final int posA, final String chrB, final int posB, final GATKSVVCFConstants.StructuralVariantAnnotationType type) { @@ -488,7 +501,7 @@ public static SVCallRecord newCallRecordWithCoordinatesAndType(final String id, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newCallRecordWithAlgorithms(final List algorithms) { @@ -496,7 +509,7 @@ public static SVCallRecord newCallRecordWithAlgorithms(final List algori algorithms, Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static SVCallRecord newCallRecordInsertionWithLength(final Integer length) { @@ -504,7 +517,7 @@ public static SVCallRecord newCallRecordInsertionWithLength(final Integer length PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(), - Collections.emptyMap()); + Collections.emptyMap(), Collections.emptySet(), null); } public static VariantContext newVariantContext(final String id, final String chrA, final int posA, diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java index 24fefe40298..56ab174a353 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CNVDefragmenterTest.java @@ -23,47 +23,47 @@ public void testClusterTogether() { final SVCallRecord deletion = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); final SVCallRecord duplication = new SVCallRecord("test_dup", "chr1", 1000, false, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DUP), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(deletion, duplication), "Different sv types should not cluster"); final SVCallRecord duplicationNonDepthOnly = new SVCallRecord("test_dup", "chr1", 1000, false, "chr1", 1999, true, GATKSVVCFConstants.StructuralVariantAnnotationType.DUP, null, 1000, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM, SVTestUtils.PESR_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DUP), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(duplication, duplicationNonDepthOnly), "Clustered records must be depth-only"); final SVCallRecord cnv = new SVCallRecord("test_cnv", "chr1", 1000, null, "chr1", 1999, null, GATKSVVCFConstants.StructuralVariantAnnotationType.CNV, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DUP), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(deletion, cnv), "Different sv types should not cluster"); final SVCallRecord insertion = new SVCallRecord("test_ins", "chr1", 1000, true, "chr1", 1001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.INS, null, 1000, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_INS), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(insertion, insertion), "Only CNVs should be valid"); final SVCallRecord deletion2 = new SVCallRecord("test_del2", "chr1", 1000, true, "chr1", 1999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertTrue(defragmenter.areClusterable(deletion, deletion2), "Valid identical records should cluster"); final SVCallRecord deletion3 = new SVCallRecord("test_del3", "chr1", 2999, true, "chr1", 3998, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertTrue(defragmenter.areClusterable(deletion, deletion3), "Should cluster due to overlap"); final SVCallRecord deletion4 = new SVCallRecord("test_del3", "chr1", 3000, true, "chr1", 3999, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(deletion, deletion4), "Should barely not cluster"); } @@ -192,7 +192,7 @@ public void testGetMaxClusterableStartingPosition(final int start, final int end final SVCallRecord call1 = new SVCallRecord("call1", "chr1", start, true, "chr1", end, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, end - start + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); final int maxClusterableStart = defragmenter.getMaxClusterableStartingPosition(call1); final int call2Start = maxClusterableStart; @@ -200,7 +200,7 @@ public void testGetMaxClusterableStartingPosition(final int start, final int end final SVCallRecord call2 = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2End, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, call2End - call2Start + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertTrue(defragmenter.areClusterable(call1, call2)); final int call3Start = maxClusterableStart + 1; @@ -208,7 +208,7 @@ public void testGetMaxClusterableStartingPosition(final int start, final int end final SVCallRecord call3 = new SVCallRecord("call3", "chr1", call3Start, true, "chr1", call3End, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, call3End - call3Start + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), dictionary); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, dictionary); Assert.assertFalse(defragmenter.areClusterable(call1, call3)); } } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java index 8a4263b35b0..6a6d96448d8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/CanonicalSVCollapserUnitTest.java @@ -1325,7 +1325,7 @@ public Object[][] collapseTypesTestData() { @Test(dataProvider= "collapseTypesTestData") public void collapseTypesTest(final List types, final GATKSVVCFConstants.StructuralVariantAnnotationType expectedResult) { - final List records = types.stream().map(t -> SVTestUtils.newCallRecordWithIntervalAndType(1, 100, t)).collect(Collectors.toList()); + final List records = types.stream().map(t -> SVTestUtils.newPESRCallRecordWithIntervalAndType(1, 100, t)).collect(Collectors.toList()); Assert.assertEquals(collapser.collapseTypes(records), expectedResult); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java index c333007527e..942feab95f6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/cluster/SVClusterEngineTest.java @@ -25,9 +25,18 @@ public class SVClusterEngineTest { private final SVClusterEngine engine = SVTestUtils.defaultSingleLinkageEngine; + private static final ClusteringParameters depthOnlyParametersSizeSimilarity = ClusteringParameters.createDepthParameters(0.1, 0.5, 0, 0); + private static final ClusteringParameters mixedParametersSizeSimilarity = ClusteringParameters.createMixedParameters(0.1, 0.5, 5000, 0); + private static final ClusteringParameters evidenceParametersSizeSimilarity = ClusteringParameters.createPesrParameters(0.1, 0.5, 5000, 0); + private final CanonicalSVLinkage linkageSizeSimilarity = new CanonicalSVLinkage<>(SVTestUtils.hg38Dict, false); + private final SVClusterEngine engineSizeSimilarity = new SVClusterEngine(SVClusterEngine.CLUSTERING_TYPE.SINGLE_LINKAGE, SVTestUtils.defaultCollapser::collapse, linkageSizeSimilarity, SVTestUtils.hg38Dict); + @BeforeTest public void initializeClusterEngine() { engine.add(SVTestUtils.call1); + linkageSizeSimilarity.setDepthOnlyParams(depthOnlyParametersSizeSimilarity); + linkageSizeSimilarity.setMixedParams(mixedParametersSizeSimilarity); + linkageSizeSimilarity.setEvidenceParams(evidenceParametersSizeSimilarity); } @Test @@ -77,17 +86,76 @@ public void testClusterTogether() { Assert.assertTrue(engine.getLinkage().areClusterable(SVTestUtils.call1, SVTestUtils.overlapsCall1)); } + @DataProvider(name = "testClusterTogetherWithSizeSimilarityDataProvider") + public Object[][] testClusterTogetherWithSizeSimilarityDataProvider() { + return new Object[][]{ + {1001, 2000, 1001, 2000, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, true}, + {1001, 2000, 1501, 2000, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, true}, + {1001, 2000, 1900, 2900, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, true}, + // Fails size similarity + {1001, 2000, 1502, 2000, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, false}, + {1001, 2000, 1901, 2399, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, false}, + // Fails reciprocal overlap + {1001, 2000, 1902, 2900, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, false} + }; + } + + @Test(dataProvider= "testClusterTogetherWithSizeSimilarityDataProvider") + public void testClusterTogetherWithSizeSimilarity(final int start1, final int end1, + final int start2, final int end2, + final GATKSVVCFConstants.StructuralVariantAnnotationType svtype, + final boolean expectedResult) { + // PESR + final SVCallRecord record1 = SVTestUtils.newPESRCallRecordWithIntervalAndType(start1, end1, svtype); + final SVCallRecord record2 = SVTestUtils.newPESRCallRecordWithIntervalAndType(start2, end2, svtype); + Assert.assertEquals(engineSizeSimilarity.getLinkage().areClusterable(record1, record2), expectedResult); + Assert.assertEquals(engineSizeSimilarity.getLinkage().areClusterable(record2, record1), expectedResult); + + // Depth only + final SVCallRecord record3 = SVTestUtils.newDepthCallRecordWithIntervalAndType(start1, end1, svtype); + final SVCallRecord record4 = SVTestUtils.newDepthCallRecordWithIntervalAndType(start2, end2, svtype); + Assert.assertEquals(engineSizeSimilarity.getLinkage().areClusterable(record3, record4), expectedResult); + Assert.assertEquals(engineSizeSimilarity.getLinkage().areClusterable(record4, record3), expectedResult); + } + + @DataProvider(name = "testClusterTogetherWithSizeSimilarityInsertionsDataProvider") + public Object[][] testClusterTogetherWithSizeSimilarityInsertionsDataProvider() { + return new Object[][]{ + {1000, 1000, 1000, 1000, true}, + {1000, 1000, 1000, 500, true}, + {1000, 1000, 1000, 2000, true}, + {1000, 1000, 100, 1000, true}, + {1000, 1000, 1900, 1000, true}, + // Fails reciprocal overlap + {1000, 1000, 99, 1000, false}, + {1000, 1000, 1901, 1000, false}, + // Fails size similarity + {1000, 1000, 1000, 499, false}, + {1000, 2001, 1000, 1000, false}, + }; + } + + @Test(dataProvider= "testClusterTogetherWithSizeSimilarityInsertionsDataProvider") + public void testClusterTogetherWithSizeSimilarityInsertions(final int start1, final int length1, + final int start2, final int length2, + final boolean expectedResult) { + final SVCallRecord record1 = SVTestUtils.newInsertionWithPositionAndLength(start1, length1); + final SVCallRecord record2 = SVTestUtils.newInsertionWithPositionAndLength(start2, length2); + Assert.assertEquals(engineSizeSimilarity.getLinkage().areClusterable(record1, record2), expectedResult); + Assert.assertEquals(engineSizeSimilarity.getLinkage().areClusterable(record2, record1), expectedResult); + } + @Test(expectedExceptions = { IllegalArgumentException.class }) public void testClusterTogetherInvalidInterval() { // End position beyond contig end after padding final SVCallRecord deletion1 = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 248956423 + SVTestUtils.defaultEvidenceParameters.getWindow(), false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, null, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord deletion2 = new SVCallRecord("test_del", "chr1", 1000, true, "chr1", 248956422, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, null, Collections.singletonList(SVTestUtils.PESR_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); engine.getLinkage().areClusterable(deletion1, deletion2); Assert.fail("Expected exception not thrown"); } @@ -118,29 +186,29 @@ private void testGetMaxClusterableStartingPositionWithAlgorithm(final int start, final SVCallRecord call1 = new SVCallRecord("call1", "chr1", start, true, "chr1", end, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, end - start + 1, Collections.singletonList(algorithm), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final int maxClusterableStart = engine.getLinkage().getMaxClusterableStartingPosition(call1); final int call2Start = maxClusterableStart; final SVCallRecord call2Depth = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, call1.getLength(), Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call2Pesr = new SVCallRecord("call2", "chr1", call2Start, true, "chr1", call2Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, call1.getLength(), SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); Assert.assertTrue(engine.getLinkage().areClusterable(call1, call2Depth) || engine.getLinkage().areClusterable(call1, call2Pesr)); final int call3Start = maxClusterableStart + 1; final SVCallRecord call3Depth = new SVCallRecord("call2", "chr1", call3Start, true, "chr1", call3Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, call1.getLength(), Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call3Pesr = new SVCallRecord("call2", "chr1", call3Start, true, "chr1", call3Start + call1.getLength() - 1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, call1.getLength(), SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL), - Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); Assert.assertFalse(engine.getLinkage().areClusterable(call1, call3Depth) || engine.getLinkage().areClusterable(call1, call3Pesr)); } @@ -196,12 +264,12 @@ public void testClusterTogetherVaryPositions(final int start1, final int end1, f "chr1", end1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, end1 - start1 + 1, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DUP), - SVTestUtils.threeGenotypes, Collections.emptyMap(), SVTestUtils.hg38Dict); + SVTestUtils.threeGenotypes, Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call2 = new SVCallRecord("call2", "chr1", start2, true, "chr1", end2, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, end2 - start2 + 1, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DUP), - SVTestUtils.threeGenotypes, Collections.emptyMap(), SVTestUtils.hg38Dict); + SVTestUtils.threeGenotypes, Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); Assert.assertEquals(engine.getLinkage().areClusterable(call1, call2), result); } @@ -212,12 +280,12 @@ public void testClusterTogetherVaryTypes() { final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, SVTestUtils.getValidTestStrandA(type1), "chr1", 2001, SVTestUtils.getValidTestStrandB(type1), type1, null, SVTestUtils.getLength(1000, 2001, type1), Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (final GATKSVVCFConstants.StructuralVariantAnnotationType type2 : GATKSVVCFConstants.StructuralVariantAnnotationType.values()) { final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1000, SVTestUtils.getValidTestStrandA(type2), "chr1", 2001, SVTestUtils.getValidTestStrandB(type2), type2, null, SVTestUtils.getLength(1000, 2001, type2), Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Should only cluster together if same type, except CNVs if ((type1 == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV && call2.isSimpleCNV()) || (type2 == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV && call1.isSimpleCNV())) { @@ -237,13 +305,13 @@ public void testClusterTogetherVaryStrands() { final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, strand1A, "chr1", 2001, strand1B, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (final Boolean strand2A : bools) { for (final Boolean strand2B : bools) { final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1000, strand2A, "chr1", 2001, strand2B, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, Lists.newArrayList(GATKSVVCFConstants.DEPTH_ALGORITHM), - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Should only cluster if strands match Assert.assertEquals(engine.getLinkage().areClusterable(call1, call2), strand1A == strand2A && strand1B == strand2B); } @@ -262,7 +330,7 @@ public void testClusterTogetherVaryContigs() { final SVCallRecord call1 = new SVCallRecord("call1", contig1A, 1000, true, contig1B, 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (int k = 0; k < contigs.size(); k++) { final String contig2A = contigs.get(k); for (int m = k; m < contigs.size(); m++) { @@ -270,7 +338,7 @@ public void testClusterTogetherVaryContigs() { final SVCallRecord call2 = new SVCallRecord("call2", contig2A, 1000, true, contig2B, 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Should only cluster if contigs match Assert.assertEquals(engine.getLinkage().areClusterable(call1, call2), contig1A.equals(contig2A) && contig1B.equals(contig2B)); } @@ -289,11 +357,11 @@ public void testClusterTogetherVaryAlgorithms() { for (final List algorithms1 : algorithmsList) { final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, true, "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, - 1002, algorithms1, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + 1002, algorithms1, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); for (final List algorithms2 : algorithmsList) { final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1000, true, "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, - 1002, algorithms2, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + 1002, algorithms2, Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // All combinations should cluster Assert.assertTrue(engine.getLinkage().areClusterable(call1, call2)); } @@ -338,13 +406,13 @@ public void testClusterTogetherVaryParameters() { final SVClusterEngine testEngine1 = SVTestUtils.getNewDefaultSingleLinkageEngine(); final SVCallRecord call1 = new SVCallRecord("call1", "chr1", 1000, true, "chr1", 2001, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, - 1002, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + 1002, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call2 = new SVCallRecord("call2", "chr1", 1100, true, "chr1", 2101, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, - 1002, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + 1002, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); // Cluster with default parameters Assert.assertTrue(testEngine1.getLinkage().areClusterable(call1, call2)); - final ClusteringParameters exactMatchParameters = ClusteringParameters.createDepthParameters(1.0, 0, 1.0); + final ClusteringParameters exactMatchParameters = ClusteringParameters.createDepthParameters(1.0, 0, 0, 1.0); final CanonicalSVLinkage exactMatchLinkage = SVTestUtils.getNewDefaultLinkage(); exactMatchLinkage.setDepthOnlyParams(exactMatchParameters); // Do not cluster requiring exact overlap @@ -381,15 +449,15 @@ public void testAddVaryPositions(final int positionA1, final int positionB1, final SVCallRecord call1 = new SVCallRecord("call1", "chr1", positionA1, true, "chr1", positionB1, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, positionB1 - positionA1 + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call2 = new SVCallRecord("call1", "chr1", positionA2, true, "chr1", positionB2, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, positionB2 - positionA2 + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final SVCallRecord call3 = new SVCallRecord("call1", "chr1", positionA3, true, "chr1", positionB3, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL, null, positionB3 - positionA3 + 1, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), - Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), SVTestUtils.hg38Dict); + Collections.emptyList(), Collections.emptyList(), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); engine.add(call1); engine.add(call2); engine.add(call3); @@ -443,7 +511,7 @@ public void testAddMaxCliqueLarge() { for (int i = 0; i < numRecords; i++) { final int start = 1000 + 10 * i; final int end = start + length - 1; - engine.add(SVTestUtils.newCallRecordWithIntervalAndType(start, end, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL)); + engine.add(SVTestUtils.newPESRCallRecordWithIntervalAndType(start, end, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL)); } final List result = engine.forceFlush(); Assert.assertEquals(result.size(), 50); @@ -518,7 +586,7 @@ public void testGetCarrierSamplesBiallelic(final int ploidy, final Allele refAll final SVCallRecord recordWithCopyNumber = new SVCallRecord("", "chr1", 1000, SVTestUtils.getValidTestStrandA(svtype), "chr1", 1999, SVTestUtils.getValidTestStrandB(svtype), svtype, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), - alleles, GenotypesContext.copy(genotypesWithCopyNumber), Collections.emptyMap(), SVTestUtils.hg38Dict); + alleles, GenotypesContext.copy(genotypesWithCopyNumber), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final Set resultWithCopyNumber = recordWithCopyNumber.getCarrierSampleSet(); Assert.assertEquals(resultWithCopyNumber, expectedResult); @@ -533,7 +601,7 @@ public void testGetCarrierSamplesBiallelic(final int ploidy, final Allele refAll final SVCallRecord recordWithGenotype = new SVCallRecord("", "chr1", 1000, SVTestUtils.getValidTestStrandA(svtype), "chr1", 1999, SVTestUtils.getValidTestStrandB(svtype), svtype, null, 1000, Collections.singletonList(GATKSVVCFConstants.DEPTH_ALGORITHM), - alleles, GenotypesContext.copy(genotypesWithGenotype), Collections.emptyMap(), SVTestUtils.hg38Dict); + alleles, GenotypesContext.copy(genotypesWithGenotype), Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict); final Set resultWithGenotype = recordWithGenotype.getCarrierSampleSet(); Assert.assertEquals(resultWithGenotype, expectedResult); @@ -546,7 +614,7 @@ public void testLargeRandom() { for (int i = 0; i < 1000; i++) { final int pos1 = rand.nextInt(9000) + 1; final int pos2 = rand.nextInt(10000) + 1; - records.add(SVTestUtils.newCallRecordWithIntervalAndType(Math.min(pos1, pos2), Math.max(pos1, pos2), GATKSVVCFConstants.StructuralVariantAnnotationType.DEL)); + records.add(SVTestUtils.newPESRCallRecordWithIntervalAndType(Math.min(pos1, pos2), Math.max(pos1, pos2), GATKSVVCFConstants.StructuralVariantAnnotationType.DEL)); } final SVClusterEngine engine = SVTestUtils.getNewDefaultMaxCliqueEngine(); records.stream().sorted(SVCallRecordUtils.getCallComparator(SVTestUtils.hg38Dict)).forEach(engine::add); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java index 40a53351ebf..3265e9ecc6d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVClusterIntegrationTest.java @@ -278,9 +278,9 @@ public void testAgainstSimpleImplementation() { final Pair> testVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final ReferenceSequenceFile referenceSequenceFile = ReferenceUtils.createReferenceReader(new GATKPath(REFERENCE_PATH)); - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 2000, 0); - final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 2000, 0); - final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 500, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); + final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVClusterEngine engine = SVClusterEngineFactory.createCanonical( SVClusterEngine.CLUSTERING_TYPE.SINGLE_LINKAGE, CanonicalSVCollapser.BreakpointSummaryStrategy.MEDIAN_START_MEDIAN_END, @@ -429,7 +429,7 @@ public void testClusterSampleOverlap() { Assert.assertEquals(header.getSampleNamesInOrder().size(), 156); - Assert.assertEquals(records.size(), 1731); + Assert.assertEquals(records.size(), 1703); // Check for one record int expectedRecordsFound = 0; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java index eca32b971a7..7f846624de4 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java @@ -10,6 +10,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.AbstractConcordanceWalker; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; @@ -68,9 +69,9 @@ public void testRefPanel() { final Pair> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict; - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 2000, 0); - final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 2000, 0); - final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 500, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); + final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary); linkage.setDepthOnlyParams(depthParameters); linkage.setMixedParams(mixedParameters); @@ -121,6 +122,8 @@ public void testRefPanel() { Assert.assertEquals(outputVariant.getStart(), expectedVariant.getStart()); Assert.assertEquals(outputVariant.getEnd(), expectedVariant.getEnd()); Assert.assertEquals(outputVariant.getAlleles(), expectedVariant.getAlleles()); + Assert.assertEquals(outputVariant.getFilters(), expectedVariant.getFilters()); + Assert.assertEquals(outputVariant.getPhredScaledQual(), expectedVariant.getPhredScaledQual()); Assert.assertEquals(outputVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, "test_default"), expectedVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, "expected_default")); checkTruthVariantId(outputVariant, expectedVariant); Assert.assertEquals(outputVariant.getAttributeAsString(Concordance.TRUTH_STATUS_VCF_ATTRIBUTE, "test_default"), expectedVariant.getAttributeAsString(Concordance.TRUTH_STATUS_VCF_ATTRIBUTE, "expected_default")); @@ -176,12 +179,15 @@ public void testSelf() { .add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, GATKBaseTest.FULL_HG38_DICT) .add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5) + .add(SVClusterEngineArgumentsCollection.DEPTH_SIZE_SIMILARITY_NAME, 0) .add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 2000) .add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) + .add(SVClusterEngineArgumentsCollection.MIXED_SIZE_SIMILARITY_NAME, 0) .add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000) .add(SVClusterEngineArgumentsCollection.PESR_SAMPLE_OVERLAP_FRACTION_NAME, 0) .add(SVClusterEngineArgumentsCollection.PESR_INTERVAL_OVERLAP_FRACTION_NAME, 0.1) + .add(SVClusterEngineArgumentsCollection.PESR_SIZE_SIMILARITY_NAME, 0) .add(SVClusterEngineArgumentsCollection.PESR_BREAKEND_WINDOW_NAME, 500) .add(AbstractConcordanceWalker.TRUTH_VARIANTS_LONG_NAME, vcfPath) .add(AbstractConcordanceWalker.EVAL_VARIANTS_SHORT_NAME, vcfPath); @@ -191,9 +197,9 @@ public void testSelf() { final Pair> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict; - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 2000, 0); - final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 2000, 0); - final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 500, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); + final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary); linkage.setDepthOnlyParams(depthParameters); linkage.setMixedParams(mixedParameters); @@ -215,6 +221,8 @@ public void testSelf() { Assert.assertEquals(outputVariant.getStart(), expectedVariant.getStart()); Assert.assertEquals(outputVariant.getEnd(), expectedVariant.getEnd()); Assert.assertEquals(outputVariant.getAlleles(), expectedVariant.getAlleles()); + Assert.assertEquals(outputVariant.getFilters(), expectedVariant.getFilters()); + Assert.assertEquals(outputVariant.getPhredScaledQual(), expectedVariant.getPhredScaledQual()); final String svtype = outputVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, "test_default"); Assert.assertEquals(svtype, expectedVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, "expected_default")); // check the variant matched itself with perfect concordance @@ -271,9 +279,9 @@ private void assertPerfectConcordance(final File output, final String evalVcfPat final Pair> outputVcf = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()); final SAMSequenceDictionary dictionary = SVTestUtils.hg38Dict; - final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 2000, 0); - final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 2000, 0); - final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 500, 0); + final ClusteringParameters depthParameters = ClusteringParameters.createDepthParameters(0.5, 0, 2000, 0); + final ClusteringParameters mixedParameters = ClusteringParameters.createMixedParameters(0.1, 0, 2000, 0); + final ClusteringParameters pesrParameters = ClusteringParameters.createPesrParameters(0.1, 0, 500, 0); final SVConcordanceLinkage linkage = new SVConcordanceLinkage(dictionary); linkage.setDepthOnlyParams(depthParameters); linkage.setMixedParams(mixedParameters); @@ -296,6 +304,8 @@ private void assertPerfectConcordance(final File output, final String evalVcfPat Assert.assertEquals(outputVariant.getStart(), expectedVariant.getStart()); Assert.assertEquals(outputVariant.getEnd(), expectedVariant.getEnd()); Assert.assertEquals(outputVariant.getAlleles(), expectedVariant.getAlleles()); + Assert.assertEquals(outputVariant.getFilters(), expectedVariant.getFilters()); + Assert.assertEquals(outputVariant.getPhredScaledQual(), expectedVariant.getPhredScaledQual()); final String svtype = outputVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, "test_default"); Assert.assertEquals(svtype, expectedVariant.getAttributeAsString(GATKSVVCFConstants.SVTYPE, "expected_default")); // check the variant matched itself with perfect concordance @@ -345,4 +355,19 @@ public void testSitesOnly() { Assert.assertTrue(variant.getAttributeAsString(GATKSVVCFConstants.GENOTYPE_CONCORDANCE_INFO, ".").equals(".")); } } + + @Test(expectedExceptions = UserException.IncompatibleSequenceDictionaries.class) + public void testHeaderContigsOutOfOrder() { + final File output = createTempFile("concord_sites_only", ".vcf"); + final String evalVcfPath = getToolTestDataDir() + "ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz"; + final String truthVcfPath = getToolTestDataDir() + "ref_panel_1kg.cleaned.gatk.chr22_chrY.contigs_out_of_order.vcf.gz"; + + final ArgumentsBuilder args = new ArgumentsBuilder() + .addOutput(output) + .add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, GATKBaseTest.FULL_HG38_DICT) + .add(AbstractConcordanceWalker.TRUTH_VARIANTS_LONG_NAME, truthVcfPath) + .add(AbstractConcordanceWalker.EVAL_VARIANTS_SHORT_NAME, evalVcfPath); + + runCommandLine(args, SVConcordance.class.getSimpleName()); + } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.contigs_out_of_order.vcf.gz b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.contigs_out_of_order.vcf.gz new file mode 100644 index 00000000000..4da924c39b9 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.contigs_out_of_order.vcf.gz differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.contigs_out_of_order.vcf.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.contigs_out_of_order.vcf.gz.tbi new file mode 100644 index 00000000000..1d1599c2967 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.contigs_out_of_order.vcf.gz.tbi differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz index 6f130a07606..1b742a4bf6d 100644 Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz.tbi index bf5f3441154..8de01169834 100644 Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz.tbi and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sample_subset.vcf.gz.tbi differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz index 2c40150944d..a959b1d4ca8 100644 Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz.tbi index 75f3f01f96f..a8d3d6f5d6e 100644 Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz.tbi and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.sites_only.vcf.gz.tbi differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz index ced001b4bdb..0fc84b559b1 100644 Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz.tbi index 0de1d152cff..5827787165f 100644 Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz.tbi and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance/ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz.tbi differ