Skip to content

Commit

Permalink
Address reviewer comments; greatly simplify clustering; add ctx/cpx s…
Browse files Browse the repository at this point in the history
…upport
  • Loading branch information
mwalker174 committed Dec 21, 2022
1 parent b75acac commit c0241d1
Show file tree
Hide file tree
Showing 27 changed files with 895 additions and 1,678 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,18 @@ public final class GATKSVVCFConstants {
public static final String NEAREST_TSS = "PREDICTED_NEAREST_TSS";
public static final String TSS_DUP = "PREDICTED_TSS_DUP";

// SVTYPE classes
public enum StructuralVariantAnnotationType {
DEL,
DUP,
INS,
INV,
CPX,
BND,
CTX,
CNV
}

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////

}
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,65 @@
import htsjdk.variant.variantcontext.Genotype;

import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
* Simple allele counter for SVs. Supports multi-allelic variants, except for multi-allelic CNVs that lack genotype alleles.
*/
public class SVAlleleCounter {

private final List<Allele> alleles;
private final int[] alleleCounts;
private final int alleleNumber;
private final int[] counts;
private final double[] frequencies;
private final int number;

protected SVAlleleCounter(final List<Allele> alleles,
final int[] alleleCounts,
final int alleleNumber) {
public SVAlleleCounter(final List<Allele> alleles,
final List<Genotype> genotypes) {
this.alleles = alleles;
this.alleleCounts = alleleCounts;
this.alleleNumber = alleleNumber;
final Map<Allele, Long> alleleCountsMap = computeCounts(genotypes);
number = alleleCountsMap.values().stream().mapToInt(Long::intValue).sum();
counts = new int[alleles.size()];
for (int i = 0; i < alleles.size(); i++) {
counts[i] = alleleCountsMap.getOrDefault(alleles.get(i), 0L).intValue();
}
this.frequencies = computeFrequencies(counts, number);
}

public List<Allele> getAlleles() {
return alleles;
}

public int[] getAlleleCounts() {
return alleleCounts;
public int[] getCounts() {
return counts;
}

public int getAlleleNumber() {
return alleleNumber;
public int getNumber() {
return number;
}

public double[] getFrequencies() { return frequencies; }

/**
* Factory method for counting alleles. All calculations except AF are performed here.
* @param alleles alternate alleles to count
* @param genotypes genotypes with assigned alleles
* @return result
* Counts unique alleles in the given set of genotypes.
*/
public static SVAlleleCounter create(final List<Allele> alleles, final List<Genotype> genotypes) {
final Map<Allele, Long> alleleCountsMap = SVCallRecordUtils.getAlleleCounts(genotypes);
final int alleleNumber = alleleCountsMap.values().stream().mapToInt(Long::intValue).sum();

final int[] alleleCounts = new int[alleles.size()];
for (int i = 0; i < alleles.size(); i++) {
alleleCounts[i] = alleleCountsMap.getOrDefault(alleles.get(i), 0L).intValue();
}
return new SVAlleleCounter(alleles, alleleCounts, alleleNumber);
private static Map<Allele, Long> computeCounts(final Collection<Genotype> genotypes) {
return genotypes.stream().map(Genotype::getAlleles).flatMap(Collection::stream)
.collect(Collectors.groupingBy(a -> a, Collectors.counting()));
}

/**
* Get allele frequencies (AF). Avoid calling this repeatedly on the same object, as computations are performed
* on the fly.
* Compute allele frequencies (AF) based on counts.
*/
public double[] computeFrequencies() {
final double[] freq = new double[alleleCounts.length];
if (alleleNumber == 0) {
private static double[] computeFrequencies(final int[] counts, final int number) {
final double[] freq = new double[counts.length];
if (number == 0) {
Arrays.fill(freq, Double.NaN);
} else {
for (int i = 0; i < alleleCounts.length; i++) {
freq[i] = alleleCounts[i] / (double) alleleNumber;
for (int i = 0; i < counts.length; i++) {
freq[i] = counts[i] / (double) number;
}
}
return freq;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.StructuralVariantType;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
Expand Down Expand Up @@ -44,7 +43,7 @@ public class SVCallRecord implements SVLocatable {
private final String contigB;
private final int positionB;
private final Boolean strandB;
private final StructuralVariantType type;
private final GATKSVVCFConstants.StructuralVariantAnnotationType type;
private final Integer length;
private final List<String> algorithms;
private final List<Allele> alleles;
Expand All @@ -60,7 +59,7 @@ public SVCallRecord(final String id,
final String contigB,
final int positionB,
final Boolean strandB,
final StructuralVariantType type,
final GATKSVVCFConstants.StructuralVariantAnnotationType type,
final Integer length,
final List<String> algorithms,
final List<Allele> alleles,
Expand All @@ -78,7 +77,7 @@ protected SVCallRecord(final String id,
final String contigB,
final int positionB,
final Boolean strandB,
final StructuralVariantType type,
final GATKSVVCFConstants.StructuralVariantAnnotationType type,
final Integer length,
final List<String> algorithms,
final List<Allele> alleles,
Expand Down Expand Up @@ -117,7 +116,7 @@ public SVCallRecord(final SVCallRecord baseRecord,
final String id,
final Boolean strandA,
final Boolean strandB,
final StructuralVariantType type,
final GATKSVVCFConstants.StructuralVariantAnnotationType type,
final Integer length,
final List<String> algorithms,
final List<Allele> alleles,
Expand Down Expand Up @@ -170,21 +169,23 @@ private static Map<String, Object> validateAttributes(final Map<String, Object>
* @param inputLength assumed length, may be null
* @return variant length, may be null
*/
private static Integer inferLength(final StructuralVariantType type,
private static Integer inferLength(final GATKSVVCFConstants.StructuralVariantAnnotationType type,
final int positionA,
final int positionB,
final Integer inputLength) {
if (type.equals(StructuralVariantType.CNV) || type.equals(StructuralVariantType.DEL)
|| type.equals(StructuralVariantType.DUP) || type.equals(StructuralVariantType.INV)) {
if (type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.CNV) || type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL)
|| type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP) || type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.INV)) {
// Intrachromosomal classes
final int length = CoordMath.getLength(positionA, positionB);
if (inputLength != null) {
Utils.validateArg(inputLength.intValue() == length, "Input length does not match calculated length");
}
return length;
} else {
Utils.validate(type.equals(StructuralVariantType.INS) || inputLength == null,
"Input length should be null for type " + type.name() + " but found " + inputLength);
if ((type == GATKSVVCFConstants.StructuralVariantAnnotationType.BND
|| type == GATKSVVCFConstants.StructuralVariantAnnotationType.CTX) && inputLength != null) {
throw new IllegalArgumentException("Input length should be null for type " + type.name() + " but found " + inputLength);
}
return inputLength;
}
}
Expand All @@ -198,21 +199,21 @@ private static Integer inferLength(final StructuralVariantType type,
* @param inputStrandB assumed second strand, may be null
* @return pair containing (first strand, second strand), which may contain null values
*/
private static Pair<Boolean, Boolean> inferStrands(final StructuralVariantType type,
private static Pair<Boolean, Boolean> inferStrands(final GATKSVVCFConstants.StructuralVariantAnnotationType type,
final Boolean inputStrandA,
final Boolean inputStrandB) {
if (type.equals(StructuralVariantType.CNV)) {
if (type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.CNV)) {
Utils.validateArg(inputStrandA == null && inputStrandB == null, "Attempted to create CNV with non-null strands");
return Pair.of(null, null);
} else if (type.equals(StructuralVariantType.DEL) || type.equals(StructuralVariantType.INS)) {
} else if (type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.DEL) || type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.INS)) {
if (inputStrandA != null) {
Utils.validateArg(inputStrandA.booleanValue() == true, "Attempted to create DEL/INS with negative first strand");
}
if (inputStrandB != null) {
Utils.validateArg(inputStrandB.booleanValue() == false, "Attempted to create DEL/INS with positive second strand");
}
return Pair.of(Boolean.TRUE, Boolean.FALSE);
} else if (type.equals(StructuralVariantType.DUP)) {
} else if (type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.DUP)) {
if (inputStrandA != null) {
Utils.validateArg(inputStrandA.booleanValue() == false, "Attempted to create DUP with positive first strand");
}
Expand All @@ -221,11 +222,8 @@ private static Pair<Boolean, Boolean> inferStrands(final StructuralVariantType t
}
return Pair.of(Boolean.FALSE, Boolean.TRUE);
} else {
if (type.equals(StructuralVariantType.INV)) {
if (inputStrandA == null && inputStrandB == null) {
return Pair.of(Boolean.TRUE, Boolean.TRUE);
}
Utils.validateArg(inputStrandA.equals(inputStrandB), "Inversions must have matching strands but found " +
if (type.equals(GATKSVVCFConstants.StructuralVariantAnnotationType.INV)) {
Utils.validateArg(Objects.equals(inputStrandA, inputStrandB), "Inversions must have matching strands but found " +
inputStrandA + " / " + inputStrandB);
}
return Pair.of(inputStrandA, inputStrandB);
Expand Down Expand Up @@ -303,7 +301,9 @@ public boolean isDepthOnly() {
}

public boolean isSimpleCNV() {
return type == StructuralVariantType.DEL || type == StructuralVariantType.DUP || type == StructuralVariantType.CNV;
return type == GATKSVVCFConstants.StructuralVariantAnnotationType.DEL
|| type == GATKSVVCFConstants.StructuralVariantAnnotationType.DUP
|| type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV;
}

public boolean nullStrands() {
Expand Down Expand Up @@ -347,7 +347,7 @@ public Boolean getStrandB() {
}

@Override
public StructuralVariantType getType() {
public GATKSVVCFConstants.StructuralVariantAnnotationType getType() {
return type;
}

Expand Down
Loading

0 comments on commit c0241d1

Please sign in to comment.