Skip to content

Commit

Permalink
fix(data): track singular variants
Browse files Browse the repository at this point in the history
  • Loading branch information
markwoon committed Aug 27, 2024
1 parent f66033b commit fb1ebbd
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
import com.google.gson.annotations.Expose;
import com.google.gson.annotations.SerializedName;
import org.checkerframework.checker.nullness.qual.Nullable;
Expand Down Expand Up @@ -56,6 +58,11 @@ public class DefinitionFile {
@Expose
@SerializedName("namedAlleles")
private SortedSet<NamedAllele> m_namedAlleles;
@Expose
@SerializedName("singularVariants")
private SortedSet<String> m_singularVariants;



//-- cache
private transient Map<String, NamedAllele> m_namedAlleleMap;
Expand Down Expand Up @@ -136,12 +143,22 @@ public String getRefSeqChromosome() {


/**
* The {@link VariantLocus} objects used to define {@link NamedAllele}s in this translation
* The {@link VariantLocus} objects used to define {@link NamedAllele}s in this translation.
*/
public VariantLocus[] getVariants() {
return m_variants;
}

/**
* All VCF chr:pos that are only used by a single {@link NamedAllele}s that only has 1 {@link VariantLocus}.
*/
public SortedSet<String> getSingularVariants() {
if (m_singularVariants == null) {
return Collections.emptySortedSet();
}
return m_singularVariants;
}


/**
* All the named alleles defined in this translation
Expand Down Expand Up @@ -407,6 +424,34 @@ void doVcfTranslation(VcfHelper vcfHelper) throws IOException {
}
resetNamedAlleles(Collections.unmodifiableSortedSet(updatedNamedAlleles));
m_variants = sortedVariants;

m_singularVariants = new TreeSet<>();
// look for alleles with only 1 position
SortedSet<NamedAllele> allelesWith1Position = m_namedAlleles.stream()
.filter(na -> !na.isReference())
.filter(na -> Arrays.stream(na.getCpicAlleles()).filter(Objects::nonNull).count() == 1)
.collect(Collectors.toCollection(TreeSet::new));

if (!allelesWith1Position.isEmpty()) {
// check how frequently a position is used by an allele
SortedSetMultimap<VariantLocus, NamedAllele> locusMap = TreeMultimap.create();
for (int x = 0; x < m_variants.length; x += 1) {
for (NamedAllele na : m_namedAlleles) {
if (na.isReference()) {
continue;
}
if (na.getCpicAllele(x) != null) {
locusMap.put(m_variants[x], na);
}
}
}
// get positions only used by a single allele (that are only have a single position)
for (VariantLocus vl : locusMap.keySet()) {
if (locusMap.get(vl).size() == 1 && allelesWith1Position.contains(locusMap.get(vl).first())) {
m_singularVariants.add(vl.getVcfChrPosition());
}
}
}
}

private static final Pattern sf_hgvsRepeatPattern = Pattern.compile("g\\.[\\d_]+([ACGT]+\\[\\d+])$");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ public String[] getCpicAlleles() {
}

public String getCpicAllele(int x) {
Preconditions.checkState(m_isInitialized, "This NamedAllele has not been initialized()");
// will throw NPE if this NamedAlle has not been initialized
return m_cpicAlleles[x];
}

Expand Down

0 comments on commit fb1ebbd

Please sign in to comment.