Skip to content

Commit

Permalink
fix: validate outside call diplotypes
Browse files Browse the repository at this point in the history
  • Loading branch information
markwoon committed Aug 14, 2024
1 parent 0cbb7d0 commit 9c49fbc
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 11 deletions.
49 changes: 49 additions & 0 deletions src/main/java/org/pharmgkb/pharmcat/Env.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.pharmgkb.pharmcat.definition.DefinitionReader;
import org.pharmgkb.pharmcat.definition.model.DefinitionFile;
import org.pharmgkb.pharmcat.phenotype.PhenotypeMap;
import org.pharmgkb.pharmcat.phenotype.model.GenePhenotype;
import org.pharmgkb.pharmcat.reporter.MessageHelper;
Expand All @@ -28,6 +32,7 @@ public class Env {
private final PgkbGuidelineCollection m_drugs;
private MessageHelper m_messageHelper;
private final Map<DataSource, Map<String, Map<String, Haplotype>>> m_haplotypeCache = new HashMap<>();
private final Multimap<String, String> m_validHaplotypes = HashMultimap.create();


public Env() throws IOException, ReportableException {
Expand Down Expand Up @@ -74,6 +79,50 @@ public String getReferenceAllele(String gene) {
}


/**
* Checks if the specified allele is used in either definition files or phenotype.
*/
public boolean isValidNamedAllele(String gene, String allele) {

if (m_validHaplotypes.containsEntry(gene, allele)) {
return true;
}

if (gene.startsWith("HLA-")) {
// HLA's are a special case
m_validHaplotypes.put(gene, allele);
return true;
}

Optional<DefinitionFile> opt = m_definitionReader.lookupDefinitionFile(gene);
if (opt.isPresent() && opt.get().getNamedAllele(allele) != null) {
m_validHaplotypes.put(gene, allele);
return true;
}

String inferredAllele = allele;
if (gene.equals("CYP2D6")) {
inferredAllele = Cyp2d6CopyNumberCaller.inferHaplotypeName(allele);
}
GenePhenotype gp = m_phenotypeMap.getPhenotype(gene, DataSource.CPIC);
if (gp != null) {
if (gp.getHaplotypes().containsKey(inferredAllele) || gp.getActivityValues().containsKey(inferredAllele)) {
m_validHaplotypes.put(gene, allele);
return true;
}
}
gp = m_phenotypeMap.getPhenotype(gene, DataSource.DPWG);
if (gp != null) {
boolean rez = gp.getHaplotypes().containsKey(inferredAllele) || gp.getActivityValues().containsKey(inferredAllele);
if (rez) {
m_validHaplotypes.put(gene, allele);
}
return rez;
}
return false;
}


public PgkbGuidelineCollection getDrugs() {
return m_drugs;
}
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/org/pharmgkb/pharmcat/Pipeline.java
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,11 @@ public PipelineResult call() throws IOException {
output.add(AnsiConsole.styleWarning(msg));
}
}
for (String hap : call.getHaplotypes()) {
if (!m_env.isValidNamedAllele(gene, hap)) {
call.addWarning("Undocumented " + gene + " named allele in outside call: " + hap);
}
}
outsideCalls.add(call);
call.getWarnings().forEach(w -> output.add(AnsiConsole.styleWarning("WARNING: " + w)));
outsideCallMap.computeIfAbsent(gene, g -> new HashSet<>()).add(call);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ public OutsideCall(String line, int lineNumber) throws RuntimeException {
List<String> alleles = sf_diplotypeSplitter.splitToList(diplotype).stream()
.map(a -> a.replaceFirst("^" + m_gene + "\\s*", ""))
.toList();
if (alleles.size() > 2) {
throw new BadOutsideCallException("Line " + lineNumber + ": Too many alleles specified in " + diplotype);
}

if (m_gene.equals("CYP2D6")) {
alleles = alleles.stream()
Expand Down Expand Up @@ -121,12 +124,10 @@ public OutsideCall(String line, int lineNumber) throws RuntimeException {
.toList();
}

// re-join alleles to eliminate white space when gene symbol is used in diplotype
// re-join alleles to eliminate white space when a gene symbol is used in diplotype
m_diplotype = String.join(sf_diplotypeSeparator, alleles);
m_diplotypes = ImmutableList.of(m_diplotype);
if (alleles.size() > 2) {
throw new BadOutsideCallException("Line " + lineNumber + ": Too many alleles specified in " + m_diplotype);
}

m_haplotypes.add(alleles.get(0));
if (alleles.size() == 2) {
m_haplotypes.add(alleles.get(1));
Expand All @@ -146,6 +147,10 @@ public OutsideCall(String line, int lineNumber) throws RuntimeException {
}
}

public void addWarning(String warning) {
m_warnings.add(warning);
}

@Override
public String toString() {
if (m_diplotype != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,31 @@ private static Object[] inferHaplotype(Haplotype haplotype) {
if (haplotype == null) {
return new Object[] {false, null};
}
Matcher m = sf_copyNumberPattern.matcher(haplotype.getName());
String name = inferHaplotypeName(haplotype.getName());
return new Object[] {!name.equals(haplotype.getName()), name};
}


public static String inferHaplotypeName(String haplotypeName) {

Matcher m = sf_copyNumberPattern.matcher(haplotypeName);
if (!m.matches()) {
return new Object[] {false, haplotype.getName()};
return haplotypeName;
}
Integer hap = Integer.parseInt(m.group(1));
if (!m_gteThree.contains(hap)) {
return new Object[] {false, haplotype.getName()};
return haplotypeName;
}
int cn = Integer.parseInt(m.group(3));
if (cn <= 2) {
return new Object[] {false, haplotype.getName()};
return haplotypeName;
}
if (haplotype.getName().contains(TextConstants.GTE)) {
if (haplotypeName.contains(TextConstants.GTE)) {
if (cn == 3) {
// ignore >= 3
return new Object[] {false, haplotype.getName()};
return haplotypeName;
}
}
return new Object[] {true, "*" + hap + "x" + TextConstants.GTE + "3"};
return "*" + hap + "x" + TextConstants.GTE + "3";
}
}

0 comments on commit 9c49fbc

Please sign in to comment.