From 85a770f89cf1985a28a11667cf88264932b26fa8 Mon Sep 17 00:00:00 2001
From: Mark Woon <markwoon@gmail.com>
Date: Wed, 24 Apr 2024 22:27:15 -0700
Subject: [PATCH] fix(datamanager): handle all allele/position removal during
 data ingestion phase

---
 .../pharmcat/definition/DefinitionReader.java |   6 +-
 .../definition/model/DefinitionExemption.java |  16 ++-
 .../definition/model/DefinitionFile.java      | 113 +++++++++++++++---
 .../definition/model/NamedAllele.java         |   5 +-
 .../pharmcat/haplotype/MatchData.java         |  25 ++--
 .../haplotype/NamedAlleleMatcher.java         |  81 ++-----------
 .../pharmcat/haplotype/ResultBuilder.java     |  16 +--
 .../pharmcat/haplotype/model/GeneCall.java    |  11 +-
 .../pharmgkb/pharmcat/util/DataManager.java   |  10 +-
 9 files changed, 144 insertions(+), 139 deletions(-)
diff --git a/src/main/java/org/pharmgkb/pharmcat/definition/DefinitionReader.java b/src/main/java/org/pharmgkb/pharmcat/definition/DefinitionReader.java
index e58fe380a..2f073074a 100644
--- a/src/main/java/org/pharmgkb/pharmcat/definition/DefinitionReader.java
+++ b/src/main/java/org/pharmgkb/pharmcat/definition/DefinitionReader.java
@@ -84,7 +84,7 @@ public DefinitionReader(List<Path> definitionFiles, @Nullable Path exemptionsFil
    * This should be called <em>after</em> all allele definitions have been read.
    */
   public String getGenomeBuild() {
-    Preconditions.checkState(m_definitionFiles.size() > 0);
+    Preconditions.checkState(!m_definitionFiles.isEmpty());
 
     if (m_genomeBuild == null) {
       for (DefinitionFile definitionFile : m_definitionFiles.values()) {
@@ -143,6 +143,10 @@ public SortedSet<NamedAllele> getHaplotypes(String gene) {
     return m_definitionFiles.get(gene).getNamedAlleles();
   }
 
+  public NamedAllele getReferenceHaplotype(String gene) {
+    return m_definitionFiles.get(gene).getReferenceNamedAllele();
+  }
+
   public @Nullable DefinitionExemption getExemption(String gene) {
     return m_exemptions.get(gene.toLowerCase());
   }
diff --git a/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionExemption.java b/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionExemption.java
index fdd9d28bf..2a5b33c12 100644
--- a/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionExemption.java
+++ b/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionExemption.java
@@ -49,7 +49,9 @@ public DefinitionExemption(String gene, @Nullable SortedSet<VariantLocus> ignore
       m_ignoredAllelesLc = m_ignoredAlleles;
     } else {
       m_ignoredAlleles = ignoredAlleles;
-      m_ignoredAllelesLc = ignoredAlleles.stream().map(String::toLowerCase).collect(Collectors.toCollection(TreeSet::new));
+      m_ignoredAllelesLc = ignoredAlleles.stream()
+          .map(String::toLowerCase)
+          .collect(Collectors.toCollection(TreeSet::new));
     }
     m_allHits = allHits;
   }
@@ -61,7 +63,9 @@ public String getGene() {
 
 
   /**
-   * Gets the positions from definition that to ignore.
+   * Gets the positions from original definition that should be ignored.
+   * These get removed by the {@link org.pharmgkb.pharmcat.util.DataManager} when definitions are first pulled back from
+   * PharmGKB.
    */
   public SortedSet<VariantLocus> getIgnoredPositions() {
     return m_ignoredPositions;
@@ -72,7 +76,7 @@ public SortedSet<VariantLocus> getIgnoredPositions() {
    * <p>
    * <b>Currently only checks based on RSID!</b>
    */
-  public boolean shouldIgnorePosition(VariantLocus position) {
+  boolean shouldIgnorePosition(VariantLocus position) {
     return m_ignoredPositions.stream()
         .anyMatch(vl -> {
           if (vl.getRsid() != null) {
@@ -92,7 +96,9 @@ public SortedSet<VariantLocus> getExtraPositions() {
 
 
   /**
-   * Gets the named alleles to ignore.
+   * Gets the named alleles from the original definition that should be ignored.
+   * These get removed by the {@link org.pharmgkb.pharmcat.util.DataManager} when definitions are first pulled back
+   * from PharmGKB.
    */
   public SortedSet<String> getIgnoredAlleles() {
     return m_ignoredAlleles;
@@ -101,7 +107,7 @@ public SortedSet<String> getIgnoredAlleles() {
   /**
    * Checks if the given named allele should be ignored.
    */
-  public boolean shouldIgnoreAllele(String allele) {
+  boolean shouldIgnoreAllele(String allele) {
     return m_ignoredAllelesLc.contains(allele.toLowerCase());
   }
 
diff --git a/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionFile.java b/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionFile.java
index af026d185..5e8688cb9 100644
--- a/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionFile.java
+++ b/src/main/java/org/pharmgkb/pharmcat/definition/model/DefinitionFile.java
@@ -7,6 +7,7 @@
 import java.util.stream.Collectors;
 import com.google.gson.annotations.Expose;
 import com.google.gson.annotations.SerializedName;
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.pharmgkb.pharmcat.ParseException;
 import org.pharmgkb.pharmcat.haplotype.Iupac;
 import org.pharmgkb.pharmcat.reporter.model.DataSource;
@@ -57,6 +58,11 @@ public class DefinitionFile {
   @SerializedName("namedAlleles")
   private SortedSet<NamedAllele> m_namedAlleles;
 
+  //-- cache
+  private Map<String, NamedAllele> m_namedalleleMap;
+  private NamedAllele m_referenceNamedAllele;
+
+
 
   /**
    * The format version of the definition file.
@@ -145,6 +151,39 @@ public SortedSet<NamedAllele> getNamedAlleles() {
     return m_namedAlleles;
   }
 
+  public @Nullable NamedAllele getNamedAllele(String name) {
+    if (m_namedalleleMap == null) {
+      mapNamedAlleles();
+    }
+    return m_namedalleleMap.get(name);
+  }
+
+  public NamedAllele getReferenceNamedAllele() {
+    if (m_referenceNamedAllele == null) {
+      mapNamedAlleles();
+    }
+    return m_referenceNamedAllele;
+  }
+
+  private void mapNamedAlleles() {
+    if (m_namedalleleMap == null) {
+      m_namedalleleMap = new HashMap<>();
+      for (NamedAllele allele : getNamedAlleles()) {
+        m_namedalleleMap.put(allele.getName(), allele);
+        if (allele.isReference()) {
+          if (m_referenceNamedAllele != null) {
+            throw new IllegalStateException("Multiple reference named alleles: " + allele.getName() + " and " +
+                m_referenceNamedAllele.getName());
+          }
+          m_referenceNamedAllele = allele;
+        }
+      }
+      if (m_referenceNamedAllele == null) {
+        throw new IllegalStateException(m_geneSymbol + " has no reference named allele!");
+      }
+    }
+  }
+
 
   @Override
   public String toString() {
@@ -191,44 +230,81 @@ public void removeIgnoredNamedAlleles(DefinitionExemption exemption) {
 
 
   /**
-   * Remove ignored positions specified in {@link DefinitionExemption}.
+   * Remove ignored positions specified in {@link DefinitionExemption} and any unused positions.
    * Should only be called during initial generation of this {@link DefinitionFile} by {@link DataManager}.
    */
   public void removeIgnoredPositions(DefinitionExemption exemption) {
-    // find ignored positions
-    Set<Integer> ignoredPositions = new HashSet<>();
+
+    // cannot use helper methods on NamedAlleles because they're not initialized yet
+    // must loop through elements manually
+
+    // find unused positions due to ignored NamedAlleles
+    SortedSet<VariantLocus> unusedPositions = new TreeSet<>();
+    for (int x = 0; x < m_variants.length; x += 1) {
+      boolean inUse = false;
+      for (NamedAllele na : m_namedAlleles) {
+        if (na.getCpicAlleles()[x] != null) {
+          inUse = true;
+          break;
+        }
+      }
+      if (!inUse) {
+        System.out.println("  Found unused position: " + m_variants[x]);
+        unusedPositions.add(m_variants[x]);
+      }
+    }
+
+    // remove unused/ignored positions
+    int numIgnored = 0;
+    int numUnused = 0;
+    Set<Integer> skipPositions = new HashSet<>();
     List<VariantLocus> newVariants = new ArrayList<>();
     for (int x = 0; x < m_variants.length; x += 1) {
       if (exemption.shouldIgnorePosition(m_variants[x])) {
-        System.out.println("  Removing position " + x + " (" + m_variants[x] + ")");
-        ignoredPositions.add(x);
+        System.out.println("  Removing ignored position " + x + " (" + m_variants[x] + ")");
+        skipPositions.add(x);
+        numIgnored += 1;
+      } else if (unusedPositions.contains(m_variants[x])) {
+        System.out.println("  Removing unused position " + x + " (" + m_variants[x] + ")");
+        skipPositions.add(x);
+        numUnused += 1;
       } else {
         newVariants.add(m_variants[x]);
       }
     }
-    if (exemption.getIgnoredPositions().size() != ignoredPositions.size()) {
+    if (exemption.getIgnoredPositions().size() != numIgnored) {
       throw new IllegalStateException("Should have " + exemption.getIgnoredPositions().size() + " ignored positions, " +
-          "but only found " + ignoredPositions.size());
+          "but only found " + numIgnored);
+    }
+    if (unusedPositions.size() != numUnused) {
+      throw new IllegalStateException("Should have " + unusedPositions.size() + " unused positions, but only found " +
+          numUnused);
     }
     // update variants
     m_variants = newVariants.toArray(new VariantLocus[0]);
 
     SortedSet<NamedAllele> updatedNamedAlleles = new TreeSet<>();
     for (NamedAllele namedAllele : m_namedAlleles) {
-      String[] cpicAlleles = new String[namedAllele.getCpicAlleles().length - ignoredPositions.size()];
-      if (m_variants.length != cpicAlleles.length) {
+      // sanity check
+      int totalAlleles = namedAllele.getCpicAlleles().length - skipPositions.size();
+      if (m_variants.length != namedAllele.getCpicAlleles().length - skipPositions.size()) {
         throw new IllegalStateException("Number of variants (" + m_variants.length + ") and number of CPIC alleles (" +
-            cpicAlleles.length + ") don't match up for " + namedAllele.getName());
+            totalAlleles + ") don't match up for " + namedAllele.getName());
       }
+      String[] cpicAlleles = new String[totalAlleles];
       for (int x = 0, y = 0; x < namedAllele.getCpicAlleles().length; x += 1) {
-        if (ignoredPositions.contains(x)) {
+        if (skipPositions.contains(x)) {
           continue;
         }
         cpicAlleles[y] = namedAllele.getCpicAlleles()[x];
         y += 1;
       }
-      // if there's nothing left that differs from reference allele then don't include this named allele in output
-      if (!Arrays.stream(cpicAlleles).allMatch(Objects::isNull)) {
+
+      // if there's nothing left that differs from reference allele, then don't include this named allele in output
+      if (Arrays.stream(cpicAlleles).allMatch(Objects::isNull)) {
+        System.out.println("WARNING: Removing " + namedAllele.getName() +
+            " because it has no alleles after removing unused/ignored positions");
+      } else {
         updatedNamedAlleles.add(new NamedAllele(namedAllele.getId(), namedAllele.getName(), null, cpicAlleles,
             namedAllele.isReference()));
       }
@@ -239,6 +315,7 @@ public void removeIgnoredPositions(DefinitionExemption exemption) {
 
   /**
    * Translate variants from CPIC to VCF (i.e. {@code cpicAlleles} to {@code alleles}).
+   * Should only be called during initial generation of this {@link DefinitionFile} by {@link DataManager}.
    */
   public void doVcfTranslation(VcfHelper vcfHelper) throws IOException {
 
@@ -332,7 +409,7 @@ private void translateVariantLocus(NamedAllele referenceNamedAllele, VariantLocu
         altAlleles.add(allele);
       }
     }
-    if (repeats.size() > 0 && repeats.size() != vl.getCpicAlleles().size()) {
+    if (!repeats.isEmpty() && repeats.size() != vl.getCpicAlleles().size()) {
       boolean haveSingle = false;
       if (nonRepeats.size() == 1) {
         String repeatedSequence = repeats.get(0);
@@ -347,7 +424,7 @@ private void translateVariantLocus(NamedAllele referenceNamedAllele, VariantLocu
 
     List<String> hgvsNames = VariantLocus.HGVS_NAME_SPLITTER.splitToList(vl.getChromosomeHgvsName());
 
-    if (!isSnp && repeats.size() == 0 && altAlleles.size() != 1) {
+    if (!isSnp && repeats.isEmpty() && altAlleles.size() != 1) {
       // in/dels - must have HGVS to represent each change
       throw new IllegalStateException(errorLocation + ": has " + altAlleles.size() + " alt alleles; max is 1");
     }
@@ -392,7 +469,7 @@ private void translateVariantLocus(NamedAllele referenceNamedAllele, VariantLocu
             " vs. " + vcfPosition);
       }
 
-    } else if (repeats.size() > 0) {
+    } else if (!repeats.isEmpty()) {
       Map<String, VcfHelper.VcfData> firstPass = new HashMap<>();
       for (String h : hgvsNames) {
         String repeatAlt;
@@ -467,12 +544,12 @@ private void translateVariantLocus(NamedAllele referenceNamedAllele, VariantLocu
       }
     }
 
-    if (missingAlts.size() > 0) {
+    if (!missingAlts.isEmpty()) {
       if (altAlleles.size() == 1) {
         throw new IllegalStateException(errorLocation + ": Missing alts " + missingAlts);
       } else {
         if (!vcfMap.entrySet().stream().allMatch((e) -> e.getKey().equals(e.getValue()))) {
-          // CPIC alleles needs to be translated
+          // CPIC alleles need to be translated
           throw new IllegalStateException(errorLocation + ": Don't know how to translate " + missingAlts);
         } else {
           // no translation, use as is
diff --git a/src/main/java/org/pharmgkb/pharmcat/definition/model/NamedAllele.java b/src/main/java/org/pharmgkb/pharmcat/definition/model/NamedAllele.java
index ed38b4aff..1c5322791 100644
--- a/src/main/java/org/pharmgkb/pharmcat/definition/model/NamedAllele.java
+++ b/src/main/java/org/pharmgkb/pharmcat/definition/model/NamedAllele.java
@@ -14,6 +14,7 @@
 import com.google.gson.annotations.SerializedName;
 import org.apache.commons.lang3.ObjectUtils;
 import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.pharmgkb.pharmcat.haplotype.Iupac;
 import org.pharmgkb.pharmcat.haplotype.MatchData;
 import org.pharmgkb.pharmcat.util.HaplotypeNameComparator;
@@ -249,7 +250,7 @@ public String getAllele(int idx) {
     return m_alleles[idx];
   }
 
-  public String getAllele(VariantLocus variantLocus) {
+  public @Nullable String getAllele(VariantLocus variantLocus) {
     Preconditions.checkState(m_isInitialized, "This NamedAllele has not been initialized()");
     return m_alleleMap.get(variantLocus);
   }
@@ -263,7 +264,7 @@ public String getCpicAllele(int x) {
     return m_cpicAlleles[x];
   }
 
-  public String getCpicAllele(VariantLocus variantLocus) {
+  public @Nullable String getCpicAllele(VariantLocus variantLocus) {
     Preconditions.checkState(m_isInitialized, "This NamedAllele has not been initialized()");
     return m_cpicAlleleMap.get(variantLocus);
   }
diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java
index 8d335c67c..1a23ae85d 100644
--- a/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java
+++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java
@@ -1,7 +1,17 @@
 package org.pharmgkb.pharmcat.haplotype;
 
 import java.lang.invoke.MethodHandles;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.SortedSet;
+import java.util.TreeMap;
+import java.util.TreeSet;
 import java.util.stream.Collectors;
 import com.google.common.base.Preconditions;
 import com.google.gson.annotations.Expose;
@@ -62,7 +72,7 @@ public class MatchData {
    * @param alleleMap map of chr:positions to {@link SampleAllele}s from VCF
    * @param allPositions all {@link VariantLocus} positions of interest for the gene
    * @param extraPositions extra positions to track sample alleles for
-   * @param ignoredPositions ignored positions due to ignored named alleles
+   * @param ignoredPositions ignored positions to remove from matching (used for special cases like DPYD)
    */
   public MatchData(String sampleId, String gene, SortedMap<String, SampleAllele> alleleMap, VariantLocus[] allPositions,
       @Nullable SortedSet<VariantLocus> extraPositions, @Nullable SortedSet<VariantLocus> ignoredPositions) {
@@ -84,7 +94,7 @@ public MatchData(String sampleId, String gene, SortedMap<String, SampleAllele> a
       if (m_ignoredPositions.contains(variant)) {
         continue;
       }
-      if (allele.getUndocumentedVariations().size() > 0) {
+      if (!allele.getUndocumentedVariations().isEmpty()) {
         m_positionsWithUndocumentedVariations.add(variant);
         if (allele.isTreatUndocumentedVariationsAsReference()) {
           m_treatUndocumentedVariationsAsReference = true;
@@ -175,11 +185,8 @@ private boolean isIgnorableCombination(String gene, NamedAllele hap) {
   void defaultMissingAllelesToReference() {
 
     SortedSet<NamedAllele> updatedHaplotypes = new TreeSet<>();
-    Optional<NamedAllele> refHapOpt = m_haplotypes.stream().filter(NamedAllele::isReference).findAny();
-    if (refHapOpt.isEmpty()) {
-      throw new IllegalStateException(m_gene + " does not have a reference");
-    }
-    NamedAllele referenceHaplotype = refHapOpt.get();
+    NamedAllele referenceHaplotype = m_haplotypes.stream().filter(NamedAllele::isReference).findAny()
+        .orElseThrow(() -> new IllegalStateException(m_gene + " does not have a reference"));
     int numAlleles = referenceHaplotype.getAlleles().length;
     for (NamedAllele hap : m_haplotypes) {
       if (referenceHaplotype == hap) {
@@ -319,7 +326,7 @@ public SortedSet<Variant> getExtraPositions() {
    */
   public SortedSet<NamedAllele> getHaplotypes() {
     if (m_haplotypes == null) {
-      if (m_sampleMap.size() == 0) {
+      if (m_sampleMap.isEmpty()) {
         return Collections.emptySortedSet();
       }
       throw new IllegalStateException("Not initialized - call marshallHaplotypes()");
diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java
index 14cf34cee..22fb95efc 100644
--- a/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java
+++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/NamedAlleleMatcher.java
@@ -488,27 +488,21 @@ private Set<String> getNames(@Nullable BaseMatch bm) {
   private MatchData initializeCallData(String sampleId, SortedMap<String, SampleAllele> alleleMap, String gene,
       boolean assumeReference, boolean findCombinations) {
 
-    DefinitionExemption exemption = m_definitionReader.getExemption(gene);
-    SortedSet<VariantLocus> extraPositions = null;
     SortedSet<NamedAllele> alleles = m_definitionReader.getHaplotypes(gene);
     VariantLocus[] allPositions = m_definitionReader.getPositions(gene);
-    SortedSet<VariantLocus> unusedPositions = null;
+    DefinitionExemption exemption = m_definitionReader.getExemption(gene);
+
+    SortedSet<VariantLocus> extraPositions = null;
     if (exemption != null) {
       extraPositions = exemption.getExtraPositions();
-      unusedPositions = findUnusedPositions(exemption, allPositions, alleles);
     }
 
     // grab SampleAlleles for all positions related to the current gene
-    MatchData data = new MatchData(sampleId, gene, alleleMap, allPositions, extraPositions, unusedPositions);
+    MatchData data = new MatchData(sampleId, gene, alleleMap, allPositions, extraPositions, null);
     if (data.getNumSampleAlleles() == 0) {
       return data;
     }
 
-    if (exemption != null) {
-      alleles = alleles.stream()
-          .filter(a -> !exemption.shouldIgnoreAllele(a.getName()))
-          .collect(Collectors.toCollection(TreeSet::new));
-    }
     // handle missing positions (if any)
     data.marshallHaplotypes(gene, alleles, findCombinations);
 
@@ -521,71 +515,22 @@ private MatchData initializeCallData(String sampleId, SortedMap<String, SampleAl
   }
 
 
-  /**
-   * Find positions that are only used by ignored alleles (and therefore should be eliminated from consideration).
-   */
-  private SortedSet<VariantLocus> findUnusedPositions(DefinitionExemption exemption, VariantLocus[] allPositions,
-      SortedSet<NamedAllele> namedAlleles) {
-
-    SortedSet<VariantLocus> unusedPositions = new TreeSet<>();
-    if (exemption.getIgnoredAlleles().isEmpty()) {
-      return unusedPositions;
-    }
-
-    List<NamedAllele> allAlleles = new ArrayList<>(namedAlleles);
-    List<NamedAllele> variantNamedAlleles = allAlleles.subList(1, namedAlleles.size() - 1);
-    Set<VariantLocus> ignorablePositions = new HashSet<>();
-    for (NamedAllele namedAllele : variantNamedAlleles) {
-      if (exemption.shouldIgnoreAllele(namedAllele.getName())) {
-        ignorablePositions.addAll(findIgnorablePositions(allPositions, namedAllele));
-      }
-    }
-
-    for (VariantLocus vl : ignorablePositions) {
-      boolean isUnused = true;
-      for (NamedAllele namedAllele : variantNamedAlleles) {
-        if (!exemption.shouldIgnoreAllele(namedAllele.getName())) {
-          if (namedAllele.getAllele(vl) != null) {
-            isUnused = false;
-            break;
-          }
-        }
-      }
-      if (isUnused) {
-        unusedPositions.add(vl);
-      }
-    }
-    return unusedPositions;
-  }
-
-  /**
-   * Find positions that are used by ignored alleles (and are therefore potentially ignorable).
-   */
-  private Set<VariantLocus> findIgnorablePositions(VariantLocus[] allPositions, NamedAllele namedAllele)  {
-    Set<VariantLocus> ignorablePositions = new HashSet<>();
-    int x = 0;
-    for (String allele : namedAllele.getAlleles()) {
-      if (allele != null) {
-        ignorablePositions.add(allPositions[x]);
-      }
-      x += 1;
-    }
-    return ignorablePositions;
-  }
-
-
   private MatchData initializeDpydCallData(String sampleId, SortedMap<String, SampleAllele> alleleMap,
       boolean assumeReference, boolean findCombinations) {
 
     String gene = "DPYD";
-    DefinitionExemption exemption = m_definitionReader.getExemption(gene);
-    SortedSet<VariantLocus> extraPositions = null;
     // remove HapB3 and HapB3Intron
     SortedSet<NamedAllele> alleles = m_definitionReader.getHaplotypes(gene).stream()
         .filter(a -> !a.getName().equals(DpydHapB3Matcher.HAPB3_ALLELE) &&
             !a.getName().equals(DpydHapB3Matcher.HAPB3_INTRONIC_ALLELE))
         .collect(Collectors.toCollection(TreeSet::new));
     VariantLocus[] allPositions = m_definitionReader.getPositions(gene);
+    DefinitionExemption exemption = m_definitionReader.getExemption(gene);
+
+    SortedSet<VariantLocus> extraPositions = null;
+    if (exemption != null) {
+      extraPositions = exemption.getExtraPositions();
+    }
     SortedSet<VariantLocus> unusedPositions = new TreeSet<>();
     // add HapB3 positions to ignore
     for (VariantLocus vl : allPositions) {
@@ -593,12 +538,6 @@ private MatchData initializeDpydCallData(String sampleId, SortedMap<String, Samp
         unusedPositions.add(vl);
       }
     }
-    if (exemption != null) {
-      extraPositions = exemption.getExtraPositions();
-      if (!exemption.getIgnoredAlleles().isEmpty()) {
-        throw new IllegalStateException("Not expecting DPYD to have ignored alleles");
-      }
-    }
 
     // grab SampleAlleles for all positions related to the current gene
     MatchData data = new MatchData(sampleId, gene, alleleMap, allPositions, extraPositions, unusedPositions);
diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java
index 8150cd044..d344c482c 100644
--- a/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java
+++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/ResultBuilder.java
@@ -2,7 +2,6 @@
 
 import java.util.Collection;
 import java.util.Date;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -12,7 +11,6 @@
 import org.pharmgkb.common.util.PathUtils;
 import org.pharmgkb.pharmcat.VcfFile;
 import org.pharmgkb.pharmcat.definition.DefinitionReader;
-import org.pharmgkb.pharmcat.definition.model.DefinitionExemption;
 import org.pharmgkb.pharmcat.definition.model.DefinitionFile;
 import org.pharmgkb.pharmcat.definition.model.NamedAllele;
 import org.pharmgkb.pharmcat.definition.model.VariantLocus;
@@ -136,22 +134,10 @@ private GeneCall initGeneCall(String gene, MatchData matchData, @Nullable List<M
         .map(NamedAllele::getName)
         .filter(n -> !matchableHaps.contains(n))
         .collect(Collectors.toSet());
-    Set<String> ignoredHaplotypes;
-    DefinitionExemption exemption = m_definitionReader.getExemption(gene);
-    if (exemption != null) {
-      uncallableHaplotypes = uncallableHaplotypes.stream()
-          .filter(h -> !exemption.shouldIgnoreAllele(h))
-          .collect(Collectors.toSet());
-      ignoredHaplotypes = exemption.getIgnoredAlleles().stream()
-          .map(String::toUpperCase)
-          .collect(Collectors.toSet());
-    } else {
-      ignoredHaplotypes = new HashSet<>();
-    }
 
     DefinitionFile definitionFile = m_definitionReader.getDefinitionFile(gene);
     GeneCall geneCall = new GeneCall(definitionFile.getSource(), definitionFile.getVersion(),
-        definitionFile.getChromosome(), gene, matchData, uncallableHaplotypes, ignoredHaplotypes, warnings);
+        definitionFile.getChromosome(), gene, matchData, uncallableHaplotypes, warnings);
 
     // get position info
     for (VariantLocus variant : matchData.getPositions()) {
diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/model/GeneCall.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/model/GeneCall.java
index 4a55d84a7..30997c0b7 100644
--- a/src/main/java/org/pharmgkb/pharmcat/haplotype/model/GeneCall.java
+++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/model/GeneCall.java
@@ -61,23 +61,18 @@ public class GeneCall {
   @SerializedName("uncallableHaplotypes")
   private final Set<String> m_uncallableHaplotypes;
   @Expose
-  @SerializedName("ignoredHaplotypes")
-  private final Set<String> m_ignoredHaplotypes;
-  @Expose
   @SerializedName("warnings")
   private final List<MessageAnnotation> m_warnings;
 
 
   public GeneCall(DataSource source, String version, String chromosome, String gene,
-      MatchData matchData, Set<String> uncallableHaplotypes, Set<String> ignoredHaplotypes,
-      @Nullable List<MessageAnnotation> warnings) {
+      MatchData matchData, Set<String> uncallableHaplotypes, @Nullable List<MessageAnnotation> warnings) {
     m_source = source;
     m_version = version;
     m_chromosome = chromosome;
     m_gene = gene;
     m_matchData = matchData;
     m_uncallableHaplotypes = uncallableHaplotypes;
-    m_ignoredHaplotypes = ignoredHaplotypes;
     m_variantsOfInterest = matchData.getExtraPositions();
     m_warnings = warnings;
   }
@@ -113,10 +108,6 @@ public Set<String> getUncallableHaplotypes() {
     return m_uncallableHaplotypes;
   }
 
-  public Set<String> getIgnoredHaplotypes() {
-    return m_ignoredHaplotypes;
-  }
-
 
   public LinkedHashSet<DiplotypeMatch> getDiplotypes() {
     return m_diplotypes;
diff --git a/src/main/java/org/pharmgkb/pharmcat/util/DataManager.java b/src/main/java/org/pharmgkb/pharmcat/util/DataManager.java
index 9367a55fd..60507de5e 100644
--- a/src/main/java/org/pharmgkb/pharmcat/util/DataManager.java
+++ b/src/main/java/org/pharmgkb/pharmcat/util/DataManager.java
@@ -367,14 +367,8 @@ private Set<String> getCurrentFiles(Path dir, String suffix) throws IOException
    */
   private void fixCyp2c19(DefinitionFile definitionFile) {
     Preconditions.checkNotNull(definitionFile);
-    NamedAllele star1 = definitionFile.getNamedAlleles().stream()
-        .filter(na -> na.getName().equals("*1"))
-        .findFirst()
-        .orElseThrow(() -> new IllegalStateException("Cannot find CYP2C19*1"));
-    NamedAllele star38 = definitionFile.getNamedAlleles().stream()
-        .filter(na -> na.getName().equals("*38"))
-        .findFirst()
-        .orElseThrow(() -> new IllegalStateException("Cannot find CYP2C19*38"));
+    NamedAllele star1 = Objects.requireNonNull(definitionFile.getNamedAllele("*1"));
+    NamedAllele star38 = Objects.requireNonNull(definitionFile.getNamedAllele("*38"));
     star1.initialize(definitionFile.getVariants());
     star38.initialize(definitionFile.getVariants());
     for (int x = 0; x < star38.getAlleles().length; x += 1) {