Skip to content

Commit

Permalink
fix(phenotyper): improve support for combinations in outside calls
Browse files Browse the repository at this point in the history
  • Loading branch information
markwoon committed Aug 27, 2024
1 parent ac0c52e commit b1a6deb
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 75 deletions.
14 changes: 1 addition & 13 deletions src/main/java/org/pharmgkb/pharmcat/Pipeline.java
Original file line number Diff line number Diff line change
Expand Up @@ -286,25 +286,13 @@ public PipelineResult call() throws IOException {
SortedSet<OutsideCall> outsideCalls = new TreeSet<>();
if (m_phenotyperOutsideCallsFile != null && !m_phenotyperOutsideCallsFile.isEmpty()) {
for (Path outsideCallPath : m_phenotyperOutsideCallsFile) {
for (OutsideCall call : OutsideCallParser.parse(outsideCallPath)) {
for (OutsideCall call : OutsideCallParser.parse(m_env, outsideCallPath)) {
String gene = call.getGene();
if (!m_env.hasGene(gene)) {
String msg = "Discarded outside call for " + gene + " because it is not supported by PharmCAT.";
output.add(AnsiConsole.styleWarning(msg));
continue;
}
if (!m_env.isActivityScoreGene(gene)) {
if (call.getDiplotype() == null && call.getPhenotype() == null) {
String msg = gene + " is not an activity score gene but has outside call with only an " +
"activity score. PharmCAT will not be able to provide any recommendations based on this gene.";
output.add(AnsiConsole.styleWarning(msg));
}
}
for (String hap : call.getHaplotypes()) {
if (!m_env.isValidNamedAllele(gene, hap)) {
call.addWarning("Undocumented " + gene + " named allele in outside call: " + hap);
}
}
outsideCalls.add(call);
call.getWarnings().forEach(w -> output.add(AnsiConsole.styleWarning("WARNING: " + w)));
outsideCallMap.computeIfAbsent(gene, g -> new HashSet<>()).add(call);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*/
public class CombinationMatch extends BaseMatch {
public static final String COMBINATION_JOINER = " + ";
public static final String COMBINATION_JOINER_REGEX = " \\+ ";
public static final Splitter COMBINATION_NAME_SPLITTER = Splitter.on(COMBINATION_JOINER).trimResults();
@Expose
@SerializedName("componentHaplotypes")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.pharmgkb.pharmcat.Env;
import org.pharmgkb.pharmcat.phenotype.model.OutsideCall;


Expand Down Expand Up @@ -41,7 +42,8 @@
public class OutsideCallParser {
private static final Predicate<String> sf_nonCommentLine = (l) -> StringUtils.isNotBlank(l) && !l.startsWith("#");

public static List<OutsideCall> parse(Path filePath) throws IOException {

public static List<OutsideCall> parse(Env env, Path filePath) throws IOException {
Preconditions.checkNotNull(filePath);

List<OutsideCall> calls = new ArrayList<>();
Expand All @@ -51,20 +53,20 @@ public static List<OutsideCall> parse(Path filePath) throws IOException {
while ((line = reader.readLine()) != null) {
x += 1;
if (sf_nonCommentLine.test(line)) {
calls.add(new OutsideCall(line, x));
calls.add(new OutsideCall(env, line, x));
}
}
}
return calls;
}

public static Set<OutsideCall> parse(String outsideCallData) {
public static Set<OutsideCall> parse(Env env, String outsideCallData) {
Set<OutsideCall> calls = new HashSet<>();
String[] lines = StringUtils.stripToEmpty(outsideCallData).split("\n");
for (int x = 0; x < lines.length; x += 1) {
String line = lines[x];
if (sf_nonCommentLine.test(line)) {
calls.add(new OutsideCall(line, x + 1));
calls.add(new OutsideCall(env, line, x + 1));
}
}
return calls;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.pharmgkb.common.util.ComparisonChain;
import org.pharmgkb.pharmcat.Constants;
import org.pharmgkb.pharmcat.Env;
import org.pharmgkb.pharmcat.haplotype.model.CombinationMatch;
import org.pharmgkb.pharmcat.phenotype.PhenotypeUtils;
import org.pharmgkb.pharmcat.reporter.BadOutsideCallException;
import org.pharmgkb.pharmcat.util.HaplotypeNameComparator;
Expand All @@ -36,8 +39,8 @@ public class OutsideCall implements Comparable<OutsideCall> {
private static final int IDX_ACTIVITY = 3;

private final String m_gene;
private final @Nullable String m_diplotype;
private final List<String> m_diplotypes;
private @Nullable String m_diplotype;
private List<String> m_diplotypes;
private @Nullable String m_phenotype = null;
private @Nullable String m_activityScore = null;
private final SortedSet<String> m_haplotypes = new TreeSet<>(HaplotypeNameComparator.getComparator());
Expand All @@ -49,7 +52,7 @@ public class OutsideCall implements Comparable<OutsideCall> {
* @param line a TSV-formatted string
* @throws RuntimeException can occur if data not in expected format
*/
public OutsideCall(String line, int lineNumber) throws RuntimeException {
public OutsideCall(Env env, String line, int lineNumber) throws RuntimeException {
List<String> fields = sf_lineSplitter.splitToList(line);
if (fields.size() < 2) {
throw new BadOutsideCallException("Line " + lineNumber + ": Expected at least 2 TSV fields, got " + fields.size());
Expand All @@ -59,26 +62,42 @@ public OutsideCall(String line, int lineNumber) throws RuntimeException {
if (m_gene == null) {
throw new BadOutsideCallException("Line " + lineNumber + ": No gene specified");
}
m_diplotype = StringUtils.stripToNull(fields.get(IDX_DIPS));
if (fields.size() >= 3) {
m_phenotype = PhenotypeUtils.normalize(fields.get(IDX_PHENO).replaceAll(m_gene, ""));
}
if (fields.size() >= 4) {
m_activityScore = StringUtils.stripToNull(fields.get(IDX_ACTIVITY));
}

String diplotype = StringUtils.stripToNull(fields.get(IDX_DIPS));
if (fields.size() == 2 && (diplotype == null || diplotype.equals(sf_diplotypeSeparator))) {
if (!env.hasGene(m_gene)) {
return;
}
if (m_phenotype == null && m_activityScore == null && m_diplotype == null) {
throw new BadOutsideCallException("Specify a diplotype, phenotype, or activity score for " + m_gene);
}

if (fields.size() == 2 && (m_diplotype == null || m_diplotype.equals(sf_diplotypeSeparator))) {
if (StringUtils.isBlank(fields.get(IDX_DIPS))) {
throw new BadOutsideCallException("Line " + lineNumber + ": No diplotype specified");
} else {
throw new BadOutsideCallException("Line " + lineNumber + ": Invalid diplotype specified");
}
}

if (diplotype == null) {
m_diplotype = null;
if (m_diplotype == null) {
m_diplotypes = ImmutableList.of();
if (env.isActivityScoreGene(m_gene)) {
m_warnings.add(m_gene + " is not an activity score gene but has outside call with only an " +
"activity score. PharmCAT will not be able to provide any recommendations based on this gene.");
}
} else {
// strip any prefix of the gene symbol
List<String> alleles = sf_diplotypeSplitter.splitToList(diplotype).stream()
List<String> alleles = sf_diplotypeSplitter.splitToList(m_diplotype).stream()
.map(a -> a.replaceFirst("^" + m_gene + "\\s*", ""))
.toList();
if (alleles.size() > 2) {
throw new BadOutsideCallException("Line " + lineNumber + ": Too many alleles specified in " + diplotype);
throw new BadOutsideCallException("Line " + lineNumber + ": Too many alleles specified in " + m_diplotype);
}

if (m_gene.equals("CYP2D6")) {
Expand Down Expand Up @@ -124,6 +143,46 @@ public OutsideCall(String line, int lineNumber) throws RuntimeException {
.toList();
}

// convert alleles from combination format if applicable
alleles = alleles.stream()
.map(a -> {
if (!env.isValidNamedAllele(m_gene, a)) {
String fixedA;
if (a.startsWith("[") && a.endsWith("]")) {
// convert PharmCAT style combinations into combinations recognized by phenotyper
fixedA = a.substring(1, a.length() - 1);
if (env.isValidNamedAllele(m_gene, fixedA)) {
m_warnings.add("Converting outside call for " + m_gene + " from '" + a + "', to '" + fixedA +
"'.");
return fixedA;
}
} else {
fixedA = a;
}
if (fixedA.contains(CombinationMatch.COMBINATION_JOINER)) {
fixedA = fixedA.replaceAll(CombinationMatch.COMBINATION_JOINER_REGEX, "+");
if (env.isValidNamedAllele(m_gene, fixedA)) {
m_warnings.add("Converting outside call for " + m_gene + " from '" + a + "', to '" + fixedA +
"'.");
return fixedA;
}
}
StringBuilder builder = new StringBuilder().append("Undocumented ")
.append(m_gene)
.append(" named ");
if (Constants.isVariantGene(m_gene)) {
builder.append("variant");
} else {
builder.append("allele");
}
builder.append(" in outside call: ")
.append(a);
m_warnings.add(builder.toString());
}
return a;
})
.toList();

// re-join alleles to eliminate white space when a gene symbol is used in diplotype
m_diplotype = String.join(sf_diplotypeSeparator, alleles);
m_diplotypes = ImmutableList.of(m_diplotype);
Expand All @@ -141,10 +200,6 @@ public OutsideCall(String line, int lineNumber) throws RuntimeException {
if (fields.size() >= 4) {
m_activityScore = StringUtils.stripToNull(fields.get(IDX_ACTIVITY));
}

if (m_phenotype == null && m_activityScore == null && m_diplotype == null) {
throw new BadOutsideCallException("Specify a diplotype, phenotype, or activity score for " + m_gene);
}
}

public void addWarning(String warning) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import java.io.IOException;
import java.nio.file.Path;
import java.util.List;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInfo;
import org.pharmgkb.pharmcat.Env;
import org.pharmgkb.pharmcat.TestUtils;
import org.pharmgkb.pharmcat.phenotype.model.OutsideCall;
import org.pharmgkb.pharmcat.reporter.BadOutsideCallException;
Expand All @@ -14,6 +16,13 @@


class OutsideCallParserTest {
private static Env s_env;

@BeforeAll
static void prepare() throws Exception {
s_env = new Env();
}


@Test
void testMinimalInput(TestInfo testInfo) throws IOException {
Expand All @@ -22,7 +31,7 @@ void testMinimalInput(TestInfo testInfo) throws IOException {
fw.write("CYP2C9\t*1/*2");
}

List<OutsideCall> calls = OutsideCallParser.parse(outsideCallPath);
List<OutsideCall> calls = OutsideCallParser.parse(s_env, outsideCallPath);
assertNotNull(calls);
assertEquals(1, calls.size());
assertEquals("CYP2C9", calls.get(0).getGene());
Expand All @@ -36,7 +45,7 @@ void testGenePrefixStripping(TestInfo testInfo) throws IOException {
fw.write("CYP2C9\tCYP2C9*1/CYP2C9 *2");
}

List<OutsideCall> calls = OutsideCallParser.parse(outsideCallPath);
List<OutsideCall> calls = OutsideCallParser.parse(s_env, outsideCallPath);
assertNotNull(calls);
assertEquals(1, calls.size());
assertEquals("CYP2C9", calls.get(0).getGene());
Expand All @@ -50,7 +59,7 @@ void testPhenotype(TestInfo testInfo) throws IOException {
fw.write("CYP2C9\t*1/*2\tNormal Metabolizer");
}

List<OutsideCall> calls = OutsideCallParser.parse(outsideCallPath);
List<OutsideCall> calls = OutsideCallParser.parse(s_env, outsideCallPath);
assertNotNull(calls);
assertEquals(1, calls.size());
assertEquals("CYP2C9", calls.get(0).getGene());
Expand All @@ -65,7 +74,7 @@ void testPrefixedPhenotype(TestInfo testInfo) throws IOException {
fw.write("CYP2C9\t*1/*2\tCYP2C9 Normal Metabolizer");
}

List<OutsideCall> calls = OutsideCallParser.parse(outsideCallPath);
List<OutsideCall> calls = OutsideCallParser.parse(s_env, outsideCallPath);
assertNotNull(calls);
assertEquals(1, calls.size());
assertEquals("CYP2C9", calls.get(0).getGene());
Expand All @@ -83,7 +92,7 @@ void testTwoGenes(TestInfo testInfo) throws IOException {
""");
}

List<OutsideCall> calls = OutsideCallParser.parse(tmpOutsideCallPath);
List<OutsideCall> calls = OutsideCallParser.parse(s_env, tmpOutsideCallPath);
assertNotNull(calls);
assertEquals(2, calls.size());
assertEquals("CYP2C9", calls.get(0).getGene());
Expand All @@ -103,7 +112,7 @@ void testBadFormat(TestInfo testInfo) throws IOException {
""");
}

assertThrows(BadOutsideCallException.class, () -> OutsideCallParser.parse(outsideCallPath));
assertThrows(BadOutsideCallException.class, () -> OutsideCallParser.parse(s_env, outsideCallPath));
}


Expand All @@ -122,7 +131,7 @@ void testCommentsAndEmptyLines(TestInfo testInfo) throws IOException {
""");
}

List<OutsideCall> calls = OutsideCallParser.parse(outsideCallPath);
List<OutsideCall> calls = OutsideCallParser.parse(s_env, outsideCallPath);
assertEquals(2, calls.size());
assertEquals("*1/*2", calls.get(0).getDiplotype());
assertEquals("*3/*4", calls.get(1).getDiplotype());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void testCyp2C19Het() throws Exception {

Phenotyper phenotyper = new Phenotyper(s_env,
readMatchData("Cyp2C19Het.match.json"),
OutsideCallParser.parse("CYP2D6\t*1/*3"), warnings);
OutsideCallParser.parse(s_env, "CYP2D6\t*1/*3"), warnings);

assertCalledByMatcher(phenotyper, "CYP2C19");
assertReportable(phenotyper, "CYP2D6");
Expand All @@ -66,7 +66,7 @@ void testCyp2C19Het() throws Exception {
void testCyp2D6Only() throws Exception {
Phenotyper phenotyper = new Phenotyper(s_env,
new ArrayList<>(),
OutsideCallParser.parse("CYP2D6\t*1/*3"), null);
OutsideCallParser.parse(s_env, "CYP2D6\t*1/*3"), null);

assertReportable(phenotyper, "CYP2D6");

Expand Down
Loading

0 comments on commit b1a6deb

Please sign in to comment.