Skip to content

Commit

Permalink
feat(pharmcat): support more than one outside call file
Browse files Browse the repository at this point in the history
  • Loading branch information
whaleyr authored and markwoon committed Apr 23, 2024
1 parent 940f6cf commit 154ac49
Show file tree
Hide file tree
Showing 17 changed files with 406 additions and 186 deletions.
10 changes: 7 additions & 3 deletions src/main/java/org/pharmgkb/pharmcat/BaseConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import java.util.Objects;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import com.google.common.base.Splitter;
import org.apache.commons.io.FilenameUtils;
Expand All @@ -28,7 +30,8 @@ public class BaseConfig {
public static final String MATCHER_SUFFIX = ".match";
public static final String PHENOTYPER_SUFFIX = ".phenotype";
public static final String REPORTER_SUFFIX = ".report";
public static final String OUTSIDE_SUFFIX = ".outside";
public static final Pattern OUTSIDE_SUFFIX_PATTERN = Pattern.compile("^(.+)\\.outside\\d*$");
public static final Pattern OUTSIDE_FILENAME_PATTERN = Pattern.compile("^(.+)\\.outside\\d*\\.tsv$");
private static final Splitter sf_commaSplitter = Splitter.on(",").trimResults().omitEmptyStrings();
boolean runMatcher = true;
Path definitionDir;
Expand Down Expand Up @@ -170,8 +173,9 @@ public static String getBaseFilename(Path inputFile) {
if (filename.endsWith(MATCHER_SUFFIX)) {
filename = filename.substring(0, filename.length() - MATCHER_SUFFIX.length());
}
if (filename.endsWith(OUTSIDE_SUFFIX)) {
filename = filename.substring(0, filename.length() - OUTSIDE_SUFFIX.length());
Matcher m = BaseConfig.OUTSIDE_SUFFIX_PATTERN.matcher(filename);
if (m.matches()) {
filename = m.group(1);
}
if (filename.endsWith(PHENOTYPER_SUFFIX)) {
filename = filename.substring(0, filename.length() - PHENOTYPER_SUFFIX.length());
Expand Down
38 changes: 27 additions & 11 deletions src/main/java/org/pharmgkb/pharmcat/BatchPharmCAT.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import org.apache.commons.io.FileUtils;
Expand All @@ -36,7 +37,7 @@ public class BatchPharmCAT {
private final boolean m_verbose;
private final Map<String, VcfFile> m_vcfFilesToProcess = new TreeMap<>();
private final Map<String, Path> m_matchFilesToProcess = new TreeMap<>();
private final Map<String, Path> m_outsideCallFilesToProcess = new TreeMap<>();
private final Map<String, List<Path>> m_outsideCallFilesToProcess = new TreeMap<>();
private final Map<String, Path> m_phenotypeFilesToProcess = new TreeMap<>();


Expand Down Expand Up @@ -158,9 +159,9 @@ private BatchPharmCAT(BaseConfig config, Path inputDir, @Nullable Path vcfFile,
if (config.runPhenotyper) {
m_matchFilesToProcess.put(basename, file);
}
} else if (name.endsWith(BaseConfig.OUTSIDE_SUFFIX + ".tsv")) {
} else if (BaseConfig.OUTSIDE_FILENAME_PATTERN.matcher(name).matches()) {
if (config.runPhenotyper) {
m_outsideCallFilesToProcess.put(basename, file);
m_outsideCallFilesToProcess.computeIfAbsent(basename, k -> new ArrayList<>()).add(file);
}
} else if (name.endsWith(BaseConfig.PHENOTYPER_SUFFIX + ".json")) {
if (config.runReporter) {
Expand All @@ -175,12 +176,11 @@ private BatchPharmCAT(BaseConfig config, Path inputDir, @Nullable Path vcfFile,
return;
}
// input VCF file trumps other VCF files in inputDir
Set<String> vcfBasenames = m_vcfFilesToProcess.keySet();
String vcfBasename = BaseConfig.getBaseFilename(vcfFile);
m_vcfFilesToProcess.clear();
m_vcfFilesToProcess.put(vcfBasename, new VcfFile(vcfFile));
if (config.runPhenotyper) {
Path f = m_outsideCallFilesToProcess.get(vcfBasename);
List<Path> f = m_outsideCallFilesToProcess.get(vcfBasename);
m_outsideCallFilesToProcess.clear();
if (f != null) {
m_outsideCallFilesToProcess.put(vcfBasename, f);
Expand All @@ -198,7 +198,7 @@ private BatchPharmCAT(BaseConfig config, Path inputDir, @Nullable Path vcfFile,
}
if (config.runPhenotyper) {
types.add("*" + BaseConfig.MATCHER_SUFFIX + ".json");
types.add("*" + BaseConfig.OUTSIDE_SUFFIX + ".tsv");
types.add(BaseConfig.OUTSIDE_FILENAME_PATTERN.pattern());
}
if (config.runReporter) {
types.add("*" + BaseConfig.PHENOTYPER_SUFFIX + ".json");
Expand Down Expand Up @@ -335,7 +335,7 @@ public class Builder {
private String m_sampleId;
private boolean m_runPhenotyper;
private Path m_piFile;
private Path m_poFile;
private List<Path> m_poFile = null;
private boolean m_runReporter;
private Path m_riFile;
private boolean m_singleSample;
Expand Down Expand Up @@ -418,12 +418,12 @@ private void findPhenotyperFiles(String basename) {
}
// po file
if (m_outsideCallFilesToProcess.containsKey(basename)) {
Path file = m_outsideCallFilesToProcess.get(basename);
m_poFile = pickFirstFile(m_poFile, file);
List<Path> files = m_outsideCallFilesToProcess.get(basename);
m_poFile = pickFirst(m_poFile, files);
m_outsideCallFilesToProcess.remove(basename);

if ((m_piFile == null && !m_config.runMatcher) || (m_piFile == null && m_vcfFile == null)) {
System.out.println("* Warning: lone outside call file (" + m_poFile.getFileName() +
System.out.println("* Warning: lone outside call file (" + printFileNames(m_poFile) +
") with no matching .vcf or " + BaseConfig.MATCHER_SUFFIX + ".json");
}
}
Expand Down Expand Up @@ -457,4 +457,20 @@ private Path pickFirstFile(Path origFile, Path newFile) {
}
}
}

private List<Path> pickFirst(List<Path> origList, List<Path> newList) {
if (origList == null) {
return newList;
} else {
System.out.println("* Ignoring " + printFileNames(newList) + " - using " + printFileNames(origList) + " instead");
return origList;
}
}

private String printFileNames(Collection<Path> paths) {
if (paths == null || paths.isEmpty()) {
return "";
}
return paths.stream().map(p -> p.getFileName().toString()).collect(Collectors.joining(", "));
}
}
20 changes: 14 additions & 6 deletions src/main/java/org/pharmgkb/pharmcat/PharmCAT.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.lang.invoke.MethodHandles;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
Expand Down Expand Up @@ -85,16 +86,23 @@ Please specify a VCF file (-vcf)"""
}

Path phenotyperInputFile = null;
Path phenotyperOutsideCallsFile = null;
List<Path> phenotyperOutsideCallsFiles = new ArrayList<>();
if (config.runPhenotyper) {
if (cliHelper.hasOption("pi")) {
phenotyperInputFile = cliHelper.getValidFile("pi", true);
}
if (cliHelper.hasOption("po")) {
phenotyperOutsideCallsFile = cliHelper.getValidFile("po", true);
for (String outsidePathString : cliHelper.getValues("po")) {
Path outsidePath = Paths.get(outsidePathString);
if (outsidePath.toFile().exists() && outsidePath.toFile().isFile()) {
phenotyperOutsideCallsFiles.add(outsidePath);
} else {
throw new ReportableException("Not a valid file: '" + outsidePathString);
}
}
}

if (vcfFile == null && phenotyperInputFile == null && phenotyperOutsideCallsFile == null) {
if (vcfFile == null && phenotyperInputFile == null && phenotyperOutsideCallsFiles.isEmpty()) {
System.out.println("""
No input for Phenotyper!
Expand All @@ -113,7 +121,7 @@ Please specify a VCF file (-vcf)"""
reporterInputFile = cliHelper.getValidFile("ri", true);
}

if (vcfFile == null && phenotyperInputFile == null && phenotyperOutsideCallsFile == null &&
if (vcfFile == null && phenotyperInputFile == null && phenotyperOutsideCallsFiles.isEmpty() &&
reporterInputFile == null) {
System.out.println(
"""
Expand Down Expand Up @@ -158,7 +166,7 @@ Please specify a VCF file (-vcf)"""
Pipeline pipeline = new Pipeline(env,
config.runMatcher, vcfFile, sampleId, singleSample,
config.topCandidateOnly, config.callCyp2d6, config.findCombinations, config.matcherHtml,
config.runPhenotyper, phenotyperInputFile, phenotyperOutsideCallsFile,
config.runPhenotyper, phenotyperInputFile, phenotyperOutsideCallsFiles,
config.runReporter, reporterInputFile, config.reporterTitle,
config.reporterSources, config.reporterCompact, config.reporterJson, config.reporterHtml,
config.outputDir, config.baseFilename, config.deleteIntermediateFiles,
Expand All @@ -182,7 +190,7 @@ Please specify a VCF file (-vcf)"""
Pipeline pipeline = new Pipeline(env,
false, null, null, true,
config.topCandidateOnly, config.callCyp2d6, config.findCombinations, config.matcherHtml,
config.runPhenotyper, phenotyperInputFile, phenotyperOutsideCallsFile,
config.runPhenotyper, phenotyperInputFile, phenotyperOutsideCallsFiles,
config.runReporter, reporterInputFile, config.reporterTitle,
config.reporterSources, config.reporterCompact, config.reporterJson, config.reporterHtml,
config.outputDir, config.baseFilename, config.deleteIntermediateFiles,
Expand Down
53 changes: 34 additions & 19 deletions src/main/java/org/pharmgkb/pharmcat/Pipeline.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.pharmgkb.common.util.AnsiConsole;
Expand Down Expand Up @@ -57,7 +62,7 @@ public enum Mode {

private final boolean m_runPhenotyper;
private Path m_phenotyperInputFile;
private Path m_phenotyperOutsideCallsFile;
private List<Path> m_phenotyperOutsideCallsFile;
private Path m_phenotyperJsonFile;

private final boolean m_runReporter;
Expand All @@ -81,7 +86,7 @@ public enum Mode {
public Pipeline(Env env,
boolean runMatcher, @Nullable VcfFile vcfFile, @Nullable String sampleId, boolean singleSample,
boolean topCandidateOnly, boolean callCyp2d6, boolean findCombinations, boolean matcherHtml,
boolean runPhenotyper, @Nullable Path phenotyperInputFile, @Nullable Path phenotyperOutsideCallsFile,
boolean runPhenotyper, @Nullable Path phenotyperInputFile, @Nullable List<Path> phenotyperOutsideCallsFile,
boolean runReporter, @Nullable Path reporterInputFile, @Nullable String reporterTitle,
@Nullable List<DataSource> reporterSources, boolean reporterCompact, boolean reporterJson, boolean reporterHtml,
@Nullable Path outputDir, @Nullable String baseFilename, boolean deleteIntermediateFiles,
Expand Down Expand Up @@ -113,8 +118,8 @@ public Pipeline(Env env,
Path inputFile = m_matcherJsonFile;
if (m_phenotyperInputFile != null) {
inputFile = m_phenotyperInputFile;
} else if (m_phenotyperOutsideCallsFile != null) {
inputFile = m_phenotyperOutsideCallsFile;
} else if (m_phenotyperOutsideCallsFile != null && !m_phenotyperOutsideCallsFile.isEmpty()) {
inputFile = m_phenotyperOutsideCallsFile.get(0);
}
if (inputFile == null) {
throw new IllegalStateException("No phenotyper input file");
Expand Down Expand Up @@ -277,22 +282,32 @@ public PipelineResult call() throws IOException {
calls = new ArrayList<>();
}

List<OutsideCall> outsideCalls = new ArrayList<>();
if (m_phenotyperOutsideCallsFile != null) {
for (OutsideCall call : OutsideCallParser.parse(m_phenotyperOutsideCallsFile)) {
if (!m_env.hasGene(call.getGene())) {
String msg = "Discarded outside call for " + call.getGene() + " because it is not supported by PharmCAT.";
output.add(AnsiConsole.styleWarning(msg));
continue;
}
if (!m_env.isActivityScoreGene(call.getGene())) {
if (call.getDiplotype() == null && call.getPhenotype() == null) {
String msg = call.getGene() + " is not an activity score gene but has outside call with only an " +
"activity score. PharmCAT will not be able to provide any recommendations based on this gene.";
Map<String, Set<OutsideCall>> outsideCallMap = new HashMap<>();
SortedSet<OutsideCall> outsideCalls = new TreeSet<>();
if (m_phenotyperOutsideCallsFile != null && !m_phenotyperOutsideCallsFile.isEmpty()) {
for (Path outsideCallPath : m_phenotyperOutsideCallsFile) {
for (OutsideCall call : OutsideCallParser.parse(outsideCallPath)) {
String gene = call.getGene();
if (!m_env.hasGene(gene)) {
String msg = "Discarded outside call for " + gene + " because it is not supported by PharmCAT.";
output.add(AnsiConsole.styleWarning(msg));
continue;
}
if (!m_env.isActivityScoreGene(gene)) {
if (call.getDiplotype() == null && call.getPhenotype() == null) {
String msg = gene + " is not an activity score gene but has outside call with only an " +
"activity score. PharmCAT will not be able to provide any recommendations based on this gene.";
output.add(AnsiConsole.styleWarning(msg));
}
}
outsideCalls.add(call);
outsideCallMap.computeIfAbsent(gene, g -> new HashSet<>()).add(call);
}
}
for (String gene : outsideCallMap.keySet()) {
if (outsideCallMap.get(gene).size() > 1) {
output.add(AnsiConsole.styleWarning("WARNING: Multiple outside calls for " + gene + "."));
}
outsideCalls.add(call);
}
}

Expand Down Expand Up @@ -396,11 +411,11 @@ private String getInputDescription() {
}
builder.append(m_phenotyperInputFile.getFileName());
}
if (m_phenotyperOutsideCallsFile != null) {
if (m_phenotyperOutsideCallsFile != null && !m_phenotyperOutsideCallsFile.isEmpty()) {
if (!builder.isEmpty()) {
builder.append(", ");
}
builder.append(m_phenotyperOutsideCallsFile.getFileName());
builder.append(m_phenotyperOutsideCallsFile.stream().map(p -> p.getFileName().toString()).collect(Collectors.joining(", ")));
}
if (m_reporterInputFile != null) {
if (!builder.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
Expand Down Expand Up @@ -56,8 +58,8 @@ public static List<OutsideCall> parse(Path filePath) throws IOException {
return calls;
}

public static List<OutsideCall> parse(String outsideCallData) {
List<OutsideCall> calls = new ArrayList<>();
public static Set<OutsideCall> parse(String outsideCallData) {
Set<OutsideCall> calls = new HashSet<>();
String[] lines = StringUtils.stripToEmpty(outsideCallData).split("\n");
for (int x = 0; x < lines.length; x += 1) {
String line = lines[x];
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/org/pharmgkb/pharmcat/phenotype/Phenotyper.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ public class Phenotyper {
* @param outsideCalls a List of {@link OutsideCall} objects
* @param variantWarnings map of VCF warnings, keyed to chromosomal position
*/
public Phenotyper(Env env, List<GeneCall> geneCalls, List<OutsideCall> outsideCalls,
public Phenotyper(Env env, List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls,
@Nullable Map<String, Collection<String>> variantWarnings) {
initialize(geneCalls, outsideCalls, env, DataSource.CPIC, variantWarnings);
initialize(geneCalls, outsideCalls, env, DataSource.DPWG, variantWarnings);
}


private void initialize(List<GeneCall> geneCalls, List<OutsideCall> outsideCalls, Env env, DataSource source,
private void initialize(List<GeneCall> geneCalls, Set<OutsideCall> outsideCalls, Env env, DataSource source,
@Nullable Map<String, Collection<String>> variantWarnings) {
SortedMap<String, GeneReport> reportMap = m_geneReports.computeIfAbsent(source, (s) -> new TreeMap<>());

Expand Down Expand Up @@ -97,7 +97,6 @@ private void initialize(List<GeneCall> geneCalls, List<OutsideCall> outsideCalls

} else {
// add alternate outside call
System.out.println("WARNING: Multiple outside calls for " + outsideCall.getGene());
geneReport.addOutsideCall(outsideCall, env);
continue;
}
Expand Down
Loading

0 comments on commit 154ac49

Please sign in to comment.