Skip to content

Commit

Permalink
analysis: improve ROH analysis and add Junit tests, #TASK-5795, #TASK…
Browse files Browse the repository at this point in the history
…-5343
  • Loading branch information
jtarraga committed Mar 20, 2024
1 parent 8418479 commit 21be031
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.AnalysisUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.analysis.wrappers.samtools.SamtoolsWrapperAnalysisExecutor;
import org.opencb.opencga.catalog.db.api.FileDBAdaptor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.FileManager;
Expand All @@ -32,6 +33,9 @@
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;

import java.nio.file.Path;
import java.nio.file.Paths;
Expand All @@ -52,7 +56,6 @@ public class RohWrapperAnalysis extends OpenCgaToolScopeStudy {
@Override
protected void check() throws Exception {
super.check();
setUpStorageEngineExecutor(study);

// Check sample
if (StringUtils.isEmpty(rohParams.getSampleId())) {
Expand All @@ -68,6 +71,8 @@ protected void check() throws Exception {

@Override
protected void run() throws Exception {
setUpStorageEngineExecutor(study);

step(getId(), () -> {
// Get VCF file
Path vcfPath = null;
Expand All @@ -85,11 +90,30 @@ protected void run() throws Exception {
vcfPath = Paths.get(fileResult.first().getUri().getPath());
} else {
// Export variants to VCF file
throw new ToolException("VCF for sample " + rohParams.getSampleId() + " not foud; export not yet implemented");
vcfPath = getOutDir().resolve(rohParams.getSampleId() + "." + getJobId() + ".vcf.gz");

VariantQuery variantQuery = new VariantQuery()
.study(study)
.sample(rohParams.getSampleId())
.includeSampleData("GT")
.unknownGenotype("./.");

QueryOptions queryOptions = QueryOptions.empty();

logger.info("Export variants for sample {} to the file {}", rohParams.getSampleId(), vcfPath);
logger.info("Export query: {}", query.toJson());
logger.info("Export query options: {}", queryOptions.toJson());

try {
getVariantStorageManager().exportData(vcfPath.toString(), VariantWriterFactory.VariantOutputFormat.VCF_GZ, null,
variantQuery, queryOptions, token);
} catch (StorageEngineException | CatalogException e) {
throw new ToolException(e);
}
}

// Create the ROH analysis executor
RohWrapperAnalysisExecutor executor = new RohWrapperAnalysisExecutor()
// Get he ROH analysis executor and execute !!!
getToolExecutor(RohWrapperAnalysisExecutor.class)
.setSampleId(rohParams.getSampleId())
.setChromosome(rohParams.getChromosome())
.setVcfPath(vcfPath)
Expand All @@ -104,10 +128,8 @@ protected void run() throws Exception {
.setHomozygSnp(rohParams.getHomozygSnp())
.setHomozygHet(rohParams.getHomozygHet())
.setHomozygDensity(rohParams.getHomozygDensity())
.setHomozygGap(rohParams.getHomozygGap());

// Execute
executor.execute();
.setHomozygGap(rohParams.getHomozygGap())
.execute();
});
}
}
Original file line number Diff line number Diff line change
@@ -1,31 +1,28 @@
package org.opencb.opencga.analysis.wrappers.roh;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.opencb.commons.annotations.DataField;
import org.opencb.commons.utils.DockerUtils;
import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureAnalysis;
import org.opencb.opencga.analysis.variant.mutationalSignature.MutationalSignatureLocalAnalysisExecutor;
import org.opencb.opencga.analysis.wrappers.executors.DockerWrapperAnalysisExecutor;
import org.opencb.opencga.analysis.wrappers.samtools.SamtoolsWrapperAnalysis;
import org.opencb.opencga.core.api.FieldConstants;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;

@ToolExecutor(id = RohWrapperAnalysisExecutor.ID,
tool = SamtoolsWrapperAnalysis.ID,
tool = RohWrapperAnalysis.ID,
source = ToolExecutor.Source.STORAGE,
framework = ToolExecutor.Framework.LOCAL)
public class RohWrapperAnalysisExecutor extends DockerWrapperAnalysisExecutor {

public final static String ID = RohWrapperAnalysis.ID + "-local";
public static final String ID = RohWrapperAnalysis.ID + "-local";

private String study;
private Path vcfPath;
Expand All @@ -44,17 +41,15 @@ public class RohWrapperAnalysisExecutor extends DockerWrapperAnalysisExecutor {
private String homozygDensity;
private String homozygGap;

private Path opencgaHome;

private Logger logger = LoggerFactory.getLogger(this.getClass());

@Override
public void run() throws ToolException {
opencgaHome = Paths.get(getExecutorParams().getString("opencgaHome"));
Path opencgaHome = Paths.get(getExecutorParams().getString("opencgaHome"));

Path symbolicLink = null;
try {
// Build command line to run R script via docker image

String jobDir = "/jobdir";
String scriptDir = "/scripts";

Expand All @@ -63,6 +58,21 @@ public void run() throws ToolException {
inputBindings.add(new AbstractMap.SimpleEntry<>(opencgaHome.resolve("analysis/" + RohWrapperAnalysis.ID)
.toAbsolutePath().toString(), scriptDir));

// Check if the vcf file is in the job dir
if (!getOutDir().toAbsolutePath().toString().equals(vcfPath.getParent().toAbsolutePath().toString())) {
// Create symbolic link
symbolicLink = Files.createSymbolicLink(getOutDir().resolve(vcfPath.getFileName()), vcfPath);
logger.info("The symbolic link to the vcf was created: {}", symbolicLink);

// Add to the docker input bindings
inputBindings.add(new AbstractMap.SimpleEntry<>(vcfPath.getParent().toAbsolutePath().toString(),
vcfPath.getParent().toAbsolutePath().toString()));

// Finally, update the VCF path with the symbolic link to be used by the docker command line
vcfPath = symbolicLink;
}


// Output binding
AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry<>(getOutDir().toAbsolutePath().toString(),
jobDir);
Expand All @@ -79,41 +89,48 @@ public void run() throws ToolException {
if (StringUtils.isNotEmpty(getFilter())) {
cli.append("--filter ").append(getFilter());
}
// if (getSkipGenotypeQuality() != null && !getSkipGenotypeQuality()) {
// echo "--genotype-quality INTEGER GQ (VCF genotype quality annotation field) threshold to filter in variants in the ROH analysis. Default: 40 (GQ>40)."
// echo "--skip-genotype-quality Flag to not use the GQ (VCF genotype quality annotation field) to filter in variants in the ROH analysis. Default: false"
// TODO: manage --genotype-quality and --skip-genotype-quality
cli.append(" --skip-genotype-quality ");
if (getHomozygWindowSnp() != null) {
cli.append("--homozyg-window-snp ").append(getHomozygWindowSnp());
cli.append(" --homozyg-window-snp ").append(getHomozygWindowSnp());
}
if (getHomozygWindowHet() != null) {
cli.append("--homozyg-window-het ").append(getHomozygWindowHet());
cli.append(" --homozyg-window-het ").append(getHomozygWindowHet());
}
if (getHomozygWindowMissing() != null) {
cli.append("--homozyg-window-missing ").append(getHomozygWindowMissing());
cli.append(" --homozyg-window-missing ").append(getHomozygWindowMissing());
}
if (getHomozygWindowThreshold() != null) {
cli.append("--homozyg-window-threshold ").append(getHomozygWindowThreshold());
cli.append(" --homozyg-window-threshold ").append(getHomozygWindowThreshold());
}
if (getHomozygKb() != null) {
cli.append("--homozyg-kb ").append(getHomozygKb());
cli.append(" --homozyg-kb ").append(getHomozygKb());
}
if (getHomozygWindowSnp() != null) {
cli.append("--homozyg-snp ").append(getHomozygWindowSnp());
cli.append(" --homozyg-snp ").append(getHomozygWindowSnp());
}
if (getHomozygHet() != null) {
cli.append("--homozyg-het ").append(getHomozygHet());
cli.append(" --homozyg-het ").append(getHomozygHet());
}
if (getHomozygDensity() != null) {
cli.append("--homozyg-density ").append(getHomozygDensity());
cli.append(" --homozyg-density ").append(getHomozygDensity());
}
if (getHomozygGap() != null) {
cli.append("--homozyg-gap ").append(getHomozygGap());
cli.append(" --homozyg-gap ").append(getHomozygGap());
}

// Execute R script in docker
DockerUtils.run(getDockerImageName(), inputBindings, outputBinding, cli.toString(), null);
DockerUtils.run(getDockerImageName() + ":" + getDockerImageVersion(), inputBindings, outputBinding, cli.toString(), null);
} catch (Exception e) {
throw new ToolException(e);
} finally {
if (symbolicLink != null) {
try {
Files.delete(symbolicLink);
} catch (IOException e) {
logger.warn("Could not delete the symbolic link {}", symbolicLink, e);
}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,13 @@ public Path isolateOpenCGA() throws IOException {
Files.copy(inputStream, analysisPath.resolve(exomiserFile), StandardCopyOption.REPLACE_EXISTING);
}

// ROH analysis
analysisPath = Files.createDirectories(opencgaHome.resolve("analysis/roh")).toAbsolutePath();
Files.copy(Paths.get("../opencga-app/app/analysis").resolve("roh/singles_roh_analysis.sh"),
analysisPath.resolve("singles_roh_analysis.sh"), StandardCopyOption.REPLACE_EXISTING);
Files.copy(Paths.get("../opencga-app/app/analysis").resolve("roh/roh_opencgadatamodel_visualisation.py"),
analysisPath.resolve("roh_opencgadatamodel_visualisation.py"), StandardCopyOption.REPLACE_EXISTING);

return opencgaHome;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ public class VariantAnalysisTest {
private static String cancer_sample = "AR2.10039966-01T";
private static String germline_sample = "AR2.10039966-01G";

public static final String ROH_STUDY = "roh";
private static String roh_sample = "SAMPLE_1";

@Rule
public ExpectedException thrown = ExpectedException.none();

Expand Down Expand Up @@ -246,6 +249,15 @@ public void setUp() throws Throwable {
SampleUpdateParams updateParams = new SampleUpdateParams().setSomatic(true);
catalogManager.getSampleManager().update(CANCER_STUDY, cancer_sample, updateParams, null, token);

// Cancer (SV)
file = opencga.createFile(ROH_STUDY, "variant-test-roh.vcf.gz", token);
variantStorageManager.index(ROH_STUDY, file.getId(), opencga.createTmpOutdir("_index"), new ObjectMap(VariantStorageOptions.ANNOTATE.key(), true), token);

updateParams = new SampleUpdateParams().setSomatic(true);
catalogManager.getSampleManager().update(ROH_STUDY, roh_sample, updateParams, null, token);



opencga.getStorageConfiguration().getVariant().setDefaultEngine(storageEngine);
VariantStorageEngine engine = opencga.getStorageEngineFactory().getVariantStorageEngine(storageEngine, DB_NAME);
if (storageEngine.equals(HadoopVariantStorageEngine.STORAGE_ENGINE_ID)) {
Expand Down Expand Up @@ -307,6 +319,14 @@ public void setUpCatalogManager() throws IOException, CatalogException {
.create(CANCER_STUDY, new Individual("AR2.10039966-01", "AR2.10039966-01", new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initMale(), null, null, null, null, "",
samples, false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()), Collections.emptyList(), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first();
assertEquals(2, individual.getSamples().size());

// roh
catalogManager.getStudyManager().create(projectId, ROH_STUDY, null, "Phase 1", "Done", null, null, null, null, null, token);
sample = new Sample().setId(roh_sample).setSomatic(true);
individual = catalogManager.getIndividualManager()
.create(CANCER_STUDY, new Individual(roh_sample, roh_sample, new Individual(), new Individual(), new Location(), SexOntologyTermAnnotation.initMale(), null, null, null, null, "",
Collections.singletonList(sample), false, 0, Collections.emptyList(), Collections.emptyList(), Collections.emptyList(), IndividualInternal.init(), Collections.emptyMap()), Collections.emptyList(), new QueryOptions(ParamConstants.INCLUDE_RESULT_PARAM, true), token).first();
assertEquals(1, individual.getSamples().size());
}

@Test
Expand Down Expand Up @@ -1126,26 +1146,29 @@ public void testCellbaseConfigure() throws Exception {
}

@Test
public void testRoh() throws IOException, ToolException {
Path rohOutDir = Paths.get(opencga.createTmpOutdir("_roh"));
public void testRohUsingVcf() throws IOException, ToolException {
Path rohOutDir = Paths.get(opencga.createTmpOutdir("_roh_vcf"));

// SNV fitting
RohWrapperParams params = new RohWrapperParams();
params.setSampleId(roh_sample);
params.setChromosome("1");

toolRunner.execute(RohWrapperAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, ROH_STUDY), rohOutDir, null, token);
System.out.println("rohOutDir = " + rohOutDir.toAbsolutePath());
}

@Test
public void testRohUsingExport() throws IOException, ToolException {
Path rohOutDir = Paths.get(opencga.createTmpOutdir("_roh_export"));

// SNV fitting
RohWrapperParams params = new RohWrapperParams();
params.setSampleId(son);
params.setChromosome("1");
// params.setId(snvSignature.getId());
// params.setFitId("snv-fitting-1");
// params.setFitMethod("FitMS");
// params.setFitSigVersion("RefSigv2");
// params.setFitOrgan("Breast");
// params.setFitNBoot(100);
// params.setFitThresholdPerc(5.0f);
// params.setFitThresholdPval(0.05f);
// params.setFitMaxRareSigs(1);
// params.setSkip("catalogue");

toolRunner.execute(RohWrapperAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, CANCER_STUDY),
rohOutDir, null, token);

toolRunner.execute(RohWrapperAnalysis.class, params, new ObjectMap(ParamConstants.STUDY_PARAM, STUDY), rohOutDir, null, token);
System.out.println("rohOutDir = " + rohOutDir.toAbsolutePath());
}

//-------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 21be031

Please sign in to comment.