Skip to content

Commit

Permalink
Merge pull request #2522 from opencb/TASK-6722
Browse files Browse the repository at this point in the history
TASK-6722 - Variant Walker to enable user defined variant analysis
  • Loading branch information
j-coll authored Dec 20, 2024
2 parents e2b0541 + 005855f commit bdccb74
Show file tree
Hide file tree
Showing 99 changed files with 5,187 additions and 815 deletions.
20 changes: 17 additions & 3 deletions .github/workflows/test-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,28 @@ jobs:
with:
mongodb-version: 6.0
mongodb-replica-set: rs-test
- name: Maven build
- name: Maven build (skip tests)
run: mvn -B clean install -DskipTests -P ${{ inputs.hadoop }} -Dcheckstyle.skip ${{ inputs.mvn_opts }}
- name: Build Junit log file name
id: BuildJunitLogFileName
run: |
MODULE=$(basename ${{ (inputs.module == '' || inputs.module == 'all') && 'opencga' || inputs.module }} )
if [[ -z "$MODULE" ]]; then
MODULE="opencga"
fi
TAGS=$(echo ${{ inputs.test_profile }} | sed -e 's/run\([^,]*\)Tests/\1/g' | tr ',' '_' | tr '[:upper:]' '[:lower:]' )
echo "TESTS_LOG_FILE_NAME=junit_${{ inputs.hadoop }}_${TAGS}_${MODULE}.log" >> $GITHUB_OUTPUT
- name: Run Junit tests
run: mvn -B verify surefire-report:report --fail-never -Dsurefire.testFailureIgnore=true -f ${{ (inputs.module == '' || inputs.module == 'all') && '.' || inputs.module }} -P ${{ inputs.hadoop }},${{ inputs.test_profile }} -Dcheckstyle.skip ${{ inputs.mvn_opts }}
run: mvn -B verify surefire-report:report --fail-never -Dsurefire.testFailureIgnore=true -f ${{ (inputs.module == '' || inputs.module == 'all') && '.' || inputs.module }} -P ${{ inputs.hadoop }},${{ inputs.test_profile }} -Dcheckstyle.skip ${{ inputs.mvn_opts }} |& tee ${{ steps.BuildJunitLogFileName.outputs.TESTS_LOG_FILE_NAME }} |& grep -P '^\[[^\]]*(INFO|WARNING|ERROR)' --colour=never --line-buffered
- name: Upload Junit test logs
uses: actions/upload-artifact@v4
with:
name: ${{ steps.BuildJunitLogFileName.outputs.TESTS_LOG_FILE_NAME }}
path: ${{ steps.BuildJunitLogFileName.outputs.TESTS_LOG_FILE_NAME }}
- name: Publish Test Report on GitHub
uses: scacap/action-surefire-report@v1
env:
NODE_OPTIONS: '--max_old_space_size=4096'
NODE_OPTIONS: '--max_old_space_size=6144'
## Skip cancelled()
## https://docs.github.com/en/actions/learn-github-actions/expressions#cancelled
if: success() || failure()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.opencb.opencga.analysis.variant.relatedness.RelatednessAnalysis;
import org.opencb.opencga.analysis.wrappers.plink.PlinkWrapperAnalysisExecutor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.core.config.Analysis;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.individual.Individual;
Expand Down Expand Up @@ -117,7 +118,7 @@ public static RelatednessReport compute(String study, Family family, List<String
} catch (IOException e) {
throw new ToolException("Something wrong happened when copying files during the relatedness analysis execution");
}
File outFile = runIBD(FILTERED_BASENAME, freqPath, outDir);
File outFile = runIBD(FILTERED_BASENAME, freqPath, outDir, storageManager.getCatalogManager().getConfiguration().getAnalysis());

if (!outFile.exists()) {
throw new ToolException("Something wrong happened executing relatedness analysis");
Expand Down Expand Up @@ -298,7 +299,7 @@ public static void filterFamilyTpedFile(Path tPedPath, Path tPedFilteredPath, Pa
bw.close();
}

private static File runIBD(String basename, Path freqPath, Path outDir) throws ToolException {
private static File runIBD(String basename, Path freqPath, Path outDir, Analysis analysisConf) throws ToolException {
// Input bindings
List<AbstractMap.SimpleEntry<String, String>> inputBindings = new ArrayList<>();
inputBindings.add(new AbstractMap.SimpleEntry<>(freqPath.getParent().toString(), "/input"));
Expand All @@ -311,8 +312,8 @@ private static File runIBD(String basename, Path freqPath, Path outDir) throws T
String plinkParams = "plink1.9 --tfile /output/" + basename + " --genome rel-check --read-freq /input/" + FREQ_FILENAME
+ " --out /output/" + basename;
try {
PlinkWrapperAnalysisExecutor plinkExecutor = new PlinkWrapperAnalysisExecutor();
DockerUtils.run(plinkExecutor.getDockerImageName(), inputBindings, outputBinding, plinkParams, null);
String dockerImageName = PlinkWrapperAnalysisExecutor.getDockerImageName(analysisConf);
DockerUtils.run(dockerImageName, inputBindings, outputBinding, plinkParams, null);
} catch (IOException e) {
throw new ToolException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,17 +192,11 @@ public final ExecutionResult start() throws ToolException {
if (!erm.isClosed()) {
String message = "Unexpected system shutdown. Job killed by the system.";
privateLogger.error(message);
if (exception == null) {
exception = new RuntimeException(message);
}
try {
if (scratchDir != null) {
deleteScratchDirectory();
}
if (exception == null) {
exception = new RuntimeException(message);
}
logException(exception);
ExecutionResult result = erm.close(exception);
privateLogger.info("------- Tool '" + getId() + "' executed in "
+ TimeUtils.durationToString(result.getEnd().getTime() - result.getStart().getTime()) + " -------");
close(exception);
} catch (ToolException e) {
privateLogger.error("Error closing ExecutionResult", e);
}
Expand Down Expand Up @@ -271,13 +265,25 @@ public final ExecutionResult start() throws ToolException {
}
throw e;
} finally {
// If the shutdown hook has been executed, the ExecutionResultManager is already closed
if (!erm.isClosed()) {
result = close(exception);
} else {
result = erm.read();
}
}
return result;
}

private ExecutionResult close(Throwable exception) throws ToolException {
if (scratchDir != null) {
deleteScratchDirectory();
stopMemoryMonitor();
result = erm.close(exception);
logException(exception);
privateLogger.info("------- Tool '" + getId() + "' executed in "
+ TimeUtils.durationToString(result.getEnd().getTime() - result.getStart().getTime()) + " -------");
}
logException(exception);
stopMemoryMonitor();
ExecutionResult result = erm.close(exception);
privateLogger.info("------- Tool '" + getId() + "' executed in "
+ TimeUtils.durationToString(result.getEnd().getTime() - result.getStart().getTime()) + " -------");
return result;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,14 @@
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.tools.OpenCgaTool;
import org.opencb.opencga.catalog.io.IOManager;
import org.opencb.opencga.core.common.UriUtils;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.variant.VariantExportParams;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;

import java.net.URI;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -70,9 +66,8 @@ protected List<String> getSteps() {

@Override
protected void run() throws Exception {
List<URI> uris = new ArrayList<>(2);
step(ID, () -> {
Path outDir = getScratchDir();
Path outDir = getOutDir();
String outputFile = StringUtils.isEmpty(toolParams.getOutputFileName())
? outDir.toString()
: outDir.resolve(toolParams.getOutputFileName()).toString();
Expand All @@ -81,17 +76,9 @@ protected void run() throws Exception {
for (VariantQueryParam param : VariantQueryParam.values()) {
queryOptions.remove(param.key());
}
uris.addAll(variantStorageManager.exportData(outputFile,
variantStorageManager.exportData(outputFile,
outputFormat,
toolParams.getVariantsFile(), query, queryOptions, token));
});
step("move-files", () -> {
IOManager ioManager = catalogManager.getIoManagerFactory().get(uris.get(0));
for (URI uri : uris) {
String fileName = UriUtils.fileName(uri);
logger.info("Moving file -- " + fileName);
ioManager.move(uri, getOutDir().resolve(fileName).toUri());
}
toolParams.getVariantsFile(), query, queryOptions, token);
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright 2015-2020 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.opencga.analysis.variant;

import org.apache.solr.common.StringUtils;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.tools.OpenCgaTool;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.variant.VariantWalkerParams;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;
import org.opencb.opencga.storage.core.variant.io.VariantWriterFactory;

import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;

@Tool(id = VariantWalkerTool.ID, description = VariantWalkerTool.DESCRIPTION,
scope = Tool.Scope.PROJECT, resource = Enums.Resource.VARIANT)
public class VariantWalkerTool extends OpenCgaTool {
public static final String ID = "variant-walk";
public static final String DESCRIPTION = "Filter and walk variants from the variant storage to produce a file";

@ToolParams
protected VariantWalkerParams toolParams = new VariantWalkerParams();

private VariantWriterFactory.VariantOutputFormat format;

@Override
protected void check() throws Exception {
super.check();

if (StringUtils.isEmpty(toolParams.getInputFormat())) {
toolParams.setInputFormat(VariantWriterFactory.VariantOutputFormat.VCF.toString());
}

format = VariantWriterFactory.toOutputFormat(toolParams.getInputFormat(), toolParams.getOutputFileName());
if (format.isBinary()) {
throw new IllegalArgumentException("Binary format not supported for VariantWalkerTool");
}
if (!format.isPlain()) {
format = format.inPlain();
}

if (StringUtils.isEmpty(toolParams.getOutputFileName())) {
toolParams.setOutputFileName("output.txt.gz");
} else if (!toolParams.getOutputFileName().endsWith(".gz")) {
toolParams.setOutputFileName(toolParams.getOutputFileName() + ".gz");
}
}

@Override
protected List<String> getSteps() {
return Arrays.asList(ID, "move-files");
}

@Override
protected void run() throws Exception {
step(ID, () -> {
Path outDir = getOutDir();
String outputFile = outDir.resolve(toolParams.getOutputFileName()).toString();
Query query = toolParams.toQuery();
QueryOptions queryOptions = new QueryOptions().append(QueryOptions.INCLUDE, toolParams.getInclude())
.append(QueryOptions.EXCLUDE, toolParams.getExclude());
variantStorageManager.walkData(outputFile,
format, query, queryOptions, toolParams.getDockerImage(), toolParams.getCommandLine(), token);
});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.opencb.commons.datastore.solr.SolrManager;
import org.opencb.opencga.analysis.StorageManager;
import org.opencb.opencga.analysis.variant.VariantExportTool;
import org.opencb.opencga.analysis.variant.VariantWalkerTool;
import org.opencb.opencga.analysis.variant.manager.operations.*;
import org.opencb.opencga.analysis.variant.metadata.CatalogStorageMetadataSynchronizer;
import org.opencb.opencga.analysis.variant.metadata.CatalogVariantMetadataFactory;
Expand Down Expand Up @@ -97,6 +98,7 @@

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.*;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -187,6 +189,37 @@ public List<URI> exportData(String outputFile, VariantOutputFormat outputFormat,
});
}

/**
* Exports the result of the given query and the associated metadata.
*
* @param outputFile Optional output file. If null or empty, will print into the Standard output. Won't export any metadata.
* @param format Variant Output format.
* @param query Query with the variants to export
* @param queryOptions Query options
* @param dockerImage Docker image to use
* @param commandLine Command line to use
* @param token User's session id
* @throws CatalogException if there is any error with Catalog
* @throws StorageEngineException If there is any error exporting variants
* @return generated files
*/
public List<URI> walkData(String outputFile, VariantOutputFormat format,
Query query, QueryOptions queryOptions, String dockerImage, String commandLine, String token)
throws CatalogException, StorageEngineException {
String anyStudy = catalogUtils.getAnyStudy(query, token);
return secureAnalysis(VariantWalkerTool.ID, anyStudy, queryOptions, token, engine -> {
Query finalQuery = catalogUtils.parseQuery(query, queryOptions, engine.getCellBaseUtils(), token);
checkSamplesPermissions(finalQuery, queryOptions, token);
URI outputUri;
try {
outputUri = UriUtils.createUri(outputFile);
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
return engine.walkData(outputUri, format, finalQuery, queryOptions, dockerImage, commandLine);
});
}

// --------------------------//
// Data Operation methods //
// --------------------------//
Expand Down Expand Up @@ -435,8 +468,9 @@ private CatalogStorageMetadataSynchronizer getSynchronizer(VariantStorageEngine
return synchronizer;
}

public DataResult<Trio> familyIndexBySamples(String study, Collection<String> samples, ObjectMap params, String token)
public DataResult<Trio> familyIndexBySamples(String inputStudy, Collection<String> samples, ObjectMap params, String token)
throws CatalogException, StorageEngineException {
String study = getStudyFqn(inputStudy, token);
return secureOperation(VariantFamilyIndexOperationTool.ID, study, params, token, engine -> {
Collection<String> thisSamples = samples;
boolean allSamples;
Expand Down Expand Up @@ -506,6 +540,8 @@ public boolean hasVariantSetup(String studyStr, String token) throws CatalogExce

public ObjectMap configureProject(String projectStr, ObjectMap params, String token) throws CatalogException, StorageEngineException {
return secureOperationByProject("configure", projectStr, params, token, engine -> {
validateNewConfiguration(engine, params, token);

DataStore dataStore = getDataStoreByProjectId(projectStr, token);

dataStore.getOptions().putAll(params);
Expand All @@ -517,6 +553,7 @@ public ObjectMap configureProject(String projectStr, ObjectMap params, String to

public ObjectMap configureStudy(String studyStr, ObjectMap params, String token) throws CatalogException, StorageEngineException {
return secureOperation("configure", studyStr, params, token, engine -> {
validateNewConfiguration(engine, params, token);
Study study = catalogManager.getStudyManager()
.get(studyStr,
new QueryOptions(INCLUDE, StudyDBAdaptor.QueryParams.INTERNAL_CONFIGURATION_VARIANT_ENGINE_OPTIONS.key()),
Expand All @@ -540,6 +577,15 @@ public ObjectMap configureStudy(String studyStr, ObjectMap params, String token)
});
}

private void validateNewConfiguration(VariantStorageEngine engine, ObjectMap params, String token)
throws StorageEngineException, CatalogException {
if (catalogManager.getAuthorizationManager().isOpencgaAdministrator(catalogManager.getUserManager().validateToken(token))) {
logger.info("Skip configuration validation. User is an admin.");
return;
}
engine.validateNewConfiguration(params);
}

/**
* Modify SampleIndex configuration. Automatically submit a job to rebuild the sample index.
*
Expand Down
Loading

0 comments on commit bdccb74

Please sign in to comment.