Skip to content

Commit

Permalink
Merge pull request #2431 from opencb/TASK-5876
Browse files Browse the repository at this point in the history
TASK-5876 - Outdated variant stats are not returned
  • Loading branch information
j-coll authored Apr 24, 2024
2 parents 33dd860 + 6efa387 commit 4366720
Show file tree
Hide file tree
Showing 76 changed files with 929 additions and 657 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
import org.opencb.opencga.core.models.study.Study;
import org.opencb.opencga.core.models.user.User;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.clinical.ClinicalVariantEngine;
import org.opencb.opencga.storage.core.clinical.ClinicalVariantException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.core.tools.OpenCgaToolExecutor;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import org.opencb.opencga.analysis.clinical.ClinicalUtils;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.core.tools.OpenCgaToolExecutor;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
Expand All @@ -40,7 +40,6 @@
import java.util.Map;

import static org.opencb.opencga.analysis.clinical.InterpretationAnalysis.PRIMARY_FINDINGS_FILENAME;
import static org.opencb.opencga.analysis.clinical.InterpretationAnalysis.SECONDARY_FINDINGS_FILENAME;
import static org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils.FAMILY_SEGREGATION;

@ToolExecutor(id = "opencga-local",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
import org.opencb.opencga.core.models.study.Study;
import org.opencb.opencga.core.models.study.StudyPermissions;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.core.tools.ToolParams;
import org.opencb.opencga.storage.core.StorageEngineFactory;
import org.opencb.opencga.storage.core.StoragePipelineResult;
Expand Down Expand Up @@ -646,6 +646,7 @@ public VariantQueryResult<Variant> get(Query inputQuery, QueryOptions queryOptio
@SuppressWarnings("unchecked")
public <T> VariantQueryResult<T> get(Query query, QueryOptions queryOptions, String token, Class<T> clazz)
throws CatalogException, IOException, StorageEngineException {

VariantQueryResult<Variant> result = get(query, queryOptions, token);
List<T> variants;
if (clazz == Variant.class) {
Expand All @@ -659,16 +660,7 @@ public <T> VariantQueryResult<T> get(Query query, QueryOptions queryOptions, Str
} else {
throw new IllegalArgumentException("Unknown variant format " + clazz);
}
return new VariantQueryResult<>(
result.getTime(),
result.getNumResults(),
result.getNumMatches(),
result.getEvents(),
variants,
result.getSamples(),
result.getSource(),
result.getApproximateCount(),
result.getApproximateCountSamplingSize(), null);
return new VariantQueryResult<>(result, variants);

}

Expand Down Expand Up @@ -899,7 +891,7 @@ public DataResult<Variant> getSampleData(String variant, String study, QueryOpti

VariantQueryResult<Variant> result = new VariantQueryResult<>(
((int) stopWatch.getTime(TimeUnit.MILLISECONDS)),
1, 1, new ArrayList<>(), Collections.singletonList(variantResult), null, null)
1, 1, new ArrayList<>(), Collections.singletonList(variantResult), engine.getStorageEngineId())
.setNumSamples(sampleEntries.size());
if (exactNumSamples) {
result.setApproximateCount(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import org.opencb.opencga.core.models.sample.Sample;
import org.opencb.opencga.core.models.variant.MutationalSignatureAnalysisParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.core.tools.annotations.ToolExecutor;
import org.opencb.opencga.core.tools.variant.MutationalSignatureAnalysisExecutor;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata;
import org.opencb.biodata.models.variant.protobuf.VariantProto;
import org.opencb.opencga.core.response.RestResponse;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.io.VcfDataWriter;

import java.io.PrintStream;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,11 @@ public Iterable<CohortMetadata> getInvalidCohorts(int studyId) {
return () -> Iterators.filter(cohortIterator(studyId), CohortMetadata::isInvalid);
}

public Iterable<CohortMetadata> getCalculatedOrInvalidCohorts(int studyId) {
return () -> Iterators.filter(cohortIterator(studyId),
cohortMetadata -> cohortMetadata.isStatsReady() || cohortMetadata.isInvalid());
}

public CohortMetadata setSamplesToCohort(int studyId, String cohortName, Collection<Integer> samples) throws StorageEngineException {
return updateCohortSamples(studyId, cohortName, samples, false);
}
Expand Down Expand Up @@ -1244,6 +1249,7 @@ private CohortMetadata updateCohortSamples(int studyId, String cohortName, Colle
if (!oldSamples.equals(sampleIdsList) || !oldFiles.equals(fileIds)) {
// Cohort has been modified! Invalidate stats
cohort.setStatsStatus(TaskMetadata.Status.ERROR);
cohort.getAttributes().put(CohortMetadata.INVALID_STATS_NUM_SAMPLES, oldSamples.size());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* @author Jacobo Coll &lt;jacobo167@gmail.com&gt;
*/
public class CohortMetadata extends StudyResourceMetadata<CohortMetadata> {
public static final String INVALID_STATS_NUM_SAMPLES = "invalidStatsNumSamples";

// private int studyId;
// private int id;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import org.opencb.opencga.core.config.storage.StorageConfiguration;
import org.opencb.opencga.core.models.operations.variant.VariantAggregateFamilyParams;
import org.opencb.opencga.core.models.operations.variant.VariantAggregateParams;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.storage.core.StorageEngine;
import org.opencb.opencga.storage.core.StoragePipelineResult;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
Expand Down Expand Up @@ -1233,8 +1233,8 @@ public VariantQueryResult<Variant> get(Query query, QueryOptions options) {
}
addDefaultLimit(options, getOptions());
addDefaultSampleLimit(query, getOptions());
query = preProcessQuery(query, options);
return getVariantQueryExecutor(query, options).get(query, options);
ParsedVariantQuery variantQuery = parseQuery(query, options);
return getVariantQueryExecutor(variantQuery).get(variantQuery);
}

@Override
Expand All @@ -1251,8 +1251,8 @@ public MultiVariantDBIterator iterator(Iterator<?> variants, Query query, QueryO
public VariantDBIterator iterator(Query query, QueryOptions options) {
query = VariantQueryUtils.copy(query);
options = VariantQueryUtils.copy(options);
query = preProcessQuery(query, options);
return getVariantQueryExecutor(query, options).iterator(query, options);
ParsedVariantQuery variantQuery = parseQuery(query, options);
return getVariantQueryExecutor(variantQuery).iterator(variantQuery);
}

public final List<VariantQueryExecutor> getVariantQueryExecutors() throws StorageEngineException {
Expand All @@ -1274,7 +1274,7 @@ protected List<VariantQueryExecutor> initVariantQueryExecutors() throws StorageE
executors.add(new CompoundHeterozygousQueryExecutor(
getMetadataManager(), getStorageEngineId(), getOptions(), this));
executors.add(new BreakendVariantQueryExecutor(
getMetadataManager(), getStorageEngineId(), getOptions(), new DBAdaptorVariantQueryExecutor(
getStorageEngineId(), getOptions(), new DBAdaptorVariantQueryExecutor(
getDBAdaptor(), getStorageEngineId(), getOptions()), getDBAdaptor()));
executors.add(new SamplesSearchIndexVariantQueryExecutor(
getDBAdaptor(), getVariantSearchManager(), getStorageEngineId(), dbName, configuration, getOptions()));
Expand All @@ -1293,12 +1293,22 @@ protected List<VariantQueryExecutor> initVariantQueryExecutors() throws StorageE
* @return VariantQueryExecutor to use
*/
public VariantQueryExecutor getVariantQueryExecutor(Query query, QueryOptions options) {
return getVariantQueryExecutor(parseQuery(query, options));
}

/**
* Determine which {@link VariantQueryExecutor} should be used to execute the given query.
*
* @param variantQuery Parsed variant query
* @return VariantQueryExecutor to use
*/
public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQuery) {
try {
for (VariantQueryExecutor executor : getVariantQueryExecutors()) {
if (executor.canUseThisExecutor(query, options)) {
if (executor.canUseThisExecutor(variantQuery.getQuery(), variantQuery.getInputOptions())) {
logger.info("Using VariantQueryExecutor : " + executor.getClass().getName());
logger.info(" Query : " + VariantQueryUtils.printQuery(query));
logger.info(" Options : " + options.toJson());
logger.info(" Query : " + VariantQueryUtils.printQuery(variantQuery.getInputQuery()));
logger.info(" Options : " + variantQuery.getInputOptions().toJson());
return executor;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import org.opencb.commons.datastore.core.DataResult;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.core.response.VariantQueryResult;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator;
import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery;
import org.opencb.opencga.storage.core.variant.query.VariantQueryParser;
import org.opencb.opencga.storage.core.variant.query.VariantQueryResult;
import org.opencb.opencga.storage.core.variant.query.projection.VariantQueryProjectionParser;
import org.opencb.opencga.storage.core.variant.stats.VariantStatsWrapper;

Expand All @@ -37,30 +37,13 @@
import java.util.List;
import java.util.Map;

import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.addSamplesMetadataIfRequested;

/**
* @author Ignacio Medina <igmecas@gmail.com>
* @author Jacobo Coll <jacobo167@gmail.com>
* @author Cristina Yenyxe Gonzalez Garcia <cgonzalez@cipf.es>
*/
public interface VariantDBAdaptor extends VariantIterable, AutoCloseable {

/**
* This method inserts Variants into the given Study. If the Study already exists then it just adds the new Sample
* genotypes, also new variants are inserted. If it is a new Study then Sample genotypes are added to the new Study.
*
* @param variants List of variants in OpenCB data model to be inserted
* @param studyName Name or alias of the study
* @param options Query modifiers, accepted values are: include, exclude, limit, skip, sort and count
* @return A DataResult with the number of inserted variants
*/
@Deprecated
default DataResult insert(List<Variant> variants, String studyName, QueryOptions options) {
throw new UnsupportedOperationException();
}

/**
/**
* Fetch all variants resulting of executing the query in the database. Returned fields are taken from
* the 'include' and 'exclude' fields at options.
Expand All @@ -71,26 +54,29 @@ default DataResult insert(List<Variant> variants, String studyName, QueryOptions
* @return A DataResult with the result of the query
*/
default VariantQueryResult<Variant> get(Iterator<?> variants, Query query, QueryOptions options) {
DataResult<Variant> queryResult = iterator(variants, query, options).toDataResult();
return addSamplesMetadataIfRequested(queryResult, query, options, getMetadataManager());
ParsedVariantQuery variantQuery = new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true);
try (VariantDBIterator iterator = iterator(variants, query, options)) {
return iterator.toDataResult(variantQuery);
} catch (Exception e) {
throw VariantQueryException.internalException(e);
}
}

@Deprecated
default VariantDBIterator iterator(Query query, QueryOptions options) {
return iterator(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true), options);
return iterator(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true));
}

VariantDBIterator iterator(ParsedVariantQuery query, QueryOptions options);
VariantDBIterator iterator(ParsedVariantQuery query);

/**
* Fetch all variants resulting of executing the query in the database. Returned fields are taken from
* the 'include' and 'exclude' fields at options.
*
* @param query Query to be executed in the database to filter variants
* @param options Query modifiers, accepted values are: include, exclude, limit, skip, sort and count
* @return A DataResult with the result of the query
*/
VariantQueryResult<Variant> get(ParsedVariantQuery query, QueryOptions options);
VariantQueryResult<Variant> get(ParsedVariantQuery query);

/**
* Fetch all variants resulting of executing the query in the database. Returned fields are taken from
Expand All @@ -102,7 +88,7 @@ default VariantDBIterator iterator(Query query, QueryOptions options) {
*/
@Deprecated
default VariantQueryResult<Variant> get(Query query, QueryOptions options) {
return get(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true), options);
return get(new VariantQueryParser(null, getMetadataManager()).parseQuery(query, options, true));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ public VariantQuery region(String value) {
put(VariantQueryParam.REGION.key(), value);
return this;
}
public VariantQuery region(String... value) {
put(VariantQueryParam.REGION.key(), Arrays.asList(value));
return this;
}
public VariantQuery region(Region... value) {
put(VariantQueryParam.REGION.key(), Arrays.asList(value));
return this;
Expand Down Expand Up @@ -222,20 +226,24 @@ public boolean includeGenotype() {
return getBoolean(VariantQueryParam.INCLUDE_GENOTYPE.key());
}

public VariantQuery sampleLimit(String value) {
public VariantQuery sampleLimit(int value) {
put(VariantQueryParam.SAMPLE_LIMIT.key(), value);
return this;
}
public String sampleLimit() {
return getString(VariantQueryParam.SAMPLE_LIMIT.key());
public int sampleLimit() {
return getInt(VariantQueryParam.SAMPLE_LIMIT.key());
}

public VariantQuery sampleSkip(String value) {
put(VariantQueryParam.SAMPLE_SKIP.key(), value);
return this;
}
public String sampleSkip() {
return getString(VariantQueryParam.SAMPLE_SKIP.key());
public VariantQuery sampleSkip(int value) {
put(VariantQueryParam.SAMPLE_SKIP.key(), value);
return this;
}
public int sampleSkip() {
return getInt(VariantQueryParam.SAMPLE_SKIP.key());
}

public VariantQuery file(String value) {
Expand Down Expand Up @@ -633,12 +641,12 @@ public VariantQuery release(String value) {
put(VariantQueryParam.RELEASE.key(), value);
return this;
}
public VariantQuery release(int value) {
public VariantQuery release(Integer value) {
put(VariantQueryParam.RELEASE.key(), value);
return this;
}
public String release() {
return getString(VariantQueryParam.RELEASE.key());
public Integer release() {
return getInt(VariantQueryParam.RELEASE.key());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
package org.opencb.opencga.storage.core.variant.adaptors.iterators;

import org.opencb.biodata.models.variant.Variant;

import java.util.NoSuchElementException;

public class LimitVariantDBIterator extends DelegatedVariantDBIterator {

private final int limit;
private int count;

LimitVariantDBIterator(VariantDBIterator delegated, int limit) {
super(delegated);
this.limit = limit;
this.count = 0;
}

@Override
public boolean hasNext() {
return getCount() < limit && super.hasNext();
return count < limit && super.hasNext();
}

@Override
public Variant next() {
if (!this.hasNext()) {
throw new NoSuchElementException();
} else {
++this.count;
return super.next();
}
}
}
Loading

0 comments on commit 4366720

Please sign in to comment.