Skip to content

Commit

Permalink
Merge pull request #2503 from opencb/TASK-6765
Browse files Browse the repository at this point in the history
TASK-6765 - Improve VCF Export performance from secondary sample index
  • Loading branch information
pfurio authored Feb 4, 2025
2 parents 6bb38f8 + 2a5c55e commit ad003fb
Show file tree
Hide file tree
Showing 140 changed files with 4,231 additions and 1,869 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pull-request-approved.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
echo "github.event.pull_request.head.ref: ${{ github.event.pull_request.head.ref }}"
xetabase_branch=$(./.github/workflows/scripts/get-xetabase-branch.sh ${{ github.event.pull_request.base.ref }})
echo "__Xetabase ref:__ \"${xetabase_branch}\"" | tee -a ${GITHUB_STEP_SUMMARY}
echo "xetabase_branch: ${xetabase_branch}"
echo "xetabase_branch=${xetabase_branch}" >> $GITHUB_OUTPUT
env:
ZETTA_REPO_ACCESS_TOKEN: ${{ secrets.ZETTA_REPO_ACCESS_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scripts/get-xetabase-branch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ get_xetabase_branch() {
# If the branch begins with 'TASK' and exists in the opencga-enterprise repository, I return it
if [[ $input_branch == TASK* ]]; then
if [ "$(git ls-remote "https://$ZETTA_REPO_ACCESS_TOKEN@github.com/zetta-genomics/opencga-enterprise.git" "$input_branch" )" ] ; then
echo $input_branch;
echo "$input_branch";
return 0;
fi
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import org.opencb.opencga.core.models.sample.SamplePermissions;
import org.opencb.opencga.core.models.study.Study;
import org.opencb.opencga.core.models.user.UserFilter;
import org.opencb.opencga.core.models.variant.VariantQueryParams;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
Expand Down Expand Up @@ -89,70 +90,45 @@
*/
public class VariantCatalogQueryUtils extends CatalogUtils {

public static final String SAMPLE_ANNOTATION_DESC =
"Selects some samples using metadata information from Catalog. e.g. age>20;phenotype=hpo:123,hpo:456;name=smith";
public static final QueryParam SAMPLE_ANNOTATION
= QueryParam.create("sampleAnnotation", SAMPLE_ANNOTATION_DESC, QueryParam.Type.TEXT_ARRAY);
public static final String PROJECT_DESC = ParamConstants.PROJECT_DESCRIPTION;
public static final QueryParam PROJECT = QueryParam.create(ParamConstants.PROJECT_PARAM, PROJECT_DESC, QueryParam.Type.TEXT_ARRAY);
= QueryParam.create("sampleAnnotation", VariantQueryParams.SAMPLE_ANNOTATION_DESC, QueryParam.Type.TEXT_ARRAY);
public static final QueryParam PROJECT = QueryParam.create(ParamConstants.PROJECT_PARAM, VariantQueryParams.PROJECT_DESC, QueryParam.Type.TEXT_ARRAY);

public static final String FAMILY_DESC = "Filter variants where any of the samples from the given family contains the variant "
+ "(HET or HOM_ALT)";
public static final QueryParam FAMILY =
QueryParam.create("family", FAMILY_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_MEMBERS_DESC = "Sub set of the members of a given family";
QueryParam.create("family", VariantQueryParams.FAMILY_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_MEMBERS =
QueryParam.create("familyMembers", FAMILY_MEMBERS_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_DISORDER_DESC = "Specify the disorder to use for the family segregation";
QueryParam.create("familyMembers", VariantQueryParams.FAMILY_MEMBERS_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_DISORDER =
QueryParam.create("familyDisorder", FAMILY_DISORDER_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_PROBAND_DESC = "Specify the proband child to use for the family segregation";
QueryParam.create("familyDisorder", VariantQueryParams.FAMILY_DISORDER_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_PROBAND =
QueryParam.create("familyProband", FAMILY_PROBAND_DESC, QueryParam.Type.TEXT);
public static final String FAMILY_SEGREGATION_DESCR = "Filter by segregation mode from a given family. Accepted values: "
+ "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, "
+ "deNovo, deNovoStrict, mendelianError, compoundHeterozygous ]";
QueryParam.create("familyProband", VariantQueryParams.FAMILY_PROBAND_DESC, QueryParam.Type.TEXT);
public static final QueryParam FAMILY_SEGREGATION =
QueryParam.create("familySegregation", FAMILY_SEGREGATION_DESCR, QueryParam.Type.TEXT);
QueryParam.create("familySegregation", VariantQueryParams.FAMILY_SEGREGATION_DESCR, QueryParam.Type.TEXT);

public static final String SAVED_FILTER_DESCR = "Use a saved filter at User level";
public static final QueryParam SAVED_FILTER =
QueryParam.create("savedFilter", SAVED_FILTER_DESCR, QueryParam.Type.TEXT);
QueryParam.create("savedFilter", VariantQueryParams.SAVED_FILTER_DESCR, QueryParam.Type.TEXT);

@Deprecated
public static final QueryParam FAMILY_PHENOTYPE = FAMILY_DISORDER;
@Deprecated
public static final QueryParam MODE_OF_INHERITANCE = FAMILY_SEGREGATION;

public static final String PANEL_DESC = "Filter by genes from the given disease panel";
public static final QueryParam PANEL =
QueryParam.create("panel", PANEL_DESC, QueryParam.Type.TEXT);
public static final String PANEL_MOI_DESC = "Filter genes from specific panels that match certain mode of inheritance. " +
"Accepted values : "
+ "[ autosomalDominant, autosomalRecessive, XLinkedDominant, XLinkedRecessive, YLinked, mitochondrial, "
+ "deNovo, mendelianError, compoundHeterozygous ]";
QueryParam.create("panel", VariantQueryParams.PANEL_DESC, QueryParam.Type.TEXT);
public static final QueryParam PANEL_MODE_OF_INHERITANCE =
QueryParam.create("panelModeOfInheritance", PANEL_MOI_DESC
QueryParam.create("panelModeOfInheritance", VariantQueryParams.PANEL_MOI_DESC
, QueryParam.Type.TEXT);
public static final String PANEL_CONFIDENCE_DESC = "Filter genes from specific panels that match certain confidence. " +
"Accepted values : [ high, medium, low, rejected ]";
public static final QueryParam PANEL_CONFIDENCE =
QueryParam.create("panelConfidence", PANEL_CONFIDENCE_DESC, QueryParam.Type.TEXT);
QueryParam.create("panelConfidence", VariantQueryParams.PANEL_CONFIDENCE_DESC, QueryParam.Type.TEXT);

public static final String PANEL_INTERSECTION_DESC = "Intersect panel genes and regions with given "
+ "genes and regions from que input query. This will prevent returning variants from regions out of the panel.";
public static final QueryParam PANEL_INTERSECTION =
QueryParam.create("panelIntersection", PANEL_INTERSECTION_DESC, Type.BOOLEAN);
QueryParam.create("panelIntersection", VariantQueryParams.PANEL_INTERSECTION_DESC, Type.BOOLEAN);

public static final String PANEL_ROLE_IN_CANCER_DESC = "Filter genes from specific panels that match certain role in cancer. " +
"Accepted values : [ both, oncogene, tumorSuppressorGene, fusion ]";
public static final QueryParam PANEL_ROLE_IN_CANCER =
QueryParam.create("panelRoleInCancer", PANEL_ROLE_IN_CANCER_DESC, QueryParam.Type.TEXT);
QueryParam.create("panelRoleInCancer", VariantQueryParams.PANEL_ROLE_IN_CANCER_DESC, QueryParam.Type.TEXT);

public static final String PANEL_FEATURE_TYPE_DESC = "Filter elements from specific panels by type. " +
"Accepted values : [ gene, region, str, variant ]";
public static final QueryParam PANEL_FEATURE_TYPE =
QueryParam.create("panelFeatureType", PANEL_FEATURE_TYPE_DESC, QueryParam.Type.TEXT);
QueryParam.create("panelFeatureType", VariantQueryParams.PANEL_FEATURE_TYPE_DESC, QueryParam.Type.TEXT);

public static final List<QueryParam> VARIANT_CATALOG_QUERY_PARAMS = Arrays.asList(
SAMPLE_ANNOTATION,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -555,10 +555,16 @@ public OpenCGAResult<Job> configureSampleIndex(String studyStr, SampleIndexConfi
boolean skipRebuild, String token)
throws CatalogException, StorageEngineException {
return secureOperation("configure", studyStr, new ObjectMap(), token, engine -> {
String version = engine.getCellBaseUtils().getCellBaseClient().getClientConfiguration().getVersion();
sampleIndexConfiguration.validate(version);
String cellbaseVersion = engine.getCellBaseUtils().getVersionFromServer();
sampleIndexConfiguration.validate(cellbaseVersion);
String studyFqn = getStudyFqn(studyStr, token);
engine.getMetadataManager().addSampleIndexConfiguration(studyFqn, sampleIndexConfiguration, true);
int studyId;
if (!engine.getMetadataManager().studyExists(studyFqn)) {
studyId = engine.getMetadataManager().createStudy(studyFqn, cellbaseVersion).getId();
} else {
studyId = engine.getMetadataManager().getStudyId(studyFqn);
}
engine.getMetadataManager().addSampleIndexConfiguration(studyId, sampleIndexConfiguration, true);

catalogManager.getStudyManager()
.setVariantEngineConfigurationSampleIndex(studyStr, sampleIndexConfiguration, token);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.opencb.opencga.analysis.variant.manager.VariantCatalogQueryUtils;
import org.opencb.opencga.app.cli.internal.options.VariantCommandOptions;
import org.opencb.opencga.storage.core.variant.adaptors.VariantField;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -55,6 +56,7 @@ public static Query parseQuery(VariantCommandOptions.AbstractVariantQueryCommand
addParam(query, VariantCatalogQueryUtils.PANEL_ROLE_IN_CANCER, queryVariantsOptions.panelRoleInCancer);
addParam(query, VariantCatalogQueryUtils.PANEL_FEATURE_TYPE, queryVariantsOptions.panelFeatureType);
addParam(query, VariantCatalogQueryUtils.SAVED_FILTER, queryVariantsOptions.savedFilter);
addParam(query, VariantQueryParam.SOURCE, queryVariantsOptions.source);

return query;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import static org.opencb.opencga.core.api.FieldConstants.EXOMISER_CLINICAL_ANALYSIS_DESCRIPTION;
import static org.opencb.opencga.core.api.FieldConstants.EXOMISER_VERSION_DESCRIPTION;
import static org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam.*;
import static org.opencb.opencga.core.models.variant.VariantQueryParams.*;

@Parameters(commandNames = {"clinical"}, commandDescription = "Clinical analysis commands")
public class ClinicalCommandOptions {
Expand Down
Loading

0 comments on commit ad003fb

Please sign in to comment.