From f0f6a8a2dd11c1f032fa3e1b07812e95f0e8d5b1 Mon Sep 17 00:00:00 2001 From: alisman Date: Mon, 10 Jul 2023 13:58:17 -0400 Subject: [PATCH] Feature/add clinical data counts endpoint (#10262) Migrate mutated-genes and clinical-data-counts to clickhouse-ish implementations --------- Co-authored-by: haynescd --- .../persistence/StudyViewRepository.java | 18 +- .../enums/ClinicalAttributeDataSource.java | 15 ++ .../enums/ClinicalAttributeDataType.java | 16 ++ .../mybatiscolumnstore/StudyViewMapper.java | 17 +- .../StudyViewMyBatisRepository.java | 44 +++- .../mybatiscolumnstore/StudyViewMapper.xml | 221 +++++++++++++----- .../StudyViewMyBatisRepositoryTest.java | 186 ++++++++++++++- .../cbioportal/service/StudyViewService.java | 2 + .../service/impl/StudyViewServiceImpl.java | 68 +++++- .../StudyViewColumnStoreController.java | 51 +++- .../util/NewStudyViewFilterUtil.java | 12 + .../CategorizedClinicalDataCountFilter.java | 71 ++++++ 12 files changed, 643 insertions(+), 78 deletions(-) create mode 100644 persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataSource.java create mode 100644 persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataType.java create mode 100644 web/src/main/java/org/cbioportal/web/columnstore/util/NewStudyViewFilterUtil.java create mode 100644 webparam/src/main/java/org/cbioportal/webparam/CategorizedClinicalDataCountFilter.java diff --git a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index ec5ea68cd45..66525e30e1e 100644 --- a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -1,13 +1,27 @@ package org.cbioportal.persistence; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.ClinicalData; +import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.Sample; +import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; +import org.cbioportal.persistence.enums.ClinicalAttributeDataType; +import org.cbioportal.webparam.CategorizedClinicalDataCountFilter; import org.cbioportal.webparam.StudyViewFilter; import java.util.List; public interface StudyViewRepository { - List getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter); + List getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + + List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + + List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); + + List getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); + + List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); + + List getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType); - List getMutatedGenes(StudyViewFilter studyViewFilter); } diff --git a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataSource.java b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataSource.java new file mode 100644 index 00000000000..ed320c1cebd --- /dev/null +++ b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataSource.java @@ -0,0 +1,15 @@ +package org.cbioportal.persistence.enums; + +public enum ClinicalAttributeDataSource { + PATIENT("PATIENT"),SAMPLE("SAMPLE"); + + private final String value; + + ClinicalAttributeDataSource(String value) { + this.value = value; + } + + public String getValue() { + return this.value; + } +} diff --git a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataType.java b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataType.java new file mode 100644 index 00000000000..591f67e117d --- /dev/null +++ b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/enums/ClinicalAttributeDataType.java @@ -0,0 +1,16 @@ +package org.cbioportal.persistence.enums; + +public enum ClinicalAttributeDataType { + CATEGORICAL("CATEGORICAL"), + NUMERIC("NUMERIC"); + + private final String value; + + ClinicalAttributeDataType(String value) { + this.value = value; + } + + public String getValue() { + return this.value; + } +} diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.java index eb9671a43d0..0440213f584 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.java @@ -1,12 +1,25 @@ package org.cbioportal.persistence.mybatiscolumnstore; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.Sample; +import org.cbioportal.webparam.CategorizedClinicalDataCountFilter; import org.cbioportal.webparam.StudyViewFilter; import java.util.List; public interface StudyViewMapper { - List getFilteredSamples(StudyViewFilter studyViewFilter); - List getMutatedGenes(StudyViewFilter studyViewFilter); + List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); + List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); + + List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); + + List getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues ); + + List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); + + List getClinicalAttributeNames(String tableName); } diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepository.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepository.java index d8361240a21..c08c2b36ff6 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepository.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepository.java @@ -1,8 +1,13 @@ package org.cbioportal.persistence.mybatiscolumnstore; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.ClinicalData; +import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.Sample; import org.cbioportal.persistence.StudyViewRepository; +import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; +import org.cbioportal.persistence.enums.ClinicalAttributeDataType; +import org.cbioportal.webparam.CategorizedClinicalDataCountFilter; import org.cbioportal.webparam.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -12,16 +17,47 @@ @Repository public class StudyViewMyBatisRepository implements StudyViewRepository { + private final List FILTERED_CLINICAL_ATTR_VALUES = List.of("NA", "NAN", "N/A"); @Autowired private StudyViewMapper studyViewMapper; @Override - public List getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter) { - return studyViewMapper.getFilteredSamples(studyViewFilter); + public List getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + + return studyViewMapper.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); } @Override - public List getMutatedGenes(StudyViewFilter studyViewFilter) { - return studyViewMapper.getMutatedGenes(studyViewFilter); + public List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + return studyViewMapper.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); + } + + @Override + public List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { + return studyViewMapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES ); + } + + @Override + public List getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { + return studyViewMapper.getSampleClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES ); + } + + @Override + public List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { + return studyViewMapper.getPatientClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES); + } + + @Override + public List getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType) { + String tableName = clinicalAttributeDataSource.getValue().toLowerCase() + "_clinical_attribute_" + dataType.getValue().toLowerCase(); + return studyViewMapper.getClinicalAttributeNames(tableName); + } + + private boolean shouldApplyPatientIdFilters(CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + return categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty() + || categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters().isEmpty(); } } \ No newline at end of file diff --git a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.xml b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.xml index 49d487a128e..f121f75fd5d 100644 --- a/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.xml +++ b/persistence/persistence-mybatis/src/main/resources/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMapper.xml @@ -12,10 +12,13 @@ sample_unique_id_base64 as uniqueSampleKey, patient_unique_id_base64 as uniquePatientKey FROM sample - WHERE sample_unique_id IN ( - - - ) + + sample_unique_id IN ( ) + + + AND patient_unique_id IN () + + ORDER BY sample_stable_id ASC; @@ -29,103 +32,203 @@ COUNT(DISTINCT sample_unique_id) as numberOfAlteredCases, COUNT(*) as totalCount FROM mutation - WHERE sample_unique_id IN ( - - ) + + sample_unique_id IN ( + + INTERSECT + + ) + + GROUP BY hugo_gene_symbol ORDER BY totalCount DESC; + + + + + SELECT + attribute_name as attributeId, + CASE WHEN attribute_value = 'NULL' THEN 'NA' ELSE attribute_value END AS value, + Count(*) as count + FROM ${table_name_prefix}_clinical_attribute_categorical + + patient_unique_id IN ( + + INTERSECT + + ) + AND UPPER(attribute_value) NOT IN + + #{filteredAttributeValue} + + AND attribute_name IN + + #{attributeId} + + + GROUP BY attribute_name, + attribute_value + + + - + INTERSECT SELECT sample_unique_id FROM sample WHERE cancer_study_identifier IN - + #{studyId} - + INTERSECT SELECT sample_unique_id FROM sample WHERE sample_unique_id IN - + #{sampleIdentifier.getStudyId()}_#{sampleIdentifier.getSampleId()} - + INTERSECT SELECT sample_unique_id FROM genomic_event - + genetic_profile_stable_id IN #{molecularProfileId} - AND hugo_gene_symbol IN - - + AND hugo_gene_symbol IN ( + + #{geneFilterQuery.hugoGeneSymbol} + ) - - - - SELECT sample_unique_id - FROM sample_clinical_attribute_numeric - WHERE attribute_name = '${clinicalDataFilter.attributeId}' - - - - AND attribute_value = -1000000 - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SELECT ${unique_id} + FROM ${table_name} + WHERE attribute_name = '${clinicalDataFilter.attributeId}' + + + + AND attribute_value = -1000000 + + + + AND ABS(attribute_value - ${dataFilterValue.start}) < EXP(-11) + + AND attribute_value > ${dataFilterValue.start} AND attribute_value <= ${dataFilterValue.end} - - - - - - - + + + + + + + SELECT ${unique_id} + FROM ${table_name} + WHERE attribute_name = '${clinicalDataFilter.attributeId}' + + + AND attribute_value = '${dataFilterValue.value}' + + + + + + SELECT sample_unique_id + FROM sample + + patient_unique_id IN () + + + + + SELECT patient_unique_id + FROM sample + + sample_unique_id IN () + + + + diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java index b8862780fcc..fe72ed27380 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java @@ -1,11 +1,14 @@ package org.cbioportal.persistence.mybatiscolumnstore; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.Sample; -import org.cbioportal.webparam.ClinicalDataFilter; -import org.cbioportal.webparam.DataFilterValue; -import org.cbioportal.webparam.StudyViewFilter; +import org.cbioportal.model.util.Select; +import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; +import org.cbioportal.persistence.enums.ClinicalAttributeDataType; +import org.cbioportal.webparam.*; import org.junit.Assert; +import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; @@ -15,6 +18,9 @@ import java.math.BigDecimal; import java.util.*; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; import java.util.function.Function; import java.util.stream.Collectors; @@ -31,7 +37,7 @@ public void getFilteredSamples() { StudyViewFilter studyViewFilter = generateStudyViewFilter(new String[]{"msk_ch_2020", "msk_impact_2017"}, null); // test total number of samples - List samples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter); + List samples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter, CategorizedClinicalDataCountFilter.getBuilder().build()); Assert.assertEquals(11, samples.size()); // test sample numerical clinical attributes @@ -42,7 +48,7 @@ public void getFilteredSamples() { ) ); - samples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter); + samples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter, CategorizedClinicalDataCountFilter.getBuilder().build()); Assert.assertEquals(2, samples.size()); } @@ -50,10 +56,27 @@ public void getFilteredSamples() { public void getMutatedGenes() { StudyViewFilter studyViewFilter = generateStudyViewFilter(new String[]{"msk_ch_2020"}, null); - List mutations = studyViewMyBatisRepository.getMutatedGenes(studyViewFilter); + List mutations = studyViewMyBatisRepository.getMutatedGenes(studyViewFilter, CategorizedClinicalDataCountFilter.getBuilder().build()); Assert.assertEquals(2, mutations.size()); } + + @Test + public void getMutatedGenesFiltered() { + StudyViewFilter studyViewFilter = generateStudyViewFilter(new String[]{"msk_impact_2017"}, null); + + List studyViewGeneFilters = new ArrayList<>(); + StudyViewGeneFilter mutationStudyViewGeneFilter = new StudyViewGeneFilter(); + mutationStudyViewGeneFilter.setMolecularProfileIds(new HashSet<>(Arrays.asList("msk_impact_2017_mutations"))); + + mutationStudyViewGeneFilter.setGeneQueries(getFilters.apply("moo")); + studyViewGeneFilters.add(mutationStudyViewGeneFilter); + + studyViewFilter.setGeneFilters(studyViewGeneFilters); + + List mutations = studyViewMyBatisRepository.getMutatedGenes(studyViewFilter, CategorizedClinicalDataCountFilter.getBuilder().build()); + Assert.assertEquals(414, mutations.size()); + } private StudyViewFilter generateStudyViewFilter( String[] studyIds, @@ -90,6 +113,38 @@ private ClinicalDataFilter generateClinicalDataFilter(String attributeId, String return dataFilterValue; }; + private final Function>> getFilters = v -> { + + boolean includeDriver = true; + boolean includeVUS = true; + boolean includeUnknownOncogenicity = true; + boolean includeGermline = true; + boolean includeSomatic = true; + boolean includeUnknownStatus = true; + Select selectedTiers = Select.none(); + boolean includeUnknownTier = true; + + GeneFilterQuery geneFilterQuery1 = new GeneFilterQuery("SMARCD1", null, + null, includeDriver, includeVUS, includeUnknownOncogenicity, selectedTiers, includeUnknownTier, + includeGermline, includeSomatic, includeUnknownStatus); + + GeneFilterQuery geneFilterQuery2 = new GeneFilterQuery("TP53", null, + null, includeDriver, includeVUS, includeUnknownOncogenicity, selectedTiers, includeUnknownTier, + includeGermline, includeSomatic, includeUnknownStatus); + + List> q1 = new ArrayList<>(); + List q2 = new ArrayList<>(); + List q3 = new ArrayList<>(); + + q2.add(geneFilterQuery1); + q3.add(geneFilterQuery2); + q1.add(q2); + q1.add(q3); + return q1; + + }; + + private final Function mapNumericalValues = v -> { DataFilterValue dataFilterValue = new DataFilterValue(); @@ -101,4 +156,123 @@ private ClinicalDataFilter generateClinicalDataFilter(String attributeId, String return dataFilterValue; }; + @Test + @Ignore + public void getClinicalAttributeNames() { + List names = studyViewMyBatisRepository.getClinicalDataAttributeNames(ClinicalAttributeDataSource.PATIENT, ClinicalAttributeDataType.CATEGORICAL); + Assert.assertTrue(names.size() > 0); + Assert.assertTrue(names.contains("OS_STATUS")); + } + + @Test + @Ignore + public void getFilterSamplesWithClinicalDataFilter() { + ClinicalDataFilter clinicalDataFilter = new ClinicalDataFilter(); + clinicalDataFilter.setAttributeId("TMB_NONSYNONYMOUS"); + DataFilterValue filterValue = new DataFilterValue(); + filterValue.setStart(BigDecimal.valueOf(0.033333333)); + filterValue.setEnd(BigDecimal.valueOf(0.033333333)); + clinicalDataFilter.setValues(List.of(filterValue)); + StudyViewFilter studyViewFilter = new StudyViewFilter(); + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = CategorizedClinicalDataCountFilter.getBuilder() + .setSampleNumericalClinicalDataFilters(List.of(clinicalDataFilter)) + .build(); + + List studyIds = new ArrayList<>(); + studyIds.add("msk_ch_2020"); + studyViewFilter.setStudyIds(studyIds); + + List test = categorizedClinicalDataCountFilter.getSampleNumericalClinicalDataFilters(); + List samples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter, categorizedClinicalDataCountFilter); + Assert.assertEquals(4507, samples.size()); + } + + @Test + //@Ignore + public void getFilterSamplesWithCategoricalSampleClinicalDataFilter() { + StudyViewFilter studyViewFilter = generateStudyViewFilter( + new String[]{"msk_ch_2020"},null + ); + CategorizedClinicalDataCountFilter clincalDataCountFilters = CategorizedClinicalDataCountFilter.getBuilder() + .setSampleCategoricalClinicalDataFilters(Arrays.asList(generateCategoricalClinicalDataFilter("CANCER_TYPE", new String[]{"Bladder Cancer", "Breast Cancer"}))) + .build(); + + List filteredSamples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter, clincalDataCountFilters); + Assert.assertEquals(1435, filteredSamples.size()); + } + @Test + @Ignore + public void getFilterSamplesWithNumericPatientClinicalDataFilter() { + StudyViewFilter studyViewFilter = generateStudyViewFilter( + new String[]{"msk_ch_2020"},null + ); + CategorizedClinicalDataCountFilter clincalDataCountFilters = CategorizedClinicalDataCountFilter.getBuilder() + .setPatientNumericalClinicalDataFilters(Arrays.asList(generateNumericalClinicalDataFilter("AGE", new String[]{"60-65"}))) + .build(); + + List filteredSamples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter, clincalDataCountFilters); + Assert.assertEquals(3343, filteredSamples.size()); + } + @Test + @Ignore + public void getFilterSamplesWithPatientClinicalDataFilterAndSampleClinicalDataFilter() { + StudyViewFilter studyViewFilter = generateStudyViewFilter( + new String[]{"msk_ch_2020"},null + ); + + ClinicalDataFilter clinicalDataFilter = new ClinicalDataFilter(); + clinicalDataFilter.setAttributeId("TMB_NONSYNONYMOUS"); + DataFilterValue filterValue = new DataFilterValue(); + filterValue.setStart(BigDecimal.valueOf(0.033333333)); + filterValue.setEnd(BigDecimal.valueOf(0.033333333)); + clinicalDataFilter.setValues(List.of(filterValue)); + + CategorizedClinicalDataCountFilter clincalDataCountFilters = CategorizedClinicalDataCountFilter.getBuilder() + .setPatientNumericalClinicalDataFilters(List.of(generateNumericalClinicalDataFilter("AGE", new String[]{"60-65"}))) + .setSampleNumericalClinicalDataFilters(List.of(clinicalDataFilter)) + .setSampleCategoricalClinicalDataFilters(List.of(generateCategoricalClinicalDataFilter("CANCER_TYPE", new String[]{"Bladder Cancer"}))) + .build(); + + List filteredSamples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter, clincalDataCountFilters); + Assert.assertEquals(21, filteredSamples.size()); + } + + @Test + @Ignore + public void getMutatedGenesWithPatientClinicalDataFilterAndSampleClinicalDataFilter() { + StudyViewFilter studyViewFilter = generateStudyViewFilter( + new String[]{"msk_ch_2020"},null + ); + + ClinicalDataFilter clinicalDataFilter = new ClinicalDataFilter(); + clinicalDataFilter.setAttributeId("TMB_NONSYNONYMOUS"); + DataFilterValue filterValue = new DataFilterValue(); + filterValue.setStart(BigDecimal.valueOf(0.033333333)); + filterValue.setEnd(BigDecimal.valueOf(0.033333333)); + clinicalDataFilter.setValues(List.of(filterValue)); + + CategorizedClinicalDataCountFilter clincalDataCountFilters = CategorizedClinicalDataCountFilter.getBuilder() + .setPatientNumericalClinicalDataFilters(List.of(generateNumericalClinicalDataFilter("AGE", new String[]{"60-65"}))) + .setSampleNumericalClinicalDataFilters(List.of(clinicalDataFilter)) + .setSampleCategoricalClinicalDataFilters(List.of(generateCategoricalClinicalDataFilter("CANCER_TYPE", new String[]{"Bladder Cancer"}))) + .build(); + + List mutatedGenes = studyViewMyBatisRepository.getMutatedGenes(studyViewFilter, clincalDataCountFilters); + Assert.assertEquals(16, mutatedGenes.size()); + } + + @Test + @Ignore + public void getClinicalDataCounts() { + StudyViewFilter studyViewFilter = generateStudyViewFilter( + new String[]{"msk_ch_2020"},null + ); + + List counts = studyViewMyBatisRepository.getClinicalDataCounts(studyViewFilter, CategorizedClinicalDataCountFilter.getBuilder().build(), + List.of("CANCER_TYPE")); + + Assert.assertEquals(counts.size(), 50); + + + } } \ No newline at end of file diff --git a/service/src/main/java/org/cbioportal/service/StudyViewService.java b/service/src/main/java/org/cbioportal/service/StudyViewService.java index efd835f658b..f1dfadfe10c 100644 --- a/service/src/main/java/org/cbioportal/service/StudyViewService.java +++ b/service/src/main/java/org/cbioportal/service/StudyViewService.java @@ -26,4 +26,6 @@ List getCNAAlterationCountByGenes(List studyIds, List getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter); List getMutatedGenesFromColumnstore(StudyViewFilter interceptedStudyViewFilter); + + List getClinicalDataCountsFromColumnStore(StudyViewFilter studyViewFilter, List filteredAttributes); } diff --git a/service/src/main/java/org/cbioportal/service/impl/StudyViewServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/StudyViewServiceImpl.java index 9780435fdef..abb17400c8c 100644 --- a/service/src/main/java/org/cbioportal/service/impl/StudyViewServiceImpl.java +++ b/service/src/main/java/org/cbioportal/service/impl/StudyViewServiceImpl.java @@ -5,10 +5,14 @@ import org.cbioportal.model.*; import org.cbioportal.model.util.Select; import org.cbioportal.persistence.StudyViewRepository; +import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; +import org.cbioportal.persistence.enums.ClinicalAttributeDataType; import org.cbioportal.service.*; import org.cbioportal.service.exception.MolecularProfileNotFoundException; import org.cbioportal.service.exception.StudyNotFoundException; import org.cbioportal.service.util.MolecularProfileUtil; +import org.cbioportal.webparam.CategorizedClinicalDataCountFilter; +import org.cbioportal.webparam.ClinicalDataFilter; import org.cbioportal.webparam.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -16,10 +20,14 @@ import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; @Service public class StudyViewServiceImpl implements StudyViewService { private static final List CNA_TYPES_AMP_AND_HOMDEL = Collections.unmodifiableList(Arrays.asList(CNA.AMP, CNA.HOMDEL)); + + private final Map> clinicalAttributeNameMap = new HashMap<>(); + @Autowired private MolecularProfileService molecularProfileService; @Autowired @@ -252,11 +260,67 @@ public List fetchGenericAssayDataCounts(List @Override public List getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter) { - return studyViewRepository.getFilteredSamplesFromColumnstore(studyViewFilter); + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return studyViewRepository.getFilteredSamplesFromColumnstore(studyViewFilter, categorizedClinicalDataCountFilter); } @Override public List getMutatedGenesFromColumnstore(StudyViewFilter studyViewFilter) { - return studyViewRepository.getMutatedGenes(studyViewFilter); + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return studyViewRepository.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter); + } + + @Override + public List getClinicalDataCountsFromColumnStore(StudyViewFilter studyViewFilter, List filteredAttributes) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + + return studyViewRepository.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, filteredAttributes) + .stream().collect(Collectors.groupingBy(ClinicalDataCount::getAttributeId)) + .entrySet().parallelStream().map(e -> { + ClinicalDataCountItem item = new ClinicalDataCountItem(); + item.setAttributeId(e.getKey()); + item.setCounts(e.getValue()); + return item; + }).collect(Collectors.toList()); + } + + private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) { + if(clinicalAttributeNameMap.isEmpty()) { + buildClinicalAttributeNameMap(); + } + + if(studyViewFilter.getClinicalDataFilters() == null) { + return CategorizedClinicalDataCountFilter.getBuilder().build(); + } + + final String patientCategoricalKey = ClinicalAttributeDataSource.PATIENT.getValue() + ClinicalAttributeDataType.CATEGORICAL.getValue(); + final String patientNumericKey = ClinicalAttributeDataSource.PATIENT.getValue() + ClinicalAttributeDataType.NUMERIC.getValue(); + final String sampleCategoricalKey = ClinicalAttributeDataSource.SAMPLE.getValue() + ClinicalAttributeDataType.CATEGORICAL.getValue(); + final String sampleNumericKey = ClinicalAttributeDataSource.SAMPLE.getValue() + ClinicalAttributeDataType.NUMERIC.getValue(); + + return CategorizedClinicalDataCountFilter.getBuilder() + .setPatientCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters() + .stream().filter(clinicalDataFilter -> clinicalAttributeNameMap.get(patientCategoricalKey).contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .setPatientNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> clinicalAttributeNameMap.get(patientNumericKey).contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .setSampleCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> clinicalAttributeNameMap.get(sampleCategoricalKey).contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .setSampleNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> clinicalAttributeNameMap.get(sampleNumericKey).contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .build(); + } + + private void buildClinicalAttributeNameMap() { + List clinicalAttributeDataSources = List.of(ClinicalAttributeDataSource.values()); + for(ClinicalAttributeDataSource clinicalAttributeDataSource : clinicalAttributeDataSources) { + String categoricalKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.CATEGORICAL; + String numericKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.NUMERIC; + clinicalAttributeNameMap.put(categoricalKey, studyViewRepository.getClinicalDataAttributeNames(clinicalAttributeDataSource, ClinicalAttributeDataType.CATEGORICAL)); + clinicalAttributeNameMap.put(numericKey, studyViewRepository.getClinicalDataAttributeNames(clinicalAttributeDataSource, ClinicalAttributeDataType.NUMERIC)); + } } } diff --git a/web/src/main/java/org/cbioportal/web/columnstore/StudyViewColumnStoreController.java b/web/src/main/java/org/cbioportal/web/columnstore/StudyViewColumnStoreController.java index 84e7fdf2ba8..a7f9383a3f0 100644 --- a/web/src/main/java/org/cbioportal/web/columnstore/StudyViewColumnStoreController.java +++ b/web/src/main/java/org/cbioportal/web/columnstore/StudyViewColumnStoreController.java @@ -4,9 +4,15 @@ import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.AlterationFilter; +import org.cbioportal.model.ClinicalDataCountItem; import org.cbioportal.model.Sample; import org.cbioportal.service.StudyViewService; +import org.cbioportal.service.exception.StudyNotFoundException; +import org.cbioportal.web.columnstore.util.NewStudyViewFilterUtil; import org.cbioportal.web.config.annotation.InternalApi; +import org.cbioportal.webparam.ClinicalDataCountFilter; +import org.cbioportal.webparam.ClinicalDataFilter; import org.cbioportal.webparam.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.HttpStatus; @@ -17,20 +23,25 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestAttribute; import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; import springfox.documentation.annotations.ApiIgnore; import javax.validation.Valid; +import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; @InternalApi @RestController @Validated @Api(tags = "Study View Column Store") public class StudyViewColumnStoreController { - + + private static final String COLUMN_STORE_ENDPOINT_PREFIX="/column-store"; + @Autowired private StudyViewService studyViewService; @@ -64,10 +75,44 @@ public ResponseEntity> fetchMutatedGenes( @RequestAttribute(required = false, value = "involvedCancerStudies") Collection involvedCancerStudies, @ApiIgnore // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above. @Valid @RequestAttribute(required = false, value = "interceptedStudyViewFilter") StudyViewFilter interceptedStudyViewFilter - ) { + ) throws StudyNotFoundException { + AlterationFilter annotationFilters = interceptedStudyViewFilter.getAlterationFilter(); + List samples = studyViewService.getFilteredSamplesFromColumnstore(interceptedStudyViewFilter); + List studyIds = new ArrayList<>(); + List sampleIds = new ArrayList<>(); + for(Sample sample : samples) { + studyIds.add(sample.getCancerStudyIdentifier()); + sampleIds.add(sample.getStableId()); + } return new ResponseEntity<>( - studyViewService.getMutatedGenesFromColumnstore(interceptedStudyViewFilter), + studyViewService.getMutationAlterationCountByGenes(studyIds, sampleIds, annotationFilters), HttpStatus.OK ); } + + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") + @PostMapping(value = "/column-store/clinical-data-counts/fetch", + consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + @ApiOperation("Fetch clinical data counts by study view filter") + public ResponseEntity> fetchClinicalDataCounts( + @ApiParam(required = true, value = "Clinical data count filter") + @Valid @RequestBody(required = false) ClinicalDataCountFilter clinicalDataCountFilter, + @ApiIgnore // prevent reference to this attribute in the swagger-ui interface + @RequestAttribute(required = false, value = "involvedCancerStudies") Collection involvedCancerStudies, + @ApiIgnore // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above. + @Valid @RequestAttribute(required = false, value = "interceptedClinicalDataCountFilter") ClinicalDataCountFilter interceptedClinicalDataCountFilter) { + + List attributes = interceptedClinicalDataCountFilter.getAttributes(); + StudyViewFilter studyViewFilter = interceptedClinicalDataCountFilter.getStudyViewFilter(); + + if (attributes.size() == 1) { + NewStudyViewFilterUtil.removeSelfFromFilter(attributes.get(0).getAttributeId(), studyViewFilter); + } + // boolean singleStudyUnfiltered = studyViewFilterUtil.isSingleStudyUnfiltered(studyViewFilter); + List result = studyViewService.getClinicalDataCountsFromColumnStore(studyViewFilter, + attributes.stream().map(ClinicalDataFilter::getAttributeId).collect(Collectors.toList())); + //studyIds, sampleIds, attributes.stream().map(a -> a.getAttributeId()).collect(Collectors.toList())); + return new ResponseEntity<>(result, HttpStatus.OK); + + } } diff --git a/web/src/main/java/org/cbioportal/web/columnstore/util/NewStudyViewFilterUtil.java b/web/src/main/java/org/cbioportal/web/columnstore/util/NewStudyViewFilterUtil.java new file mode 100644 index 00000000000..cc4bd198f20 --- /dev/null +++ b/web/src/main/java/org/cbioportal/web/columnstore/util/NewStudyViewFilterUtil.java @@ -0,0 +1,12 @@ +package org.cbioportal.web.columnstore.util; + +import org.cbioportal.webparam.StudyViewFilter; + +public class NewStudyViewFilterUtil { + + public static void removeSelfFromFilter(String attributeId, StudyViewFilter studyViewFilter) { + if (studyViewFilter!= null && studyViewFilter.getClinicalDataFilters() != null) { + studyViewFilter.getClinicalDataFilters().removeIf(f -> f.getAttributeId().equals(attributeId)); + } + } +} diff --git a/webparam/src/main/java/org/cbioportal/webparam/CategorizedClinicalDataCountFilter.java b/webparam/src/main/java/org/cbioportal/webparam/CategorizedClinicalDataCountFilter.java new file mode 100644 index 00000000000..cc56f9a8803 --- /dev/null +++ b/webparam/src/main/java/org/cbioportal/webparam/CategorizedClinicalDataCountFilter.java @@ -0,0 +1,71 @@ +package org.cbioportal.webparam; + +import java.util.List; + +public final class CategorizedClinicalDataCountFilter { + + public static Builder getBuilder() { + return new Builder(); + } + private final List sampleNumericalClinicalDataFilters; + private final List sampleCategoricalClinicalDataFilters; + private final List patientNumericalClinicalDataFilters; + private final List patientCategoricalClinicalDataFilters; + private CategorizedClinicalDataCountFilter(Builder builder) { + this.sampleCategoricalClinicalDataFilters = builder.sampleCategoricalClinicalDataFilters; + this.sampleNumericalClinicalDataFilters = builder.sampleNumericalClinicalDataFilters; + this.patientCategoricalClinicalDataFilters = builder.patientCategoricalClinicalDataFilters; + this.patientNumericalClinicalDataFilters = builder.patientNumericalClinicalDataFilters; + } + + public List getSampleNumericalClinicalDataFilters() { + return sampleNumericalClinicalDataFilters; + } + + public List getSampleCategoricalClinicalDataFilters() { + return sampleCategoricalClinicalDataFilters; + } + + public List getPatientNumericalClinicalDataFilters() { + return patientNumericalClinicalDataFilters; + } + + public List getPatientCategoricalClinicalDataFilters() { + return patientCategoricalClinicalDataFilters; + } + + + public static class Builder { + private List sampleNumericalClinicalDataFilters; + private List sampleCategoricalClinicalDataFilters; + private List patientNumericalClinicalDataFilters; + private List patientCategoricalClinicalDataFilters; + + private Builder(){ + + } + public Builder setSampleCategoricalClinicalDataFilters(List sampleCategoricalClinicalDataFilters) { + this.sampleCategoricalClinicalDataFilters = sampleCategoricalClinicalDataFilters; + return this; + } + + public Builder setSampleNumericalClinicalDataFilters(List sampleNumericalClinicalDataFilters) { + this.sampleNumericalClinicalDataFilters = sampleNumericalClinicalDataFilters; + return this; + } + + public Builder setPatientCategoricalClinicalDataFilters(List patientCategoricalClinicalDataFilters) { + this.patientCategoricalClinicalDataFilters = patientCategoricalClinicalDataFilters; + return this; + } + + public Builder setPatientNumericalClinicalDataFilters(List patientNumericalClinicalDataFilters) { + this.patientNumericalClinicalDataFilters = patientNumericalClinicalDataFilters; + return this; + } + + public CategorizedClinicalDataCountFilter build() { + return new CategorizedClinicalDataCountFilter(this); + } + } +}