Skip to content

Commit

Permalink
Genomic data counts study-view endpoint (#10300)
Browse files Browse the repository at this point in the history
* implement getting CNA types for a gene specific

* add tests for genomic-data-counts endpoint

* change CopyNumberDataCounterFilter to StudyViewFilter and GenomicDataCountFilter

* change to molecularDataService to fetch data

* add filter logic for DISCRETE molecular profiles

* Refactor

* count NA and clean up codes

* Updates

* Update StudyViewServiceImplTest.java

---------

Co-authored-by: Qi-Xuan Lu <qlu@carisls.com>
Co-authored-by: Karthik <kalletlakarthik@gmail.com>
  • Loading branch information
3 people authored Aug 8, 2023
1 parent d035bb1 commit ca2bdff
Show file tree
Hide file tree
Showing 11 changed files with 555 additions and 93 deletions.
35 changes: 35 additions & 0 deletions model/src/main/java/org/cbioportal/model/GenomicDataCountItem.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.cbioportal.model;

import java.io.Serializable;
import java.util.List;

public class GenomicDataCountItem implements Serializable {

private String hugoGeneSymbol;
private String profileType;
private List<GenomicDataCount> counts;

public String getHugoGeneSymbol() {
return hugoGeneSymbol;
}

public void setHugoGeneSymbol(String hugoGeneSymbol) {
this.hugoGeneSymbol = hugoGeneSymbol;
}

public String getProfileType() {
return profileType;
}

public void setProfileType(String profileType) {
this.profileType = profileType;
}

public List<GenomicDataCount> getCounts() {
return counts;
}

public void setCounts(List<GenomicDataCount> counts) {
this.counts = counts;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.cbioportal.service;

import org.apache.commons.math3.util.Pair;
import org.cbioportal.model.*;
import org.cbioportal.service.exception.MolecularProfileNotFoundException;
import org.cbioportal.service.exception.StudyNotFoundException;

import java.util.List;
Expand All @@ -19,6 +21,8 @@ List<AlterationCountByGene> getStructuralVariantAlterationCountByGenes(List<Stri
List<CopyNumberCountByGene> getCNAAlterationCountByGenes(List<String> studyIds, List<String> sampleIds, AlterationFilter annotationFilter)
throws StudyNotFoundException;

List<GenomicDataCountItem> getCNAAlterationCountsByGeneSpecific(List<String> studyIds, List<String> sampleIds, List<Pair<String, String>> genomicDataFilters);

List<GenericAssayDataCountItem> fetchGenericAssayDataCounts(List<String> sampleIds, List<String> studyIds, List<String> stableIds, List<String> profileTypes);

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.apache.commons.collections4.map.MultiKeyMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.util.Pair;
import org.cbioportal.model.*;
import org.cbioportal.model.util.Select;
import org.cbioportal.persistence.AlterationRepository;
Expand All @@ -15,6 +16,7 @@
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

@Service
public class StudyViewServiceImpl implements StudyViewService {
Expand All @@ -37,6 +39,12 @@ public class StudyViewServiceImpl implements StudyViewService {
@Autowired
private AlterationRepository alterationRepository;

@Autowired
private GeneService geneService;

@Autowired
private MolecularDataService molecularDataService;

@Override
public List<GenomicDataCount> getGenomicDataCounts(List<String> studyIds, List<String> sampleIds) {
List<MolecularProfileCaseIdentifier> molecularProfileSampleIdentifiers =
Expand Down Expand Up @@ -154,7 +162,7 @@ public List<CopyNumberCountByGene> getCNAAlterationCountByGenes(List<String> stu
throws StudyNotFoundException {
List<MolecularProfileCaseIdentifier> caseIdentifiers =
molecularProfileService.getFirstDiscreteCNAProfileCaseIdentifiers(studyIds, sampleIds);
Select<CNA> cnaTypes = Select.byValues(CNA_TYPES_AMP_AND_HOMDEL);

List<CopyNumberCountByGene> copyNumberCountByGenes = alterationCountService.getSampleCnaGeneCounts(
caseIdentifiers,
Select.all(),
Expand Down Expand Up @@ -186,6 +194,96 @@ public List<CopyNumberCountByGene> getCNAAlterationCountByGenes(List<String> stu
return copyNumberCountByGenes;
}

@Override
public List<GenomicDataCountItem> getCNAAlterationCountsByGeneSpecific(List<String> studyIds,
List<String> sampleIds,
List<Pair<String, String>> genomicDataFilters) {

List<MolecularProfile> molecularProfiles = molecularProfileService.getMolecularProfilesInStudies(studyIds,
"SUMMARY");

Map<String, List<MolecularProfile>> molecularProfileMap = molecularProfileUtil
.categorizeMolecularProfilesByStableIdSuffixes(molecularProfiles);

Set<String> hugoGeneSymbols = genomicDataFilters.stream().map(Pair::getKey)
.collect(Collectors.toSet());

Map<String, Integer> geneSymbolIdMap = geneService
.fetchGenes(new ArrayList<>(hugoGeneSymbols), "HUGO_GENE_SYMBOL",
"SUMMARY")
.stream().collect(Collectors.toMap(Gene::getHugoGeneSymbol, Gene::getEntrezGeneId));

return genomicDataFilters
.stream()
.flatMap(gdFilter -> {
GenomicDataCountItem genomicDataCountItem = new GenomicDataCountItem();
String hugoGeneSymbol = gdFilter.getKey();
String profileType = gdFilter.getValue();
genomicDataCountItem.setHugoGeneSymbol(hugoGeneSymbol);
genomicDataCountItem.setProfileType(profileType);

List<String> stableIds = Arrays.asList(geneSymbolIdMap.get(hugoGeneSymbol).toString());

Map<String, String> studyIdToMolecularProfileIdMap = molecularProfileMap
.getOrDefault(profileType, new ArrayList<MolecularProfile>()).stream()
.collect(Collectors.toMap(MolecularProfile::getCancerStudyIdentifier,
MolecularProfile::getStableId));

List<String> mappedSampleIds = new ArrayList<>();
List<String> mappedProfileIds = new ArrayList<>();

for (int i = 0; i < sampleIds.size(); i++) {
String studyId = studyIds.get(i);
if (studyIdToMolecularProfileIdMap.containsKey(studyId)) {
mappedSampleIds.add(sampleIds.get(i));
mappedProfileIds.add(studyIdToMolecularProfileIdMap.get(studyId));
}
}

if (mappedSampleIds.isEmpty()) {
return Stream.of();
}

List<GeneMolecularData> geneMolecularDataList = molecularDataService.getMolecularDataInMultipleMolecularProfiles(mappedProfileIds, mappedSampleIds,
stableIds.stream().map(Integer::parseInt).collect(Collectors.toList()), "SUMMARY");

List<GenomicDataCount> genomicDataCounts = geneMolecularDataList
.stream()
.filter(g -> StringUtils.isNotEmpty(g.getValue()) && !g.getValue().equals("NA"))
.collect(Collectors.groupingBy(GeneMolecularData::getValue))
.entrySet()
.stream()
.map(entry -> {
Integer alteration = Integer.valueOf(entry.getKey());
List<GeneMolecularData> geneMolecularData = entry.getValue();
int count = geneMolecularData.size();

String label = CNA.getByCode(alteration.shortValue()).getDescription();

GenomicDataCount genomicDataCount = new GenomicDataCount();
genomicDataCount.setLabel(label);
genomicDataCount.setValue(String.valueOf(alteration));
genomicDataCount.setCount(count);

return genomicDataCount;
}).collect(Collectors.toList());

int totalCount = genomicDataCounts.stream().mapToInt(GenomicDataCount::getCount).sum();
int naCount = sampleIds.size() - totalCount;

if (naCount > 0) {
GenomicDataCount genomicDataCount = new GenomicDataCount();
genomicDataCount.setLabel("NA");
genomicDataCount.setValue("NA");
genomicDataCount.setCount(naCount);
genomicDataCounts.add(genomicDataCount);
}

genomicDataCountItem.setCounts(genomicDataCounts);
return Stream.of(genomicDataCountItem);
}).collect(Collectors.toList());
};

@Override
public List<GenericAssayDataCountItem> fetchGenericAssayDataCounts(List<String> sampleIds, List<String> studyIds,
List<String> stableIds, List<String> profileTypes) {
Expand Down Expand Up @@ -261,5 +359,4 @@ public List<GenericAssayDataCountItem> fetchGenericAssayDataCounts(List<String>
})
.collect(Collectors.toList());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,7 @@ public class BaseServiceImplTest {
public static final String CATEGORY_VALUE_1 = "category_value1";
public static final String CATEGORY_VALUE_2 = "category_value2";
public static final String EMPTY_VALUE_1 = "";
public static final String EMPTY_VALUE_2 = "NA";
public static final String EMPTY_VALUE_2 = "NA";
public static final String PROFILE_TYPE_1 = "profile_type1";
public static final String PROFILE_TYPE_2 = "profile_type2";
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
import org.mockito.Spy;
import org.mockito.junit.MockitoJUnitRunner;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.*;

import static org.mockito.ArgumentMatchers.*;

Expand All @@ -41,6 +38,10 @@ public class StudyViewServiceImplTest extends BaseServiceImplTest {
private SignificantCopyNumberRegionService significantCopyNumberRegionService;
@Mock
private GenericAssayService genericAssayService;
@Mock
private MolecularDataService molecularDataService;
@Mock
private GeneService geneService;
private AlterationFilter alterationFilter = new AlterationFilter();

@Test
Expand Down Expand Up @@ -255,6 +256,79 @@ public void getCNAAlterationCountByGenes() throws Exception {
Assert.assertEquals(1, result.size());
}

@Test
public void getCNAAlterationCountByGeneSpecific() throws Exception {

List<String> sampleIds = Arrays.asList(BaseServiceImplTest.SAMPLE_ID1, BaseServiceImplTest.SAMPLE_ID2, BaseServiceImplTest.SAMPLE_ID3);
List<String> studyIds = Collections.nCopies(3, BaseServiceImplTest.STUDY_ID);
List<Pair<String, String>> genomicDataFilters = new ArrayList<>();
Pair<String, String> genomicDataFilter1 = new Pair<>(BaseServiceImplTest.HUGO_GENE_SYMBOL_1, BaseServiceImplTest.PROFILE_TYPE_1);
Pair<String, String> genomicDataFilter2 = new Pair<>(BaseServiceImplTest.HUGO_GENE_SYMBOL_2, BaseServiceImplTest.PROFILE_TYPE_2);
genomicDataFilters.add(genomicDataFilter1);
genomicDataFilters.add(genomicDataFilter2);

List<MolecularProfile> molecularProfiles = new ArrayList<>();
MolecularProfile molecularProfile = new MolecularProfile();
molecularProfile.setCancerStudyIdentifier(BaseServiceImplTest.STUDY_ID);
molecularProfile.setStableId(BaseServiceImplTest.STUDY_ID + "_" + BaseServiceImplTest.MOLECULAR_PROFILE_ID_A);
molecularProfiles.add(molecularProfile);

Mockito.when(molecularProfileService.getMolecularProfilesInStudies(studyIds, "SUMMARY"))
.thenReturn(molecularProfiles);

Map<String, List<MolecularProfile>> molecularProfileMap = new HashMap<>();
molecularProfileMap.put(BaseServiceImplTest.PROFILE_TYPE_1, molecularProfiles);
molecularProfileMap.put(BaseServiceImplTest.PROFILE_TYPE_2, molecularProfiles);

Mockito.when(molecularProfileUtil.categorizeMolecularProfilesByStableIdSuffixes(molecularProfiles))
.thenReturn(molecularProfileMap);

List<Gene> genes = new ArrayList<>();
Gene gene1 = new Gene();
gene1.setEntrezGeneId(BaseServiceImplTest.ENTREZ_GENE_ID_1);
gene1.setHugoGeneSymbol(BaseServiceImplTest.HUGO_GENE_SYMBOL_1);
Gene gene2 = new Gene();
gene2.setEntrezGeneId(BaseServiceImplTest.ENTREZ_GENE_ID_2);
gene2.setHugoGeneSymbol(BaseServiceImplTest.HUGO_GENE_SYMBOL_2);
genes.add(gene1);
genes.add(gene2);

Mockito.when(geneService.fetchGenes(anyList(), anyString(), anyString()))
.thenReturn(genes);

List<GeneMolecularData> geneMolecularData = new ArrayList<>();
GeneMolecularData geneMolecularData1 = new GeneMolecularData();
geneMolecularData1.setValue("-2");
GeneMolecularData geneMolecularData2 = new GeneMolecularData();
geneMolecularData2.setValue("-2");
GeneMolecularData geneMolecularData3 = new GeneMolecularData();
geneMolecularData3.setValue("2");
GeneMolecularData geneMolecularData4 = new GeneMolecularData();
geneMolecularData4.setValue("2");
geneMolecularData.add(geneMolecularData1);
geneMolecularData.add(geneMolecularData2);
geneMolecularData.add(geneMolecularData3);
geneMolecularData.add(geneMolecularData4);

Mockito.when(molecularDataService.getMolecularDataInMultipleMolecularProfiles(
anyList(),
anyList(),
anyList(),
anyString()))
.thenReturn(geneMolecularData);

List<GenomicDataCountItem> result = studyViewService.getCNAAlterationCountsByGeneSpecific(studyIds, sampleIds, genomicDataFilters);
Assert.assertEquals(2, result.size());
Assert.assertEquals(BaseServiceImplTest.HUGO_GENE_SYMBOL_1, result.get(0).getHugoGeneSymbol());
Assert.assertEquals(BaseServiceImplTest.PROFILE_TYPE_1, result.get(0).getProfileType());
Assert.assertEquals(2, result.get(0).getCounts().get(0).getCount().intValue());
Assert.assertEquals(2, result.get(0).getCounts().get(1).getCount().intValue());
Assert.assertEquals(BaseServiceImplTest.HUGO_GENE_SYMBOL_2, result.get(1).getHugoGeneSymbol());
Assert.assertEquals(BaseServiceImplTest.PROFILE_TYPE_2, result.get(1).getProfileType());
Assert.assertEquals(2, result.get(1).getCounts().get(0).getCount().intValue());
Assert.assertEquals(2, result.get(1).getCounts().get(1).getCount().intValue());
}

@Test
public void fetchGenericAssayDataCounts() throws Exception {

Expand Down
43 changes: 42 additions & 1 deletion web/src/main/java/org/cbioportal/web/StudyViewController.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
import org.apache.commons.math3.stat.correlation.SpearmansCorrelation;
import org.apache.commons.math3.util.Pair;
import org.cbioportal.model.*;
import org.cbioportal.service.*;
import org.cbioportal.service.exception.StudyNotFoundException;
import org.cbioportal.service.util.ClinicalAttributeUtil;
import org.cbioportal.web.config.annotation.InternalApi;
import org.cbioportal.model.AlterationFilter;
import org.cbioportal.web.parameter.*;
import org.cbioportal.web.parameter.sort.ClinicalDataSortBy;
import org.cbioportal.web.util.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
Expand Down Expand Up @@ -861,6 +861,47 @@ public ResponseEntity<List<GenomicDataBin>> fetchGenomicDataBinCounts(
return new ResponseEntity<>(studyViewFilterApplier.getDataBins(dataBinMethod, interceptedGenomicDataBinCountFilter), HttpStatus.OK);
}

@PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection<CancerStudyId>', T(org.cbioportal.utils.security.AccessLevel).READ)")
@RequestMapping(value = "/genomic-data-counts/fetch", method = RequestMethod.POST,
consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
@ApiOperation("Fetch genomic data counts by GenomicDataCountFilter")
public ResponseEntity<List<GenomicDataCountItem>> fetchGenomicDataCounts(
@ApiParam(required = true, value = "Genomic data count filter") @Valid @RequestBody(required = false) GenomicDataCountFilter genomicDataCountFilter,
@ApiIgnore // prevent reference to this attribute in the swagger-ui interface
@RequestAttribute(required = false, value = "involvedCancerStudies") Collection<String> involvedCancerStudies,
@ApiParam(required = true, value = "Intercepted Genomic Data Count Filter")
@ApiIgnore
@Valid @RequestAttribute(required = false, value = "interceptedGenomicDataCountFilter") GenomicDataCountFilter interceptedGenomicDataCountFilter
) throws StudyNotFoundException {
List<GenomicDataFilter> gdFilters = interceptedGenomicDataCountFilter.getGenomicDataFilters();
StudyViewFilter studyViewFilter = interceptedGenomicDataCountFilter.getStudyViewFilter();
// when there is only one filter, it means study view is doing a single chart filter operation
// remove filter from studyViewFilter to return all data counts
// the reason we do this is to make sure after chart get filtered, user can still see unselected portion of the chart
if (gdFilters.size() == 1) {
studyViewFilterUtil.removeSelfFromGenomicDataFilter(
gdFilters.get(0).getHugoGeneSymbol(),
gdFilters.get(0).getProfileType(),
studyViewFilter);
}
List<SampleIdentifier> filteredSampleIdentifiers = studyViewFilterApplier.apply(studyViewFilter);

if (filteredSampleIdentifiers.isEmpty()) {
return new ResponseEntity<>(new ArrayList<>(), HttpStatus.OK);
}

List<String> studyIds = new ArrayList<>();
List<String> sampleIds = new ArrayList<>();
studyViewFilterUtil.extractStudyAndSampleIds(filteredSampleIdentifiers, studyIds, sampleIds);

List<GenomicDataCountItem> result = studyViewService.getCNAAlterationCountsByGeneSpecific(
studyIds,
sampleIds,
gdFilters.stream().map(gdFilter -> new Pair<>(gdFilter.getHugoGeneSymbol(), gdFilter.getProfileType())).collect(Collectors.toList()));

return new ResponseEntity<>(result, HttpStatus.OK);
}

@PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection<CancerStudyId>', T(org.cbioportal.utils.security.AccessLevel).READ)")
@RequestMapping(value = "/generic-assay-data-counts/fetch", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE)
@ApiOperation("Fetch generic assay data counts by study view filter")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package org.cbioportal.web.parameter;

import java.io.Serializable;
import java.util.List;

public class GenomicDataCountFilter implements Serializable {

private List<GenomicDataFilter> genomicDataFilters;
private StudyViewFilter studyViewFilter;

public List<GenomicDataFilter> getGenomicDataFilters() {
return genomicDataFilters;
}

public void setGenomicDataFilters(List<GenomicDataFilter> genomicDataFilters) {
this.genomicDataFilters = genomicDataFilters;
}

public StudyViewFilter getStudyViewFilter() {
return studyViewFilter;
}

public void setStudyViewFilter(StudyViewFilter studyViewFilter) {
this.studyViewFilter = studyViewFilter;
}
}
Loading

0 comments on commit ca2bdff

Please sign in to comment.