Skip to content

Commit

Permalink
move custom data binning logic to binner and util class
Browse files Browse the repository at this point in the history
  • Loading branch information
Bryan Lai committed Sep 4, 2024
1 parent ef2ef4f commit 451ab90
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 53 deletions.
100 changes: 98 additions & 2 deletions src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java
Original file line number Diff line number Diff line change
@@ -1,29 +1,42 @@
package org.cbioportal.web.columnar;

import org.cbioportal.model.*;
import org.cbioportal.service.CustomDataService;
import org.cbioportal.service.StudyViewColumnarService;
import org.cbioportal.service.util.CustomDataSession;
import org.cbioportal.web.columnar.util.CustomDataFilterUtil;
import org.cbioportal.web.columnar.util.NewClinicalDataBinUtil;
import org.cbioportal.web.parameter.*;
import org.cbioportal.web.util.DataBinner;
import org.cbioportal.web.util.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.stereotype.Component;

import java.util.*;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.toMap;

@Component
public class ClinicalDataBinner {
private final StudyViewColumnarService studyViewColumnarService;
private final DataBinner dataBinner;
private final CustomDataService customDataService;

@Autowired
private StudyViewFilterUtil studyViewFilterUtil;
@Autowired
private CustomDataFilterUtil customDataFilterUtil;

@Autowired
public ClinicalDataBinner(
StudyViewColumnarService studyViewColumnarService,
DataBinner dataBinner
DataBinner dataBinner,
CustomDataService customDataService
) {
this.studyViewColumnarService = studyViewColumnarService;
this.dataBinner = dataBinner;
this.customDataService = customDataService;
}

private List<ClinicalData> convertCountsToData(List<ClinicalDataCount> clinicalDataCounts)
Expand Down Expand Up @@ -111,4 +124,87 @@ public List<ClinicalDataBin> fetchClinicalDataBinCounts(

return clinicalDataBins;
}

@Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()")
public List<ClinicalDataBin> fetchCustomDataBinCounts(
DataBinMethod dataBinMethod,
ClinicalDataBinCountFilter dataBinCountFilter,
boolean shouldRemoveSelfFromFilter
) {
List<ClinicalDataBinFilter> attributes = dataBinCountFilter.getAttributes();
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (shouldRemoveSelfFromFilter) {
studyViewFilter = NewClinicalDataBinUtil.removeSelfCustomDataFromFilter(dataBinCountFilter);
}

List<String> attributeIds = attributes.stream().map(ClinicalDataBinFilter::getAttributeId).collect(Collectors.toList());

// a new StudyView filter to partially filter by study and sample ids only
// we need this additional partial filter because we always need to know the bins generated for the initial state
// which allows us to keep the number of bins and bin ranges consistent even if there are additional data filters.
// we only want to update the counts for each bin, we don't want to regenerate the bins for the filtered data.
// NOTE: partial filter is only needed when dataBinMethod == DataBinMethod.STATIC but that's always the case
// for the frontend implementation. we can't really use dataBinMethod == DataBinMethod.DYNAMIC because of the
// complication it brings to the frontend visualization and filtering
StudyViewFilter partialFilter = new StudyViewFilter();
partialFilter.setStudyIds(studyViewFilter.getStudyIds());
partialFilter.setSampleIdentifiers(studyViewFilter.getSampleIdentifiers());

Map<String, CustomDataSession> customDataSessions = customDataService.getCustomDataSessions(attributeIds);
Map<String, ClinicalDataType> customAttributeByDatatype = customDataSessions.entrySet().stream().collect(toMap(
Map.Entry::getKey,
NewClinicalDataBinUtil::getDataType
));

// we need the clinical data for the partial filter in order to generate the bins for initial state
// we use the filtered data to calculate the counts for each bin, we do not regenerate bins for the filtered data
List<SampleIdentifier> unfilteredSampleIdentifiers = studyViewColumnarService.getFilteredSamples(partialFilter).stream().map(sample -> studyViewFilterUtil.buildSampleIdentifier(sample.getCancerStudyIdentifier(), sample.getStableId())).collect(Collectors.toList());
List<ClinicalDataCountItem> unfilteredCustomDataCounts = customDataFilterUtil.getCustomDataCounts(unfilteredSampleIdentifiers, customDataSessions);
List<SampleIdentifier> filteredSampleIdentifiers = studyViewColumnarService.getFilteredSamples(studyViewFilter).stream().map(sample -> studyViewFilterUtil.buildSampleIdentifier(sample.getCancerStudyIdentifier(), sample.getStableId())).collect(Collectors.toList());
List<ClinicalDataCountItem> filteredCustomDataCounts = customDataFilterUtil.getCustomDataCounts(filteredSampleIdentifiers, customDataSessions);

// TODO ignoring conflictingPatientAttributeIds for now
List<ClinicalData> unfilteredCustomData = convertCountsToData(
unfilteredCustomDataCounts.stream().flatMap(c -> c.getCounts().stream()).toList()
);
List<ClinicalData> filteredCustomData = convertCountsToData(
filteredCustomDataCounts.stream().flatMap(c -> c.getCounts().stream()).toList()
);

Map<String, List<Binnable>> unfilteredClinicalDataByAttributeId =
unfilteredCustomData.stream().collect(Collectors.groupingBy(Binnable::getAttrId));

Map<String, List<Binnable>> filteredClinicalDataByAttributeId =
filteredCustomData.stream().collect(Collectors.groupingBy(Binnable::getAttrId));

List<ClinicalDataBin> clinicalDataBins = Collections.emptyList();

if (dataBinMethod == DataBinMethod.STATIC) {
if (!unfilteredCustomData.isEmpty()) {
clinicalDataBins = NewClinicalDataBinUtil.calculateStaticDataBins(
dataBinner,
attributes,
customAttributeByDatatype,
unfilteredClinicalDataByAttributeId,
filteredClinicalDataByAttributeId
);
}
}
else { // dataBinMethod == DataBinMethod.DYNAMIC
// TODO we should consider removing dynamic binning support
// we never use dynamic binning in the frontend because number of bins and the bin ranges can change
// each time there is a new filter which makes the frontend implementation complicated
if (!filteredCustomData.isEmpty()) {
clinicalDataBins = NewClinicalDataBinUtil.calculateDynamicDataBins(
dataBinner,
attributes,
customAttributeByDatatype,
filteredClinicalDataByAttributeId
);
}
}

return clinicalDataBins;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import jakarta.validation.Valid;
import org.apache.commons.collections4.CollectionUtils;
import org.cbioportal.model.*;
import org.cbioportal.service.*;
import org.cbioportal.service.exception.StudyNotFoundException;
Expand All @@ -17,7 +16,6 @@
import org.cbioportal.web.columnar.util.NewStudyViewFilterUtil;
import org.cbioportal.web.config.annotation.InternalApi;
import org.cbioportal.web.parameter.*;
import org.cbioportal.web.util.ClinicalDataBinUtil;
import org.cbioportal.web.util.DensityPlotParameters;
import org.cbioportal.web.util.StudyViewFilterUtil;
import org.springframework.beans.factory.annotation.Autowired;
Expand All @@ -36,7 +34,6 @@

import java.math.BigDecimal;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;

@InternalApi
Expand All @@ -50,27 +47,24 @@ public class StudyViewColumnStoreController {
private final ClinicalDataDensityPlotService clinicalDataDensityPlotService;
private final ViolinPlotService violinPlotService;
private final CustomDataService customDataService;
private final PatientService patientService;

@Autowired
private StudyViewFilterUtil studyViewFilterUtil;
@Autowired
private ClinicalDataBinUtil clinicalDataBinUtil;
private CustomDataFilterUtil customDataFilterUtil;

@Autowired
public StudyViewColumnStoreController(StudyViewColumnarService studyViewColumnarService,
ClinicalDataBinner clinicalDataBinner,
ClinicalDataDensityPlotService clinicalDataDensityPlotService,
ViolinPlotService violinPlotService,
CustomDataService customDataService,
PatientService patientService
CustomDataService customDataService
) {
this.studyViewColumnarService = studyViewColumnarService;
this.clinicalDataBinner = clinicalDataBinner;
this.clinicalDataDensityPlotService = clinicalDataDensityPlotService;
this.violinPlotService = violinPlotService;
this.customDataService = customDataService;
this.patientService = patientService;
}


Expand Down Expand Up @@ -506,26 +500,8 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchCustomDataCounts(

final List<String> attributeIds = attributes.stream().map(ClinicalDataFilter::getAttributeId).collect(Collectors.toList());
Map<String, CustomDataSession> customDataSessionsMap = customDataService.getCustomDataSessions(attributeIds);

Map<String, SampleIdentifier> customSamplesMap = filteredSampleIdentifiers.stream()
.collect(Collectors.toMap(sampleIdentifier -> studyViewFilterUtil.getCaseUniqueKey(
sampleIdentifier.getStudyId(),
sampleIdentifier.getSampleId()
), Function.identity()));

List<String> studyIds = new ArrayList<>();
List<String> sampleIds = new ArrayList<>();
studyViewFilterUtil.extractStudyAndSampleIds(filteredSampleIdentifiers, studyIds, sampleIds);

long patientCustomDataSessionsCount = customDataSessionsMap.values().stream()
.filter(customDataSession -> customDataSession.getData().getPatientAttribute()).count();
List<Patient> patients = new ArrayList<>();
if (patientCustomDataSessionsCount > 0) {
patients.addAll(patientService.getPatientsOfSamples(studyIds, sampleIds));
}

List<ClinicalDataCountItem> result = studyViewFilterUtil.getClinicalDataCountsFromCustomData(customDataSessionsMap.values(),
customSamplesMap, patients);

List<ClinicalDataCountItem> result = customDataFilterUtil.getCustomDataCounts(filteredSampleIdentifiers, customDataSessionsMap);

return new ResponseEntity<>(result, HttpStatus.OK);
}
Expand All @@ -546,20 +522,12 @@ public ResponseEntity<List<ClinicalDataBin>> fetchCustomDataBinCounts(
@Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above.
@Valid @RequestAttribute(required = false, value = "interceptedClinicalDataBinCountFilter") ClinicalDataBinCountFilter interceptedClinicalDataBinCountFilter
) {
// TODO code shared with ClinicalDataController.fetchCustomDataCounts
List<ClinicalDataBinFilter> attributes = interceptedClinicalDataBinCountFilter.getAttributes();
StudyViewFilter studyViewFilter = interceptedClinicalDataBinCountFilter.getStudyViewFilter();
if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfCustomDataFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
}
List<SampleIdentifier> filteredSampleIdentifiers = studyViewColumnarService.getFilteredSamples(studyViewFilter).stream().map(sample -> studyViewFilterUtil.buildSampleIdentifier(sample.getCancerStudyIdentifier(), sample.getStableId())).collect(Collectors.toList());

if (filteredSampleIdentifiers.isEmpty()) {
return new ResponseEntity<>(new ArrayList<>(), HttpStatus.OK);
}

final List<ClinicalDataBin> clinicalDataBins = clinicalDataBinUtil.fetchCustomDataBinCounts(dataBinMethod, interceptedClinicalDataBinCountFilter, false);
List<ClinicalDataBin> clinicalDataBinsTest = clinicalDataBinner.fetchCustomDataBinCounts(
dataBinMethod,
interceptedClinicalDataBinCountFilter,
true
);

return new ResponseEntity<>(clinicalDataBins, HttpStatus.OK);
return new ResponseEntity<>(clinicalDataBinsTest, HttpStatus.OK);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import com.google.common.collect.Range;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.map.MultiKeyMap;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.Patient;
import org.cbioportal.service.CustomDataService;
import org.cbioportal.service.PatientService;
import org.cbioportal.service.util.CustomDataSession;
import org.cbioportal.web.parameter.*;
import org.cbioportal.web.util.CustomDatatype;
Expand All @@ -24,11 +27,13 @@
public class CustomDataFilterUtil {
private final StudyViewFilterUtil studyViewFilterUtil;
private final CustomDataService customDataService;
private final PatientService patientService;

@Autowired
public CustomDataFilterUtil(StudyViewFilterUtil studyViewFilterUtil, CustomDataService customDataService) {
public CustomDataFilterUtil(StudyViewFilterUtil studyViewFilterUtil, CustomDataService customDataService, PatientService patientService) {
this.studyViewFilterUtil = studyViewFilterUtil;
this.customDataService = customDataService;
this.patientService = patientService;
}

public List<CustomSampleIdentifier> extractCustomDataSamples(final StudyViewFilter studyViewFilter) {
Expand Down Expand Up @@ -216,4 +221,27 @@ private Boolean containsNA(ClinicalDataFilter filter) {
return filter.getValues().stream().anyMatch(
r -> r.getValue() != null && r.getValue().toUpperCase().equals("NA"));
}

public List<ClinicalDataCountItem> getCustomDataCounts(List<SampleIdentifier> filteredSampleIdentifiers, Map<String, CustomDataSession> customDataSessions) {
Map<String, SampleIdentifier> customSamplesMap = filteredSampleIdentifiers.stream()
.collect(Collectors.toMap(sampleIdentifier -> studyViewFilterUtil.getCaseUniqueKey(
sampleIdentifier.getStudyId(),
sampleIdentifier.getSampleId()
), Function.identity()));

List<String> studyIds = new ArrayList<>();
List<String> sampleIds = new ArrayList<>();
studyViewFilterUtil.extractStudyAndSampleIds(filteredSampleIdentifiers, studyIds, sampleIds);

long patientCustomDataSessionsCount = customDataSessions.values().stream()
.filter(customDataSession -> customDataSession.getData().getPatientAttribute()).count();
List<Patient> patients = new ArrayList<>();
if (patientCustomDataSessionsCount > 0) {
patients.addAll(patientService.getPatientsOfSamples(studyIds, sampleIds));
}

List<ClinicalDataCountItem> result = studyViewFilterUtil.getClinicalDataCountsFromCustomData(customDataSessions.values(),
customSamplesMap, patients);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.cbioportal.model.ClinicalDataBin;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.DataBin;
import org.cbioportal.service.util.CustomDataSession;
import org.cbioportal.web.parameter.ClinicalDataBinCountFilter;
import org.cbioportal.web.parameter.ClinicalDataBinFilter;
import org.cbioportal.web.parameter.ClinicalDataType;
Expand All @@ -30,6 +31,17 @@ public static StudyViewFilter removeSelfFromFilter(ClinicalDataBinCountFilter da
return studyViewFilter;
}

public static StudyViewFilter removeSelfCustomDataFromFilter(ClinicalDataBinCountFilter dataBinCountFilter) {
List<ClinicalDataBinFilter> attributes = dataBinCountFilter.getAttributes();
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfCustomDataFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
}

return studyViewFilter;
}

public static ClinicalDataBin dataBinToClinicalDataBin(ClinicalDataBinFilter attribute, DataBin dataBin) {
ClinicalDataBin clinicalDataBin = new ClinicalDataBin();
clinicalDataBin.setAttributeId(attribute.getAttributeId());
Expand Down Expand Up @@ -148,4 +160,8 @@ public static List<ClinicalData> generateClinicalDataFromClinicalDataCount(Clini

return data;
}

public static ClinicalDataType getDataType(Map.Entry<String, CustomDataSession> entry) {
return entry.getValue().getData().getPatientAttribute() ? ClinicalDataType.PATIENT : ClinicalDataType.SAMPLE;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ public List<ClinicalDataCountItem> getClinicalDataCountsFromCustomData(Collectio

}).distinct().count();
ClinicalDataCount dataCount = new ClinicalDataCount();
dataCount.setAttributeId(customDataSession.getId());
dataCount.setValue(entry.getKey());
dataCount.setCount(Math.toIntExact(count));
return dataCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import org.cbioportal.persistence.helper.AlterationFilterHelper;
import org.cbioportal.persistence.helper.StudyViewFilterHelper;
import org.cbioportal.persistence.mybatisclickhouse.config.MyBatisConfig;
import org.cbioportal.web.parameter.DataFilter;
import org.cbioportal.web.parameter.DataFilterValue;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.cbioportal.web.parameter.*;
import org.cbioportal.web.parameter.filter.AndedPatientTreatmentFilters;
import org.cbioportal.web.parameter.filter.AndedSampleTreatmentFilters;
import org.cbioportal.web.parameter.filter.OredPatientTreatmentFilters;
Expand All @@ -25,11 +23,7 @@
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.*;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
Expand All @@ -55,6 +49,21 @@ public void getFilteredSamples() {
studyViewFilter.setStudyIds(Arrays.asList(STUDY_TCGA_PUB, STUDY_ACC_TCGA));
var filteredSamples = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, null));
assertEquals(19, filteredSamples.size());

ClinicalDataFilter customDataFilter = new ClinicalDataFilter();
customDataFilter.setAttributeId("123");
DataFilterValue value = new DataFilterValue();
customDataFilter.setValues(List.of(value));
studyViewFilter.setCustomDataFilters(List.of(customDataFilter));
var filteredSamples1 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, new ArrayList<>()));
assertEquals(0, filteredSamples1.size());

CustomSampleIdentifier customSampleIdentifier = new CustomSampleIdentifier();
customSampleIdentifier.setAttributeId("123");
customSampleIdentifier.setStudyId("acc_tcga");
customSampleIdentifier.setSampleId("tcga-a1-a0sb-01");
var filteredSamples2 = studyViewMapper.getFilteredSamples(StudyViewFilterHelper.build(studyViewFilter, null, Arrays.asList(customSampleIdentifier)));
assertEquals(1, filteredSamples2.size());
}

@Test
Expand Down

0 comments on commit 451ab90

Please sign in to comment.