Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clickhouse implementation of study view #10304

Closed
wants to merge 29 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ad55466
WIP
pvannierop Mar 15, 2023
f4546d7
Move StudyViewFilter to webparams modules
pvannierop Mar 16, 2023
aa216b8
Wip
pvannierop Mar 17, 2023
916fb71
Finalize second datasource
pvannierop Mar 20, 2023
6cf4d7c
Fix clickhouse datasource
pvannierop Mar 26, 2023
b998a6c
Wip
pvannierop Mar 26, 2023
0d0ecac
Wip
pvannierop Mar 30, 2023
a126eaa
Working version of study view filter
pvannierop Apr 1, 2023
3b85a99
Add mutated-genes via clickhouse
pvannierop Apr 3, 2023
6e103e8
Fix ClinicalEventMapper.xml
pvannierop Apr 3, 2023
2814099
Add comments
pvannierop May 9, 2023
e1e576e
Deprecate need for connection to MySQL for clickhouse
pvannierop May 10, 2023
4221dfa
Fix typo
pvannierop May 10, 2023
2c2e7aa
Fix mapper
pvannierop May 10, 2023
00ef061
Externalize clickhouse connection string config
pvannierop May 10, 2023
f1efaf9
make clickhouse db params configurable
inodb May 15, 2023
5848c47
Sample Retrieval
JREastonMarks May 17, 2023
c1e40be
Use precomputed base64 encoded unique identifiers
pvannierop May 23, 2023
a89cc82
Revert "Use precomputed base64 encoded unique identifiers"
inodb May 25, 2023
fede26d
improved clickhouse specific example configuration
onursumer May 31, 2023
0bf78c9
use base64 encoded columns for unique sample and patient keys
onursumer Jun 5, 2023
60a5146
add mock data and config for clickhouse testing on filtered samples a…
onursumer Jun 26, 2023
6b5a0a2
Revert original Study View endpoints (#10216)
onursumer Jun 27, 2023
047cb26
Extract column store endpoints into StudyViewColumnStoreController (#…
haynescd Jun 28, 2023
18819b1
add tests for clinical data filters (#10249)
onursumer Jun 29, 2023
2e3d886
Feature/add clinical data counts endpoint (#10262)
alisman Jul 10, 2023
6987580
add new clickhouse clinical data bin count end point (#10265)
onursumer Jul 12, 2023
e0e441d
Fix test to add Clinical Data
haynescd Jul 17, 2023
ccdb8eb
Fix GeneFilters
haynescd Jul 18, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.mskcc.cbio.portal.dao;

import org.apache.commons.dbcp2.BasicDataSource;
import org.mskcc.cbio.portal.util.GlobalProperties;

public class ClickhouseJdbcDataSource extends BasicDataSource {

public ClickhouseJdbcDataSource() {
String connectionString = GlobalProperties.getProperty("db.clickhouse.connection_string");
String user = GlobalProperties.getProperty("db.clickhouse.user");
String password = GlobalProperties.getProperty("db.clickhouse.password");
this.setDriverClassName("com.clickhouse.jdbc.ClickHouseDriver");
this.setUsername(user);
this.setPassword(password);
this.setUrl(connectionString);
this.setJmxName("org.cbioportal:DataSource=clickhouse");
}
}
2 changes: 1 addition & 1 deletion core/src/test/resources/applicationContext-dao.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@

<!-- scan for mappers and let them be autowired -->
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="org.mskcc.cbio.portal.persistence,org.cbioportal.persistence.mybatis" />
<property name="basePackage" value="org.mskcc.cbio.portal.persistence,org.cbioportal.persistence.mybatis,org.cbioportal.persistence.mybatiscolumnstore" />
</bean>

<!-- enable component scanning (beware that this does not enable mapper scanning!) -->
Expand Down
2 changes: 1 addition & 1 deletion docs/user-guide/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ filterJson is set in the url hash string. Here are the allowed parameters and fo
"value": "string"
}]
}],
"geneFilters": [{
"studyViewGeneFilters": [{
"geneQueries": [
["string"]
],
Expand Down
8 changes: 8 additions & 0 deletions persistence/persistence-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
<groupId>org.mskcc.cbio</groupId>
<artifactId>model</artifactId>
</dependency>
<dependency>
<groupId>org.mskcc.cbio</groupId>
<artifactId>webparam</artifactId>
</dependency>
<dependency>
<groupId>org.mskcc.cbio</groupId>
<artifactId>utils</artifactId>
Expand All @@ -37,6 +41,10 @@
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>org.mskcc.cbio</groupId>
<artifactId>webparam</artifactId>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.DiscreteCopyNumberData;
import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.model.meta.BaseMeta;
import org.cbioportal.webparam.GeneFilterQuery;
import org.springframework.cache.annotation.Cacheable;

import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package org.cbioportal.persistence;

import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.model.Mutation;
import org.cbioportal.model.MutationCountByPosition;
import org.cbioportal.model.meta.MutationMeta;
import org.cbioportal.webparam.GeneFilterQuery;
import org.springframework.cache.annotation.Cacheable;

import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import java.util.List;

import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.model.StructuralVariant;
import org.cbioportal.model.StructuralVariantQuery;
import org.cbioportal.webparam.GeneFilterQuery;
import org.springframework.cache.annotation.Cacheable;

public interface StructuralVariantRepository {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.cbioportal.persistence;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.Sample;
import org.cbioportal.persistence.enums.ClinicalAttributeDataSource;
import org.cbioportal.persistence.enums.ClinicalAttributeDataType;
import org.cbioportal.webparam.CategorizedClinicalDataCountFilter;
import org.cbioportal.webparam.StudyViewFilter;

import java.util.List;

public interface StudyViewRepository {
List<Sample> getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);

List<ClinicalData> getSampleClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);

List<ClinicalData> getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);

List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter);

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List<String> filteredAttributes);

List<ClinicalDataCount> getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List<String> filteredAttributes);

List<ClinicalDataCount> getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List<String> filteredAttributes);

List<String> getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package org.cbioportal.persistence.enums;

public enum ClinicalAttributeDataSource {
PATIENT("PATIENT"),SAMPLE("SAMPLE");

private final String value;

ClinicalAttributeDataSource(String value) {
this.value = value;
}

public String getValue() {
return this.value;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.cbioportal.persistence.enums;

public enum ClinicalAttributeDataType {
CATEGORICAL("CATEGORICAL"),
NUMERIC("NUMERIC");

private final String value;

ClinicalAttributeDataType(String value) {
this.value = value;
}

public String getValue() {
return this.value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

<!-- scan for mappers and let them be autowired -->
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="org.mskcc.cbio.portal.persistence,org.cbioportal.persistence.mybatis" />
<property name="basePackage" value="org.mskcc.cbio.portal.persistence,org.cbioportal.persistence.mybatis,org.cbioportal.persistence.mybatiscolumnstore" />
</bean>

<!-- enable component scanning (beware that this does not enable mapper scanning!) -->
Expand All @@ -42,11 +42,15 @@
<bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<property name="dataSource" ref="businessDataSource" />
</bean>

<bean id="columnstoreTransactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<property name="dataSource" ref="columnstoreDataSource" />
</bean>

<bean id="customObjectFactory" class="org.cbioportal.persistence.mybatis.util.CustomMyBatisObjectFactory" />

<!-- define the SqlSessionFactory -->
<bean id="sqlSessionFactory" class="org.mybatis.spring.SqlSessionFactoryBean">
<bean id="sqlSessionFactory" class="org.mybatis.spring.SqlSessionFactoryBean" primary="true">
<property name="dataSource" ref="businessDataSource" />
<property name="typeAliasesPackage" value="org.mskcc.cbio.portal.model" />
<property name="typeHandlersPackage" value="org.cbioportal.persistence.mybatis.typehandler" />
Expand All @@ -61,6 +65,19 @@
<!-- having to autowire repositories into java classes that do not make direct use of them -->
<property name="mapperLocations" value="classpath*:org/cbioportal/persistence/mybatis/**/*.xml" />
</bean>
<bean id="columnstoreSqlSessionFactory" class="org.mybatis.spring.SqlSessionFactoryBean">
<property name="dataSource" ref="columnstoreDataSource" />
<property name="typeAliasesPackage" value="org.mskcc.cbio.portal.model" />
<property name="typeHandlersPackage" value="org.cbioportal.persistence.mybatis.typehandler" />
<property name="objectFactory" ref="customObjectFactory" />
<!-- mapper locations is set here to support interdependency between mappers without -->
<!-- having to autowire repositories into java classes that do not make direct use of them -->
<!-- <property name="mapperLocations" value="classpath*:org/cbioportal/persistence/mybatiscolumnstore/*.xml" />-->
</bean>
<bean id="studyViewMapper" class="org.mybatis.spring.mapper.MapperFactoryBean">
<property name="mapperInterface" value="org.cbioportal.persistence.mybatiscolumnstore.StudyViewMapper" />
<property name="sqlSessionFactory" ref="columnstoreSqlSessionFactory" />
</bean>
</beans>

<beans profile="dbcp">
Expand All @@ -72,6 +89,8 @@
<list><ref bean="businessDataSource"/></list>
</property>
</bean>
<bean id="columnstoreDataSource" destroy-method="close"
class="org.mskcc.cbio.portal.dao.ClickhouseJdbcDataSource" />
</beans>

<beans profile="jndi">
Expand Down
16 changes: 16 additions & 0 deletions persistence/persistence-mybatis/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@
PROJECT_VERSION env variable (see version.sh) -->
<version>0-unknown-version-SNAPSHOT</version>
</parent>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>9</source>
<target>9</target>
</configuration>
</plugin>
</plugins>
</build>
<modelVersion>4.0.0</modelVersion>

<artifactId>persistence-mybatis</artifactId>
Expand Down Expand Up @@ -58,5 +70,9 @@
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
</dependency>
<dependency>
<groupId>org.mskcc.cbio</groupId>
<artifactId>webparam</artifactId>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.DiscreteCopyNumberData;
import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.webparam.GeneFilterQuery;
import org.cbioportal.model.meta.BaseMeta;

import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.DiscreteCopyNumberData;
import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.webparam.GeneFilterQuery;
import org.cbioportal.model.meta.BaseMeta;
import org.cbioportal.persistence.DiscreteCopyNumberRepository;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.cbioportal.persistence.mybatis;

import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.webparam.GeneFilterQuery;
import org.cbioportal.model.Mutation;
import org.cbioportal.model.MutationCountByPosition;
import org.cbioportal.model.meta.MutationMeta;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.cbioportal.persistence.mybatis;

import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.webparam.GeneFilterQuery;
import org.cbioportal.model.Mutation;
import org.cbioportal.model.MutationCountByPosition;
import org.cbioportal.model.meta.MutationMeta;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@

import java.util.List;

import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.model.StructuralVariant;
import org.cbioportal.model.StructuralVariantQuery;
import org.cbioportal.webparam.GeneFilterQuery;

public interface StructuralVariantMapper {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

package org.cbioportal.persistence.mybatis;

import org.cbioportal.model.GeneFilterQuery;
import org.cbioportal.webparam.GeneFilterQuery;
import org.cbioportal.model.StructuralVariant;
import org.cbioportal.model.StructuralVariantQuery;
import org.cbioportal.persistence.StructuralVariantRepository;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.cbioportal.persistence.mybatiscolumnstore;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.Sample;
import org.cbioportal.webparam.CategorizedClinicalDataCountFilter;
import org.cbioportal.webparam.StudyViewFilter;

import java.util.List;

public interface StudyViewMapper {
List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

List<ClinicalDataCount> getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter,
boolean applyPatientIdFilters, List<String> attributeIds, List<String> filteredAttributeValues);

List<ClinicalDataCount> getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter,
boolean applyPatientIdFilters, List<String> attributeIds, List<String> filteredAttributeValues );

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter,
boolean applyPatientIdFilters, List<String> attributeIds, List<String> filteredAttributeValues);

List<String> getClinicalAttributeNames(String tableName);

List<ClinicalData> getSampleClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);

List<ClinicalData> getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List<String> attributeIds);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package org.cbioportal.persistence.mybatiscolumnstore;

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.Sample;
import org.cbioportal.persistence.StudyViewRepository;
import org.cbioportal.persistence.enums.ClinicalAttributeDataSource;
import org.cbioportal.persistence.enums.ClinicalAttributeDataType;
import org.cbioportal.webparam.CategorizedClinicalDataCountFilter;
import org.cbioportal.webparam.StudyViewFilter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;

import java.util.List;

@Repository
public class StudyViewMyBatisRepository implements StudyViewRepository {

private final List<String> FILTERED_CLINICAL_ATTR_VALUES = List.of("NA", "NAN", "N/A");
@Autowired
private StudyViewMapper studyViewMapper;

@Override
public List<Sample> getFilteredSamplesFromColumnstore(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {

return studyViewMapper.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter));
}

@Override
public List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return studyViewMapper.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter));
}

@Override
public List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List<String> filteredAttributes) {
return studyViewMapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter),
filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES );
}

@Override
public List<ClinicalDataCount> getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List<String> filteredAttributes) {
return studyViewMapper.getSampleClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter),
filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES );
}

@Override
public List<ClinicalDataCount> getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List<String> filteredAttributes) {
return studyViewMapper.getPatientClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter),
filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES);
}

@Override
public List<String> getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType) {
String tableName = clinicalAttributeDataSource.getValue().toLowerCase() + "_clinical_attribute_" + dataType.getValue().toLowerCase();
return studyViewMapper.getClinicalAttributeNames(tableName);
}

private boolean shouldApplyPatientIdFilters(CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty()
|| categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters().isEmpty();
}

public List<ClinicalData> getSampleClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return studyViewMapper.getSampleClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
}

public List<ClinicalData> getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, List<String> attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) {
return studyViewMapper.getPatientClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds);
}
}
Loading
Loading