From c1bdbda29b397eeafad70c8f65ff7ebbc1c7aa21 Mon Sep 17 00:00:00 2001 From: Onur Sumer Date: Mon, 26 Jun 2023 17:47:14 -0400 Subject: [PATCH] add mock data and config for clickhouse testing on filtered samples and mutated genes (#10213) Co-authored-by: Bryan Lai --- .../StudyViewMyBatisRepositoryTest.java | 49 +++++++ .../src/test/resources/clickhouseSchema.sql | 121 ++++++++++++++++++ .../src/test/resources/clickhouseTestSql.sql | 20 +++ .../testContextDatabaseClickhouse.xml | 42 ++++++ 4 files changed, 232 insertions(+) create mode 100644 persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java create mode 100644 persistence/persistence-mybatis/src/test/resources/clickhouseSchema.sql create mode 100644 persistence/persistence-mybatis/src/test/resources/clickhouseTestSql.sql create mode 100644 persistence/persistence-mybatis/src/test/resources/testContextDatabaseClickhouse.xml diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java new file mode 100644 index 00000000000..a67cf376d51 --- /dev/null +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatiscolumnstore/StudyViewMyBatisRepositoryTest.java @@ -0,0 +1,49 @@ +package org.cbioportal.persistence.mybatiscolumnstore; + +import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.Sample; +import org.cbioportal.webparam.StudyViewFilter; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Configurable; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; + +import java.util.ArrayList; +import java.util.List; + +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration("/testContextDatabaseClickhouse.xml") +@Configurable +public class StudyViewMyBatisRepositoryTest { + + @Autowired + private StudyViewMyBatisRepository studyViewMyBatisRepository; + + @Test + public void getFilteredSamples() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + + List studyIds = new ArrayList<>(); + studyIds.add("msk_ch_2020"); + studyViewFilter.setStudyIds(studyIds); + + List samples = studyViewMyBatisRepository.getFilteredSamplesFromColumnstore(studyViewFilter); + Assert.assertEquals(3, samples.size()); + } + + @Test + public void getMutatedGenes() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + + List studyIds = new ArrayList<>(); + studyIds.add("msk_ch_2020"); + studyViewFilter.setStudyIds(studyIds); + + List mutations = studyViewMyBatisRepository.getMutatedGenes(studyViewFilter); + Assert.assertEquals(2, mutations.size()); + } + +} \ No newline at end of file diff --git a/persistence/persistence-mybatis/src/test/resources/clickhouseSchema.sql b/persistence/persistence-mybatis/src/test/resources/clickhouseSchema.sql new file mode 100644 index 00000000000..37721f01497 --- /dev/null +++ b/persistence/persistence-mybatis/src/test/resources/clickhouseSchema.sql @@ -0,0 +1,121 @@ +create table cna_discrete +( + sample_unique_id varchar(255), + alteration int, + hugo_gene_symbol varchar(255), + gene_panel_stable_id varchar(255), + cancer_study_identifier varchar(255), + genetic_profile_stable_id varchar(255), + PRIMARY KEY (sample_unique_id, alteration, hugo_gene_symbol, cancer_study_identifier) +); + +create table genetic_profile +( + sample_unique_id varchar(255), + genetic_alteration_type varchar(255), + datatype varchar(255), + value varchar(255), + cancer_study_identifier varchar(255), + PRIMARY KEY (sample_unique_id, genetic_alteration_type, datatype, value, cancer_study_identifier) +); + +create table genetic_profile_counts +( + sample_unique_id varchar(255), + profile_name varchar(255), + genetic_profile_stable_id varchar(255), + cancer_study_identifier varchar(255), + count int +); + +create table genomic_event +( + sample_unique_id varchar(255), + variant varchar(255), + hugo_gene_symbol varchar(255), + gene_panel_stable_id varchar(255), + cancer_study_identifier varchar(255), + genetic_profile_stable_id varchar(255), + PRIMARY KEY (sample_unique_id, variant, hugo_gene_symbol, cancer_study_identifier, genetic_profile_stable_id) +); + +create table mutation +( + sample_unique_id varchar(255), + variant varchar(255), + hugo_gene_symbol varchar(255), + gene_panel_stable_id varchar(255), + cancer_study_identifier varchar(255), + genetic_profile_stable_id varchar(255), + PRIMARY KEY (sample_unique_id, variant, hugo_gene_symbol, cancer_study_identifier) +); + +create table patient_clinical_attribute_categorical +( + patient_unique_id varchar(255), + attribute_name varchar(255), + attribute_value varchar(255), + cancer_study_identifier varchar(255), + PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) +); + +create table patient_clinical_attribute_numeric +( + patient_unique_id varchar(255), + attribute_name varchar(255), + attribute_value float, + cancer_study_identifier varchar(255), + PRIMARY KEY (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) +); + +create table sample +( + sample_unique_id varchar(255), + sample_stable_id varchar(255), + patient_unique_id varchar(255), + patient_stable_id varchar(255), + cancer_study_identifier varchar(255), + sample_unique_id_base64 varchar(255), + patient_unique_id_base64 varchar(255), + PRIMARY KEY (sample_unique_id, patient_unique_id, cancer_study_identifier) +); + +create table sample_clinical_attribute_categorical +( + patient_unique_id varchar(255), + sample_unique_id varchar(255), + attribute_name varchar(255), + attribute_value varchar(255), + cancer_study_identifier varchar(255), + PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier) +); + +create table sample_clinical_attribute_numeric +( + patient_unique_id varchar(255), + sample_unique_id varchar(255), + attribute_name varchar(255), + attribute_value float, + cancer_study_identifier varchar(255), + PRIMARY KEY (patient_unique_id, sample_unique_id, attribute_name, attribute_value, cancer_study_identifier) +); + +create table sample_list +( + sample_unique_id varchar(255), + sample_list_stable_id varchar(255), + name varchar(255), + cancer_study_identifier varchar(255), + PRIMARY KEY (sample_unique_id, sample_list_stable_id, name, cancer_study_identifier) +); + +create table structural_variant +( + sample_unique_id varchar(255), + hugo_symbol_gene1 varchar(255), + hugo_symbol_gene2 varchar(255), + gene_panel_stable_id varchar(255), + cancer_study_identifier varchar(255), + genetic_profile_stable_id varchar(255), + PRIMARY KEY (sample_unique_id, hugo_symbol_gene1, hugo_symbol_gene2, cancer_study_identifier) +); diff --git a/persistence/persistence-mybatis/src/test/resources/clickhouseTestSql.sql b/persistence/persistence-mybatis/src/test/resources/clickhouseTestSql.sql new file mode 100644 index 00000000000..a52d4180198 --- /dev/null +++ b/persistence/persistence-mybatis/src/test/resources/clickhouseTestSql.sql @@ -0,0 +1,20 @@ +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000004', 'AGE', 39.739902, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000004', 'TIME_FROM_DX_TO_SEQ', 991, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000004', 'TIME_TO_BLOOD_DRAW_FROM_TX', 609, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000015', 'AGE', 44.440792, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000015', 'TIME_FROM_DX_TO_SEQ', 2558, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000015', 'TIME_TO_BLOOD_DRAW_FROM_TX', 5, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000023', 'AGE', 61.319645, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000023', 'TIME_FROM_DX_TO_SEQ', 245, 'msk_ch_2020'); +INSERT INTO patient_clinical_attribute_numeric (patient_unique_id, attribute_name, attribute_value, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000023', 'TIME_TO_BLOOD_DRAW_FROM_TX', 166, 'msk_ch_2020'); + +INSERT INTO sample (sample_unique_id, sample_unique_id_base64, sample_stable_id, patient_unique_id, patient_unique_id_base64, patient_stable_id, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000004-N01', '', 'P-0000004-N01', 'msk_ch_2020_P-0000004', '', 'P-0000004', 'msk_ch_2020'); +INSERT INTO sample (sample_unique_id, sample_unique_id_base64, sample_stable_id, patient_unique_id, patient_unique_id_base64, patient_stable_id, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000015-N01', '', 'P-0000015-N01', 'msk_ch_2020_P-0000015', '', 'P-0000015', 'msk_ch_2020'); +INSERT INTO sample (sample_unique_id, sample_unique_id_base64, sample_stable_id, patient_unique_id, patient_unique_id_base64, patient_stable_id, cancer_study_identifier) VALUES ('msk_ch_2020_P-0000023-N01', '', 'P-0000023-N01', 'msk_ch_2020_P-0000023', '', 'P-0000023', 'msk_ch_2020'); +-- +INSERT INTO genomic_event (sample_unique_id, variant, hugo_gene_symbol, gene_panel_stable_id, cancer_study_identifier, genetic_profile_stable_id) VALUES ('msk_ch_2020_P-0000004-N01', 'p.R1051Q', 'KDR', '', 'msk_ch_2020', 'msk_ch_2020_mutations'); +INSERT INTO genomic_event (sample_unique_id, variant, hugo_gene_symbol, gene_panel_stable_id, cancer_study_identifier, genetic_profile_stable_id) VALUES ('msk_ch_2020_P-0000004-N01', 'p.T1884I', 'TET2', '', 'msk_ch_2020', 'msk_ch_2020_mutations'); + +INSERT INTO mutation (sample_unique_id, variant, hugo_gene_symbol, gene_panel_stable_id, cancer_study_identifier, genetic_profile_stable_id) VALUES ('msk_ch_2020_P-0000004-N01', 'p.R1051Q', 'KDR', '', 'msk_ch_2020', 'msk_ch_2020_mutations'); +INSERT INTO mutation (sample_unique_id, variant, hugo_gene_symbol, gene_panel_stable_id, cancer_study_identifier, genetic_profile_stable_id) VALUES ('msk_ch_2020_P-0000004-N01', 'p.T1884I', 'TET2', '', 'msk_ch_2020', 'msk_ch_2020_mutations'); + diff --git a/persistence/persistence-mybatis/src/test/resources/testContextDatabaseClickhouse.xml b/persistence/persistence-mybatis/src/test/resources/testContextDatabaseClickhouse.xml new file mode 100644 index 00000000000..4f93f760f8c --- /dev/null +++ b/persistence/persistence-mybatis/src/test/resources/testContextDatabaseClickhouse.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file