Skip to content

Commit

Permalink
Update tests and test data
Browse files Browse the repository at this point in the history
  • Loading branch information
Sander Tan authored and oplantalech committed Jun 12, 2019
1 parent 66fb089 commit 623f454
Show file tree
Hide file tree
Showing 19 changed files with 285 additions and 186 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -478,8 +478,6 @@ private void processData(Map<String, Set<String>> inputStudySampleMap,
Boolean hasMethylation = false;
Boolean hasCopyNo = false;
Boolean hasSurvival = false;
Boolean hasFusionData = false;

String decodedGeneList = URLDecoder.decode(geneList, "UTF-8");
String singleStudyId = (String) request.getAttribute(CANCER_STUDY_ID);
if (inputStudySampleMap.keySet().size() == 1 && inputStudySampleMap.containsKey(singleStudyId)) { // single study
Expand Down Expand Up @@ -561,7 +559,6 @@ private void processData(Map<String, Set<String>> inputStudySampleMap,
hasMethylation = hasMethylation || countProfiles(geneticProfileList, GeneticAlterationType.METHYLATION) > 0;
hasCopyNo = hasCopyNo || countProfiles(geneticProfileList, GeneticAlterationType.COPY_NUMBER_ALTERATION) > 0;
hasSurvival = hasSurvival || selectedCancerStudy.hasSurvivalData();
hasFusionData = hasFusionData || selectedCancerStudy.hasFusionData();
for(String profileId : geneticProfileIdSet){
if (profileId != null && profileId.length() != 0)
geneticProfileMap.put(profileId, GeneticProfileUtil.getProfile(profileId, geneticProfileList));
Expand Down Expand Up @@ -603,7 +600,6 @@ private void processData(Map<String, Set<String>> inputStudySampleMap,
request.setAttribute("hasMethylation", hasMethylation);
request.setAttribute("hasCopyNo", hasCopyNo);
request.setAttribute("hasSurvival", hasSurvival);
request.setAttribute("hasFusionData", hasFusionData);

ObjectMapper mapper = new ObjectMapper();
String studySampleMapString = mapper.writeValueAsString(studySampleMap);
Expand Down
9 changes: 5 additions & 4 deletions core/src/main/scripts/importer/validateData.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ def checkGeneIdentification(self, gene_symbol=None, entrez_id=None):
extra={'line_number': self.line_number,
'cause': gene_symbol})

return gene_symbol, identified_entrez_id
return identified_entrez_id

def checkDriverAnnotationColumn(self, driver_value=None, driver_annotation=None):
"""Ensures that cbp_driver_annotation is filled when the cbp_driver column
Expand Down Expand Up @@ -997,7 +997,7 @@ def checkLine(self, data):
# parse and check the feature identifiers (implemented by subclasses)
feature_id = self.parseFeatureColumns(data[:self.num_nonsample_cols])
# skip line if no feature was identified
if feature_id == (None, None):
if feature_id is None:
return
# skip line with an error if the feature was encountered before
if feature_id in self._feature_id_lines:
Expand Down Expand Up @@ -2937,8 +2937,9 @@ def checkExonPresence(self, exon, column_name):
if data[self.cols.index('Event_Info')] == 'Fusion':
checkFusionValues(self, data)
else:
self.logger.warning('Validation for other structural variant events is not implemented yet',
extra={'cause': self.cols.index('Event_Info')})
self.logger.error('Validation and functionality for other structural variant events are not implemented '
'yet. Event_Info must be "Fusion"',
extra={'cause': self.cols.index('Event_Info')})

def onComplete(self):
"""Perform final validations based on the data parsed."""
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Copyright (c) 2018 The Hyve B.V.
* This code is licensed under the GNU Affero General Public License (AGPL),
* version 3, or (at your option) any later version.
*/

/*
* This file is part of cBioPortal.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*
* @author Sander Tan
*/

package org.mskcc.cbio.portal.scripts;

import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mskcc.cbio.portal.dao.*;
import org.mskcc.cbio.portal.model.*;
import org.mskcc.cbio.portal.util.*;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.transaction.TransactionConfiguration;
import org.springframework.transaction.annotation.Transactional;
import static org.junit.Assert.*;
import java.io.*;
import java.sql.*;

/**
* Test class to test functionality of ImportStructralVariantData
*/
@SuppressWarnings("deprecation")
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" })
@TransactionConfiguration(transactionManager = "transactionManager", defaultRollback = true)
@Transactional
public class TestImportStructuralVariantData{
int studyId;
int geneticProfileId;

/**
* Extracts StructuralVariant record from ResultSet.
* @param rs
* @return StructuralVariant record
* @throws SQLException
* @throws DaoException
*/
private StructuralVariant extractStructuralVariant(ResultSet rs) throws SQLException, DaoException {
StructuralVariant structuralVariant = new StructuralVariant();
structuralVariant.setGeneticProfileId(rs.getInt("GENETIC_PROFILE_ID"));
structuralVariant.setSampleIdInternal(rs.getInt("SAMPLE_ID"));
structuralVariant.setSite1EntrezGeneId(rs.getLong("SITE1_ENTREZ_GENE_ID"));
structuralVariant.setSite1EnsemblTranscriptId(rs.getString("SITE1_ENSEMBL_TRANSCRIPT_ID"));
structuralVariant.setSite1Exon(rs.getInt("SITE1_EXON"));
structuralVariant.setSite1Chromosome(rs.getString("SITE1_CHROMOSOME"));
structuralVariant.setSite1Position(rs.getInt("SITE1_POSITION"));
structuralVariant.setSite1Description(rs.getString("SITE1_DESCRIPTION"));
structuralVariant.setSite2EntrezGeneId(rs.getLong("SITE2_ENTREZ_GENE_ID"));
structuralVariant.setSite2EnsemblTranscriptId(rs.getString("SITE2_ENSEMBL_TRANSCRIPT_ID"));
structuralVariant.setSite2Exon(rs.getInt("SITE2_EXON"));
structuralVariant.setSite2Chromosome(rs.getString("SITE2_CHROMOSOME"));
structuralVariant.setSite2Position(rs.getInt("SITE2_POSITION"));
structuralVariant.setSite2Description(rs.getString("SITE2_DESCRIPTION"));
structuralVariant.setSite2EffectOnFrame(rs.getString("SITE2_EFFECT_ON_FRAME"));
structuralVariant.setNcbiBuild(rs.getString("NCBI_BUILD"));
structuralVariant.setDnaSupport(rs.getString("DNA_SUPPORT"));
structuralVariant.setRnaSupport(rs.getString("RNA_SUPPORT"));
structuralVariant.setNormalReadCount(rs.getInt("NORMAL_READ_COUNT"));
structuralVariant.setTumorReadCount(rs.getInt("TUMOR_READ_COUNT"));
structuralVariant.setNormalVariantCount(rs.getInt("NORMAL_VARIANT_COUNT"));
structuralVariant.setTumorVariantCount(rs.getInt("TUMOR_VARIANT_COUNT"));
structuralVariant.setNormalPairedEndReadCount(rs.getInt("NORMAL_PAIRED_END_READ_COUNT"));
structuralVariant.setTumorPairedEndReadCount(rs.getInt("TUMOR_PAIRED_END_READ_COUNT"));
structuralVariant.setNormalSplitReadCount(rs.getInt("NORMAL_SPLIT_READ_COUNT"));
structuralVariant.setTumorSplitReadCount(rs.getInt("TUMOR_SPLIT_READ_COUNT"));
structuralVariant.setAnnotation(rs.getString("ANNOTATION"));
structuralVariant.setBreakpointType(rs.getString("BREAKPOINT_TYPE"));
structuralVariant.setCenter(rs.getString("CENTER"));
structuralVariant.setConnectionType(rs.getString("CONNECTION_TYPE"));
structuralVariant.setEventInfo(rs.getString("EVENT_INFO"));
structuralVariant.setVariantClass(rs.getString("CLASS"));
structuralVariant.setLength(rs.getInt("LENGTH"));
structuralVariant.setComments(rs.getString("COMMENTS"));
structuralVariant.setExternalAnnotation(rs.getString("EXTERNAL_ANNOTATION"));
structuralVariant.setDriverFilter(rs.getString("DRIVER_FILTER"));
structuralVariant.setDriverFilterAnn(rs.getString("DRIVER_FILTER_ANNOTATION"));
structuralVariant.setDriverTiersFilter(rs.getString("DRIVER_TIERS_FILTER"));
structuralVariant.setDriverTiersFilterAnn(rs.getString("DRIVER_TIERS_FILTER_ANNOTATION"));
return structuralVariant;
}

@Before
public void setUp() throws DaoException
{
studyId = DaoCancerStudy.getCancerStudyByStableId("study_tcga_pub").getInternalId();
geneticProfileId = DaoGeneticProfile.getGeneticProfileByStableId("study_tcga_pub_structural_variants").getGeneticProfileId();
}

@Test
public void testImportStructuralVariantData() throws DaoException, IOException {
ProgressMonitor.setConsoleMode(false);

// Load test structural variants
File file = new File("src/test/resources/data_structural_variants.txt");
ImportStructuralVariantData importer = new ImportStructuralVariantData(file, geneticProfileId, null);
importer.importData();
MySQLbulkLoader.flushAll();

// Retrieve all imported structural variants
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoGeneset.class);
pstmt = con.prepareStatement("SELECT * FROM structural_variant");
rs = pstmt.executeQuery();

// Test first structural variant entry
rs.next();
StructuralVariant structuralVariant = extractStructuralVariant(rs);
assertEquals("KIAA1549-BRAF.K16B10.COSF509_2", structuralVariant.getSite2Description());

// Test second structural variant entry
rs.next();
structuralVariant = extractStructuralVariant(rs);
assertEquals("ENST00000318522", structuralVariant.getSite1EnsemblTranscriptId());
}
catch (SQLException e) {
throw new DaoException(e);
}
finally {
JdbcUtil.closeAll(DaoGeneset.class, con, pstmt, rs);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand Down Expand Up @@ -83,6 +84,8 @@
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.transaction.TransactionConfiguration;
import org.springframework.transaction.annotation.Transactional;
import org.cbioportal.model.StructuralVariant;
import org.cbioportal.service.StructuralVariantService;

/**
* Integration test using the same data that is used by validation system test
Expand Down Expand Up @@ -182,10 +185,26 @@ public void testLoadStudyEs0() throws Throwable {
.getGeneticProfiles(geneticProfileStableIds);
assertEquals(geneticProfiles.size(), 0);

// ===== Check CNA data ========
//===== Check STRUCTURAL VARIANT data ========
// 45 structural variant events are imported, using 31 unique genes, using 39 samples
// Not all 31 genes have to be queried. BRAF is fused to many of the test genes.
List<Integer> entrezGeneIds = new ArrayList<Integer>(Arrays.asList(57670, 673, 8031, 5979, 27436, 238, 7113, 2078, 1956, 238, 5774, 2115, 7273));

// Add samples and molecular profile IDs
List<String> sampleIds = new ArrayList<String>(Arrays.asList("TCGA-A2-A04P-01", "TCGA-A1-A0SB-01", "TCGA-A1-A0SB-01", "TCGA-A2-A04P-01", "TCGA-A2-A04P-01", "TCGA-A1-A0SK-01", "TCGA-A2-A0CM-01", "TCGA-AR-A1AR-01", "TCGA-B6-A0WX-01", "TCGA-BH-A1F0-01", "TCGA-B6-A0I6-01", "TCGA-BH-A18V-01", "TCGA-BH-A18Q-01", "TCGA-BH-A18K-01", "TCGA-BH-A0HL-01", "TCGA-BH-A0E0-01", "TCGA-BH-A0RX-01", "TCGA-A7-A13D-01", "TCGA-BH-A0E6-01", "TCGA-AO-A0J4-01", "TCGA-A7-A0CE-01", "TCGA-A7-A13E-01", "TCGA-A7-A0DA-01", "TCGA-D8-A142-01", "TCGA-D8-A143-01", "TCGA-AQ-A04J-01", "TCGA-BH-A0HN-01", "TCGA-A2-A0T0-01", "TCGA-A2-A0YE-01", "TCGA-A2-A0YJ-01", "TCGA-A2-A0D0-01", "TCGA-A2-A04U-01", "TCGA-AO-A0J6-01", "TCGA-A2-A0YM-01", "TCGA-A2-A0D2-01", "TCGA-BH-A0B3-01", "TCGA-A2-A04Q-01", "TCGA-A2-A0SX-01", "TCGA-AO-A0JL-01"));
// geneticProfileStableIds = new ArrayList<String>(Arrays.asList("study_es_0_structural_variants", "study_es_0_structural_variants"));
geneticProfileStableIds = Collections.nCopies(sampleIds.size(), "study_es_0_structural_variants");

StructuralVariantService structuralVariantService = applicationContext.getBean(StructuralVariantService.class);
List<StructuralVariant> structuralVariants = structuralVariantService.fetchStructuralVariants(geneticProfileStableIds, entrezGeneIds, sampleIds);

// Check if all 45 structural variants are imported
assertEquals(45, structuralVariants.size());

//===== Check CNA data ========
geneticProfileStableIds = new ArrayList<String>();
geneticProfileStableIds.add("study_es_0_gistic");
List<String> hugoGeneSymbols = new ArrayList<String>(Arrays.asList("ACAP3","AGRN","ATAD3A","ATAD3B","ATAD3C","AURKAIP1","ERCC5"));
ArrayList<String> hugoGeneSymbols = new ArrayList<String>(Arrays.asList("ACAP3","AGRN","ATAD3A","ATAD3B","ATAD3C","AURKAIP1","ERCC5"));
List<DBProfileData> cnaProfileData = apiService.getGeneticProfileData(geneticProfileStableIds, hugoGeneSymbols, null, null);
//there is data for 7 genes x 788 samples:
assertEquals(7*788, cnaProfileData.size());
Expand Down
6 changes: 6 additions & 0 deletions core/src/test/resources/seed_mini.sql
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYP
INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE');
SET @max_entity_id = (Select MAX(ID) from genetic_entity);
INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND","LENGTH") VALUES (@max_entity_id,238,'ALK','protein-coding','2p23.2-p23.1',6990);
INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE');
SET @max_entity_id = (Select MAX(ID) from genetic_entity);
INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND","LENGTH") VALUES (@max_entity_id,2115,'ETV1','protein-coding','7p21.2',9616);
INSERT INTO "genetic_entity" ("ENTITY_TYPE") VALUES ('GENE');
SET @max_entity_id = (Select MAX(ID) from genetic_entity);
INSERT INTO "gene" ("GENETIC_ENTITY_ID","ENTREZ_GENE_ID","HUGO_GENE_SYMBOL","TYPE","CYTOBAND","LENGTH") VALUES (@max_entity_id,7273,'TTN','protein-coding','2q31.2',118976);

-- cna_event
INSERT INTO "cna_event" ("CNA_EVENT_ID","ENTREZ_GENE_ID","ALTERATION") VALUES (20093,207,-2);
Expand Down
25 changes: 23 additions & 2 deletions core/src/test/scripts/test_data/api_json_system_tests/genes.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[
{"_comment": "genes for data_mutations_extended.txt",
{"_comment": "genes for data_mutations_extended.txt",
"entrez_gene_id":"3983", "hugo_gene_symbol":"ABLIM1"},
{"entrez_gene_id":"80070", "hugo_gene_symbol":"ADAMTS20"},
{"entrez_gene_id":"253559", "hugo_gene_symbol":"CADM2"},
Expand Down Expand Up @@ -83,6 +83,27 @@
{"entrez_gene_id":"2078", "hugo_gene_symbol":"ERG"},
{"entrez_gene_id":"1956", "hugo_gene_symbol":"EGFR"},
{"entrez_gene_id":"5774", "hugo_gene_symbol":"PTPN3"},
{"entrez_gene_id":"2115", "hugo_gene_symbol":"ETV1"},
{"entrez_gene_id":"7273", "hugo_gene_symbol":"TTN"},
{"entrez_gene_id":"9867", "hugo_gene_symbol":"PJA2"},
{"entrez_gene_id":"54810", "hugo_gene_symbol":"GIPC2"},
{"entrez_gene_id":"114883", "hugo_gene_symbol":"OSBPL9"},
{"entrez_gene_id":"155435", "hugo_gene_symbol":"RBM33"},
{"entrez_gene_id":"55755", "hugo_gene_symbol":"CDK5RAP2"},
{"entrez_gene_id":"23513", "hugo_gene_symbol":"SCRIB"},
{"entrez_gene_id":"5577", "hugo_gene_symbol":"PRKAR2B"},
{"entrez_gene_id":"5575", "hugo_gene_symbol":"PRKAR1B"},
{"entrez_gene_id":"57157", "hugo_gene_symbol":"PHTF2"},
{"entrez_gene_id":"7756", "hugo_gene_symbol":"ZNF207"},
{"entrez_gene_id":"11011", "hugo_gene_symbol":"TLK2"},
{"entrez_gene_id":"64759", "hugo_gene_symbol":"TNS3"},
{"entrez_gene_id":"9709", "hugo_gene_symbol":"HERPUD1"},
{"entrez_gene_id":"55750", "hugo_gene_symbol":"AGK"},
{"entrez_gene_id":"9715", "hugo_gene_symbol":"FAM131B"},
{"entrez_gene_id":"116988", "hugo_gene_symbol":"AGAP3"},
{"entrez_gene_id":"8454", "hugo_gene_symbol":"CUL1"},
{"entrez_gene_id":"27044", "hugo_gene_symbol":"SND1"},
{"entrez_gene_id":"23608", "hugo_gene_symbol":"MKRN1"},

{"_comment": "genes for mrna data data_expression_median",
"entrez_gene_id":"90993", "hugo_gene_symbol":"CREB3L1"},
Expand All @@ -101,4 +122,4 @@
{"entrez_gene_id":"24145", "hugo_gene_symbol":"PANX1"},
{"entrez_gene_id":"283234", "hugo_gene_symbol":"CCDC88B"}

]
]
3 changes: 1 addition & 2 deletions core/src/test/scripts/test_data/data_cna_duplicate_gene.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ ACAP3 116983 0 -1 0 -1 2
AGRN 375790 0 -1 0 -1 2
ATAD3A 55210 0 -1 NA -1 2
ATAD3B 83858 0 -1 0 -1 2
CENTB5 0 -1 0 -1 2
CENTB5 0 -1 spam -1 3
AURKAIP1 54998 0 -1 0 -1 2
B3GALT6 126792 0 -1 0 -1 2
B3GALT6 126792 0 -1 0 -1 2
Loading

0 comments on commit 623f454

Please sign in to comment.