From 05e0c156839b3db53350ff691857d33525cab07d Mon Sep 17 00:00:00 2001 From: Jonn Smith Date: Tue, 30 Oct 2018 17:46:47 -0400 Subject: [PATCH] # This is a combination of 2 commits. # This is the 1st commit message: Updated data source inputs to accept NIO paths for backing files. Now you can specify a URL in the backing file areas of the configuration files for Funcotator data sources and the backing files will be read by the FuncotationDataSourceFactories. This effectively enables use of data sources in the cloud or a mix of local- and cloud-based data sources through a config file change. This update will enable gnomAD annotations (once the data sources are updated to point at the gnomAD files on Google Cloud). Added in cloud data sources to test with. Minor refactoring of LocatableXsvFuncotationFactory. Now can only support one file at a time instead of multiple files for each instance. Fixes #5348 # This is the commit message #2: Added in more cloud data sources. New cloud dataset contains local data sources and a pointer to the gnomAD google cloud bucket. --- .../testing/getGencodeGenesForVcfVariants.sh | 2 +- .../getGencodeSequencesForVcfVariants.sh | 2 +- scripts/funcotator/testing/testFuncotator.sh | 20 +- .../hellbender/engine/FeatureDataSource.java | 16 +- .../hellbender/engine/FeatureInput.java | 7 + .../hellbender/engine/FeatureManager.java | 8 +- .../hellbender/engine/GATKTool.java | 2 +- .../hellbender/exceptions/UserException.java | 2 +- .../AnnotatedIntervalCodec.java | 25 +- .../tools/funcotator/Funcotator.java | 5 + .../dataSources/DataSourceUtils.java | 112 ++++----- .../gencode/GencodeFuncotationFactory.java | 65 ++++- .../vcf/VcfFuncotationFactory.java | 4 +- .../xsv/LocatableXsvFuncotationFactory.java | 69 +++--- .../XsvLocatableTableCodec.java | 222 ++++++++++++------ .../hellbender/utils/gcs/BucketUtils.java | 3 +- .../annotated_region_default.config | 6 +- .../engine/FeatureInputTestTools.java | 21 ++ .../funcotator/FuncotatorIntegrationTest.java | 62 ++++- .../funcotator/FuncotatorTestConstants.java | 17 +- ...ocatableXsvFuncotationFactoryUnitTest.java | 95 +++----- .../XsvLocatableTableCodecUnitTest.java | 13 +- .../funcotator_dataSources_cloud/MANIFEST.txt | 3 + .../achilles/hg19/achilles.config | 3 + .../achilles/hg38/achilles.config | 3 + .../gencode/hg19/gencode.config | 3 + .../gencode/hg38/gencode.config | 3 + .../gnomAD/hg19/gnomAD.config | 3 + .../oreganno/hg19/oreganno.config | 3 + .../oreganno/hg38/oreganno.config | 3 + .../achilles/hg19/achilles.config | 3 + .../hg19/achilles_lineage_results.import.txt | 3 + .../achilles/hg38 | 1 + ...ncerGeneCensus_Table_1_full_2012-03-15.txt | 3 + .../hg19/cancer_gene_census.config | 3 + .../cancer_gene_census/hg38 | 1 + .../clinvar/hg19/clinvar_hgmd.config | 3 + .../clinvar/hg19/clinvar_hgmd.tsv | 3 + .../clinvar/hg19/clinvar_hgmd.tsv.idx | 3 + .../cosmic/hg19/CosmicTest.db | 3 + .../cosmic/hg19/cosmic.config | 3 + .../cosmic_fusion/hg19/cosmic_fusion.config | 3 + .../cosmic_fusion/hg19/cosmic_fusion.tsv | 3 + .../cosmic_fusion/hg38/cosmic_fusion.config | 3 + .../cosmic_fusion/hg38/cosmic_fusion.tsv | 3 + .../cosmic_tissue/hg19/cosmic_tissue.config | 3 + .../cosmic_tissue/hg19/cosmic_tissue.tsv | 3 + .../cosmic_tissue/hg38/cosmic_tissue.config | 3 + .../cosmic_tissue/hg38/cosmic_tissue.tsv | 3 + .../dbSnp/hg19/dbSNP.config | 3 + .../hg19/dbSnp.regressionTestSet.hg19.vcf.gz | 3 + .../dbSnp.regressionTestSet.hg19.vcf.gz.tbi | 3 + .../dbSnp/hg38/dbSNP.config | 3 + .../hg38/dbSnp.regressionTestSet.hg38.vcf.gz | 3 + .../dbSnp.regressionTestSet.hg38.vcf.gz.tbi | 3 + .../hg19/dnaRepairGenes.20171221T103938.csv | 3 + .../hg19/dnaRepairGenes.config | 3 + .../dna_repair_genes/hg38 | 1 + .../hg19/Familial_Cancer_Genes.no_dupes.tsv | 3 + .../familial/hg19/familial.config | 3 + .../familial/hg38 | 1 + .../gencode/hg19/gencode.config | 3 + .../gencode.v19.regressionTestVariantSet.gtf | 3 + ...ncode.v19.regressionTestVariantSet.gtf.idx | 3 + ...gressionTestVariantSet.pc_transcripts.dict | 3 + ...regressionTestVariantSet.pc_transcripts.fa | 3 + ...essionTestVariantSet.pc_transcripts.fa.fai | 3 + .../gencode/hg38/gencode.config | 3 + .../gencode.v28.regressionTestVariantSet.gtf | 3 + ...ncode.v28.regressionTestVariantSet.gtf.idx | 3 + ...gressionTestVariantSet.pc_transcripts.dict | 3 + ...regressionTestVariantSet.pc_transcripts.fa | 3 + ...essionTestVariantSet.pc_transcripts.fa.fai | 3 + .../gencode_xhgnc/hg19/gencode_xhgnc.config | 3 + .../hg19/gencode_xhgnc_v75_37.hg19.tsv | 3 + .../gencode_xhgnc/hg38/gencode_xhgnc.config | 3 + .../hg38/gencode_xhgnc_v90_38.hg38.tsv | 3 + .../hg19/gencode_xrefseq.config | 3 + .../hg19/gencode_xrefseq_v75_37.tsv | 3 + .../hg38/gencode_xrefseq.config | 3 + .../hg38/gencode_xrefseq_v90_38.tsv | 3 + .../gnomAD/hg19/gnomAD.config | 3 + .../hgnc/hg19/hgnc.config | 3 + .../hgnc/hg19/hgnc_download_Nov302017.tsv | 3 + .../hgnc/hg38 | 1 + .../simple_uniprot/hg19/simple_uniprot.config | 3 + .../hg19/simple_uniprot_Dec012014.tsv | 3 + .../simple_uniprot/hg38 | 1 + .../template.config | 3 + .../hellbender/tools/funcotator/maf.config | 6 +- .../funcotator/xsv_locatable_test.config | 49 +++- .../funcotator/xsv_locatable_test2.config | 49 +++- .../funcotator/xsv_locatable_test3.config | 51 +++- 93 files changed, 856 insertions(+), 266 deletions(-) create mode 100644 src/test/java/org/broadinstitute/hellbender/engine/FeatureInputTestTools.java create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud/MANIFEST.txt create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg19/achilles.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg38/achilles.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg19/gencode.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg38/gencode.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud/gnomAD/hg19/gnomAD.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg19/oreganno.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg38/oreganno.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles_lineage_results.import.txt create mode 120000 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg38 create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/CancerGeneCensus_Table_1_full_2012-03-15.txt create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/cancer_gene_census.config create mode 120000 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg38 create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv.idx create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/CosmicTest.db create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/cosmic.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.tsv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.tsv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.tsv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.tsv create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSNP.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSNP.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz.tbi create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.20171221T103938.csv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.config create mode 120000 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg38 create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/Familial_Cancer_Genes.no_dupes.tsv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/familial.config create mode 120000 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg38 create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf.idx create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.dict create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa.fai create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf.idx create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.dict create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa.fai create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc_v75_37.hg19.tsv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc_v90_38.hg38.tsv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq_v75_37.tsv create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq_v90_38.tsv create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gnomAD/hg19/gnomAD.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc.config create mode 100644 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc_download_Nov302017.tsv create mode 120000 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg38 create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot.config create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot_Dec012014.tsv create mode 120000 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg38 create mode 100755 src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/template.config diff --git a/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh b/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh index 16e91787c72..26f3f515aa3 100755 --- a/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh +++ b/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh @@ -20,7 +20,7 @@ ############################################################################### #Setup variables for the script: -UNALIASED_SCRIPT_NAME=$( readlink "${BASH_SOURCE[0]}" || echo "${BASH_SOURCE[0]}" ) +UNALIASED_SCRIPT_NAME=$( python -c "import os;print os.path.realpath(\"${BASH_SOURCE[0]}\")" ) SCRIPTDIR="$( cd "$( dirname "${UNALIASED_SCRIPT_NAME}" )" && pwd )" SCRIPTNAME=$( echo $0 | sed 's#.*/##g' ) MINARGS=2 diff --git a/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh b/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh index 444171cc18a..a2e65cec8f2 100755 --- a/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh +++ b/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh @@ -20,7 +20,7 @@ ############################################################################### #Setup variables for the script: -UNALIASED_SCRIPT_NAME=$( readlink "${BASH_SOURCE[0]}" || echo "${BASH_SOURCE[0]}" ) +UNALIASED_SCRIPT_NAME=$( python -c "import os;print os.path.realpath(\"${BASH_SOURCE[0]}\")" ) SCRIPTDIR="$( cd "$( dirname "${UNALIASED_SCRIPT_NAME}" )" && pwd )" SCRIPTNAME=$( echo $0 | sed 's#.*/##g' ) MINARGS=2 diff --git a/scripts/funcotator/testing/testFuncotator.sh b/scripts/funcotator/testing/testFuncotator.sh index a4cf51e65c8..0c92257a11c 100755 --- a/scripts/funcotator/testing/testFuncotator.sh +++ b/scripts/funcotator/testing/testFuncotator.sh @@ -61,7 +61,7 @@ HG38=/Users/jonn/Development/references/Homo_sapiens_assembly38.fasta function simpleUsage() { - echo -e "Usage: $SCRIPTNAME [-c] [-u] [-t] [-19|-38] [-MAF|-VCF] [-AOU]" + echo -e "Usage: $SCRIPTNAME [-c] [-cloud] [-u] [-t] [-19|-38] [-MAF|-VCF] [-AOU]" echo -e "Build and run Funcotator." } @@ -87,6 +87,7 @@ function usage() echo -e " -38 run with hg38 data sources/reference/input file" echo -e " -MAF create MAF output" echo -e " -VCF create VCF output (default)" + echo -e " -cloud use cloud data sources" echo -e " -AOU use the All of Us/Clinical Pipeline data sources" echo -e " -M REF_VER REFERENCE INPUT DATA_SOURCES run in MANUAL mode, providing all necessary input" echo -e " REF_VER - a string for the reference version" @@ -146,7 +147,8 @@ trap at_exit EXIT function assertInputFilesExist() { assertFileExists ${INPUT} assertFileExists ${REF} - assertDirectoryExists ${DATA_SOURCES_PATH} + + [[ ! -d $DATA_SOURCES_PATH ]] && error "Warning: Data sources may not exist ${DATA_SOURCES_PATH}" && error "Ignore this if data sources directory is in the cloud." } ################################################################################ @@ -176,6 +178,9 @@ while [ $# -gt 0 ] ; do -AOU) useAOUDataSources=true ;; + -cloud) + useCloudDataSources=true + ;; -t) doRunLargeTests=true ;; @@ -267,7 +272,7 @@ if [[ $r -eq 0 ]] && ${doRunLargeTests} ; then INPUT=/Users/jonn/Development/NON_PUBLIC/0816201804HC0_R01C01.vcf #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSet1.vcf #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSet2.vcf - INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestHg19Large.vcf + #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestHg19Large.vcf #INPUT=/Users/jonn/Development/gatk/hg38_trio_liftoverb37.vcf #INPUT=/Users/jonn/Development/gatk/tmp.vcf #INPUT=/Users/jonn/Development/data_to_run/problem_samples/splice_site_should_not_be_splice_site/error_case.vcf @@ -279,18 +284,23 @@ if [[ $r -eq 0 ]] && ${doRunLargeTests} ; then else INPUT=/Users/jonn/Development/FUNCOTATOR_LARGE_TEST_INPUTS/hg38_trio.vcf #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSetHG38.vcf + #INPUT=/Users/jonn/Development/tmp/cohort24_23_seg.subset.vcf REF=$HG38 fi # Use the AOU data sources if we need them: - $useAOUDataSources && DATA_SOURCES_PATH=/Users/jonn/Development/funcotator_dataSources.vAoU3 + $useAOUDataSources && echo "Using AOU data sources." && DATA_SOURCES_PATH=/Users/jonn/Development/funcotator_dataSources.vAoU3 + + # Use cloud data sources if we need them: + $useCloudDataSources && echo "Using cloud data sources." && DATA_SOURCES_PATH=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/ + #$useCloudDataSources && echo "Using cloud data sources." && DATA_SOURCES_PATH=gs://hellbender/test/resources/large/funcotatorDataSourceCollection/funcotator_dataSources_cloud/ OUT_FORMAT_LOWER=$( echo "${OUT_FORMAT}" | tr 'A-Z' 'a-z' ) OUT_FILE_NAME=FUNCOTATOR_OUT.${OUT_FORMAT_LOWER} assertInputFilesExist - ${GATKDIR}/gatk Funcotator \ + time ${GATKDIR}/gatk Funcotator \ -V ${INPUT} \ -O ${OUT_FILE_NAME} \ -R ${REF} \ diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index e2d2da1863c..b32519e978e 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -20,7 +20,6 @@ import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.nio.SeekableByteChannelPrefetcher; -import static org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.*; import java.io.File; import java.io.IOException; @@ -32,6 +31,8 @@ import java.util.Optional; import java.util.function.Function; +import static org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.createExportConfiguration; + /** * Enables traversals and queries over sources of Features, which are metadata associated with a location * on the genome in a format supported by our file parsing framework, Tribble. Examples of Features are @@ -276,6 +277,9 @@ public FeatureDataSource(final FeatureInput featureInput, final int queryLook this.queryLookaheadBases = queryLookaheadBases; } + final void printCacheStats() { + queryCache.printCacheStatistics( getName() ); + } @SuppressWarnings("unchecked") private static FeatureReader getFeatureReader(final FeatureInput featureInput, final Class targetFeatureType, @@ -332,17 +336,19 @@ private static FeatureReader getFeatureReader(final Featu private static AbstractFeatureReader getTribbleFeatureReader(final FeatureInput featureInput, final FeatureCodec codec, final Function cloudWrapper, final Function cloudIndexWrapper) { Utils.nonNull(codec); try { - final String absolutePath = IOUtils.getPath(featureInput.getFeaturePath()).toAbsolutePath().toUri().toString(); + // Must get the path to the data file from the codec here: + final String absoluteRawPath = IOUtils.getPath(featureInput.getFeaturePath()).toAbsolutePath().toUri().toString(); + final String absoluteProcessedPath = IOUtils.getPath(codec.getPathToDataFile(featureInput.getFeaturePath())).toAbsolutePath().toUri().toString(); // Instruct the reader factory to not require an index. We will require one ourselves as soon as // a query by interval is attempted. final boolean requireIndex = false; // Only apply the wrappers if the feature input is on Google Cloud Storage - if (BucketUtils.isCloudStorageUrl(absolutePath)) { - return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper); + if (BucketUtils.isCloudStorageUrl(absoluteProcessedPath)) { + return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper); } else { - return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, Function.identity(), Function.identity()); + return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, Function.identity(), Function.identity()); } } catch (final TribbleException e) { throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java index 57083255684..f303f334d98 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java @@ -291,6 +291,13 @@ public String getFeaturePath() { return featureFile; } + /** + * @return The key/value {@link Map} as supplied to create the data in this {@link FeatureInput}. + */ + public Map getKeyValueMap() { + return keyValueMap; + } + /** * FeatureInputs will be hashed by the engine, so make an effort to produce a reasonable hash code * diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java index d44c7769db7..b50a78aca2b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java @@ -210,6 +210,12 @@ private void initializeFeatureSources( final int featureQueryLookahead, final Co } } + @SuppressWarnings({"unchecked", "rawtypes"}) + public void dumpAllFeatureCacheStats() { + for ( final FeatureDataSource f : featureSources.values() ) { + f.printCacheStats(); + } + } /** * Add the feature data source to the given feature input. @@ -450,7 +456,7 @@ private FeatureDataSource lookupDataSource( final Feature public static FeatureCodec getCodecForFile( final Path featurePath, final Class featureType ) { // Make sure Path exists/is readable if ( ! Files.isReadable(featurePath) ) { - throw new UserException.CouldNotReadInputFile(featurePath); + throw new UserException.CouldNotReadInputFile(featurePath.toUri().toString()); } // Gather all discovered codecs that claim to be able to decode the given file according to their diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 2c1adabad34..d9bbaf35918 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -148,7 +148,7 @@ public abstract class GATKTool extends CommandLineProgram { /** * Our source of Feature data (null if no source of Features was provided) */ - FeatureManager features; + public FeatureManager features; /** * diff --git a/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java b/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java index 0ac52076aed..7e9cea29d35 100644 --- a/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java +++ b/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java @@ -384,7 +384,7 @@ public static final class NoSuitableCodecs extends UserException { private static final long serialVersionUID = 0L; public NoSuitableCodecs(final Path file) { - super("Cannot read " + file + " because no suitable codecs found"); + super("Cannot read " + file.toUri().toString() + " because no suitable codecs found"); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java index 5f2c34dce0b..12bb4e25fad 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java @@ -5,8 +5,10 @@ import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.readers.LineIterator; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvLocatableTableCodec; @@ -38,6 +40,7 @@ public class AnnotatedIntervalCodec extends AsciiFeatureCodec public static final String START_COL_COMMENT = "_StartHeader="; public static final String END_COL_COMMENT = "_EndHeader="; + private Path configFilePath; private XsvLocatableTableCodec xsvLocatableTableCodec; private AnnotatedIntervalHeader header; @@ -46,9 +49,10 @@ public AnnotatedIntervalCodec() { xsvLocatableTableCodec = new XsvLocatableTableCodec(); } - public AnnotatedIntervalCodec(final Path overrideConfigFile) { + public AnnotatedIntervalCodec(final Path configFilePath) { super(AnnotatedInterval.class); - xsvLocatableTableCodec = new XsvLocatableTableCodec(overrideConfigFile); + this.configFilePath = configFilePath; + xsvLocatableTableCodec = new XsvLocatableTableCodec(configFilePath); } @Override @@ -78,8 +82,8 @@ public AnnotatedIntervalHeader readActualHeader(final LineIterator reader) { } @Override - public boolean canDecode(final String path) { - return (path.endsWith(".seg") || path.endsWith(".maf") || path.endsWith(".maf.annotated")) && xsvLocatableTableCodec.canDecodeMinusExtensionChecks(path); + public boolean canDecode(final String pathString) { + return (pathString.endsWith(".seg") || pathString.endsWith(".maf") || pathString.endsWith(".maf.annotated")) && xsvLocatableTableCodec.canDecodeFileChecks(configFilePath.toUri().toString(), pathString); } /** @@ -98,10 +102,15 @@ public static AnnotatedIntervalHeader createHeaderForWriter(final Path outputCon Utils.nonNull(outputConfigFile); //TODO: Change this so that it outputs the first in the list. - final Properties headerNameProperties = XsvLocatableTableCodec.getAndValidateConfigFileContents(outputConfigFile); - final String contigColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(XsvLocatableTableCodec.CONFIG_FILE_CONTIG_COLUMN_KEY)); - final String startColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(XsvLocatableTableCodec.CONFIG_FILE_START_COLUMN_KEY)); - final String endColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(XsvLocatableTableCodec.CONFIG_FILE_END_COLUMN_KEY)); + final Pair validityAndPropertiesPair = XsvLocatableTableCodec.getAndValidateConfigFileContentsOnPath(outputConfigFile, true); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties headerNameProperties = validityAndPropertiesPair.getRight(); + if ( !isValid ) { + throw new UserException.BadInput("Error: invalid configuration file given: " + outputConfigFile.toUri().toString()); + } + final String contigColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN)); + final String startColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN)); + final String endColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN)); XsvLocatableTableCodec.validateLocatableColumnName(contigColumnName); XsvLocatableTableCodec.validateLocatableColumnName(startColumnName); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java index 9a1f7e07ef4..b96cbcc2230 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java @@ -227,11 +227,13 @@ public boolean requiresReference() { @Override public void onTraversalStart() { + logger.info("Validating Sequence Dictionaries..."); if (seqValidationArguments.performSequenceDictionaryValidation()) { // Ensure that the reference dictionary is a superset of the variant dictionary: checkReferenceDictionaryIsSupersetOfVariantDictionary(); } + logger.info("Processing user transcripts/defaults/overrides..."); // Next set up our transcript list: final Set finalUserTranscriptIdSet = FuncotatorEngine.processTranscriptList(funcotatorArgs.userTranscriptIdSet); @@ -242,11 +244,13 @@ public void onTraversalStart() { // Get the header for our variants: final VCFHeader vcfHeader = getHeaderForVariants(); + logger.info("Initializing data sources..."); // Initialize all of our data sources: // Sort data sources to make them process in the same order each time: funcotatorArgs.dataSourceDirectories.sort(Comparator.naturalOrder()); final Map configData = DataSourceUtils.getAndValidateDataSourcesFromPaths(funcotatorArgs.referenceVersion, funcotatorArgs.dataSourceDirectories); + logger.info("Finalizing data sources (this step can be long if data sources are cloud-based)..."); // Create the data sources from the input: // This will also create and register the FeatureInputs (created by the Data Sources) // with the GATK Engine, so we do not have to plumb them in after the fact. @@ -260,6 +264,7 @@ public void onTraversalStart() { new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize) ); + logger.info("Initializing Funcotator Engine..."); // Create our engine to do our work and drive this Funcotation train! funcotatorEngine = new FuncotatorEngine( funcotatorArgs, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java index 76494bdce7b..e26e79c8be8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java @@ -264,7 +264,7 @@ public static List createDataSourceFuncotationFact final FeatureInput featureInput; switch ( FuncotatorArgumentDefinitions.DataSourceType.getEnum(stringType) ) { case LOCATABLE_XSV: - featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, XsvTableFeature.class); + featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, XsvTableFeature.class, true); funcotationFactory = DataSourceUtils.createLocatableXsvDataSource(path, properties, annotationOverridesMap, featureInput); break; case SIMPLE_XSV: @@ -274,16 +274,16 @@ public static List createDataSourceFuncotationFact funcotationFactory = DataSourceUtils.createCosmicDataSource(path, properties, annotationOverridesMap); break; case GENCODE: - featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class); + featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false); funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode, userTranscriptIdSet, featureInput, flankSettings); break; case VCF: - featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class); + featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false); funcotationFactory = DataSourceUtils.createVcfDataSource(path, properties, annotationOverridesMap, featureInput); break; default: - throw new GATKException("Unknown type of DataSourceFuncotationFactory encountered: " + stringType); + throw new GATKException("Unknown type of DataSourceFuncotationFactory encountered: " + stringType ); } // Add in our factory: @@ -294,41 +294,25 @@ public static List createDataSourceFuncotationFact return dataSourceFactories; } - private static FeatureInput createAndRegisterFeatureInputs(final Path dataSourceFile, + private static FeatureInput createAndRegisterFeatureInputs(final Path configFilePath, final Properties dataSourceProperties, final GATKTool funcotatorToolInstance, final int lookaheadFeatureCachingInBp, - final Class featureType) { - Utils.nonNull(dataSourceFile); + final Class featureType, + final boolean useConfigFilePath) { + Utils.nonNull(configFilePath); Utils.nonNull(dataSourceProperties); - final String name = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME); - final String sourceFile = dataSourceFile.resolveSibling(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE)).toString(); + final String name = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME); + final String sourceFile = useConfigFilePath + ? configFilePath.toUri().toString() + : resolveFilePathStringFromKnownPath( dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), configFilePath ).toUri().toString(); // Get feature inputs by creating them with the tool instance itself. // This has the side effect of registering the FeatureInputs with the engine, so that they can be later queried. return funcotatorToolInstance.addFeatureInputsAfterInitialization(sourceFile, name, featureType, lookaheadFeatureCachingInBp); } - /** - * Create {@link FeatureInput} FOR TESTING ONLY. - * @param dataSourceFile - * @param dataSourceProperties - * @return - */ - private static FeatureInput createFeatureInputsForTesting(final Path dataSourceFile, - final Properties dataSourceProperties) { - - Utils.nonNull(dataSourceFile); - Utils.nonNull(dataSourceProperties); - - final String name = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME); - final String sourceFile = dataSourceFile.resolveSibling(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE)).toString(); - - // Get feature inputs by creating them with the funcotator tool instance itself: - return new FeatureInput<>(sourceFile, name, Collections.emptyMap()); - } - /** * Create a {@link LocatableXsvFuncotationFactory} from filesystem resources and field overrides. * @param dataSourceFile {@link Path} to the data source file. Must not be {@code null}. @@ -359,13 +343,7 @@ private static LocatableXsvFuncotationFactory createLocatableXsvDataSource(final // Set the supported fields by the LocatableXsvFuncotationFactory: locatableXsvFuncotationFactory.setSupportedFuncotationFields( - new ArrayList<>( - Collections.singletonList( - dataSourceFile.resolveSibling( - IOUtils.getPath( dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE) ) - ) - ) - ) + resolveFilePathStringFromKnownPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), dataSourceFile) ); return locatableXsvFuncotationFactory; @@ -389,7 +367,7 @@ private static SimpleKeyXsvFuncotationFactory createSimpleXsvDataSource(final Pa // Create our SimpleKeyXsvFuncotationFactory: return new SimpleKeyXsvFuncotationFactory( dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME), - dataSourceFile.resolveSibling(IOUtils.getPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE))), + resolveFilePathStringFromKnownPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), dataSourceFile), dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_VERSION), dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_XSV_DELIMITER), Integer.valueOf(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_XSV_KEY_COLUMN)), @@ -417,7 +395,7 @@ private static CosmicFuncotationFactory createCosmicDataSource(final Path dataSo final String version = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_VERSION); return new CosmicFuncotationFactory( - dataSourceFile.resolveSibling(IOUtils.getPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE))), + resolveFilePathStringFromKnownPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), dataSourceFile), annotationOverridesMap, version ); @@ -456,7 +434,7 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data // Create our gencode factory: return new GencodeFuncotationFactory( - dataSourceFile.resolveSibling(fastaPath), + resolveFilePathStringFromKnownPath( fastaPath, dataSourceFile ), version, name, transcriptSelectionMode, @@ -493,7 +471,7 @@ private static VcfFuncotationFactory createVcfDataSource(final Path dataSourceFi return new VcfFuncotationFactory( name, version, - dataSourceFile.resolveSibling(srcFile).toAbsolutePath(), + resolveFilePathStringFromKnownPath(srcFile, dataSourceFile), annotationOverridesMap, featureInput ); @@ -690,7 +668,7 @@ private static void assertConfigFilePropertiesAreValid(final Properties configFi assertConfigPropertiesContainsKey(CONFIG_FILE_FIELD_NAME_TYPE, configFileProperties, configFilePath); // Validate our source file: - assertPathFilePropertiesField( configFileProperties, CONFIG_FILE_FIELD_NAME_SRC_FILE, configFilePath); + assertPathFilePropertiesField(configFileProperties, CONFIG_FILE_FIELD_NAME_SRC_FILE, configFilePath); // Validate our type: final String stringType = configFileProperties.getProperty(CONFIG_FILE_FIELD_NAME_TYPE); @@ -751,25 +729,53 @@ public static void assertBooleanPropertiesField(final Properties props, final St } } + /** + * Resolves the path string to a full path object using the given knownPath as a sibling file. + * Sibling file will only be used if it is determined that the given path string is not a relative path. + * @param filePathString {@link String} containing a file path to resolve. + * @param knownPath {@link Path} of a potential sibling file system entry. + * @return A {@link Path} object resolved to point to the given {@code filePathString}. + */ + public static Path resolveFilePathStringFromKnownPath(final String filePathString, final Path knownPath ) { + + final Path rawFilePath = IOUtils.getPath(filePathString); + + final Path absoluteFilePath; + if ( rawFilePath.isAbsolute() || (!rawFilePath.getFileSystem().equals(FileSystems.getDefault()))) { + // Absolute path or different file system. + // No need to resolve anything. + absoluteFilePath = rawFilePath; + } + else { + // If the path is not absolute, assume we must resolve it with our config file path: + absoluteFilePath = knownPath.resolveSibling(filePathString); + logger.info("Resolved local data source file path: " + rawFilePath.toUri().toString() + " -> " + absoluteFilePath.toUri().toString()); + } + return absoluteFilePath; + } + /** * Asserts that the given {@code field} is contained in the given {@code props} and is a file path. - * @param props {@link Properties} corresponding to the given {@code filePath} in which to check for the validity of {@code field}. + * @param props {@link Properties} corresponding to the given {@code configFilePath} in which to check for the validity of {@code field}. * @param field {@link String} name of the field, the existence and correct type of which will be confirmed in {@code props}. - * @param filePath {@link Path} to config file. For output purposes only. + * @param configFilePath {@link Path} to config file. For output purposes only. */ - public static void assertPathFilePropertiesField(final Properties props, final String field, final Path filePath) { - final Path sourceFilePath = filePath.resolveSibling(props.getProperty(field)); - if ( !Files.exists(sourceFilePath) ) { - throw new UserException.BadInput("ERROR in config file: " + filePath.toUri().toString() + - " - " + field + " does not exist: " + sourceFilePath); + public static void assertPathFilePropertiesField(final Properties props, final String field, final Path configFilePath) { + + final String filePathString = props.getProperty(field); + final Path absoluteFilePath = resolveFilePathStringFromKnownPath(filePathString, configFilePath); + + if ( !Files.exists(absoluteFilePath) ) { + throw new UserException.BadInput("ERROR in config file: " + configFilePath.toUri().toString() + + " - " + field + " does not exist: " + absoluteFilePath); } - else if ( !Files.isRegularFile(sourceFilePath) ) { - throw new UserException.BadInput("ERROR in config file: " + filePath.toUri().toString() + - " - " + field + " is not a regular file: " + sourceFilePath); + else if ( !Files.isRegularFile(absoluteFilePath) ) { + throw new UserException.BadInput("ERROR in config file: " + configFilePath.toUri().toString() + + " - " + field + " is not a regular file: " + absoluteFilePath); } - else if ( !Files.isReadable(sourceFilePath) ) { - throw new UserException.BadInput("ERROR in config file: " + filePath.toUri().toString() + - " - " + field + " is not readable: " + sourceFilePath); + else if ( !Files.isReadable(absoluteFilePath) ) { + throw new UserException.BadInput("ERROR in config file: " + configFilePath.toUri().toString() + + " - " + field + " is not readable: " + absoluteFilePath); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java index 86187bfff84..1d5479e3dd6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java @@ -23,10 +23,15 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.gencode.*; +import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.nio.NioFileCopierWithProgressMeter; import org.broadinstitute.hellbender.utils.param.ParamUtils; import org.broadinstitute.hellbender.utils.read.ReadUtils; +import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import java.io.File; +import java.nio.file.FileSystems; import java.nio.file.Path; import java.util.*; import java.util.regex.Matcher; @@ -56,6 +61,9 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { /** Standard Logger. */ protected static final Logger logger = LogManager.getLogger(GencodeFuncotationFactory.class); + private static final String LOCAL_GENCODE_TRANSCRIPT_TMP_DIR_PREFIX = "localGencodeTranscriptFastaFolder"; + private static final String LOCAL_GENCODE_TRANSCRIPT_FILE_BASE_NAME = "gencodeTranscriptFastaFile"; + /** * The window around splice sites to mark variants as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE}. */ @@ -194,7 +202,7 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { /** * Creates a {@link GencodeFuncotationFactory} with the 5'/3' flank sizes both set to 0. * - * @param gencodeTranscriptFastaFile {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. + * @param gencodeTranscriptFastaFilePath {@link Path} to the FASTA file contianing the sequences of all transcripts in the Gencode data source. * @param version The version {@link String} of Gencode from which {@link Funcotation}s will be made. * @param name A {@link String} containing the name of this {@link GencodeFuncotationFactory}. * @param transcriptSelectionMode The {@link TranscriptSelectionMode} by which representative/verbose transcripts will be chosen for overlapping variants. @@ -202,20 +210,20 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { * @param annotationOverrides A {@link LinkedHashMap} containing user-specified overrides for specific {@link Funcotation}s. * @param mainFeatureInput The backing {@link FeatureInput} for this {@link GencodeFuncotationFactory}, from which all {@link Funcotation}s will be created. */ - public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, + public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath, final String version, final String name, final TranscriptSelectionMode transcriptSelectionMode, final Set userRequestedTranscripts, final LinkedHashMap annotationOverrides, final FeatureInput mainFeatureInput) { - this(gencodeTranscriptFastaFile, version, name, transcriptSelectionMode, userRequestedTranscripts, annotationOverrides, mainFeatureInput, new FlankSettings(0, 0)); + this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts, annotationOverrides, mainFeatureInput, new FlankSettings(0, 0)); } /** * Create a {@link GencodeFuncotationFactory}. * - * @param gencodeTranscriptFastaFile {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. + * @param gencodeTranscriptFastaFilePath {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. * @param version The version {@link String} of Gencode from which {@link Funcotation}s will be made. * @param name A {@link String} containing the name of this {@link GencodeFuncotationFactory}. * @param transcriptSelectionMode The {@link TranscriptSelectionMode} by which representative/verbose transcripts will be chosen for overlapping variants. @@ -224,7 +232,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, * @param mainFeatureInput The backing {@link FeatureInput} for this {@link GencodeFuncotationFactory}, from which all {@link Funcotation}s will be created. * @param flankSettings Settings object containing our 5'/3' flank sizes */ - public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, + public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath, final String version, final String name, final TranscriptSelectionMode transcriptSelectionMode, @@ -235,10 +243,12 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, super(mainFeatureInput); + // Set up our local transcript fasta file. + // We must localize it (if not on disk) to make read times fast enough to be manageable: + gencodeTranscriptFastaFile = localizeGencodeTranscriptFastaFile( gencodeTranscriptFastaFilePath ); this.flankSettings = flankSettings; - this.gencodeTranscriptFastaFile = gencodeTranscriptFastaFile; - + // Initialize our transcript data source and ID map: transcriptFastaReferenceDataSource = ReferenceDataSource.of(gencodeTranscriptFastaFile); transcriptIdMap = createTranscriptIdMap(transcriptFastaReferenceDataSource); @@ -261,6 +271,47 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, initializeAnnotationOverrides( annotationOverrides ); } + private Path localizeGencodeTranscriptFastaFile( final Path gencodeTranscriptFastaFilePath ) { + + // Is the path local or in the cloud: + if ( gencodeTranscriptFastaFilePath.getFileSystem().equals(FileSystems.getDefault()) ) { + // local path, just return it: + return gencodeTranscriptFastaFilePath; + } + + // Not a local path! We must localize it! + + // Get the remote paths for the index and dictionary files: + final Path remoteGencodeTranscriptFastaIndexFilePath = IOUtils.getPath( ReferenceUtils.getFastaIndexFileName(gencodeTranscriptFastaFilePath.toUri().toString()) ); + final Path remoteGencodeTranscriptFastaSequenceDictionaryFilePath = IOUtils.getPath( ReferenceUtils.getFastaDictionaryFileName(gencodeTranscriptFastaFilePath.toUri().toString()) ); + + // Create a place for the files: + final File tmpDir = IOUtils.createTempDir(LOCAL_GENCODE_TRANSCRIPT_TMP_DIR_PREFIX); + tmpDir.deleteOnExit(); + final Path tmpDirPath = tmpDir.toPath(); + + // Create paths to the fasta, fasta index, and the sequence dictionary: + final Path localGencodeTranscriptFastaFilePath = tmpDirPath.resolve(LOCAL_GENCODE_TRANSCRIPT_FILE_BASE_NAME + ".fa"); + final Path localGencodeTranscriptFastaIndexFilePath = IOUtils.getPath( ReferenceUtils.getFastaIndexFileName(localGencodeTranscriptFastaFilePath.toUri().toString()) ); + final Path localGencodeTranscriptFastaSequenceDictionaryFilePath = IOUtils.getPath( ReferenceUtils.getFastaDictionaryFileName(localGencodeTranscriptFastaFilePath.toUri().toString()) ); + + // Copy the files to our local machine: + logger.info("Localizing Gencode transcript FASTA file for faster lookup times..."); + + // Copy FASTA: + NioFileCopierWithProgressMeter.create(gencodeTranscriptFastaFilePath, localGencodeTranscriptFastaFilePath, true).initiateCopy(); + + // Copy Index: + NioFileCopierWithProgressMeter.create(remoteGencodeTranscriptFastaIndexFilePath, localGencodeTranscriptFastaIndexFilePath, true).initiateCopy(); + + // Copy Sequence Dictionary: + NioFileCopierWithProgressMeter.create(remoteGencodeTranscriptFastaSequenceDictionaryFilePath, localGencodeTranscriptFastaSequenceDictionaryFilePath, true).initiateCopy(); + + + // Bye Bye! + return localGencodeTranscriptFastaFilePath; + } + //================================================================================================================== // Override Methods: diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java index 0a60388964e..0e245c630a7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java @@ -138,7 +138,7 @@ public VcfFuncotationFactory(final String name, private FuncotationMetadata createFuncotationMetadata(final Path sourceFilePath) { // Read the VCF to just get the header - try ( final FeatureDataSource vcfReader = new FeatureDataSource<>(sourceFilePath.toString()) ) { + try ( final FeatureDataSource vcfReader = new FeatureDataSource<>(sourceFilePath.toUri().toString()) ) { final Object header = vcfReader.getHeader(); if ( ! (header instanceof VCFHeader) ) { throw new IllegalArgumentException(sourceFilePath + " does not have a valid VCF header"); @@ -433,7 +433,7 @@ private TableFuncotation createDefaultFuncotation(final Allele altAllele) { * Populates {@link VcfFuncotationFactory#supportedFieldNames} and {@link VcfFuncotationFactory#supportedFieldNamesAndDefaults}. */ private void populateSupportedFieldNamesFromVcfFile() { - final VCFFileReader reader = new VCFFileReader(sourceFilePath.toFile()); + final VCFFileReader reader = new VCFFileReader(sourceFilePath); final VCFHeader header = reader.getFileHeader(); final List infoLineKeys = new ArrayList<>(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java index 1b9580bb894..49b84586d3a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java @@ -15,6 +15,7 @@ import org.broadinstitute.hellbender.tools.funcotator.FuncotatorArgumentDefinitions; import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; +import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvLocatableTableCodec; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvTableFeature; @@ -25,7 +26,7 @@ import java.util.*; /** - * Factory for creating {@link TableFuncotation}s by handling `Separated Value` files with arbitrary delimiters + * Factory for creating {@link TableFuncotation}s by handling `Separated Value` files with arbitrary delimiters * (e.g. CSV/TSV files) which contain data that are locatable (i.e. {@link org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvTableFeature}). * * This is a high-level object that interfaces with the internals of {@link org.broadinstitute.hellbender.tools.funcotator.Funcotator}. @@ -52,20 +53,20 @@ public class LocatableXsvFuncotationFactory extends DataSourceFuncotationFactory /** * {@link LinkedHashSet} of the names of all fields supported by this {@link LocatableXsvFuncotationFactory}. - * Set by {@link #setSupportedFuncotationFields(List)}. + * Set by {@link #setSupportedFuncotationFields(Path)}. */ private LinkedHashSet supportedFieldNames = null; /** * {@link List} of the names of all fields supported by this {@link LocatableXsvFuncotationFactory}. - * Set by {@link #setSupportedFuncotationFields(List)}. + * Set by {@link #setSupportedFuncotationFields(Path)}. */ private List supportedFieldNameList = null; /** * {@link List} of empty {@link String}s of the same length as {@link #supportedFieldNames}. * Cached for faster output. - * Set by {@link #setSupportedFuncotationFields(List)}. + * Set by {@link #setSupportedFuncotationFields(Path)}. */ private List emptyFieldList = null; @@ -90,7 +91,6 @@ public LocatableXsvFuncotationFactory(final String name, final String version, f this.annotationOverrideMap = new LinkedHashMap<>(annotationOverridesMap); } - //================================================================================================================== // Override Methods: @@ -199,41 +199,54 @@ private List createDefaultFuncotationsOnVariantHelper( final Varian /** * Set the field names that this {@link LocatableXsvFuncotationFactory} can create. * Does so by reading the headers of backing data files for this {@link LocatableXsvFuncotationFactory}. - * @param inputDataFilePaths {@link List} to backing data files from which annotations can be made for this {@link LocatableXsvFuncotationFactory}. + * @param inputDataFilePath {@link Path} to a backing data file from which annotations can be made for this {@link LocatableXsvFuncotationFactory}. Must not be {@code null}. */ - public void setSupportedFuncotationFields(final List inputDataFilePaths) { + public void setSupportedFuncotationFields(final Path inputDataFilePath) { + + Utils.nonNull(inputDataFilePath); if ( supportedFieldNames == null ) { synchronized ( this ) { if ( supportedFieldNames == null ) { - // Approximate starting size: - supportedFieldNames = new LinkedHashSet<>(inputDataFilePaths.size() * 10); - - for ( final Path dataPath : inputDataFilePaths ) { + // Approximate / arbitrary starting size: + supportedFieldNames = new LinkedHashSet<>(10); + + // Set up a codec here to read the config file. + // We have to call canDecode to set up the internal state of the XsvLocatableTableCodec: + final XsvLocatableTableCodec codec = new XsvLocatableTableCodec(); + try { + if ( !codec.canDecode(mainSourceFileAsFeatureInput.getFeaturePath()) ) { + // This should never happen because we have already validated this config file by the time we + // reach here: + throw new GATKException.ShouldNeverReachHereException("Could not decode from data file: " + mainSourceFileAsFeatureInput.getFeaturePath()); + } + } + catch ( final NullPointerException ex ) { + // This should never happen because we have already validated this config file by the time we + // reach here: + throw new GATKException.ShouldNeverReachHereException("Could not decode from data file! Has not been set yet!"); + } - final XsvLocatableTableCodec codec = new XsvLocatableTableCodec(); - List header = null; + // Get the info from our path: + final List columnNames; + try (final InputStream fileInputStream = Files.newInputStream(inputDataFilePath)) { - if (codec.canDecode(dataPath.toString())) { - try (final InputStream fileInputStream = Files.newInputStream(dataPath)) { + final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); + codec.readActualHeader(lineReaderIterator); + columnNames = codec.getHeaderWithoutLocationColumns(); - final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); - codec.readActualHeader(lineReaderIterator); - header = codec.getHeaderWithoutLocationColumns(); + } catch (final IOException ioe) { + throw new UserException.BadInput("Could not read header from data file: " + inputDataFilePath.toUri().toString(), ioe); + } - } catch (final IOException ioe) { - throw new UserException.BadInput("Could not read header from data file: " + dataPath.toUri().toString(), ioe); - } - } + // Make sure we actually read the header: + if ( columnNames == null ) { + throw new UserException.MalformedFile("Could not decode from data file: " + inputDataFilePath.toUri().toString()); + } - // Make sure we actually read the header: - if ( header == null ) { - throw new UserException.MalformedFile("Could not decode from data file: " + dataPath.toUri().toString()); - } + supportedFieldNames.addAll(columnNames); - supportedFieldNames.addAll(header); - } // Initialize our field name lists: initializeFieldNameLists(); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java index 25f498a7b14..c1c17ea8656 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java @@ -11,10 +11,12 @@ import htsjdk.tribble.readers.LineIterator; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.io.IOUtils; @@ -69,14 +71,8 @@ public final class XsvLocatableTableCodec extends AsciiFeatureCodec header; @@ -142,10 +141,9 @@ public XsvLocatableTableCodec() { super(XsvTableFeature.class); } - /** Constructor for when a configuration file is specified instead of using a sibling config file. - * + /** + * Constructor for when a configuration file is specified. * This cannot be used with auto decoding. - * * @param overrideConfigFile {@link Path} to the file to use as a configuration file for the given file. */ public XsvLocatableTableCodec(final Path overrideConfigFile) { @@ -158,36 +156,65 @@ public XsvLocatableTableCodec(final Path overrideConfigFile) { @Override public boolean canDecode(final String path) { + Utils.nonNull(path); // seg files are handled by a different codec. This check has to be done, since seg files will return true in // this codec and the AnnotatedIntervalCodec. - return !path.endsWith(".seg") && canDecodeMinusExtensionChecks(path); + return path.endsWith(".config") && canDecodeFileChecks(path); + } + + @Override + public String getPathToDataFile( final String path ) { + return backingDataFilePath.toUri().toString(); } /** - * Minus checking the file extension, can this class decode the given path. + * Checks the content of the given config file to see if it can be decoded. + * The config file can be independent of the backing data source file. + * + * NOTE: To reiterate, this takes a CONFIG file, not the actual data file to be read. * * TODO: This method should be inside an abstract superclass. {@link XsvLocatableTableCodec} and {@link org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCodec} should inherit. See https://github.com/broadinstitute/gatk/issues/4580 * - * @param path File to check. Never {@code null} + * @param configFilePathString {@link String} containing the path to the configuration file to check. Never {@code null} * @return true if the file can be decoded. False otherwise. */ - public boolean canDecodeMinusExtensionChecks(final String path) { - Utils.nonNull(path); + public boolean canDecodeFileChecks(final String configFilePathString) { + Utils.nonNull(configFilePathString); - // Get the paths to our file and the config file: - final Path inputFilePath = IOUtils.getPath(path); - final Path configFilePath = (overrideConfigFile != null ? - overrideConfigFile : getConfigFilePath(inputFilePath)); + // Get the path to our config file: + final Path configFilePath = (overrideConfigFile != null ? overrideConfigFile : IOUtils.getPath(configFilePathString)); + + // Make sure we can read the config file: + if ( !validateInputFileCanBeRead(configFilePath) ) { + return false; + } + + // Make sure our config file contains the information we need: + final Pair validityAndPropertiesPair = getAndValidateConfigFileContentsOnPath(configFilePath, true); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties configProperties = validityAndPropertiesPair.getRight(); + + if ( !isValid ) { + return false; + } + + // Get the backing data file path: + final String inputFilePathString = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_SRC_FILE); + + // Resolve the input file path to a real path: + final Path dataFilePath = DataSourceUtils.resolveFilePathStringFromKnownPath( inputFilePathString, configFilePath ); // Check that our files are good for eating... I mean reading... - if ( validateInputDataFile(inputFilePath) && validateInputDataFile(configFilePath) ) { + if ( validateInputFileCanBeRead(dataFilePath) ) { + + backingDataFilePath = dataFilePath; // auto-determine the preamble format - preambleLineStart = determinePreambleLineStart(inputFilePath); + preambleLineStart = determinePreambleLineStart(backingDataFilePath); // Get our metadata and set up our internals so we can read from this file: - readMetadataFromConfigFile(configFilePath); + populateMetaDataFromConfigProperties(configProperties); return true; } else { @@ -195,6 +222,57 @@ public boolean canDecodeMinusExtensionChecks(final String path) { } } + /** + * Checks the content of the given config file and backing data file to see if they can be decoded. + * + * NOTE: To reiterate, this takes a CONFIG file, not the actual data file to be read. + * + * TODO: This method should be inside an abstract superclass. {@link XsvLocatableTableCodec} and {@link org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCodec} should inherit. See https://github.com/broadinstitute/gatk/issues/4580 + * + * @param configFilePathString {@link String} containing the path to the configuration file to check. Never {@code null}. + * @param dataFilePathString {@link String} containing the path to the backing data file to check. Never {@code null}. + * @return true if the file can be decoded. False otherwise. + */ + public boolean canDecodeFileChecks(final String configFilePathString, final String dataFilePathString) { + Utils.nonNull(configFilePathString); + Utils.nonNull(dataFilePathString); + + // Get the path to our config file: + final Path configFilePath = (overrideConfigFile != null ? overrideConfigFile : IOUtils.getPath(configFilePathString)); + + // Get the path to our data file: + final Path dataFilePath = IOUtils.getPath(dataFilePathString); + + // Make sure we can read the config file: + if ( !validateInputFileCanBeRead(configFilePath) ) { + return false; + } + + // Make sure our config file contains the information we need: + final Pair validityAndPropertiesPair = getAndValidateConfigFileContentsOnPath(configFilePath, false); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties configProperties = validityAndPropertiesPair.getRight(); + + if ( !isValid ) { + return false; + } + + // Make sure we can read the data file: + if ( !validateInputFileCanBeRead(dataFilePath) ) { + return false; + } + + // Resolve the input file path to a real path: + backingDataFilePath = dataFilePath; + + // auto-determine the preamble format + preambleLineStart = determinePreambleLineStart(backingDataFilePath); + + // Get our metadata and set up our internals so we can read from this file: + populateMetaDataFromConfigProperties(configProperties); + return true; + } + @Override public XsvTableFeature decode(final String s) { @@ -297,6 +375,43 @@ String determineFinalColumn(final String rawInputListOrIndex) { : determinePrefixForHeader() + determineColumnNameToUse(rawInputListOrIndex); } + /** + * Get the properties from the given {@code configFilePath}, validate that all required properties are present, + * and return the property map. + * @param configFilePath {@link Path} to the configuration file. + * @param errorOnMissingConfigKey If {@code true} will log an error message when the given {@code key} is not contained in {@code configProperties}. + * @return The {@link Properties} as contained in the given {@code configFilePath}. + */ + public static Pair getAndValidateConfigFileContentsOnPath(final Path configFilePath, + final boolean errorOnMissingConfigKey) { + + Utils.nonNull(configFilePath); + + boolean isValid = true; + + // Read in the contents of the config file: + final Properties configProperties = new Properties(); + try ( final InputStream inputStream = Files.newInputStream(configFilePath, StandardOpenOption.READ) ) { + configProperties.load(inputStream); + } + catch (final Exception ex) { + throw new UserException.BadInput("Unable to read from XSV config file: " + configFilePath.toUri().toString(), ex); + } + + // Validate that it has the correct keys: + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_SRC_FILE, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_VERSION, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_ORIGIN_LOCATION, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_PREPROCESSING_SCRIPT, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_XSV_DELIMITER, configFilePath, errorOnMissingConfigKey) && isValid; + isValid = configPropertiesContainsKey(configProperties, DataSourceUtils.CONFIG_FILE_FIELD_NAME_NAME, configFilePath, errorOnMissingConfigKey) && isValid; + + return Pair.of(isValid, configProperties); + } + private List getRawHeaders() { assertHeaderInitialized(); return header.stream().map(h -> getHeaderWithoutPrefix(h)).collect(Collectors.toList()); @@ -343,38 +458,6 @@ private String determinePrefixForHeader() { return (StringUtils.isEmpty(dataSourceName) ? "" : dataSourceName + "_"); } - //================================================================================================================== - // Static Methods: - - /** - * Get the properties from the given {@code configFilePath}, validate that all required properties are present, - * and return the property map. - * @param configFilePath {@link Path} to the configuration file. - * @return The {@link Properties} as contained in the given {@code configFilePath}. - */ - public static Properties getAndValidateConfigFileContents(final Path configFilePath) { - - Utils.nonNull(configFilePath); - - // Read in the contents of the config file: - final Properties configFileContents = new Properties(); - try ( final InputStream inputStream = Files.newInputStream(configFilePath, StandardOpenOption.READ) ) { - configFileContents.load(inputStream); - } - catch (final Exception ex) { - throw new UserException.BadInput("Unable to read from XSV config file: " + configFilePath.toUri().toString(), ex); - } - - // Validate that it has the right keys: - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_CONTIG_COLUMN_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_START_COLUMN_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_END_COLUMN_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_DELIMITER_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_DATA_SOURCE_NAME_KEY, configFilePath); - - return configFileContents; - } - private boolean isPreambleLine(final String line) { return line.startsWith(preambleLineStart); } @@ -396,11 +479,20 @@ public static Path getConfigFilePath(final Path inputFilePath) { * @param configProperties The {@link Properties} in which to look for the given key. * @param key The value to find in the given {@link Properties}. * @param configFilePath The {@link Path} for the config file from which {@link Properties} were derived. Used for printing output only. + * @param errorOnMissingKey If {@code true} will log an error message when the given {@code key} is not contained in {@code configProperties}. */ - private static void assertConfigPropertiesContainsKey(final Properties configProperties, final String key, final Path configFilePath) { + private static boolean configPropertiesContainsKey(final Properties configProperties, final String key, final Path configFilePath, final boolean errorOnMissingKey) { if ( !configProperties.stringPropertyNames().contains(key) ) { - throw new UserException.BadInput("Config file for datasource (" + configFilePath.toUri().toString() + ") does not contain required key: " + key); + final String logMessage = "Config file for datasource (" + configFilePath.toUri().toString() + ") does not contain required key: " + key; + if (errorOnMissingKey) { + logger.error( logMessage ); + } + else { + logger.warn( logMessage ); + } + return false; } + return true; } //================================================================================================================== @@ -411,26 +503,24 @@ private static void assertConfigPropertiesContainsKey(final Properties configPro * @param filePath The {@link Path} to the data file to validate. * @return {@code true} if the given {@code filePath} is valid; {@code false} otherwise. */ - private boolean validateInputDataFile(final Path filePath) { + private boolean validateInputFileCanBeRead(final Path filePath) { return Files.exists(filePath) && Files.isReadable(filePath) && !Files.isDirectory(filePath); } /** - * Reads the metadata required for parsing from the given {@code configFilePath}. - * @param configFilePath {@link Path} to the configuration file from which to read in and setup metadata values. + * Populates the metadata required for parsing from the given {@code configProperties}. + * @param configProperties {@link Properties} containing configuration information for this {@link XsvLocatableTableCodec}. */ - private void readMetadataFromConfigFile(final Path configFilePath) { - - final Properties configProperties = getAndValidateConfigFileContents(configFilePath); + private void populateMetaDataFromConfigProperties(final Properties configProperties) { // Get the properties and remove the leading/trailing whitespace if there is any: - inputContigColumn = configProperties.getProperty(CONFIG_FILE_CONTIG_COLUMN_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); - inputStartColumn = configProperties.getProperty(CONFIG_FILE_START_COLUMN_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); - inputEndColumn = configProperties.getProperty(CONFIG_FILE_END_COLUMN_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); - dataSourceName = configProperties.getProperty(CONFIG_FILE_DATA_SOURCE_NAME_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + inputContigColumn = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + inputStartColumn = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + inputEndColumn = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + dataSourceName = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_NAME).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); // Get the delimiter - we do NOT remove whitespace here on purpose: - delimiter = configProperties.getProperty(CONFIG_FILE_DELIMITER_KEY); + delimiter = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_XSV_DELIMITER); // Process delimiter just in case it is a tab escape character: if ( delimiter.equals("\\t") ) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java index c29a27120b0..982720f12f8 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java @@ -264,8 +264,7 @@ public static String randomRemotePath(String stagingLocation, String prefix, Str */ public static boolean fileExists(String path) { final boolean MAYBE = false; - try { - InputStream inputStream = openFile(path); + try (InputStream inputStream = openFile(path)) { int ignored = inputStream.read(); } catch (UserException.CouldNotReadInputFile notthere) { // file isn't there diff --git a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config index 905ffae4c9d..a33ae21f5a0 100644 --- a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config +++ b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config @@ -2,4 +2,8 @@ contig_column = CONTIG,contig,Chromosome,chrom,chromosome,Chrom,seqname,seqnames start_column = START,start,Start,Start_Position,start_position,chromStart,segment_start,Start_position,target_start,Position,position,pos,POS,segment_start end_column = END,end,End,End_Position,end_position,chromEnd,segment_end,End_position,target_end,stop,Stop,Position,position,pos,POS,segment_end xsv_delimiter = \t -name = \ No newline at end of file +name = +src_file = +version = +origin_location = +preprocessing_script = diff --git a/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputTestTools.java b/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputTestTools.java new file mode 100644 index 00000000000..bc5457c9afe --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputTestTools.java @@ -0,0 +1,21 @@ +package org.broadinstitute.hellbender.engine; + +import htsjdk.tribble.Feature; + +/** + * Test utilities involving {@link FeatureInput}s. + * Created by jonn on 11/7/18. + */ +public class FeatureInputTestTools { + + /** + * Create a feature input based on an input path and a name. + * @param path A {@link String} containing the path to the backing data file for the resulting {@link FeatureInput}. + * @param name A {@link String} containing the name of the feature input type. + * @return A {@link FeatureInput} for the given {@code path} and {@code name}. + */ + public static FeatureInput createFeatureInput(final String path, final String name) { + return new FeatureInput<>(path, name); + } + +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java index d6c03257b1d..065dc4e0235 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java @@ -20,6 +20,7 @@ import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedInterval; import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.SimpleKeyXsvFuncotationFactory; import org.broadinstitute.hellbender.tools.funcotator.mafOutput.CustomMafFuncotationCreator; @@ -395,43 +396,64 @@ public Object[][] provideForLargeDataValidationTest() { "0816201804HC0_R01C01.vcf", b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, - GERMLINE_DATASOURCES_FOLDER + GERMLINE_DATASOURCES_FOLDER, + true + }, + { + "0816201804HC0_R01C01.vcf", + b37Reference, + FuncotatorTestConstants.REFERENCE_VERSION_HG19, + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_LOCAL_CLOUD_FOLDER, + false + }, + { + "0816201804HC0_R01C01.vcf", + b37Reference, + FuncotatorTestConstants.REFERENCE_VERSION_HG19, + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_REMOTE_CLOUD_FOLDER, + false }, { "hg38_test_variants.vcf", hg38Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG38, - LARGE_DATASOURCES_FOLDER + LARGE_DATASOURCES_FOLDER, + true }, { "hg38_trio.vcf", hg38Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG38, - LARGE_DATASOURCES_FOLDER + LARGE_DATASOURCES_FOLDER, + true }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_DATA_SET_1, b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_DATA_SET_2, b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, - FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG38, hg38Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG38, - FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_LARGE_DATA_SET, b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, - FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, }; } @@ -486,7 +508,8 @@ private void validateFuncotationsOnVcf(final Iterable vcfIterabl public void largeDataValidationTest(final String inputVcfName, final String referencePath, final String referenceVersion, - final String dataSourcesPath) throws IOException { + final String dataSourcesPath, + final boolean isDsEnvironmentPath) throws IOException { // Get our main test folder path from our environment: final String testFolderInputPath = getFuncotatorLargeDataValidationTestInputPath(); @@ -496,6 +519,14 @@ public void largeDataValidationTest(final String inputVcfName, final String outFileBaseName = inputVcfName + ".funcotator"; + final String dataSourcesPathString; + if (isDsEnvironmentPath) { + dataSourcesPathString = getFuncotatorLargeDataValidationTestInputPath() + dataSourcesPath; + } + else { + dataSourcesPathString = dataSourcesPath; + } + for (final FuncotatorArgumentDefinitions.OutputFormatType outFormat : FuncotatorArgumentDefinitions.OutputFormatType.values()) { startTime = System.nanoTime(); @@ -511,7 +542,7 @@ public void largeDataValidationTest(final String inputVcfName, testFolderInputPath + inputVcfName, outputFile, referencePath, - getFuncotatorLargeDataValidationTestInputPath() + dataSourcesPath, + dataSourcesPathString, referenceVersion, outFormat, true); @@ -930,11 +961,16 @@ final Object[][] provideForMafVcfConcordance() { private void createConfigFileForMAF(final File mafConfigFile) { try ( final PrintWriter printWriter = new PrintWriter(mafConfigFile) ) { - printWriter.println("contig_column = " + MafOutputRendererConstants.FieldName_Chromosome); - printWriter.println("start_column = " + MafOutputRendererConstants.FieldName_Start_Position); - printWriter.println("end_column = " + MafOutputRendererConstants.FieldName_End_Position); - printWriter.println("xsv_delimiter = \\t"); - printWriter.println("name = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN + " = " + MafOutputRendererConstants.FieldName_Chromosome); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN + " = " + MafOutputRendererConstants.FieldName_Start_Position); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN + " = " + MafOutputRendererConstants.FieldName_End_Position); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_XSV_DELIMITER + " = \\t"); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_NAME + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_SRC_FILE + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_VERSION + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_ORIGIN_LOCATION + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_PREPROCESSING_SCRIPT + " = "); + } catch (final FileNotFoundException ex) { throw new GATKException("Could not create the tmp config file to test maf/vcf concorance: " + mafConfigFile.toURI().toString(), ex); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java index 358b54f809c..bc332f0afe3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java @@ -32,7 +32,13 @@ public class FuncotatorTestConstants { * there will be a bit of manual work as well (I did not have the will or the time to automate everything - Jonn Smith): * GATK_DEVELOPMENT_TOP_DIRECTORY/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh */ - public static final String FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources" + File.separator; + public static final String FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources" + File.separator; + /** Local folder containing data sources that point to the cloud. */ + public static final String FUNCOTATOR_DATA_SOURCES_LOCAL_CLOUD_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources_cloud" + File.separator; + /** Local folder containing local data sources and one that points to gnomAD on the cloud. */ + public static final String FUNCOTATOR_DATA_SOURCES_LOCAL_CLOUD_GNOMAD_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources_cloud_gnomad" + File.separator; + /** Cloud-based folder containing data sources that point to the cloud. */ + public static final String FUNCOTATOR_DATA_SOURCES_REMOTE_CLOUD_FOLDER = "gs://hellbender/test/resources/large/funcotatorDataSourceCollection/funcotator_dataSources_cloud" + File.separator; public static final String DUMMY_DATA_SOURCES_TAR_GZ = FUNCOTATOR_LARGE_FILES_DIR + "dummyDataSources.tar.gz"; public static final String DUMMY_DATA_SOURCES_TAR_GZ_SHA256_FILE = FUNCOTATOR_LARGE_FILES_DIR + "dummyDataSources.sha256"; @@ -55,9 +61,12 @@ public class FuncotatorTestConstants { public static final String XSV_CSV_PIK3CA_PATH = FUNCOTATOR_TEST_DIR + "xsv_CSV_PIK3CA.csv"; public static final String XSV_CSV_MUC16_PATH = FUNCOTATOR_TEST_DIR + "xsv_CSV_MUC16.csv"; - public static final String XSV_LOCATABLE_TEST_FILE1_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test.csv"; - public static final String XSV_LOCATABLE_TEST_FILE2_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test2.csv"; - public static final String XSV_LOCATABLE_TEST_FILE3_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test3.tsv"; + public static final String XSV_LOCATABLE_TEST_FILE1_DATA_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test.csv"; + public static final String XSV_LOCATABLE_TEST_FILE1_CONFIG_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test.config"; + public static final String XSV_LOCATABLE_TEST_FILE2_DATA_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test2.csv"; + public static final String XSV_LOCATABLE_TEST_FILE2_CONFIG_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test2.config"; + public static final String XSV_LOCATABLE_TEST_FILE3_DATA_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test3.tsv"; + public static final String XSV_LOCATABLE_TEST_FILE3_CONFIG_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test3.config"; public static final String COSMIC_TEST_DB = FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + "cosmic" + File.separator + "hg19" + File.separator + "CosmicTest.db"; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java index 1d3434f6b1f..1702ca0ce2e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java @@ -6,6 +6,8 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import org.apache.commons.io.FilenameUtils; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.engine.FeatureInput; +import org.broadinstitute.hellbender.engine.FeatureInputTestTools; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.ReferenceDataSource; import org.broadinstitute.hellbender.exceptions.GATKException; @@ -19,6 +21,7 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvLocatableTableCodec; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvTableFeature; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -28,7 +31,6 @@ import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; @@ -57,33 +59,6 @@ public class LocatableXsvFuncotationFactoryUnitTest extends GATKBaseTest { //================================================================================================================== // Helper Data Types: - private static class DummyTestFeature implements Feature { - - private final String contig; - private final int start; - private final int stop; - - public DummyTestFeature(final String contig, final int start, final int stop) { - this.contig = contig; - this.start = start; - this.stop = stop; - } - - @Override - public String getContig() { - return contig; - } - - @Override - public int getStart() { - return start; - } - - @Override - public int getEnd() { - return stop; - } - } //================================================================================================================== // Helper Methods: @@ -249,29 +224,23 @@ private Object[][] provideForTestCreateFuncotations() { @DataProvider private Object[][] provideForTestSetSupportedFuncotationFields() { return new Object[][] { - // Empty list of data files: - {Collections.emptyList(), new LinkedHashSet<>()}, // One Valid XSV (csv) Locatable Data File: { - Collections.singletonList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_PATH)), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_DATA_PATH), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_CONFIG_PATH), new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond")) }, - // One Valid XSV (tsv) Locatable Data File: - { - Collections.singletonList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_PATH)), - new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond")) - }, - // Two Valid XSV Locatable Data Files: + // One Valid XSV (csv) Locatable Data File: { - Arrays.asList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_PATH), Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_PATH)), - new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond", - "SECOND_XSV_NAME_Car_Maker", "SECOND_XSV_NAME_Tire_Maker", "SECOND_XSV_NAME_Parent_Company")) + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_DATA_PATH), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_CONFIG_PATH), + new LinkedHashSet<>(Arrays.asList("SECOND_XSV_NAME_Car_Maker", "SECOND_XSV_NAME_Tire_Maker", "SECOND_XSV_NAME_Parent_Company")) }, - // Three Valid XSV Locatable Data Files: + // One Valid XSV (tsv) Locatable Data File: { - Arrays.asList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_PATH), Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_PATH), Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_PATH)), - new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond", - "SECOND_XSV_NAME_Car_Maker", "SECOND_XSV_NAME_Tire_Maker", "SECOND_XSV_NAME_Parent_Company")) + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_DATA_PATH), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_CONFIG_PATH), + new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond")) }, }; } @@ -302,18 +271,20 @@ public void testCreateFuncotations(final VariantContext variant, // Create a temporary file for the "backing data" which will only contain the header: final Path headerBackingDataFilePath = createTempPath("headerBackingDataFile", "csv"); + final Path configFilePath; try { Files.write(headerBackingDataFilePath, ("CONTIG,START,END," + reportableFuncotationFieldNames.stream().collect(Collectors.joining(","))).getBytes()); // Create a temporary file for the config file that points to the temporary file for the backing data: - createTemporaryConfigFile(headerBackingDataFilePath); + configFilePath = createTemporaryConfigFile(headerBackingDataFilePath); } catch (final IOException ex) { throw new GATKException("Could not write to temp file for testing: " + headerBackingDataFilePath.toUri(), ex); } - final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(defaultDataSourceName, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), null); - locatableXsvFuncotationFactory.setSupportedFuncotationFields(new ArrayList<>(Collections.singletonList(headerBackingDataFilePath))); + final FeatureInput featureInput = FeatureInputTestTools.createFeatureInput( configFilePath.toUri().toString(), defaultDataSourceName ); + final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(defaultDataSourceName, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), featureInput); + locatableXsvFuncotationFactory.setSupportedFuncotationFields(headerBackingDataFilePath); Assert.assertEquals( locatableXsvFuncotationFactory.createFuncotationsOnVariant( variant, referenceContext, featureList ), @@ -327,11 +298,14 @@ public void testCreateFuncotations(final VariantContext variant, } @Test(dataProvider = "provideForTestSetSupportedFuncotationFields") - public void testSetSupportedFuncotationFields(final List dataFilePaths, + public void testSetSupportedFuncotationFields(final Path dataFilePath, + final Path configFilePath, final LinkedHashSet expected) { - final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(LocatableXsvFuncotationFactory.DEFAULT_NAME, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), null); - locatableXsvFuncotationFactory.setSupportedFuncotationFields(dataFilePaths); + final FeatureInput featureInput = FeatureInputTestTools.createFeatureInput(configFilePath.toUri().toString(), defaultDataSourceName); + final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(LocatableXsvFuncotationFactory.DEFAULT_NAME, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), featureInput); + + locatableXsvFuncotationFactory.setSupportedFuncotationFields(dataFilePath); Assert.assertEquals( locatableXsvFuncotationFactory.getSupportedFuncotationFields(), @@ -345,8 +319,11 @@ public void testGetSupportedFuncotationFields() { locatableXsvFuncotationFactory.getSupportedFuncotationFields(); } - private void createTemporaryConfigFile(final Path backingDataSourcePath) throws IOException { + private Path createTemporaryConfigFile(final Path backingDataSourcePath) throws IOException { + return createTemporaryConfigFile(backingDataSourcePath, ","); + } + private Path createTemporaryConfigFile(final Path backingDataSourcePath, final String delimiter) throws IOException { // Config file must be next to backingDataSourcePath, and have the same base name, with the .config extension: final String backingDataSourceFileName = backingDataSourcePath.toFile().getName(); final String configFileBaseName = FilenameUtils.removeExtension(backingDataSourceFileName); @@ -372,13 +349,13 @@ private void createTemporaryConfigFile(final Path backingDataSourcePath) throws writer.println(""); writer.println("# Required field for GENCODE files."); writer.println("# Path to the FASTA file from which to load the sequences for GENCODE transcripts:"); - writer.println(" gencode_fasta_path ="); + writer.println("gencode_fasta_path ="); writer.println(""); writer.println("# Required field for simpleXSV files."); writer.println("# Valid values:"); writer.println("# GENE_NAME"); writer.println("# TRANSCRIPT_ID"); - writer.println(" xsv_key = "); + writer.println("xsv_key = "); writer.println(""); writer.println("# Required field for simpleXSV files."); writer.println("# The 0-based index of the column containing the key on which to match"); @@ -386,26 +363,28 @@ private void createTemporaryConfigFile(final Path backingDataSourcePath) throws writer.println(""); writer.println("# Required field for simpleXSV AND locatableXSV files."); writer.println("# The delimiter by which to split the XSV file into columns."); - writer.println(" xsv_delimiter = ,"); + writer.println("xsv_delimiter = " + delimiter); writer.println(""); writer.println("# Required field for simpleXSV files."); writer.println("# Whether to permissively match the number of columns in the header and data rows"); writer.println("# Valid values:"); writer.println("# true"); writer.println("# false"); - writer.println(" xsv_permissive_cols = "); + writer.println("xsv_permissive_cols = "); writer.println(""); writer.println("# Required field for locatableXSV files."); writer.println("# The 0-based index of the column containing the contig for each row"); - writer.println(" contig_column = 0 "); + writer.println("contig_column = 0 "); writer.println(""); writer.println("# Required field for locatableXSV files."); writer.println("# The 0-based index of the column containing the start position for each row"); - writer.println(" start_column = 1 "); + writer.println("start_column = 1 "); writer.println(""); writer.println("# Required field for locatableXSV files."); writer.println("# The 0-based index of the column containing the end position for each row"); - writer.println(" end_column = 2"); + writer.println("end_column = 2"); } + + return configPath; } } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java index 6a32fc422f6..dde9297b315 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java @@ -3,6 +3,8 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.tribble.readers.AsciiLineReader; import htsjdk.tribble.readers.AsciiLineReaderIterator; +import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; @@ -280,10 +282,19 @@ public void testGetConfigFilePath(final String filePath, final Path expected) { // getAndValidateConfigFileContents @Test(dataProvider = "provideForTestGetAndValidateConfigFileContents") public void testGetAndValidateConfigFileContents(final Path configFilePath, final Properties expected) { - final Properties properties = XsvLocatableTableCodec.getAndValidateConfigFileContents(configFilePath); + final Pair validityAndPropertiesPair = XsvLocatableTableCodec.getAndValidateConfigFileContentsOnPath(configFilePath, false); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties properties = validityAndPropertiesPair.getRight(); + + Assert.assertEquals( isValid, true ); Assert.assertEquals(properties, expected); } + @Test + public void testvalidateAndReadPreambleFromDataFile() { + throw new NotImplementedException("FIXME"); + } + @Test public void testRenderSamFileHeaderFromNoPreamble() { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/MANIFEST.txt b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/MANIFEST.txt new file mode 100644 index 00000000000..a5d8a3f4d51 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/MANIFEST.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8927a3a6d7ee7e88cad9c53b629d6935d1d21c7cb4192de3efcd584fb00bcb5c +size 134 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg19/achilles.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg19/achilles.config new file mode 100755 index 00000000000..b8cb3fd58ad --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg19/achilles.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e2b2287f413c3e2e29f8e3b141364aea73c69366d24f587a5f95590479efee +size 1638 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg38/achilles.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg38/achilles.config new file mode 100755 index 00000000000..b8cb3fd58ad --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg38/achilles.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e2b2287f413c3e2e29f8e3b141364aea73c69366d24f587a5f95590479efee +size 1638 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg19/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg19/gencode.config new file mode 100644 index 00000000000..60ea24e9a80 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg19/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85bcafc5971713c72b8e6e00a6ead025d9b5cb843981efb55795953561bb341 +size 1903 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg38/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg38/gencode.config new file mode 100644 index 00000000000..ea98e97e2bb --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg38/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ccffb3789c983b21b7a4deac3846ba01e1dfdfc920665abbe8f4614e37e02d +size 1904 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gnomAD/hg19/gnomAD.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gnomAD/hg19/gnomAD.config new file mode 100644 index 00000000000..edc4f58afb9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gnomAD/hg19/gnomAD.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afadb719be43a2e25316cbda1d342711e5c7643aea0cc42324d1ff9720dc9d63 +size 1719 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg19/oreganno.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg19/oreganno.config new file mode 100755 index 00000000000..7664555adb6 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg19/oreganno.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e06ee6b97cc7a3a22b8079039b738710cf09c432696956bc6743e4ae0e9fc04 +size 1754 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg38/oreganno.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg38/oreganno.config new file mode 100755 index 00000000000..54d817d517b --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg38/oreganno.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5817cc52bd85d5aaafa7b5182273b711cfef726f4707f6e2cc7c4e5d3b452cb6 +size 1754 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles.config new file mode 100755 index 00000000000..0f4485649d1 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b180dfb0e124158e4f635f73f7fd6e38667b550aef29cdc3d95fe38e1016afed +size 1592 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles_lineage_results.import.txt b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles_lineage_results.import.txt new file mode 100755 index 00000000000..fb1b9261983 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles_lineage_results.import.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9646f97f2309f1c25022c2df1e886ca2d518d80ba5aa6e2945542ad1b7bef635 +size 63089 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/CancerGeneCensus_Table_1_full_2012-03-15.txt b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/CancerGeneCensus_Table_1_full_2012-03-15.txt new file mode 100755 index 00000000000..56a4e792d10 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/CancerGeneCensus_Table_1_full_2012-03-15.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614aa76ebdeccdcac3930056c576255654aac6ca072b41b4e0c7b6a92586ea77 +size 59836 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/cancer_gene_census.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/cancer_gene_census.config new file mode 100755 index 00000000000..a26585a23c3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/cancer_gene_census.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a923a6ef39c1b0ba458df4ba23468648a0cdd4051c8a31d0bade136538d13dba +size 1605 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.config new file mode 100755 index 00000000000..3a68dfc279b --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc481041857d8eca45094f8113373cd8bd52886039c8d1e63ba437935e2644e4 +size 1593 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv new file mode 100644 index 00000000000..77faa1b6557 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b471771ca94ebad8cf36809c7ca3b9db74d60b4fe76eddbeebb28493f6a92c +size 3106205 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv.idx b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv.idx new file mode 100644 index 00000000000..7065462ea2f --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9ae0670204af9ca985a764d2f28cc40041bd8949de394fbad223beb1ff3801 +size 10135843 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/CosmicTest.db b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/CosmicTest.db new file mode 100644 index 00000000000..9231c0664b9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/CosmicTest.db @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb5c0581fa1155956448433762e234a506ada1e03ed1719808d30ea30d8678f +size 176128 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/cosmic.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/cosmic.config new file mode 100755 index 00000000000..36f4c550b8a --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/cosmic.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cde65ff9369789f407da94e9ab70f7bfb40a4f2d06dbacd652ea9bbadd1d331 +size 1629 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.config new file mode 100755 index 00000000000..fb7b2413392 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e4a2256b8d4adef2a39481132527a74985e613a9887c9c4c760c68d926f09f +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.tsv new file mode 100644 index 00000000000..03df2528fa8 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38425fb04d7a3baa7315fd6c93e59a54a012fdfe3694bfed500457cd96f97cf9 +size 211308 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.config new file mode 100755 index 00000000000..900fc5cd277 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8178ff8a9feee459a3c3562103e2345135091c9a857812b1eaf2a42b355b8a8c +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.tsv new file mode 100644 index 00000000000..03df2528fa8 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38425fb04d7a3baa7315fd6c93e59a54a012fdfe3694bfed500457cd96f97cf9 +size 211308 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.config new file mode 100755 index 00000000000..0ce00e894a3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce23f882b58eed016200f27e3936bf8760ff63ed983b9df6c3a038b197db7e88 +size 1662 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.tsv new file mode 100644 index 00000000000..574da93b4df --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dcd55433453c500453efdfa7f8d555a0e48e00881bf31e997443c5423ef2298 +size 2331268 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.config new file mode 100755 index 00000000000..e20d37c2e6b --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5833bdf72148afaec4b193ec478e9c95c1cac841652fbe69096e96d21c171c73 +size 1662 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.tsv new file mode 100644 index 00000000000..475ef3bdad4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfb78a918c321b70b93961c47002f36fe34fcd82590a9a551cb8d585cfc3e81 +size 2269588 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSNP.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSNP.config new file mode 100644 index 00000000000..c937ad318c3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSNP.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5a616eaae716118c8d077a36dcefc1ba2ead19eebb38d84c63e692694e02b8 +size 1740 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz new file mode 100644 index 00000000000..4f363874872 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f7de0f377a77dfb68f6efd508b52a4949c27ff509eb440211a2c57569b2f63 +size 30293528 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi new file mode 100644 index 00000000000..64c179a311e --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8e18795653074cf54b163cd27f11004b372c3ad7bb5456a7b6048f1bb746aa +size 18769 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSNP.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSNP.config new file mode 100644 index 00000000000..8e598bd310d --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSNP.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0ccf83a697ef6e8368791d6be855bab3a6e750fd8a460da529a96625ed4bd6 +size 1740 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz new file mode 100644 index 00000000000..498869b31c4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0451833249f65aa3da8392d6eb92036e90f498ce9f5a6f55fb8e316bd694d18 +size 1963469 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz.tbi b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz.tbi new file mode 100644 index 00000000000..117808dd212 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz.tbi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c514fcfc79cec032ba9bc9801652c41f69fc20da02975150eca1ffe0ef78c542 +size 2276 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.20171221T103938.csv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.20171221T103938.csv new file mode 100644 index 00000000000..1d233dbe46e --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.20171221T103938.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428b27f4e2a8010a515e4e6642371548ca8dc0d550b7759442bd5cf500e9de77 +size 9650 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.config new file mode 100755 index 00000000000..e85472f7f0c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfdbe126b7989dcafcede731276e7c9ace3744f82a85de55b90b0119608e2a67 +size 1694 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/Familial_Cancer_Genes.no_dupes.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/Familial_Cancer_Genes.no_dupes.tsv new file mode 100755 index 00000000000..e60b5f49069 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/Familial_Cancer_Genes.no_dupes.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8215ec2926e38de6594b30adeda17698a0c26ef67aa9f40b0eccc4fc2cc2b41 +size 43496 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/familial.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/familial.config new file mode 100755 index 00000000000..cbb15cc4bd9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/familial.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b0ece241594e554015aac0324bb3efe81d11b120009bd1d73ad98ae5565553 +size 1607 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.config new file mode 100755 index 00000000000..0960de2f558 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90f80d36fa98d3f7721478b7d3f54d0c1439af5ef43a30c1f9469adc4291c0e +size 1751 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf new file mode 100644 index 00000000000..e184399e8f4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0710dbeef221a115583eab64868519f2aca02bc4148bb3ed76f1afd3046ac21f +size 51415620 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf.idx b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf.idx new file mode 100644 index 00000000000..d408be70fea --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea2c637c03561cedfc663c2b6be1edfa61f7c64cbcfd19da8232b3235e1eace +size 21663 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.dict b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.dict new file mode 100644 index 00000000000..f832bbca089 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.dict @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e31e8b23f306feeec4a7148a5026ab13fcf998b0cfb6c2fdf2dcba155f1397 +size 1324009 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa new file mode 100644 index 00000000000..8980366bc81 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb89fec0cd29e2a1705be30696a179f6a4d06e78eff2e41c09fb469023e9b67 +size 9532749 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa.fai b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa.fai new file mode 100644 index 00000000000..0e09d0cc8e8 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa.fai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb405f4aa02e61385350b095ce23246c248e99f2c852f885b377a99f2f524c0 +size 584862 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.config new file mode 100755 index 00000000000..a41677ce738 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:364d9872e9b6ed83df0287e0157785b826c229070906638915b3a9da45e56e4e +size 1751 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf new file mode 100644 index 00000000000..7971c95ccf4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3bcc4aa67a61b654a8df5a32237b52fb09f1a7f5efd15f1bc1824919003b8ae +size 285039 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf.idx b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf.idx new file mode 100644 index 00000000000..7c7d4459433 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8121fd06e322f8de75c87cf80d32861c1b69cf307cc334d3f7b31813b429a17 +size 466 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.dict b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.dict new file mode 100644 index 00000000000..cfdfb3daa48 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.dict @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b93074618a47bb22af29a2889244c3c46a789c8ff39e44f03c03c3e39dae4d +size 6382 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa new file mode 100644 index 00000000000..c967633b1c2 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f733d007767f24eb83a4cffc1249ba44a73557e26a9262ad9ace311bd5404959 +size 60536 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa.fai b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa.fai new file mode 100644 index 00000000000..237d3b33414 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa.fai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ab896e07ba749af55952c3e7427dc4bf93e6dc250cd155f155f591e2d1f0440 +size 2811 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc.config new file mode 100755 index 00000000000..5e7f00ffdc5 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2186e0172291c105deb0f6f5b5c56411cd1adccb1520a21301a9f560d18840aa +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc_v75_37.hg19.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc_v75_37.hg19.tsv new file mode 100644 index 00000000000..8e516ab12c7 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc_v75_37.hg19.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a595a1a2b26426334f97a0b81c2a5869888271c1968038177498d3f2904ec2b +size 21521419 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc.config new file mode 100755 index 00000000000..7ce822c33ea --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0bd76a1397520e3ea7a07ae2e009af378493e2184581d0dc4cf5279ea27bc8 +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc_v90_38.hg38.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc_v90_38.hg38.tsv new file mode 100644 index 00000000000..ed4123df4e3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc_v90_38.hg38.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:847ff8a6ffbb33365e6ac4b3256924873102e5adc3a75ff43d9d7fa422c7dd95 +size 20948186 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq.config new file mode 100755 index 00000000000..cad5f853222 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da8f13f064f0ff7ba2be960eb1fa32cbae050ee2467334441d73a5e2ba70345 +size 1646 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq_v75_37.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq_v75_37.tsv new file mode 100644 index 00000000000..5122b33f680 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq_v75_37.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546a04ca46574dddd12b5293d7b1e14732e7518e60122430c9a782d8bc71d3b8 +size 2268385 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq.config new file mode 100755 index 00000000000..da5ff5476a4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8df0c12dfcb4d3d7bc4a21358b67e12053e1f11e900b7c11ae8307a03634f07 +size 1646 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq_v90_38.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq_v90_38.tsv new file mode 100644 index 00000000000..04bc2532560 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq_v90_38.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6b2dae5d73858dd89b029e6544a4b4681319a853209e227f0d64d7b3c05350 +size 3897642 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gnomAD/hg19/gnomAD.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gnomAD/hg19/gnomAD.config new file mode 100644 index 00000000000..edc4f58afb9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gnomAD/hg19/gnomAD.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afadb719be43a2e25316cbda1d342711e5c7643aea0cc42324d1ff9720dc9d63 +size 1719 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc.config new file mode 100755 index 00000000000..217846f61e1 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c9c1d90f2d4269c8ace638a5724e426e66812a0409726d018be0bd305cc729 +size 1609 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc_download_Nov302017.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc_download_Nov302017.tsv new file mode 100644 index 00000000000..cac248e8f8a --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc_download_Nov302017.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c21b9ebd23b1a65b0994f8e20e1516b6926dfdc185f28aff8689bdf52689c2f +size 12648820 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot.config new file mode 100755 index 00000000000..4fc3dca5c09 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670f8f8192e88603902fff6d31678f2eab0fdbf63305bf943d18070b80a7752b +size 1625 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot_Dec012014.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot_Dec012014.tsv new file mode 100755 index 00000000000..c9ff0b066b5 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot_Dec012014.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d082f1c506f166e8e1f5380e3594b875d79d3703eabce3caf663b56b2b0be64 +size 9675839 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/template.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/template.config new file mode 100755 index 00000000000..f5ac131f8e5 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/template.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa293cf1d831bf414074a0e9713bbec437bba1b65172c2cc0fbba1683804834 +size 1557 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config index 862f93552bd..6ac0839b452 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config @@ -2,4 +2,8 @@ contig_column = Chromosome start_column = Start_Position end_column = End_Position xsv_delimiter = \t -name = \ No newline at end of file +name = +src_file = +version = +origin_location = +preprocessing_script = diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config index c347ab6a75f..4970d1f8e93 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config @@ -1,5 +1,50 @@ +name = XSV_LOCATABLE_TEST_NAME +version = TEST +src_file = xsv_locatable_test.csv +origin_location = LocatableXsvFuncotationFactoryUnitTest.java +preprocessing_script = + +# Supported types: +# simpleXSV -- Arbitrary separated value table (e.g. CSV), keyed off Gene Name OR Transcript ID +# locatableXSV -- Arbitrary separated value table (e.g. CSV), keyed off a genome location +# gencode -- Custom datasource class for GENCODE +# cosmic -- Custom datasource class for COSMIC +# vcf -- Custom datasource class for Variant Call Format (VCF) files +type = locatableXSV + +# Required field for GENCODE files. +# Path to the FASTA file from which to load the sequences for GENCODE transcripts: +gencode_fasta_path = + +# Required field for simpleXSV files. +# Valid values: +# GENE_NAME +# TRANSCRIPT_ID +xsv_key = + +# Required field for simpleXSV files. +# The 0-based index of the column containing the key on which to match +xsv_key_column = + +# Required field for simpleXSV AND locatableXSV files. +# The delimiter by which to split the XSV file into columns. +xsv_delimiter = , + +# Required field for simpleXSV files. +# Whether to permissively match the number of columns in the header and data rows +# Valid values: +# true +# false +xsv_permissive_cols = + +# Required field for locatableXSV files. +# The 0-based index of the column containing the contig for each row contig_column = 1 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the start position for each row start_column = 3 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the end position for each row end_column = 4 -xsv_delimiter = , -name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config index 42ff6703e18..9bedd0a205f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config @@ -1,5 +1,50 @@ +name = SECOND_XSV_NAME +version = TEST +src_file = xsv_locatable_test2.csv +origin_location = LocatableXsvFuncotationFactoryUnitTest.java +preprocessing_script = + +# Supported types: +# simpleXSV -- Arbitrary separated value table (e.g. CSV), keyed off Gene Name OR Transcript ID +# locatableXSV -- Arbitrary separated value table (e.g. CSV), keyed off a genome location +# gencode -- Custom datasource class for GENCODE +# cosmic -- Custom datasource class for COSMIC +# vcf -- Custom datasource class for Variant Call Format (VCF) files +type = locatableXSV + +# Required field for GENCODE files. +# Path to the FASTA file from which to load the sequences for GENCODE transcripts: +gencode_fasta_path = + +# Required field for simpleXSV files. +# Valid values: +# GENE_NAME +# TRANSCRIPT_ID +xsv_key = + +# Required field for simpleXSV files. +# The 0-based index of the column containing the key on which to match +xsv_key_column = + +# Required field for simpleXSV AND locatableXSV files. +# The delimiter by which to split the XSV file into columns. +xsv_delimiter = , + +# Required field for simpleXSV files. +# Whether to permissively match the number of columns in the header and data rows +# Valid values: +# true +# false +xsv_permissive_cols = + +# Required field for locatableXSV files. +# The 0-based index of the column containing the contig for each row contig_column = 1 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the start position for each row start_column = 2 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the end position for each row end_column = 4 -xsv_delimiter = , -name = SECOND_XSV_NAME \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config index bbc2245d0fa..2121f2762f8 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config @@ -1,5 +1,50 @@ -contig_column = 1 +name = XSV_LOCATABLE_TEST_NAME +version = TEST +src_file = xsv_locatable_test3.tsv +origin_location = LocatableXsvFuncotationFactoryUnitTest.java +preprocessing_script = + +# Supported types: +# simpleXSV -- Arbitrary separated value table (e.g. CSV), keyed off Gene Name OR Transcript ID +# locatableXSV -- Arbitrary separated value table (e.g. CSV), keyed off a genome location +# gencode -- Custom datasource class for GENCODE +# cosmic -- Custom datasource class for COSMIC +# vcf -- Custom datasource class for Variant Call Format (VCF) files +type = locatableXSV + +# Required field for GENCODE files. +# Path to the FASTA file from which to load the sequences for GENCODE transcripts: +gencode_fasta_path = + +# Required field for simpleXSV files. +# Valid values: +# GENE_NAME +# TRANSCRIPT_ID +xsv_key = + +# Required field for simpleXSV files. +# The 0-based index of the column containing the key on which to match +xsv_key_column = + +# Required field for simpleXSV AND locatableXSV files. +# The delimiter by which to split the XSV file into columns. +xsv_delimiter = \t + +# Required field for simpleXSV files. +# Whether to permissively match the number of columns in the header and data rows +# Valid values: +# true +# false +xsv_permissive_cols = + +# Required field for locatableXSV files. +# The 0-based index of the column containing the contig for each row +contig_column = 1 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the start position for each row start_column = 3 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the end position for each row end_column = 4 -xsv_delimiter = \t -name = XSV_LOCATABLE_TEST_NAME