diff --git a/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh b/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh index 16e91787c72..26f3f515aa3 100755 --- a/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh +++ b/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh @@ -20,7 +20,7 @@ ############################################################################### #Setup variables for the script: -UNALIASED_SCRIPT_NAME=$( readlink "${BASH_SOURCE[0]}" || echo "${BASH_SOURCE[0]}" ) +UNALIASED_SCRIPT_NAME=$( python -c "import os;print os.path.realpath(\"${BASH_SOURCE[0]}\")" ) SCRIPTDIR="$( cd "$( dirname "${UNALIASED_SCRIPT_NAME}" )" && pwd )" SCRIPTNAME=$( echo $0 | sed 's#.*/##g' ) MINARGS=2 diff --git a/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh b/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh index 444171cc18a..a2e65cec8f2 100755 --- a/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh +++ b/scripts/funcotator/testing/getGencodeSequencesForVcfVariants.sh @@ -20,7 +20,7 @@ ############################################################################### #Setup variables for the script: -UNALIASED_SCRIPT_NAME=$( readlink "${BASH_SOURCE[0]}" || echo "${BASH_SOURCE[0]}" ) +UNALIASED_SCRIPT_NAME=$( python -c "import os;print os.path.realpath(\"${BASH_SOURCE[0]}\")" ) SCRIPTDIR="$( cd "$( dirname "${UNALIASED_SCRIPT_NAME}" )" && pwd )" SCRIPTNAME=$( echo $0 | sed 's#.*/##g' ) MINARGS=2 diff --git a/scripts/funcotator/testing/testFuncotator.sh b/scripts/funcotator/testing/testFuncotator.sh index a4cf51e65c8..05df5afe209 100755 --- a/scripts/funcotator/testing/testFuncotator.sh +++ b/scripts/funcotator/testing/testFuncotator.sh @@ -47,6 +47,7 @@ doClean=false REF_VER=hg19 OUT_FORMAT=VCF useAOUDataSources=false +useCloudDataSources=false MANUAL_MODE=false @@ -61,7 +62,7 @@ HG38=/Users/jonn/Development/references/Homo_sapiens_assembly38.fasta function simpleUsage() { - echo -e "Usage: $SCRIPTNAME [-c] [-u] [-t] [-19|-38] [-MAF|-VCF] [-AOU]" + echo -e "Usage: $SCRIPTNAME [-c] [-cloud] [-u] [-t] [-19|-38] [-MAF|-VCF] [-AOU]" echo -e "Build and run Funcotator." } @@ -71,29 +72,30 @@ function usage() simpleUsage echo -e "Can clean, run tests, and run large file tests." echo -e "" - echo -e "MUST be run from the GATK development directory." + echo -e "MUST be run from the GATK development directory." echo -e "" - echo -e "Will by default (with no options) build GATK/Funcotator." + echo -e "Will by default (with no options) build GATK/Funcotator." echo -e "For large file tests, defaults to hg19 tests with VCF output." echo -e "" echo -e "The following options are available:" echo -e " -c clean GATK/Funcotator" echo -e " -u run all tests in the Funcotator Package." - echo -e " (org.broadinstitute.hellbender.tools.funcotator)" + echo -e " (org.broadinstitute.hellbender.tools.funcotator)" echo -e " -t run Funcotator on a large data file" echo -e " (internally configured)" echo -e " -19 run with hg19 data sources/reference/input file" - echo -e " (default)" + echo -e " (default)" echo -e " -38 run with hg38 data sources/reference/input file" - echo -e " -MAF create MAF output" - echo -e " -VCF create VCF output (default)" - echo -e " -AOU use the All of Us/Clinical Pipeline data sources" - echo -e " -M REF_VER REFERENCE INPUT DATA_SOURCES run in MANUAL mode, providing all necessary input" - echo -e " REF_VER - a string for the reference version" - echo -e " REFERENCE - reference FASTA file" - echo -e " INPUT - input VCF file" - echo -e " DATA_SOURCES - path to FUNCOTATOR data sources folder" - echo -e "" + echo -e " -MAF create MAF output" + echo -e " -VCF create VCF output (default)" + echo -e " -cloud use cloud data sources" + echo -e " -AOU use the All of Us/Clinical Pipeline data sources" + echo -e " -M REF_VER REFERENCE INPUT DATA_SOURCES run in MANUAL mode, providing all necessary input" + echo -e " REF_VER - a string for the reference version" + echo -e " REFERENCE - reference FASTA file" + echo -e " INPUT - input VCF file" + echo -e " DATA_SOURCES - path to FUNCOTATOR data sources folder" + echo -e "" echo -e "Return values:" echo -e " 0 NORMAL" echo -e " 1 TOO MANY ARGUMENTS" @@ -130,11 +132,11 @@ function at_exit() } function assertFileExists() { - [[ ! -f $1 ]] && error "Error: File does not exist: $1" && exit 3 + [[ ! -f $1 ]] && error "Error: File does not exist: $1" && exit 3 } function assertDirectoryExists() { - [[ ! -d $1 ]] && error "Error: Directory does not exist: $1" && exit 4 + [[ ! -d $1 ]] && error "Error: Directory does not exist: $1" && exit 4 } ################################################################################ @@ -146,7 +148,8 @@ trap at_exit EXIT function assertInputFilesExist() { assertFileExists ${INPUT} assertFileExists ${REF} - assertDirectoryExists ${DATA_SOURCES_PATH} + + [[ ! -d $DATA_SOURCES_PATH ]] && error "Warning: Data sources may not exist ${DATA_SOURCES_PATH}" && error "Ignore this if data sources directory is in the cloud." } ################################################################################ @@ -155,49 +158,52 @@ function assertInputFilesExist() { while [ $# -gt 0 ] ; do case "$1" in - -c) - doClean=true - ;; - -u) - doUnitTests=true - ;; - -19) - REF_VER=hg19 - ;; - -38) - REF_VER=hg38 - ;; + -c) + doClean=true + ;; + -u) + doUnitTests=true + ;; + -19) + REF_VER=hg19 + ;; + -38) + REF_VER=hg38 + ;; -VCF) - OUT_FORMAT=VCF - ;; + OUT_FORMAT=VCF + ;; -MAF) - OUT_FORMAT=MAF - ;; - -AOU) - useAOUDataSources=true - ;; - -t) - doRunLargeTests=true - ;; - -M) - shift - REF_VER=$1 - shift - REF=$1 - shift - INPUT=$1 - shift - DATA_SOURCES_PATH=$1 - MANUAL_MODE=true - # Validate our args: - if [[ ${#REF} -eq 0 ]] || [[ ${#INPUT} -eq 0 ]] || [[ ${#DATA_SOURCES_PATH} -eq 0 ]] ; then - error "Error: For manual mode you must specify a reference version, reference fasta, input file, and data sources directory." && exit 5 - fi - ;; - --help) - usage - exit 0 - ;; + OUT_FORMAT=MAF + ;; + -AOU) + useAOUDataSources=true + ;; + -cloud) + useCloudDataSources=true + ;; + -t) + doRunLargeTests=true + ;; + -M) + shift + REF_VER=$1 + shift + REF=$1 + shift + INPUT=$1 + shift + DATA_SOURCES_PATH=$1 + MANUAL_MODE=true + # Validate our args: + if [[ ${#REF} -eq 0 ]] || [[ ${#INPUT} -eq 0 ]] || [[ ${#DATA_SOURCES_PATH} -eq 0 ]] ; then + error "Error: For manual mode you must specify a reference version, reference fasta, input file, and data sources directory." && exit 5 + fi + ;; + --help) + usage + exit 0 + ;; *) ;; esac @@ -210,35 +216,41 @@ done r=1 if ${doClean} ; then - ${GATKDIR}/gradlew clean compileJava compileTestJava installDist - r=$? + ${GATKDIR}/gradlew clean compileJava compileTestJava installDist + r=$? else - ${GATKDIR}/gradlew compileJava compileTestJava installDist - r=$? + ${GATKDIR}/gradlew compileJava compileTestJava installDist + r=$? fi if [[ $r -eq 0 ]] && ${doUnitTests} ; then - echo "################################################################################" - echo "## Running Unit Tests... " - ${GATKDIR}/gradlew test --tests org.broadinstitute.hellbender.tools.funcotator* --stacktrace - r=$? + echo "################################################################################" + echo "## Running Unit Tests... " + ${GATKDIR}/gradlew test \ + --tests org.broadinstitute.hellbender.tools.funcotator* \ + --tests org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable* \ + --tests org.broadinstitute.hellbender.utils.codecs.gencode* \ + --tests org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.SimpleAnnotatedIntervalWriterUnitTest* \ + --tests org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollectionUnitTest* \ + --stacktrace + r=$? fi ################################################################################ if [[ $r -eq 0 ]] && $MANUAL_MODE ; then - echo "################################################################################" - echo "## Running MANUAL Test... " - echo - echo "########################################" - echo "## Using Reference: ${REF_VER} ##" - echo "########################################" + echo "################################################################################" + echo "## Running MANUAL Test... " + echo + echo "########################################" + echo "## Using Reference: ${REF_VER} ##" + echo "########################################" OUT_FORMAT_LOWER=$( echo "${OUT_FORMAT}" | tr 'A-Z' 'a-z' ) OUT_FILE_NAME=FUNCOTATOR_OUT.${OUT_FORMAT_LOWER} - assertInputFilesExist + assertInputFilesExist ${GATKDIR}/gatk Funcotator \ -V ${INPUT} \ @@ -250,56 +262,62 @@ if [[ $r -eq 0 ]] && $MANUAL_MODE ; then --output-file-format ${OUT_FORMAT} -- --java-options '-DGATK_STACKTRACE_ON_USER_EXCEPTION=true' r=$? - exit $r + exit $r fi if [[ $r -eq 0 ]] && ${doRunLargeTests} ; then - - echo "################################################################################" - echo "## Running Large Tests... " - echo - echo - echo "########################################" - echo "## Using Reference: ${REF_VER} ##" - echo "########################################" - - if [[ "${REF_VER}" == "hg19" ]] ; then - INPUT=/Users/jonn/Development/NON_PUBLIC/0816201804HC0_R01C01.vcf - #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSet1.vcf - #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSet2.vcf - INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestHg19Large.vcf - #INPUT=/Users/jonn/Development/gatk/hg38_trio_liftoverb37.vcf - #INPUT=/Users/jonn/Development/gatk/tmp.vcf - #INPUT=/Users/jonn/Development/data_to_run/problem_samples/splice_site_should_not_be_splice_site/error_case.vcf - - #HG19=/Users/jonn/Development/references/ucsc.hg19.fasta - #HG19=/Users/jonn/Development/references/ucsc.hg19.fasta - #HG19=/Users/jonn/Development/references/GRCh37.p13.genome.fasta - REF=$HG19 - else - INPUT=/Users/jonn/Development/FUNCOTATOR_LARGE_TEST_INPUTS/hg38_trio.vcf - #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSetHG38.vcf - REF=$HG38 - fi - - # Use the AOU data sources if we need them: - $useAOUDataSources && DATA_SOURCES_PATH=/Users/jonn/Development/funcotator_dataSources.vAoU3 - - OUT_FORMAT_LOWER=$( echo "${OUT_FORMAT}" | tr 'A-Z' 'a-z' ) - OUT_FILE_NAME=FUNCOTATOR_OUT.${OUT_FORMAT_LOWER} - - assertInputFilesExist - - ${GATKDIR}/gatk Funcotator \ - -V ${INPUT} \ - -O ${OUT_FILE_NAME} \ - -R ${REF} \ - --verbosity DEBUG \ - --data-sources-path ${DATA_SOURCES_PATH} \ - --ref-version ${REF_VER} \ - --output-file-format ${OUT_FORMAT} -- --java-options '-DGATK_STACKTRACE_ON_USER_EXCEPTION=true' - - r=$? + + echo "################################################################################" + echo "## Running Large Tests... " + echo + echo + echo "########################################" + echo "## Using Reference: ${REF_VER} ##" + echo "########################################" + + if [[ "${REF_VER}" == "hg19" ]] ; then + INPUT=/Users/jonn/Development/NON_PUBLIC/0816201804HC0_R01C01.vcf + #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSet1.vcf + #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSet2.vcf + #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestHg19Large.vcf + #INPUT=/Users/jonn/Development/gatk/hg38_trio_liftoverb37.vcf + #INPUT=/Users/jonn/Development/gatk/tmp.vcf + #INPUT=/Users/jonn/Development/data_to_run/problem_samples/splice_site_should_not_be_splice_site/error_case.vcf + + #HG19=/Users/jonn/Development/references/ucsc.hg19.fasta + #HG19=/Users/jonn/Development/references/ucsc.hg19.fasta + #HG19=/Users/jonn/Development/references/GRCh37.p13.genome.fasta + REF=$HG19 + else + INPUT=/Users/jonn/Development/FUNCOTATOR_LARGE_TEST_INPUTS/hg38_trio.vcf + #INPUT=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/regressionTestVariantSetHG38.vcf + #INPUT=/Users/jonn/Development/tmp/cohort24_23_seg.subset.vcf + #INPUT=/Users/jonn/Development/gatk/tmp.38.vcf + REF=$HG38 + fi + + # Use the AOU data sources if we need them: + $useAOUDataSources && echo "Using AOU data sources." && DATA_SOURCES_PATH=/Users/jonn/Development/funcotator_dataSources.vAoU3 + + # Use cloud data sources if we need them: + $useCloudDataSources && echo "Using cloud data sources." && DATA_SOURCES_PATH=/Users/jonn/Development/gatk/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/ + #$useCloudDataSources && echo "Using cloud data sources." && DATA_SOURCES_PATH=gs://hellbender/test/resources/large/funcotatorDataSourceCollection/funcotator_dataSources_cloud/ + + OUT_FORMAT_LOWER=$( echo "${OUT_FORMAT}" | tr 'A-Z' 'a-z' ) + OUT_FILE_NAME=FUNCOTATOR_OUT.${OUT_FORMAT_LOWER} + + assertInputFilesExist + + time ${GATKDIR}/gatk Funcotator \ + -V ${INPUT} \ + -O ${OUT_FILE_NAME} \ + -R ${REF} \ + --verbosity DEBUG \ + --data-sources-path ${DATA_SOURCES_PATH} \ + --ref-version ${REF_VER} \ + --output-file-format ${OUT_FORMAT} -- --java-options '-DGATK_STACKTRACE_ON_USER_EXCEPTION=true' + + r=$? fi exit $r diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index e2d2da1863c..b32519e978e 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -20,7 +20,6 @@ import org.broadinstitute.hellbender.utils.gcs.BucketUtils; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.nio.SeekableByteChannelPrefetcher; -import static org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.*; import java.io.File; import java.io.IOException; @@ -32,6 +31,8 @@ import java.util.Optional; import java.util.function.Function; +import static org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBUtils.createExportConfiguration; + /** * Enables traversals and queries over sources of Features, which are metadata associated with a location * on the genome in a format supported by our file parsing framework, Tribble. Examples of Features are @@ -276,6 +277,9 @@ public FeatureDataSource(final FeatureInput featureInput, final int queryLook this.queryLookaheadBases = queryLookaheadBases; } + final void printCacheStats() { + queryCache.printCacheStatistics( getName() ); + } @SuppressWarnings("unchecked") private static FeatureReader getFeatureReader(final FeatureInput featureInput, final Class targetFeatureType, @@ -332,17 +336,19 @@ private static FeatureReader getFeatureReader(final Featu private static AbstractFeatureReader getTribbleFeatureReader(final FeatureInput featureInput, final FeatureCodec codec, final Function cloudWrapper, final Function cloudIndexWrapper) { Utils.nonNull(codec); try { - final String absolutePath = IOUtils.getPath(featureInput.getFeaturePath()).toAbsolutePath().toUri().toString(); + // Must get the path to the data file from the codec here: + final String absoluteRawPath = IOUtils.getPath(featureInput.getFeaturePath()).toAbsolutePath().toUri().toString(); + final String absoluteProcessedPath = IOUtils.getPath(codec.getPathToDataFile(featureInput.getFeaturePath())).toAbsolutePath().toUri().toString(); // Instruct the reader factory to not require an index. We will require one ourselves as soon as // a query by interval is attempted. final boolean requireIndex = false; // Only apply the wrappers if the feature input is on Google Cloud Storage - if (BucketUtils.isCloudStorageUrl(absolutePath)) { - return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper); + if (BucketUtils.isCloudStorageUrl(absoluteProcessedPath)) { + return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper); } else { - return AbstractFeatureReader.getFeatureReader(absolutePath, null, codec, requireIndex, Function.identity(), Function.identity()); + return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, Function.identity(), Function.identity()); } } catch (final TribbleException e) { throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java index 57083255684..f303f334d98 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java @@ -291,6 +291,13 @@ public String getFeaturePath() { return featureFile; } + /** + * @return The key/value {@link Map} as supplied to create the data in this {@link FeatureInput}. + */ + public Map getKeyValueMap() { + return keyValueMap; + } + /** * FeatureInputs will be hashed by the engine, so make an effort to produce a reasonable hash code * diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java index d44c7769db7..b50a78aca2b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java @@ -210,6 +210,12 @@ private void initializeFeatureSources( final int featureQueryLookahead, final Co } } + @SuppressWarnings({"unchecked", "rawtypes"}) + public void dumpAllFeatureCacheStats() { + for ( final FeatureDataSource f : featureSources.values() ) { + f.printCacheStats(); + } + } /** * Add the feature data source to the given feature input. @@ -450,7 +456,7 @@ private FeatureDataSource lookupDataSource( final Feature public static FeatureCodec getCodecForFile( final Path featurePath, final Class featureType ) { // Make sure Path exists/is readable if ( ! Files.isReadable(featurePath) ) { - throw new UserException.CouldNotReadInputFile(featurePath); + throw new UserException.CouldNotReadInputFile(featurePath.toUri().toString()); } // Gather all discovered codecs that claim to be able to decode the given file according to their diff --git a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java index 2c1adabad34..d9bbaf35918 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/GATKTool.java @@ -148,7 +148,7 @@ public abstract class GATKTool extends CommandLineProgram { /** * Our source of Feature data (null if no source of Features was provided) */ - FeatureManager features; + public FeatureManager features; /** * diff --git a/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java b/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java index 0ac52076aed..7e9cea29d35 100644 --- a/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java +++ b/src/main/java/org/broadinstitute/hellbender/exceptions/UserException.java @@ -384,7 +384,7 @@ public static final class NoSuitableCodecs extends UserException { private static final long serialVersionUID = 0L; public NoSuitableCodecs(final Path file) { - super("Cannot read " + file + " because no suitable codecs found"); + super("Cannot read " + file.toUri().toString() + " because no suitable codecs found"); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java index 5f2c34dce0b..12bb4e25fad 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCodec.java @@ -5,8 +5,10 @@ import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.readers.LineIterator; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvLocatableTableCodec; @@ -38,6 +40,7 @@ public class AnnotatedIntervalCodec extends AsciiFeatureCodec public static final String START_COL_COMMENT = "_StartHeader="; public static final String END_COL_COMMENT = "_EndHeader="; + private Path configFilePath; private XsvLocatableTableCodec xsvLocatableTableCodec; private AnnotatedIntervalHeader header; @@ -46,9 +49,10 @@ public AnnotatedIntervalCodec() { xsvLocatableTableCodec = new XsvLocatableTableCodec(); } - public AnnotatedIntervalCodec(final Path overrideConfigFile) { + public AnnotatedIntervalCodec(final Path configFilePath) { super(AnnotatedInterval.class); - xsvLocatableTableCodec = new XsvLocatableTableCodec(overrideConfigFile); + this.configFilePath = configFilePath; + xsvLocatableTableCodec = new XsvLocatableTableCodec(configFilePath); } @Override @@ -78,8 +82,8 @@ public AnnotatedIntervalHeader readActualHeader(final LineIterator reader) { } @Override - public boolean canDecode(final String path) { - return (path.endsWith(".seg") || path.endsWith(".maf") || path.endsWith(".maf.annotated")) && xsvLocatableTableCodec.canDecodeMinusExtensionChecks(path); + public boolean canDecode(final String pathString) { + return (pathString.endsWith(".seg") || pathString.endsWith(".maf") || pathString.endsWith(".maf.annotated")) && xsvLocatableTableCodec.canDecodeFileChecks(configFilePath.toUri().toString(), pathString); } /** @@ -98,10 +102,15 @@ public static AnnotatedIntervalHeader createHeaderForWriter(final Path outputCon Utils.nonNull(outputConfigFile); //TODO: Change this so that it outputs the first in the list. - final Properties headerNameProperties = XsvLocatableTableCodec.getAndValidateConfigFileContents(outputConfigFile); - final String contigColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(XsvLocatableTableCodec.CONFIG_FILE_CONTIG_COLUMN_KEY)); - final String startColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(XsvLocatableTableCodec.CONFIG_FILE_START_COLUMN_KEY)); - final String endColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(XsvLocatableTableCodec.CONFIG_FILE_END_COLUMN_KEY)); + final Pair validityAndPropertiesPair = XsvLocatableTableCodec.getAndValidateConfigFileContentsOnPath(outputConfigFile, true); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties headerNameProperties = validityAndPropertiesPair.getRight(); + if ( !isValid ) { + throw new UserException.BadInput("Error: invalid configuration file given: " + outputConfigFile.toUri().toString()); + } + final String contigColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN)); + final String startColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN)); + final String endColumnName = determineOutputColumnFromList(headerNameProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN)); XsvLocatableTableCodec.validateLocatableColumnName(contigColumnName); XsvLocatableTableCodec.validateLocatableColumnName(startColumnName); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCollection.java index d58136dccb4..1659d6d0dc5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/AnnotatedIntervalCollection.java @@ -103,7 +103,7 @@ public static AnnotatedIntervalCollection create(final Path input, final Path in final AnnotatedIntervalCodec codec = new AnnotatedIntervalCodec(inputConfigFile); final List regions = new ArrayList<>(); - + if (codec.canDecode(input.toUri().toString())) { try (final FeatureReader reader = AbstractFeatureReader.getFeatureReader(input.toUri().toString(), codec, false)){ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java index 9a1f7e07ef4..b96cbcc2230 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java @@ -227,11 +227,13 @@ public boolean requiresReference() { @Override public void onTraversalStart() { + logger.info("Validating Sequence Dictionaries..."); if (seqValidationArguments.performSequenceDictionaryValidation()) { // Ensure that the reference dictionary is a superset of the variant dictionary: checkReferenceDictionaryIsSupersetOfVariantDictionary(); } + logger.info("Processing user transcripts/defaults/overrides..."); // Next set up our transcript list: final Set finalUserTranscriptIdSet = FuncotatorEngine.processTranscriptList(funcotatorArgs.userTranscriptIdSet); @@ -242,11 +244,13 @@ public void onTraversalStart() { // Get the header for our variants: final VCFHeader vcfHeader = getHeaderForVariants(); + logger.info("Initializing data sources..."); // Initialize all of our data sources: // Sort data sources to make them process in the same order each time: funcotatorArgs.dataSourceDirectories.sort(Comparator.naturalOrder()); final Map configData = DataSourceUtils.getAndValidateDataSourcesFromPaths(funcotatorArgs.referenceVersion, funcotatorArgs.dataSourceDirectories); + logger.info("Finalizing data sources (this step can be long if data sources are cloud-based)..."); // Create the data sources from the input: // This will also create and register the FeatureInputs (created by the Data Sources) // with the GATK Engine, so we do not have to plumb them in after the fact. @@ -260,6 +264,7 @@ public void onTraversalStart() { new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize) ); + logger.info("Initializing Funcotator Engine..."); // Create our engine to do our work and drive this Funcotation train! funcotatorEngine = new FuncotatorEngine( funcotatorArgs, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java index 76494bdce7b..e26e79c8be8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java @@ -264,7 +264,7 @@ public static List createDataSourceFuncotationFact final FeatureInput featureInput; switch ( FuncotatorArgumentDefinitions.DataSourceType.getEnum(stringType) ) { case LOCATABLE_XSV: - featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, XsvTableFeature.class); + featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, XsvTableFeature.class, true); funcotationFactory = DataSourceUtils.createLocatableXsvDataSource(path, properties, annotationOverridesMap, featureInput); break; case SIMPLE_XSV: @@ -274,16 +274,16 @@ public static List createDataSourceFuncotationFact funcotationFactory = DataSourceUtils.createCosmicDataSource(path, properties, annotationOverridesMap); break; case GENCODE: - featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class); + featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false); funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode, userTranscriptIdSet, featureInput, flankSettings); break; case VCF: - featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class); + featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false); funcotationFactory = DataSourceUtils.createVcfDataSource(path, properties, annotationOverridesMap, featureInput); break; default: - throw new GATKException("Unknown type of DataSourceFuncotationFactory encountered: " + stringType); + throw new GATKException("Unknown type of DataSourceFuncotationFactory encountered: " + stringType ); } // Add in our factory: @@ -294,41 +294,25 @@ public static List createDataSourceFuncotationFact return dataSourceFactories; } - private static FeatureInput createAndRegisterFeatureInputs(final Path dataSourceFile, + private static FeatureInput createAndRegisterFeatureInputs(final Path configFilePath, final Properties dataSourceProperties, final GATKTool funcotatorToolInstance, final int lookaheadFeatureCachingInBp, - final Class featureType) { - Utils.nonNull(dataSourceFile); + final Class featureType, + final boolean useConfigFilePath) { + Utils.nonNull(configFilePath); Utils.nonNull(dataSourceProperties); - final String name = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME); - final String sourceFile = dataSourceFile.resolveSibling(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE)).toString(); + final String name = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME); + final String sourceFile = useConfigFilePath + ? configFilePath.toUri().toString() + : resolveFilePathStringFromKnownPath( dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), configFilePath ).toUri().toString(); // Get feature inputs by creating them with the tool instance itself. // This has the side effect of registering the FeatureInputs with the engine, so that they can be later queried. return funcotatorToolInstance.addFeatureInputsAfterInitialization(sourceFile, name, featureType, lookaheadFeatureCachingInBp); } - /** - * Create {@link FeatureInput} FOR TESTING ONLY. - * @param dataSourceFile - * @param dataSourceProperties - * @return - */ - private static FeatureInput createFeatureInputsForTesting(final Path dataSourceFile, - final Properties dataSourceProperties) { - - Utils.nonNull(dataSourceFile); - Utils.nonNull(dataSourceProperties); - - final String name = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME); - final String sourceFile = dataSourceFile.resolveSibling(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE)).toString(); - - // Get feature inputs by creating them with the funcotator tool instance itself: - return new FeatureInput<>(sourceFile, name, Collections.emptyMap()); - } - /** * Create a {@link LocatableXsvFuncotationFactory} from filesystem resources and field overrides. * @param dataSourceFile {@link Path} to the data source file. Must not be {@code null}. @@ -359,13 +343,7 @@ private static LocatableXsvFuncotationFactory createLocatableXsvDataSource(final // Set the supported fields by the LocatableXsvFuncotationFactory: locatableXsvFuncotationFactory.setSupportedFuncotationFields( - new ArrayList<>( - Collections.singletonList( - dataSourceFile.resolveSibling( - IOUtils.getPath( dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE) ) - ) - ) - ) + resolveFilePathStringFromKnownPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), dataSourceFile) ); return locatableXsvFuncotationFactory; @@ -389,7 +367,7 @@ private static SimpleKeyXsvFuncotationFactory createSimpleXsvDataSource(final Pa // Create our SimpleKeyXsvFuncotationFactory: return new SimpleKeyXsvFuncotationFactory( dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME), - dataSourceFile.resolveSibling(IOUtils.getPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE))), + resolveFilePathStringFromKnownPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), dataSourceFile), dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_VERSION), dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_XSV_DELIMITER), Integer.valueOf(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_XSV_KEY_COLUMN)), @@ -417,7 +395,7 @@ private static CosmicFuncotationFactory createCosmicDataSource(final Path dataSo final String version = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_VERSION); return new CosmicFuncotationFactory( - dataSourceFile.resolveSibling(IOUtils.getPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE))), + resolveFilePathStringFromKnownPath(dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), dataSourceFile), annotationOverridesMap, version ); @@ -456,7 +434,7 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data // Create our gencode factory: return new GencodeFuncotationFactory( - dataSourceFile.resolveSibling(fastaPath), + resolveFilePathStringFromKnownPath( fastaPath, dataSourceFile ), version, name, transcriptSelectionMode, @@ -493,7 +471,7 @@ private static VcfFuncotationFactory createVcfDataSource(final Path dataSourceFi return new VcfFuncotationFactory( name, version, - dataSourceFile.resolveSibling(srcFile).toAbsolutePath(), + resolveFilePathStringFromKnownPath(srcFile, dataSourceFile), annotationOverridesMap, featureInput ); @@ -690,7 +668,7 @@ private static void assertConfigFilePropertiesAreValid(final Properties configFi assertConfigPropertiesContainsKey(CONFIG_FILE_FIELD_NAME_TYPE, configFileProperties, configFilePath); // Validate our source file: - assertPathFilePropertiesField( configFileProperties, CONFIG_FILE_FIELD_NAME_SRC_FILE, configFilePath); + assertPathFilePropertiesField(configFileProperties, CONFIG_FILE_FIELD_NAME_SRC_FILE, configFilePath); // Validate our type: final String stringType = configFileProperties.getProperty(CONFIG_FILE_FIELD_NAME_TYPE); @@ -751,25 +729,53 @@ public static void assertBooleanPropertiesField(final Properties props, final St } } + /** + * Resolves the path string to a full path object using the given knownPath as a sibling file. + * Sibling file will only be used if it is determined that the given path string is not a relative path. + * @param filePathString {@link String} containing a file path to resolve. + * @param knownPath {@link Path} of a potential sibling file system entry. + * @return A {@link Path} object resolved to point to the given {@code filePathString}. + */ + public static Path resolveFilePathStringFromKnownPath(final String filePathString, final Path knownPath ) { + + final Path rawFilePath = IOUtils.getPath(filePathString); + + final Path absoluteFilePath; + if ( rawFilePath.isAbsolute() || (!rawFilePath.getFileSystem().equals(FileSystems.getDefault()))) { + // Absolute path or different file system. + // No need to resolve anything. + absoluteFilePath = rawFilePath; + } + else { + // If the path is not absolute, assume we must resolve it with our config file path: + absoluteFilePath = knownPath.resolveSibling(filePathString); + logger.info("Resolved local data source file path: " + rawFilePath.toUri().toString() + " -> " + absoluteFilePath.toUri().toString()); + } + return absoluteFilePath; + } + /** * Asserts that the given {@code field} is contained in the given {@code props} and is a file path. - * @param props {@link Properties} corresponding to the given {@code filePath} in which to check for the validity of {@code field}. + * @param props {@link Properties} corresponding to the given {@code configFilePath} in which to check for the validity of {@code field}. * @param field {@link String} name of the field, the existence and correct type of which will be confirmed in {@code props}. - * @param filePath {@link Path} to config file. For output purposes only. + * @param configFilePath {@link Path} to config file. For output purposes only. */ - public static void assertPathFilePropertiesField(final Properties props, final String field, final Path filePath) { - final Path sourceFilePath = filePath.resolveSibling(props.getProperty(field)); - if ( !Files.exists(sourceFilePath) ) { - throw new UserException.BadInput("ERROR in config file: " + filePath.toUri().toString() + - " - " + field + " does not exist: " + sourceFilePath); + public static void assertPathFilePropertiesField(final Properties props, final String field, final Path configFilePath) { + + final String filePathString = props.getProperty(field); + final Path absoluteFilePath = resolveFilePathStringFromKnownPath(filePathString, configFilePath); + + if ( !Files.exists(absoluteFilePath) ) { + throw new UserException.BadInput("ERROR in config file: " + configFilePath.toUri().toString() + + " - " + field + " does not exist: " + absoluteFilePath); } - else if ( !Files.isRegularFile(sourceFilePath) ) { - throw new UserException.BadInput("ERROR in config file: " + filePath.toUri().toString() + - " - " + field + " is not a regular file: " + sourceFilePath); + else if ( !Files.isRegularFile(absoluteFilePath) ) { + throw new UserException.BadInput("ERROR in config file: " + configFilePath.toUri().toString() + + " - " + field + " is not a regular file: " + absoluteFilePath); } - else if ( !Files.isReadable(sourceFilePath) ) { - throw new UserException.BadInput("ERROR in config file: " + filePath.toUri().toString() + - " - " + field + " is not readable: " + sourceFilePath); + else if ( !Files.isReadable(absoluteFilePath) ) { + throw new UserException.BadInput("ERROR in config file: " + configFilePath.toUri().toString() + + " - " + field + " is not readable: " + absoluteFilePath); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java index 86187bfff84..2457e22a090 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java @@ -23,10 +23,15 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.gencode.*; +import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.nio.NioFileCopierWithProgressMeter; import org.broadinstitute.hellbender.utils.param.ParamUtils; import org.broadinstitute.hellbender.utils.read.ReadUtils; +import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; +import java.io.File; +import java.nio.file.FileSystems; import java.nio.file.Path; import java.util.*; import java.util.regex.Matcher; @@ -56,6 +61,9 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { /** Standard Logger. */ protected static final Logger logger = LogManager.getLogger(GencodeFuncotationFactory.class); + private static final String LOCAL_GENCODE_TRANSCRIPT_TMP_DIR_PREFIX = "localGencodeTranscriptFastaFolder"; + private static final String LOCAL_GENCODE_TRANSCRIPT_FILE_BASE_NAME = "gencodeTranscriptFastaFile"; + /** * The window around splice sites to mark variants as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE}. */ @@ -194,7 +202,7 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { /** * Creates a {@link GencodeFuncotationFactory} with the 5'/3' flank sizes both set to 0. * - * @param gencodeTranscriptFastaFile {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. + * @param gencodeTranscriptFastaFilePath {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. * @param version The version {@link String} of Gencode from which {@link Funcotation}s will be made. * @param name A {@link String} containing the name of this {@link GencodeFuncotationFactory}. * @param transcriptSelectionMode The {@link TranscriptSelectionMode} by which representative/verbose transcripts will be chosen for overlapping variants. @@ -202,20 +210,20 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { * @param annotationOverrides A {@link LinkedHashMap} containing user-specified overrides for specific {@link Funcotation}s. * @param mainFeatureInput The backing {@link FeatureInput} for this {@link GencodeFuncotationFactory}, from which all {@link Funcotation}s will be created. */ - public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, + public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath, final String version, final String name, final TranscriptSelectionMode transcriptSelectionMode, final Set userRequestedTranscripts, final LinkedHashMap annotationOverrides, final FeatureInput mainFeatureInput) { - this(gencodeTranscriptFastaFile, version, name, transcriptSelectionMode, userRequestedTranscripts, annotationOverrides, mainFeatureInput, new FlankSettings(0, 0)); + this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts, annotationOverrides, mainFeatureInput, new FlankSettings(0, 0)); } /** * Create a {@link GencodeFuncotationFactory}. * - * @param gencodeTranscriptFastaFile {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. + * @param gencodeTranscriptFastaFilePath {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. * @param version The version {@link String} of Gencode from which {@link Funcotation}s will be made. * @param name A {@link String} containing the name of this {@link GencodeFuncotationFactory}. * @param transcriptSelectionMode The {@link TranscriptSelectionMode} by which representative/verbose transcripts will be chosen for overlapping variants. @@ -224,7 +232,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, * @param mainFeatureInput The backing {@link FeatureInput} for this {@link GencodeFuncotationFactory}, from which all {@link Funcotation}s will be created. * @param flankSettings Settings object containing our 5'/3' flank sizes */ - public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, + public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath, final String version, final String name, final TranscriptSelectionMode transcriptSelectionMode, @@ -235,10 +243,12 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, super(mainFeatureInput); + // Set up our local transcript fasta file. + // We must localize it (if not on disk) to make read times fast enough to be manageable: + gencodeTranscriptFastaFile = localizeGencodeTranscriptFastaFile( gencodeTranscriptFastaFilePath ); this.flankSettings = flankSettings; - this.gencodeTranscriptFastaFile = gencodeTranscriptFastaFile; - + // Initialize our transcript data source and ID map: transcriptFastaReferenceDataSource = ReferenceDataSource.of(gencodeTranscriptFastaFile); transcriptIdMap = createTranscriptIdMap(transcriptFastaReferenceDataSource); @@ -261,6 +271,46 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFile, initializeAnnotationOverrides( annotationOverrides ); } + private Path localizeGencodeTranscriptFastaFile( final Path gencodeTranscriptFastaFilePath ) { + + // Is the path local or in the cloud: + if ( gencodeTranscriptFastaFilePath.getFileSystem().equals(FileSystems.getDefault()) ) { + // local path, just return it: + return gencodeTranscriptFastaFilePath; + } + + // Not a local path! We must localize it! + + // Get the remote paths for the index and dictionary files: + final Path remoteGencodeTranscriptFastaIndexFilePath = IOUtils.getPath( ReferenceUtils.getFastaIndexFileName(gencodeTranscriptFastaFilePath.toUri().toString()) ); + final Path remoteGencodeTranscriptFastaSequenceDictionaryFilePath = IOUtils.getPath( ReferenceUtils.getFastaDictionaryFileName(gencodeTranscriptFastaFilePath.toUri().toString()) ); + + // Create a place for the files: + final File tmpDir = IOUtils.createTempDir(LOCAL_GENCODE_TRANSCRIPT_TMP_DIR_PREFIX); + tmpDir.deleteOnExit(); + final Path tmpDirPath = tmpDir.toPath(); + + // Create paths to the fasta, fasta index, and the sequence dictionary: + final Path localGencodeTranscriptFastaFilePath = tmpDirPath.resolve(LOCAL_GENCODE_TRANSCRIPT_FILE_BASE_NAME + ".fa"); + final Path localGencodeTranscriptFastaIndexFilePath = IOUtils.getPath( ReferenceUtils.getFastaIndexFileName(localGencodeTranscriptFastaFilePath.toUri().toString()) ); + final Path localGencodeTranscriptFastaSequenceDictionaryFilePath = IOUtils.getPath( ReferenceUtils.getFastaDictionaryFileName(localGencodeTranscriptFastaFilePath.toUri().toString()) ); + + // Copy the files to our local machine: + logger.info("Localizing Gencode transcript FASTA file for faster lookup times..."); + + // Copy FASTA: + NioFileCopierWithProgressMeter.create(gencodeTranscriptFastaFilePath, localGencodeTranscriptFastaFilePath, true).initiateCopy(); + + // Copy Index: + NioFileCopierWithProgressMeter.create(remoteGencodeTranscriptFastaIndexFilePath, localGencodeTranscriptFastaIndexFilePath, true).initiateCopy(); + + // Copy Sequence Dictionary: + NioFileCopierWithProgressMeter.create(remoteGencodeTranscriptFastaSequenceDictionaryFilePath, localGencodeTranscriptFastaSequenceDictionaryFilePath, true).initiateCopy(); + + // Bye Bye! + return localGencodeTranscriptFastaFilePath; + } + //================================================================================================================== // Override Methods: diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java index 0a60388964e..0e245c630a7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java @@ -138,7 +138,7 @@ public VcfFuncotationFactory(final String name, private FuncotationMetadata createFuncotationMetadata(final Path sourceFilePath) { // Read the VCF to just get the header - try ( final FeatureDataSource vcfReader = new FeatureDataSource<>(sourceFilePath.toString()) ) { + try ( final FeatureDataSource vcfReader = new FeatureDataSource<>(sourceFilePath.toUri().toString()) ) { final Object header = vcfReader.getHeader(); if ( ! (header instanceof VCFHeader) ) { throw new IllegalArgumentException(sourceFilePath + " does not have a valid VCF header"); @@ -433,7 +433,7 @@ private TableFuncotation createDefaultFuncotation(final Allele altAllele) { * Populates {@link VcfFuncotationFactory#supportedFieldNames} and {@link VcfFuncotationFactory#supportedFieldNamesAndDefaults}. */ private void populateSupportedFieldNamesFromVcfFile() { - final VCFFileReader reader = new VCFFileReader(sourceFilePath.toFile()); + final VCFFileReader reader = new VCFFileReader(sourceFilePath); final VCFHeader header = reader.getFileHeader(); final List infoLineKeys = new ArrayList<>(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java index 1b9580bb894..49b84586d3a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactory.java @@ -15,6 +15,7 @@ import org.broadinstitute.hellbender.tools.funcotator.FuncotatorArgumentDefinitions; import org.broadinstitute.hellbender.tools.funcotator.dataSources.TableFuncotation; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; +import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvLocatableTableCodec; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvTableFeature; @@ -25,7 +26,7 @@ import java.util.*; /** - * Factory for creating {@link TableFuncotation}s by handling `Separated Value` files with arbitrary delimiters + * Factory for creating {@link TableFuncotation}s by handling `Separated Value` files with arbitrary delimiters * (e.g. CSV/TSV files) which contain data that are locatable (i.e. {@link org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvTableFeature}). * * This is a high-level object that interfaces with the internals of {@link org.broadinstitute.hellbender.tools.funcotator.Funcotator}. @@ -52,20 +53,20 @@ public class LocatableXsvFuncotationFactory extends DataSourceFuncotationFactory /** * {@link LinkedHashSet} of the names of all fields supported by this {@link LocatableXsvFuncotationFactory}. - * Set by {@link #setSupportedFuncotationFields(List)}. + * Set by {@link #setSupportedFuncotationFields(Path)}. */ private LinkedHashSet supportedFieldNames = null; /** * {@link List} of the names of all fields supported by this {@link LocatableXsvFuncotationFactory}. - * Set by {@link #setSupportedFuncotationFields(List)}. + * Set by {@link #setSupportedFuncotationFields(Path)}. */ private List supportedFieldNameList = null; /** * {@link List} of empty {@link String}s of the same length as {@link #supportedFieldNames}. * Cached for faster output. - * Set by {@link #setSupportedFuncotationFields(List)}. + * Set by {@link #setSupportedFuncotationFields(Path)}. */ private List emptyFieldList = null; @@ -90,7 +91,6 @@ public LocatableXsvFuncotationFactory(final String name, final String version, f this.annotationOverrideMap = new LinkedHashMap<>(annotationOverridesMap); } - //================================================================================================================== // Override Methods: @@ -199,41 +199,54 @@ private List createDefaultFuncotationsOnVariantHelper( final Varian /** * Set the field names that this {@link LocatableXsvFuncotationFactory} can create. * Does so by reading the headers of backing data files for this {@link LocatableXsvFuncotationFactory}. - * @param inputDataFilePaths {@link List} to backing data files from which annotations can be made for this {@link LocatableXsvFuncotationFactory}. + * @param inputDataFilePath {@link Path} to a backing data file from which annotations can be made for this {@link LocatableXsvFuncotationFactory}. Must not be {@code null}. */ - public void setSupportedFuncotationFields(final List inputDataFilePaths) { + public void setSupportedFuncotationFields(final Path inputDataFilePath) { + + Utils.nonNull(inputDataFilePath); if ( supportedFieldNames == null ) { synchronized ( this ) { if ( supportedFieldNames == null ) { - // Approximate starting size: - supportedFieldNames = new LinkedHashSet<>(inputDataFilePaths.size() * 10); - - for ( final Path dataPath : inputDataFilePaths ) { + // Approximate / arbitrary starting size: + supportedFieldNames = new LinkedHashSet<>(10); + + // Set up a codec here to read the config file. + // We have to call canDecode to set up the internal state of the XsvLocatableTableCodec: + final XsvLocatableTableCodec codec = new XsvLocatableTableCodec(); + try { + if ( !codec.canDecode(mainSourceFileAsFeatureInput.getFeaturePath()) ) { + // This should never happen because we have already validated this config file by the time we + // reach here: + throw new GATKException.ShouldNeverReachHereException("Could not decode from data file: " + mainSourceFileAsFeatureInput.getFeaturePath()); + } + } + catch ( final NullPointerException ex ) { + // This should never happen because we have already validated this config file by the time we + // reach here: + throw new GATKException.ShouldNeverReachHereException("Could not decode from data file! Has not been set yet!"); + } - final XsvLocatableTableCodec codec = new XsvLocatableTableCodec(); - List header = null; + // Get the info from our path: + final List columnNames; + try (final InputStream fileInputStream = Files.newInputStream(inputDataFilePath)) { - if (codec.canDecode(dataPath.toString())) { - try (final InputStream fileInputStream = Files.newInputStream(dataPath)) { + final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); + codec.readActualHeader(lineReaderIterator); + columnNames = codec.getHeaderWithoutLocationColumns(); - final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); - codec.readActualHeader(lineReaderIterator); - header = codec.getHeaderWithoutLocationColumns(); + } catch (final IOException ioe) { + throw new UserException.BadInput("Could not read header from data file: " + inputDataFilePath.toUri().toString(), ioe); + } - } catch (final IOException ioe) { - throw new UserException.BadInput("Could not read header from data file: " + dataPath.toUri().toString(), ioe); - } - } + // Make sure we actually read the header: + if ( columnNames == null ) { + throw new UserException.MalformedFile("Could not decode from data file: " + inputDataFilePath.toUri().toString()); + } - // Make sure we actually read the header: - if ( header == null ) { - throw new UserException.MalformedFile("Could not decode from data file: " + dataPath.toUri().toString()); - } + supportedFieldNames.addAll(columnNames); - supportedFieldNames.addAll(header); - } // Initialize our field name lists: initializeFieldNameLists(); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java index 25f498a7b14..8ed5a6088bd 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodec.java @@ -11,10 +11,12 @@ import htsjdk.tribble.readers.LineIterator; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.io.IOUtils; @@ -69,14 +71,8 @@ public final class XsvLocatableTableCodec extends AsciiFeatureCodec header; @@ -142,10 +141,9 @@ public XsvLocatableTableCodec() { super(XsvTableFeature.class); } - /** Constructor for when a configuration file is specified instead of using a sibling config file. - * + /** + * Constructor for when a configuration file is specified. * This cannot be used with auto decoding. - * * @param overrideConfigFile {@link Path} to the file to use as a configuration file for the given file. */ public XsvLocatableTableCodec(final Path overrideConfigFile) { @@ -158,36 +156,65 @@ public XsvLocatableTableCodec(final Path overrideConfigFile) { @Override public boolean canDecode(final String path) { + Utils.nonNull(path); // seg files are handled by a different codec. This check has to be done, since seg files will return true in // this codec and the AnnotatedIntervalCodec. - return !path.endsWith(".seg") && canDecodeMinusExtensionChecks(path); + return path.endsWith(".config") && canDecodeFileChecks(path); + } + + @Override + public String getPathToDataFile( final String path ) { + return backingDataFilePath.toUri().toString(); } /** - * Minus checking the file extension, can this class decode the given path. + * Checks the content of the given config file to see if it can be decoded. + * The config file can be independent of the backing data source file. + * + * NOTE: To reiterate, this takes a CONFIG file, not the actual data file to be read. * * TODO: This method should be inside an abstract superclass. {@link XsvLocatableTableCodec} and {@link org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCodec} should inherit. See https://github.com/broadinstitute/gatk/issues/4580 * - * @param path File to check. Never {@code null} + * @param configFilePathString {@link String} containing the path to the configuration file to check. Never {@code null} * @return true if the file can be decoded. False otherwise. */ - public boolean canDecodeMinusExtensionChecks(final String path) { - Utils.nonNull(path); + public boolean canDecodeFileChecks(final String configFilePathString) { + Utils.nonNull(configFilePathString); - // Get the paths to our file and the config file: - final Path inputFilePath = IOUtils.getPath(path); - final Path configFilePath = (overrideConfigFile != null ? - overrideConfigFile : getConfigFilePath(inputFilePath)); + // Get the path to our config file: + final Path configFilePath = (overrideConfigFile != null ? overrideConfigFile : IOUtils.getPath(configFilePathString)); + + // Make sure we can read the config file: + if ( !validateInputFileCanBeRead(configFilePath) ) { + return false; + } + + // Make sure our config file contains the information we need: + final Pair validityAndPropertiesPair = getAndValidateConfigFileContentsOnPath(configFilePath, true); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties configProperties = validityAndPropertiesPair.getRight(); + + if ( !isValid ) { + return false; + } + + // Get the backing data file path: + final String inputFilePathString = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_SRC_FILE); + + // Resolve the input file path to a real path: + final Path dataFilePath = DataSourceUtils.resolveFilePathStringFromKnownPath( inputFilePathString, configFilePath ); // Check that our files are good for eating... I mean reading... - if ( validateInputDataFile(inputFilePath) && validateInputDataFile(configFilePath) ) { + if ( validateInputFileCanBeRead(dataFilePath) ) { + + backingDataFilePath = dataFilePath; // auto-determine the preamble format - preambleLineStart = determinePreambleLineStart(inputFilePath); + preambleLineStart = determinePreambleLineStart(backingDataFilePath); // Get our metadata and set up our internals so we can read from this file: - readMetadataFromConfigFile(configFilePath); + populateMetaDataFromConfigProperties(configProperties); return true; } else { @@ -195,6 +222,57 @@ public boolean canDecodeMinusExtensionChecks(final String path) { } } + /** + * Checks the content of the given config file and backing data file to see if they can be decoded. + * + * NOTE: To reiterate, this takes a CONFIG file, not the actual data file to be read. + * + * TODO: This method should be inside an abstract superclass. {@link XsvLocatableTableCodec} and {@link org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCodec} should inherit. See https://github.com/broadinstitute/gatk/issues/4580 + * + * @param configFilePathString {@link String} containing the path to the configuration file to check. Never {@code null}. + * @param dataFilePathString {@link String} containing the path to the backing data file to check. Never {@code null}. + * @return true if the file can be decoded. False otherwise. + */ + public boolean canDecodeFileChecks(final String configFilePathString, final String dataFilePathString) { + Utils.nonNull(configFilePathString); + Utils.nonNull(dataFilePathString); + + // Get the path to our config file: + final Path configFilePath = (overrideConfigFile != null ? overrideConfigFile : IOUtils.getPath(configFilePathString)); + + // Get the path to our data file: + final Path dataFilePath = IOUtils.getPath(dataFilePathString); + + // Make sure we can read the config file: + if ( !validateInputFileCanBeRead(configFilePath) ) { + return false; + } + + // Make sure our config file contains the information we need: + final Pair validityAndPropertiesPair = getAndValidateConfigFileContentsOnPath(configFilePath, false); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties configProperties = validityAndPropertiesPair.getRight(); + + if ( !isValid ) { + return false; + } + + // Make sure we can read the data file: + if ( !validateInputFileCanBeRead(dataFilePath) ) { + return false; + } + + // Resolve the input file path to a real path: + backingDataFilePath = dataFilePath; + + // auto-determine the preamble format + preambleLineStart = determinePreambleLineStart(backingDataFilePath); + + // Get our metadata and set up our internals so we can read from this file: + populateMetaDataFromConfigProperties(configProperties); + return true; + } + @Override public XsvTableFeature decode(final String s) { @@ -297,6 +375,46 @@ String determineFinalColumn(final String rawInputListOrIndex) { : determinePrefixForHeader() + determineColumnNameToUse(rawInputListOrIndex); } + /** + * Get the properties from the given {@code configFilePath}, validate that all required properties are present, + * and return the property map. + * @param configFilePath {@link Path} to the configuration file. + * @param errorOnMissingConfigKey If {@code true} will log an error message when the given {@code key} is not contained in {@code configProperties}. + * @return The {@link Properties} as contained in the given {@code configFilePath}. + */ + public static Pair getAndValidateConfigFileContentsOnPath(final Path configFilePath, + final boolean errorOnMissingConfigKey) { + + Utils.nonNull(configFilePath); + + boolean isValid = true; + + // Read in the contents of the config file: + final Properties configProperties = new Properties(); + try ( final InputStream inputStream = Files.newInputStream(configFilePath, StandardOpenOption.READ) ) { + configProperties.load(inputStream); + } + catch (final Exception ex) { + throw new UserException.BadInput("Unable to read from XSV config file: " + configFilePath.toUri().toString(), ex); + } + + // Validate that it has the correct keys: + isValid = Stream.of( + DataSourceUtils.CONFIG_FILE_FIELD_NAME_SRC_FILE, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_VERSION, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_ORIGIN_LOCATION, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_PREPROCESSING_SCRIPT, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_XSV_DELIMITER, + DataSourceUtils.CONFIG_FILE_FIELD_NAME_NAME) + .map( key -> configPropertiesContainsKey(configProperties, key, configFilePath, errorOnMissingConfigKey)) + .allMatch( result -> result ); + + return Pair.of(isValid, configProperties); + } + private List getRawHeaders() { assertHeaderInitialized(); return header.stream().map(h -> getHeaderWithoutPrefix(h)).collect(Collectors.toList()); @@ -343,38 +461,6 @@ private String determinePrefixForHeader() { return (StringUtils.isEmpty(dataSourceName) ? "" : dataSourceName + "_"); } - //================================================================================================================== - // Static Methods: - - /** - * Get the properties from the given {@code configFilePath}, validate that all required properties are present, - * and return the property map. - * @param configFilePath {@link Path} to the configuration file. - * @return The {@link Properties} as contained in the given {@code configFilePath}. - */ - public static Properties getAndValidateConfigFileContents(final Path configFilePath) { - - Utils.nonNull(configFilePath); - - // Read in the contents of the config file: - final Properties configFileContents = new Properties(); - try ( final InputStream inputStream = Files.newInputStream(configFilePath, StandardOpenOption.READ) ) { - configFileContents.load(inputStream); - } - catch (final Exception ex) { - throw new UserException.BadInput("Unable to read from XSV config file: " + configFilePath.toUri().toString(), ex); - } - - // Validate that it has the right keys: - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_CONTIG_COLUMN_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_START_COLUMN_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_END_COLUMN_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_DELIMITER_KEY, configFilePath); - assertConfigPropertiesContainsKey(configFileContents, CONFIG_FILE_DATA_SOURCE_NAME_KEY, configFilePath); - - return configFileContents; - } - private boolean isPreambleLine(final String line) { return line.startsWith(preambleLineStart); } @@ -396,11 +482,20 @@ public static Path getConfigFilePath(final Path inputFilePath) { * @param configProperties The {@link Properties} in which to look for the given key. * @param key The value to find in the given {@link Properties}. * @param configFilePath The {@link Path} for the config file from which {@link Properties} were derived. Used for printing output only. + * @param errorOnMissingKey If {@code true} will log an error message when the given {@code key} is not contained in {@code configProperties}. */ - private static void assertConfigPropertiesContainsKey(final Properties configProperties, final String key, final Path configFilePath) { + private static boolean configPropertiesContainsKey(final Properties configProperties, final String key, final Path configFilePath, final boolean errorOnMissingKey) { if ( !configProperties.stringPropertyNames().contains(key) ) { - throw new UserException.BadInput("Config file for datasource (" + configFilePath.toUri().toString() + ") does not contain required key: " + key); + final String logMessage = "Config file for datasource (" + configFilePath.toUri().toString() + ") does not contain required key: " + key; + if (errorOnMissingKey) { + logger.error( logMessage ); + } + else { + logger.warn( logMessage ); + } + return false; } + return true; } //================================================================================================================== @@ -411,26 +506,24 @@ private static void assertConfigPropertiesContainsKey(final Properties configPro * @param filePath The {@link Path} to the data file to validate. * @return {@code true} if the given {@code filePath} is valid; {@code false} otherwise. */ - private boolean validateInputDataFile(final Path filePath) { + private boolean validateInputFileCanBeRead(final Path filePath) { return Files.exists(filePath) && Files.isReadable(filePath) && !Files.isDirectory(filePath); } /** - * Reads the metadata required for parsing from the given {@code configFilePath}. - * @param configFilePath {@link Path} to the configuration file from which to read in and setup metadata values. + * Populates the metadata required for parsing from the given {@code configProperties}. + * @param configProperties {@link Properties} containing configuration information for this {@link XsvLocatableTableCodec}. */ - private void readMetadataFromConfigFile(final Path configFilePath) { - - final Properties configProperties = getAndValidateConfigFileContents(configFilePath); + private void populateMetaDataFromConfigProperties(final Properties configProperties) { // Get the properties and remove the leading/trailing whitespace if there is any: - inputContigColumn = configProperties.getProperty(CONFIG_FILE_CONTIG_COLUMN_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); - inputStartColumn = configProperties.getProperty(CONFIG_FILE_START_COLUMN_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); - inputEndColumn = configProperties.getProperty(CONFIG_FILE_END_COLUMN_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); - dataSourceName = configProperties.getProperty(CONFIG_FILE_DATA_SOURCE_NAME_KEY).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + inputContigColumn = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + inputStartColumn = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + inputEndColumn = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); + dataSourceName = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_NAME).replaceAll("^\\s+", "").replaceAll("\\s+$", ""); // Get the delimiter - we do NOT remove whitespace here on purpose: - delimiter = configProperties.getProperty(CONFIG_FILE_DELIMITER_KEY); + delimiter = configProperties.getProperty(DataSourceUtils.CONFIG_FILE_FIELD_NAME_XSV_DELIMITER); // Process delimiter just in case it is a tab escape character: if ( delimiter.equals("\\t") ) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java index c29a27120b0..982720f12f8 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gcs/BucketUtils.java @@ -264,8 +264,7 @@ public static String randomRemotePath(String stagingLocation, String prefix, Str */ public static boolean fileExists(String path) { final boolean MAYBE = false; - try { - InputStream inputStream = openFile(path); + try (InputStream inputStream = openFile(path)) { int ignored = inputStream.read(); } catch (UserException.CouldNotReadInputFile notthere) { // file isn't there diff --git a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config index 905ffae4c9d..a33ae21f5a0 100644 --- a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config +++ b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated_region_default.config @@ -2,4 +2,8 @@ contig_column = CONTIG,contig,Chromosome,chrom,chromosome,Chrom,seqname,seqnames start_column = START,start,Start,Start_Position,start_position,chromStart,segment_start,Start_position,target_start,Position,position,pos,POS,segment_start end_column = END,end,End,End_Position,end_position,chromEnd,segment_end,End_position,target_end,stop,Stop,Position,position,pos,POS,segment_end xsv_delimiter = \t -name = \ No newline at end of file +name = +src_file = +version = +origin_location = +preprocessing_script = diff --git a/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputTestTools.java b/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputTestTools.java new file mode 100644 index 00000000000..bc5457c9afe --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/engine/FeatureInputTestTools.java @@ -0,0 +1,21 @@ +package org.broadinstitute.hellbender.engine; + +import htsjdk.tribble.Feature; + +/** + * Test utilities involving {@link FeatureInput}s. + * Created by jonn on 11/7/18. + */ +public class FeatureInputTestTools { + + /** + * Create a feature input based on an input path and a name. + * @param path A {@link String} containing the path to the backing data file for the resulting {@link FeatureInput}. + * @param name A {@link String} containing the name of the feature input type. + * @return A {@link FeatureInput} for the given {@code path} and {@code name}. + */ + public static FeatureInput createFeatureInput(final String path, final String name) { + return new FeatureInput<>(path, name); + } + +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java index d6c03257b1d..065dc4e0235 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java @@ -20,6 +20,7 @@ import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedInterval; import org.broadinstitute.hellbender.tools.copynumber.utils.annotatedinterval.AnnotatedIntervalCollection; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.DataSourceUtils; import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; import org.broadinstitute.hellbender.tools.funcotator.dataSources.xsv.SimpleKeyXsvFuncotationFactory; import org.broadinstitute.hellbender.tools.funcotator.mafOutput.CustomMafFuncotationCreator; @@ -395,43 +396,64 @@ public Object[][] provideForLargeDataValidationTest() { "0816201804HC0_R01C01.vcf", b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, - GERMLINE_DATASOURCES_FOLDER + GERMLINE_DATASOURCES_FOLDER, + true + }, + { + "0816201804HC0_R01C01.vcf", + b37Reference, + FuncotatorTestConstants.REFERENCE_VERSION_HG19, + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_LOCAL_CLOUD_FOLDER, + false + }, + { + "0816201804HC0_R01C01.vcf", + b37Reference, + FuncotatorTestConstants.REFERENCE_VERSION_HG19, + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_REMOTE_CLOUD_FOLDER, + false }, { "hg38_test_variants.vcf", hg38Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG38, - LARGE_DATASOURCES_FOLDER + LARGE_DATASOURCES_FOLDER, + true }, { "hg38_trio.vcf", hg38Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG38, - LARGE_DATASOURCES_FOLDER + LARGE_DATASOURCES_FOLDER, + true }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_DATA_SET_1, b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_DATA_SET_2, b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, - FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG38, hg38Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG38, - FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, { FuncotatorTestConstants.NON_TRIVIAL_DATA_VALIDATION_TEST_HG19_LARGE_DATA_SET, b37Reference, FuncotatorTestConstants.REFERENCE_VERSION_HG19, - FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + FuncotatorTestConstants.FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER, + false }, }; } @@ -486,7 +508,8 @@ private void validateFuncotationsOnVcf(final Iterable vcfIterabl public void largeDataValidationTest(final String inputVcfName, final String referencePath, final String referenceVersion, - final String dataSourcesPath) throws IOException { + final String dataSourcesPath, + final boolean isDsEnvironmentPath) throws IOException { // Get our main test folder path from our environment: final String testFolderInputPath = getFuncotatorLargeDataValidationTestInputPath(); @@ -496,6 +519,14 @@ public void largeDataValidationTest(final String inputVcfName, final String outFileBaseName = inputVcfName + ".funcotator"; + final String dataSourcesPathString; + if (isDsEnvironmentPath) { + dataSourcesPathString = getFuncotatorLargeDataValidationTestInputPath() + dataSourcesPath; + } + else { + dataSourcesPathString = dataSourcesPath; + } + for (final FuncotatorArgumentDefinitions.OutputFormatType outFormat : FuncotatorArgumentDefinitions.OutputFormatType.values()) { startTime = System.nanoTime(); @@ -511,7 +542,7 @@ public void largeDataValidationTest(final String inputVcfName, testFolderInputPath + inputVcfName, outputFile, referencePath, - getFuncotatorLargeDataValidationTestInputPath() + dataSourcesPath, + dataSourcesPathString, referenceVersion, outFormat, true); @@ -930,11 +961,16 @@ final Object[][] provideForMafVcfConcordance() { private void createConfigFileForMAF(final File mafConfigFile) { try ( final PrintWriter printWriter = new PrintWriter(mafConfigFile) ) { - printWriter.println("contig_column = " + MafOutputRendererConstants.FieldName_Chromosome); - printWriter.println("start_column = " + MafOutputRendererConstants.FieldName_Start_Position); - printWriter.println("end_column = " + MafOutputRendererConstants.FieldName_End_Position); - printWriter.println("xsv_delimiter = \\t"); - printWriter.println("name = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_CONTIG_COLUMN + " = " + MafOutputRendererConstants.FieldName_Chromosome); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_START_COLUMN + " = " + MafOutputRendererConstants.FieldName_Start_Position); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_END_COLUMN + " = " + MafOutputRendererConstants.FieldName_End_Position); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_XSV_DELIMITER + " = \\t"); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_NAME + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_SRC_FILE + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_VERSION + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_ORIGIN_LOCATION + " = "); + printWriter.println(DataSourceUtils.CONFIG_FILE_FIELD_NAME_PREPROCESSING_SCRIPT + " = "); + } catch (final FileNotFoundException ex) { throw new GATKException("Could not create the tmp config file to test maf/vcf concorance: " + mafConfigFile.toURI().toString(), ex); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java index 358b54f809c..bc332f0afe3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java @@ -32,7 +32,13 @@ public class FuncotatorTestConstants { * there will be a bit of manual work as well (I did not have the will or the time to automate everything - Jonn Smith): * GATK_DEVELOPMENT_TOP_DIRECTORY/scripts/funcotator/testing/getGencodeGenesForVcfVariants.sh */ - public static final String FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources" + File.separator; + public static final String FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources" + File.separator; + /** Local folder containing data sources that point to the cloud. */ + public static final String FUNCOTATOR_DATA_SOURCES_LOCAL_CLOUD_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources_cloud" + File.separator; + /** Local folder containing local data sources and one that points to gnomAD on the cloud. */ + public static final String FUNCOTATOR_DATA_SOURCES_LOCAL_CLOUD_GNOMAD_FOLDER = FUNCOTATOR_LARGE_FILES_DIR + "funcotator_dataSources_cloud_gnomad" + File.separator; + /** Cloud-based folder containing data sources that point to the cloud. */ + public static final String FUNCOTATOR_DATA_SOURCES_REMOTE_CLOUD_FOLDER = "gs://hellbender/test/resources/large/funcotatorDataSourceCollection/funcotator_dataSources_cloud" + File.separator; public static final String DUMMY_DATA_SOURCES_TAR_GZ = FUNCOTATOR_LARGE_FILES_DIR + "dummyDataSources.tar.gz"; public static final String DUMMY_DATA_SOURCES_TAR_GZ_SHA256_FILE = FUNCOTATOR_LARGE_FILES_DIR + "dummyDataSources.sha256"; @@ -55,9 +61,12 @@ public class FuncotatorTestConstants { public static final String XSV_CSV_PIK3CA_PATH = FUNCOTATOR_TEST_DIR + "xsv_CSV_PIK3CA.csv"; public static final String XSV_CSV_MUC16_PATH = FUNCOTATOR_TEST_DIR + "xsv_CSV_MUC16.csv"; - public static final String XSV_LOCATABLE_TEST_FILE1_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test.csv"; - public static final String XSV_LOCATABLE_TEST_FILE2_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test2.csv"; - public static final String XSV_LOCATABLE_TEST_FILE3_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test3.tsv"; + public static final String XSV_LOCATABLE_TEST_FILE1_DATA_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test.csv"; + public static final String XSV_LOCATABLE_TEST_FILE1_CONFIG_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test.config"; + public static final String XSV_LOCATABLE_TEST_FILE2_DATA_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test2.csv"; + public static final String XSV_LOCATABLE_TEST_FILE2_CONFIG_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test2.config"; + public static final String XSV_LOCATABLE_TEST_FILE3_DATA_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test3.tsv"; + public static final String XSV_LOCATABLE_TEST_FILE3_CONFIG_PATH = FUNCOTATOR_TEST_DIR + "xsv_locatable_test3.config"; public static final String COSMIC_TEST_DB = FUNCOTATOR_DATA_SOURCES_MAIN_FOLDER + "cosmic" + File.separator + "hg19" + File.separator + "CosmicTest.db"; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java index 1d3434f6b1f..1702ca0ce2e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/LocatableXsvFuncotationFactoryUnitTest.java @@ -6,6 +6,8 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import org.apache.commons.io.FilenameUtils; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.engine.FeatureInput; +import org.broadinstitute.hellbender.engine.FeatureInputTestTools; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.ReferenceDataSource; import org.broadinstitute.hellbender.exceptions.GATKException; @@ -19,6 +21,7 @@ import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvLocatableTableCodec; import org.broadinstitute.hellbender.utils.codecs.xsvLocatableTable.XsvTableFeature; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -28,7 +31,6 @@ import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.*; import java.util.stream.Collectors; @@ -57,33 +59,6 @@ public class LocatableXsvFuncotationFactoryUnitTest extends GATKBaseTest { //================================================================================================================== // Helper Data Types: - private static class DummyTestFeature implements Feature { - - private final String contig; - private final int start; - private final int stop; - - public DummyTestFeature(final String contig, final int start, final int stop) { - this.contig = contig; - this.start = start; - this.stop = stop; - } - - @Override - public String getContig() { - return contig; - } - - @Override - public int getStart() { - return start; - } - - @Override - public int getEnd() { - return stop; - } - } //================================================================================================================== // Helper Methods: @@ -249,29 +224,23 @@ private Object[][] provideForTestCreateFuncotations() { @DataProvider private Object[][] provideForTestSetSupportedFuncotationFields() { return new Object[][] { - // Empty list of data files: - {Collections.emptyList(), new LinkedHashSet<>()}, // One Valid XSV (csv) Locatable Data File: { - Collections.singletonList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_PATH)), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_DATA_PATH), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_CONFIG_PATH), new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond")) }, - // One Valid XSV (tsv) Locatable Data File: - { - Collections.singletonList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_PATH)), - new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond")) - }, - // Two Valid XSV Locatable Data Files: + // One Valid XSV (csv) Locatable Data File: { - Arrays.asList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_PATH), Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_PATH)), - new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond", - "SECOND_XSV_NAME_Car_Maker", "SECOND_XSV_NAME_Tire_Maker", "SECOND_XSV_NAME_Parent_Company")) + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_DATA_PATH), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_CONFIG_PATH), + new LinkedHashSet<>(Arrays.asList("SECOND_XSV_NAME_Car_Maker", "SECOND_XSV_NAME_Tire_Maker", "SECOND_XSV_NAME_Parent_Company")) }, - // Three Valid XSV Locatable Data Files: + // One Valid XSV (tsv) Locatable Data File: { - Arrays.asList(Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE1_PATH), Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE2_PATH), Paths.get(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_PATH)), - new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond", - "SECOND_XSV_NAME_Car_Maker", "SECOND_XSV_NAME_Tire_Maker", "SECOND_XSV_NAME_Parent_Company")) + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_DATA_PATH), + IOUtils.getPath(FuncotatorTestConstants.XSV_LOCATABLE_TEST_FILE3_CONFIG_PATH), + new LinkedHashSet<>(Arrays.asList("XSV_LOCATABLE_TEST_NAME_Villain", "XSV_LOCATABLE_TEST_NAME_test_val", "XSV_LOCATABLE_TEST_NAME_Bond")) }, }; } @@ -302,18 +271,20 @@ public void testCreateFuncotations(final VariantContext variant, // Create a temporary file for the "backing data" which will only contain the header: final Path headerBackingDataFilePath = createTempPath("headerBackingDataFile", "csv"); + final Path configFilePath; try { Files.write(headerBackingDataFilePath, ("CONTIG,START,END," + reportableFuncotationFieldNames.stream().collect(Collectors.joining(","))).getBytes()); // Create a temporary file for the config file that points to the temporary file for the backing data: - createTemporaryConfigFile(headerBackingDataFilePath); + configFilePath = createTemporaryConfigFile(headerBackingDataFilePath); } catch (final IOException ex) { throw new GATKException("Could not write to temp file for testing: " + headerBackingDataFilePath.toUri(), ex); } - final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(defaultDataSourceName, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), null); - locatableXsvFuncotationFactory.setSupportedFuncotationFields(new ArrayList<>(Collections.singletonList(headerBackingDataFilePath))); + final FeatureInput featureInput = FeatureInputTestTools.createFeatureInput( configFilePath.toUri().toString(), defaultDataSourceName ); + final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(defaultDataSourceName, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), featureInput); + locatableXsvFuncotationFactory.setSupportedFuncotationFields(headerBackingDataFilePath); Assert.assertEquals( locatableXsvFuncotationFactory.createFuncotationsOnVariant( variant, referenceContext, featureList ), @@ -327,11 +298,14 @@ public void testCreateFuncotations(final VariantContext variant, } @Test(dataProvider = "provideForTestSetSupportedFuncotationFields") - public void testSetSupportedFuncotationFields(final List dataFilePaths, + public void testSetSupportedFuncotationFields(final Path dataFilePath, + final Path configFilePath, final LinkedHashSet expected) { - final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(LocatableXsvFuncotationFactory.DEFAULT_NAME, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), null); - locatableXsvFuncotationFactory.setSupportedFuncotationFields(dataFilePaths); + final FeatureInput featureInput = FeatureInputTestTools.createFeatureInput(configFilePath.toUri().toString(), defaultDataSourceName); + final LocatableXsvFuncotationFactory locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(LocatableXsvFuncotationFactory.DEFAULT_NAME, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), featureInput); + + locatableXsvFuncotationFactory.setSupportedFuncotationFields(dataFilePath); Assert.assertEquals( locatableXsvFuncotationFactory.getSupportedFuncotationFields(), @@ -345,8 +319,11 @@ public void testGetSupportedFuncotationFields() { locatableXsvFuncotationFactory.getSupportedFuncotationFields(); } - private void createTemporaryConfigFile(final Path backingDataSourcePath) throws IOException { + private Path createTemporaryConfigFile(final Path backingDataSourcePath) throws IOException { + return createTemporaryConfigFile(backingDataSourcePath, ","); + } + private Path createTemporaryConfigFile(final Path backingDataSourcePath, final String delimiter) throws IOException { // Config file must be next to backingDataSourcePath, and have the same base name, with the .config extension: final String backingDataSourceFileName = backingDataSourcePath.toFile().getName(); final String configFileBaseName = FilenameUtils.removeExtension(backingDataSourceFileName); @@ -372,13 +349,13 @@ private void createTemporaryConfigFile(final Path backingDataSourcePath) throws writer.println(""); writer.println("# Required field for GENCODE files."); writer.println("# Path to the FASTA file from which to load the sequences for GENCODE transcripts:"); - writer.println(" gencode_fasta_path ="); + writer.println("gencode_fasta_path ="); writer.println(""); writer.println("# Required field for simpleXSV files."); writer.println("# Valid values:"); writer.println("# GENE_NAME"); writer.println("# TRANSCRIPT_ID"); - writer.println(" xsv_key = "); + writer.println("xsv_key = "); writer.println(""); writer.println("# Required field for simpleXSV files."); writer.println("# The 0-based index of the column containing the key on which to match"); @@ -386,26 +363,28 @@ private void createTemporaryConfigFile(final Path backingDataSourcePath) throws writer.println(""); writer.println("# Required field for simpleXSV AND locatableXSV files."); writer.println("# The delimiter by which to split the XSV file into columns."); - writer.println(" xsv_delimiter = ,"); + writer.println("xsv_delimiter = " + delimiter); writer.println(""); writer.println("# Required field for simpleXSV files."); writer.println("# Whether to permissively match the number of columns in the header and data rows"); writer.println("# Valid values:"); writer.println("# true"); writer.println("# false"); - writer.println(" xsv_permissive_cols = "); + writer.println("xsv_permissive_cols = "); writer.println(""); writer.println("# Required field for locatableXSV files."); writer.println("# The 0-based index of the column containing the contig for each row"); - writer.println(" contig_column = 0 "); + writer.println("contig_column = 0 "); writer.println(""); writer.println("# Required field for locatableXSV files."); writer.println("# The 0-based index of the column containing the start position for each row"); - writer.println(" start_column = 1 "); + writer.println("start_column = 1 "); writer.println(""); writer.println("# Required field for locatableXSV files."); writer.println("# The 0-based index of the column containing the end position for each row"); - writer.println(" end_column = 2"); + writer.println("end_column = 2"); } + + return configPath; } } diff --git a/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java index 6a32fc422f6..3397d29ce68 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java @@ -3,6 +3,7 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.tribble.readers.AsciiLineReader; import htsjdk.tribble.readers.AsciiLineReaderIterator; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; @@ -29,14 +30,14 @@ public class XsvLocatableTableCodecUnitTest extends GATKBaseTest { // Private Static Members: private static final String TEST_RESOURCE_DIR = publicTestDir + "org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable" + File.separator; - private static final String TEST_FILE1 = TEST_RESOURCE_DIR + "xsv_locatable_test.csv"; - private static final String TEST_FILE2 = TEST_RESOURCE_DIR + "xsv_locatable_test2.tsv"; + private static final String TEST_CONFIG_FILE1 = TEST_RESOURCE_DIR + "xsv_locatable_test.config"; + private static final String TEST_CONFIG_FILE2 = TEST_RESOURCE_DIR + "xsv_locatable_test2.config"; - private static final String TEST_FILE_MIXED_ENCODING = TEST_RESOURCE_DIR + "xsv_locatable_test_mixed_encodings.csv"; + private static final String TEST_FILE_MIXED_ENCODING = TEST_RESOURCE_DIR + "xsv_locatable_test_mixed_encodings.config"; /** Uses column names, instead of index */ - private static final String TEST_FILE3 = TEST_RESOURCE_DIR + "xsv_locatable_test3.csv"; - private static final String TEST_FILE4 = TEST_RESOURCE_DIR + "xsv_locatable_test4.csv"; + private static final String TEST_CONFIG_FILE3 = TEST_RESOURCE_DIR + "xsv_locatable_test3.config"; + private static final String TEST_CONFIG_FILE4 = TEST_RESOURCE_DIR + "xsv_locatable_test4.config"; private static final String TEST_FILE_NO_CONFIG = TEST_RESOURCE_DIR + "xsv_locatable_test_no_config.csv"; // Preambles of SAMFileHeaders or just plain ol' comments @@ -73,10 +74,10 @@ public class XsvLocatableTableCodecUnitTest extends GATKBaseTest { @DataProvider private Object[][] provideForTestCanDecode() { return new Object[][] { - { TEST_FILE1, true }, - { TEST_FILE2, true }, + { TEST_CONFIG_FILE1, true }, + { TEST_CONFIG_FILE2, true }, { TEST_FILE_NO_CONFIG, false }, - { TEST_FILE3, true }, + { TEST_CONFIG_FILE3, true }, }; } @@ -92,25 +93,25 @@ private Object[][] provideForTestDecodeCharsetFailure() { private Object[][] provideForTestDecode() { return new Object[][] { - { TEST_FILE1, + { TEST_CONFIG_FILE1, Arrays.asList( new XsvTableFeature(1, 3, 4, file1Headers, file1Line1, "XSV_LOCATABLE_TEST_NAME"), new XsvTableFeature(1, 3, 4, file1Headers, file1Line2, "XSV_LOCATABLE_TEST_NAME") ) }, - { TEST_FILE2, + { TEST_CONFIG_FILE2, Arrays.asList( new XsvTableFeature(1, 2, 4, file2Headers, file2Line1, "SECOND_XSV_NAME"), new XsvTableFeature(1, 2, 4, file2Headers, file2Line2, "SECOND_XSV_NAME") ) }, - { TEST_FILE3, + { TEST_CONFIG_FILE3, Arrays.asList( new XsvTableFeature(1, 3, 4, file1Headers, file1Line1, "XSV_LOCATABLE_TEST_NAME"), new XsvTableFeature(1, 3, 4, file1Headers, file1Line2, "XSV_LOCATABLE_TEST_NAME") ) }, - { TEST_FILE4, + { TEST_CONFIG_FILE4, Arrays.asList( new XsvTableFeature(1, 3, 4, file1Headers, file1Line1, "XSV_LOCATABLE_TEST_NAME"), new XsvTableFeature(1, 3, 4, file1Headers, file1Line2, "XSV_LOCATABLE_TEST_NAME") @@ -124,16 +125,16 @@ private Object[][] provideForTestDecode() { @DataProvider private Object[][] provideForTestReadActualHeader() { return new Object[][] { - { TEST_FILE1, file1Headers }, - { TEST_FILE2, file2Headers }, + { TEST_CONFIG_FILE1, file1Headers }, + { TEST_CONFIG_FILE2, file2Headers }, }; } @DataProvider private Object[][] provideForTestGetConfigFilePath() { return new Object[][] { - { TEST_FILE1, IOUtils.getPath(TEST_RESOURCE_DIR + "xsv_locatable_test.config") }, - { TEST_FILE2, IOUtils.getPath(TEST_RESOURCE_DIR + "xsv_locatable_test2.config") }, + { TEST_CONFIG_FILE1, IOUtils.getPath(TEST_RESOURCE_DIR + "xsv_locatable_test.config") }, + { TEST_CONFIG_FILE2, IOUtils.getPath(TEST_RESOURCE_DIR + "xsv_locatable_test2.config") }, }; } @@ -146,13 +147,22 @@ private Object[][] provideForTestGetAndValidateConfigFileContents() { configFile1Properties.put("end_column", "4"); configFile1Properties.put("xsv_delimiter", ","); configFile1Properties.put("name", "XSV_LOCATABLE_TEST_NAME"); - + configFile1Properties.put("src_file", "xsv_locatable_test.csv"); + configFile1Properties.put("version", "TESTING"); + configFile1Properties.put("origin_location", "GATK Github Test Area"); + configFile1Properties.put("preprocessing_script", "NA"); + + final Properties configFile2Properties = new Properties(); configFile2Properties.put("contig_column", "1"); configFile2Properties.put("start_column", "2"); configFile2Properties.put("end_column", "4"); configFile2Properties.put("xsv_delimiter", "\t"); configFile2Properties.put("name", "SECOND_XSV_NAME"); + configFile2Properties.put("src_file", "xsv_locatable_test2.tsv"); + configFile2Properties.put("version", "TESTING"); + configFile2Properties.put("origin_location", "GATK Github Test Area"); + configFile2Properties.put("preprocessing_script", "NA"); return new Object[][] { { @@ -166,6 +176,17 @@ private Object[][] provideForTestGetAndValidateConfigFileContents() { }; } + @DataProvider + private Object[][] provideForTestCanDecodeFileChecks() { + return new Object[][] { + { TEST_CONFIG_FILE2, true }, + { TEST_FILE_SAMFILEHEADER_CONFIG_MULTIPLE_COLUMNS, true }, + { TEST_RESOURCE_DIR + "NON_EXISTENT_FILE", false }, + { TEST_RESOURCE_DIR + "xsv_locatable_test_fails_decode_checks.config", false }, + { TEST_RESOURCE_DIR + "xsv_locatable_test_fails_decode_checks2.config", false }, + }; + } + //================================================================================================================== // Tests: @@ -179,7 +200,10 @@ public void testCanDecode(final String filePath, final boolean expected) { private void testDecodeHelper(final String filePath, final List expected) { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); if (xsvLocatableTableCodec.canDecode(filePath)) { - try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) { + + final Path pathToDataFile = IOUtils.getPath(xsvLocatableTableCodec.getPathToDataFile(null)); + + try ( final FileInputStream fileInputStream = new FileInputStream(pathToDataFile.toFile())) { // Lots of scaffolding to do reading here: final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); @@ -197,10 +221,10 @@ private void testDecodeHelper(final String filePath, final List Assert.assertEquals(output, expected); } catch ( final FileNotFoundException ex ) { - throw new GATKException("Error - could not find test file: " + filePath, ex); + throw new GATKException("Error - could not find test file: " + pathToDataFile.toUri().toString(), ex); } catch ( final IOException ex ) { - throw new GATKException("Error - IO problem with file " + filePath, ex); + throw new GATKException("Error - IO problem with file " + pathToDataFile.toUri().toString(), ex); } } else { @@ -214,7 +238,10 @@ private void testDecodeHelper(final String filePath, final List public void testDecodeCharsetFailure(final String filePath ) { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); if (xsvLocatableTableCodec.canDecode(filePath)) { - try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) { + + final Path pathToDataFile = IOUtils.getPath(xsvLocatableTableCodec.getPathToDataFile(null)); + + try ( final FileInputStream fileInputStream = new FileInputStream(pathToDataFile.toFile())) { // Lots of scaffolding to do reading here: final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); @@ -228,10 +255,10 @@ public void testDecodeCharsetFailure(final String filePath ) { } } catch ( final FileNotFoundException ex ) { - throw new GATKException("Error - could not find test file: " + filePath, ex); + throw new GATKException("Error - could not find test file: " + pathToDataFile.toUri().toString(), ex); } catch ( final IOException ex ) { - throw new GATKException("Error - IO problem with file " + filePath, ex); + throw new GATKException("Error - IO problem with file " + pathToDataFile.toUri().toString(), ex); } } else { @@ -247,11 +274,14 @@ public void testDecode(final String filePath, final List expect // readActualHeader @Test(dataProvider = "provideForTestReadActualHeader") - public void testReadActualHeader(final String filePath, final List expected) { + public void testReadActualHeader(final String configFilePath, final List expected) { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); - if (xsvLocatableTableCodec.canDecode(filePath)) { - try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) { + if (xsvLocatableTableCodec.canDecode(configFilePath)) { + + final Path pathToDataFile = IOUtils.getPath( xsvLocatableTableCodec.getPathToDataFile(null) ); + + try ( final FileInputStream fileInputStream = new FileInputStream(pathToDataFile.toFile())) { // Lots of scaffolding to do reading here: final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); @@ -260,10 +290,10 @@ public void testReadActualHeader(final String filePath, final List expec Assert.assertEquals(xsvLocatableTableCodec.readActualHeader(lineReaderIterator), expected); } catch ( final FileNotFoundException ex ) { - throw new GATKException("Error - could not find test file: " + filePath, ex); + throw new GATKException("Error - could not find test file: " + pathToDataFile.toUri().toString(), ex); } catch ( final IOException ex ) { - throw new GATKException("Error - IO problem with file " + filePath, ex); + throw new GATKException("Error - IO problem with file " + pathToDataFile.toUri().toString(), ex); } } else { @@ -280,15 +310,25 @@ public void testGetConfigFilePath(final String filePath, final Path expected) { // getAndValidateConfigFileContents @Test(dataProvider = "provideForTestGetAndValidateConfigFileContents") public void testGetAndValidateConfigFileContents(final Path configFilePath, final Properties expected) { - final Properties properties = XsvLocatableTableCodec.getAndValidateConfigFileContents(configFilePath); + final Pair validityAndPropertiesPair = XsvLocatableTableCodec.getAndValidateConfigFileContentsOnPath(configFilePath, false); + final boolean isValid = validityAndPropertiesPair.getLeft(); + final Properties properties = validityAndPropertiesPair.getRight(); + + Assert.assertEquals( isValid, true ); Assert.assertEquals(properties, expected); } + @Test(dataProvider = "provideForTestCanDecodeFileChecks") + public void testCanDecodeFileChecks( final String configFilePathString, final boolean expected ) { + final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); + Assert.assertEquals( xsvLocatableTableCodec.canDecodeFileChecks(configFilePathString), expected ); + } + @Test public void testRenderSamFileHeaderFromNoPreamble() { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); - final String filePath = TEST_FILE3; - readHeaderOnly(xsvLocatableTableCodec, filePath); + final String configFilePath = TEST_CONFIG_FILE3; + readHeaderOnly(xsvLocatableTableCodec, configFilePath); final SAMFileHeader emptyHeader = xsvLocatableTableCodec.renderSamFileHeader(); @@ -297,20 +337,22 @@ public void testRenderSamFileHeaderFromNoPreamble() { Assert.assertEquals(emptyHeader.getSequenceDictionary().size(), 0); } - private List readHeaderOnly(final XsvLocatableTableCodec xsvLocatableTableCodec, final String filePath) { + private List readHeaderOnly(final XsvLocatableTableCodec xsvLocatableTableCodec, final String configFilePath) { List header = null; - if (xsvLocatableTableCodec.canDecode(filePath)) { + if (xsvLocatableTableCodec.canDecode(configFilePath)) { + + final Path pathToConfigFile = IOUtils.getPath( xsvLocatableTableCodec.getPathToDataFile(null) ); - try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) { + try ( final FileInputStream fileInputStream = new FileInputStream(pathToConfigFile.toFile())) { final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); header = xsvLocatableTableCodec.readActualHeader(lineReaderIterator); } catch ( final FileNotFoundException ex ) { - throw new GATKException("Error - could not find test file: " + filePath, ex); + throw new GATKException("Error - could not find test file: " + pathToConfigFile.toUri().toString(), ex); } catch ( final IOException ex ) { - throw new GATKException("Error - IO problem with file " + filePath, ex); + throw new GATKException("Error - IO problem with file " + pathToConfigFile.toUri().toString(), ex); } } @@ -320,7 +362,7 @@ private List readHeaderOnly(final XsvLocatableTableCodec xsvLocatableTab @Test public void testRenderSamFileHeaderFromSamFileHeaderPreamble() { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); - final String filePath = TEST_FILE_SAMFILEHEADER; + final String filePath = TEST_FILE_SAMFILEHEADER_CONFIG; Assert.assertNotNull(readHeaderOnly(xsvLocatableTableCodec, filePath), "Header could not be decoded, but it should have been okay."); final SAMFileHeader populatedHeader = xsvLocatableTableCodec.renderSamFileHeader(); @@ -345,7 +387,7 @@ public void testFalseCanDecodeFromMixedPreambles() { public void testTrueCanDecodeFromMissingColumn() { // Can decode should be true, but the parsing of the header should fail. final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(Paths.get(TEST_FILE_SAMFILEHEADER_CONFIG_ERROR_NOTHING_FOUND)); - Assert.assertTrue(xsvLocatableTableCodec.canDecode(TEST_FILE_SAMFILEHEADER)); + Assert.assertTrue(xsvLocatableTableCodec.canDecode(TEST_FILE_SAMFILEHEADER_CONFIG_ERROR_NOTHING_FOUND)); final List header = readHeader(xsvLocatableTableCodec, TEST_FILE_SAMFILEHEADER); } @@ -366,7 +408,7 @@ public Object[][] provideSimpleTestsWithMultipleColumnsInConfig() { @Test(dataProvider = "simpleTestsWithMultipleColumnsInConfig") public void testDecodeMultipleChoiceHeaders(final String configFile, final String xsvFile, final List locatableCols) { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(Paths.get(configFile)); - Assert.assertTrue(xsvLocatableTableCodec.canDecode(xsvFile)); + Assert.assertTrue(xsvLocatableTableCodec.canDecode(configFile)); final List header = readHeader(xsvLocatableTableCodec, xsvFile); Assert.assertNotNull(header); Assert.assertEquals(xsvLocatableTableCodec.getContigColumn(), locatableCols.get(0)); @@ -397,11 +439,13 @@ public Object[][] provideContigNameErrors() { }; } - @Test(expectedExceptions = UserException.BadInput.class, expectedExceptionsMessageRegExp = ".*is the same as start or end column.*", dataProvider = "contigNameErrors") + @Test(expectedExceptions = UserException.BadInput.class, + expectedExceptionsMessageRegExp = ".*is the same as start or end column.*", + dataProvider = "contigNameErrors") public void testBadContigColumnNames(final String configFile, final String xsvFile) { // Failure should happen when trying to get the header. final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(Paths.get(configFile)); - Assert.assertTrue(xsvLocatableTableCodec.canDecode(xsvFile)); + Assert.assertTrue(xsvLocatableTableCodec.canDecode(configFile)); final List header = readHeader(xsvLocatableTableCodec, xsvFile); } @@ -409,7 +453,7 @@ public void testBadContigColumnNames(final String configFile, final String xsvFi public void testNotFoundInList() { // Failure should happen when trying to get the header. final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(Paths.get(TEST_FILE_SAMFILEHEADER_CONFIG_ERROR_NOTHING_FOUND)); - Assert.assertTrue(xsvLocatableTableCodec.canDecode(TEST_FILE_SAMFILEHEADER)); + Assert.assertTrue(xsvLocatableTableCodec.canDecode(TEST_FILE_SAMFILEHEADER_CONFIG_ERROR_NOTHING_FOUND)); final List header = readHeader(xsvLocatableTableCodec, TEST_FILE_SAMFILEHEADER); } } diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/MANIFEST.txt b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/MANIFEST.txt new file mode 100644 index 00000000000..a5d8a3f4d51 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/MANIFEST.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8927a3a6d7ee7e88cad9c53b629d6935d1d21c7cb4192de3efcd584fb00bcb5c +size 134 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg19/achilles.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg19/achilles.config new file mode 100755 index 00000000000..b8cb3fd58ad --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg19/achilles.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e2b2287f413c3e2e29f8e3b141364aea73c69366d24f587a5f95590479efee +size 1638 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg38/achilles.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg38/achilles.config new file mode 100755 index 00000000000..b8cb3fd58ad --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/achilles/hg38/achilles.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0e2b2287f413c3e2e29f8e3b141364aea73c69366d24f587a5f95590479efee +size 1638 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg19/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg19/gencode.config new file mode 100644 index 00000000000..60ea24e9a80 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg19/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85bcafc5971713c72b8e6e00a6ead025d9b5cb843981efb55795953561bb341 +size 1903 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg38/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg38/gencode.config new file mode 100644 index 00000000000..ea98e97e2bb --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gencode/hg38/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ccffb3789c983b21b7a4deac3846ba01e1dfdfc920665abbe8f4614e37e02d +size 1904 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gnomAD/hg19/gnomAD.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gnomAD/hg19/gnomAD.config new file mode 100644 index 00000000000..edc4f58afb9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/gnomAD/hg19/gnomAD.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afadb719be43a2e25316cbda1d342711e5c7643aea0cc42324d1ff9720dc9d63 +size 1719 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg19/oreganno.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg19/oreganno.config new file mode 100755 index 00000000000..7664555adb6 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg19/oreganno.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e06ee6b97cc7a3a22b8079039b738710cf09c432696956bc6743e4ae0e9fc04 +size 1754 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg38/oreganno.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg38/oreganno.config new file mode 100755 index 00000000000..54d817d517b --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud/oreganno/hg38/oreganno.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5817cc52bd85d5aaafa7b5182273b711cfef726f4707f6e2cc7c4e5d3b452cb6 +size 1754 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles.config new file mode 100755 index 00000000000..0f4485649d1 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b180dfb0e124158e4f635f73f7fd6e38667b550aef29cdc3d95fe38e1016afed +size 1592 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles_lineage_results.import.txt b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles_lineage_results.import.txt new file mode 100755 index 00000000000..fb1b9261983 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg19/achilles_lineage_results.import.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9646f97f2309f1c25022c2df1e886ca2d518d80ba5aa6e2945542ad1b7bef635 +size 63089 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/achilles/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/CancerGeneCensus_Table_1_full_2012-03-15.txt b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/CancerGeneCensus_Table_1_full_2012-03-15.txt new file mode 100755 index 00000000000..56a4e792d10 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/CancerGeneCensus_Table_1_full_2012-03-15.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:614aa76ebdeccdcac3930056c576255654aac6ca072b41b4e0c7b6a92586ea77 +size 59836 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/cancer_gene_census.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/cancer_gene_census.config new file mode 100755 index 00000000000..a26585a23c3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg19/cancer_gene_census.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a923a6ef39c1b0ba458df4ba23468648a0cdd4051c8a31d0bade136538d13dba +size 1605 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cancer_gene_census/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.config new file mode 100755 index 00000000000..3a68dfc279b --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc481041857d8eca45094f8113373cd8bd52886039c8d1e63ba437935e2644e4 +size 1593 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv new file mode 100644 index 00000000000..77faa1b6557 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b471771ca94ebad8cf36809c7ca3b9db74d60b4fe76eddbeebb28493f6a92c +size 3106205 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv.idx b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv.idx new file mode 100644 index 00000000000..7065462ea2f --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/clinvar/hg19/clinvar_hgmd.tsv.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a9ae0670204af9ca985a764d2f28cc40041bd8949de394fbad223beb1ff3801 +size 10135843 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/CosmicTest.db b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/CosmicTest.db new file mode 100644 index 00000000000..9231c0664b9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/CosmicTest.db @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb5c0581fa1155956448433762e234a506ada1e03ed1719808d30ea30d8678f +size 176128 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/cosmic.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/cosmic.config new file mode 100755 index 00000000000..36f4c550b8a --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic/hg19/cosmic.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cde65ff9369789f407da94e9ab70f7bfb40a4f2d06dbacd652ea9bbadd1d331 +size 1629 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.config new file mode 100755 index 00000000000..fb7b2413392 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e4a2256b8d4adef2a39481132527a74985e613a9887c9c4c760c68d926f09f +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.tsv new file mode 100644 index 00000000000..03df2528fa8 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg19/cosmic_fusion.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38425fb04d7a3baa7315fd6c93e59a54a012fdfe3694bfed500457cd96f97cf9 +size 211308 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.config new file mode 100755 index 00000000000..900fc5cd277 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8178ff8a9feee459a3c3562103e2345135091c9a857812b1eaf2a42b355b8a8c +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.tsv new file mode 100644 index 00000000000..03df2528fa8 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_fusion/hg38/cosmic_fusion.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38425fb04d7a3baa7315fd6c93e59a54a012fdfe3694bfed500457cd96f97cf9 +size 211308 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.config new file mode 100755 index 00000000000..0ce00e894a3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce23f882b58eed016200f27e3936bf8760ff63ed983b9df6c3a038b197db7e88 +size 1662 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.tsv new file mode 100644 index 00000000000..574da93b4df --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg19/cosmic_tissue.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dcd55433453c500453efdfa7f8d555a0e48e00881bf31e997443c5423ef2298 +size 2331268 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.config new file mode 100755 index 00000000000..e20d37c2e6b --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5833bdf72148afaec4b193ec478e9c95c1cac841652fbe69096e96d21c171c73 +size 1662 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.tsv new file mode 100644 index 00000000000..475ef3bdad4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/cosmic_tissue/hg38/cosmic_tissue.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdfb78a918c321b70b93961c47002f36fe34fcd82590a9a551cb8d585cfc3e81 +size 2269588 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSNP.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSNP.config new file mode 100644 index 00000000000..c937ad318c3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSNP.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5a616eaae716118c8d077a36dcefc1ba2ead19eebb38d84c63e692694e02b8 +size 1740 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz new file mode 100644 index 00000000000..4f363874872 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f7de0f377a77dfb68f6efd508b52a4949c27ff509eb440211a2c57569b2f63 +size 30293528 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi new file mode 100644 index 00000000000..64c179a311e --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg19/dbSnp.regressionTestSet.hg19.vcf.gz.tbi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8e18795653074cf54b163cd27f11004b372c3ad7bb5456a7b6048f1bb746aa +size 18769 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSNP.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSNP.config new file mode 100644 index 00000000000..8e598bd310d --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSNP.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0ccf83a697ef6e8368791d6be855bab3a6e750fd8a460da529a96625ed4bd6 +size 1740 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz new file mode 100644 index 00000000000..498869b31c4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0451833249f65aa3da8392d6eb92036e90f498ce9f5a6f55fb8e316bd694d18 +size 1963469 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz.tbi b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz.tbi new file mode 100644 index 00000000000..117808dd212 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dbSnp/hg38/dbSnp.regressionTestSet.hg38.vcf.gz.tbi @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c514fcfc79cec032ba9bc9801652c41f69fc20da02975150eca1ffe0ef78c542 +size 2276 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.20171221T103938.csv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.20171221T103938.csv new file mode 100644 index 00000000000..1d233dbe46e --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.20171221T103938.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428b27f4e2a8010a515e4e6642371548ca8dc0d550b7759442bd5cf500e9de77 +size 9650 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.config new file mode 100755 index 00000000000..e85472f7f0c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg19/dnaRepairGenes.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfdbe126b7989dcafcede731276e7c9ace3744f82a85de55b90b0119608e2a67 +size 1694 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/dna_repair_genes/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/Familial_Cancer_Genes.no_dupes.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/Familial_Cancer_Genes.no_dupes.tsv new file mode 100755 index 00000000000..e60b5f49069 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/Familial_Cancer_Genes.no_dupes.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8215ec2926e38de6594b30adeda17698a0c26ef67aa9f40b0eccc4fc2cc2b41 +size 43496 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/familial.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/familial.config new file mode 100755 index 00000000000..cbb15cc4bd9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg19/familial.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b0ece241594e554015aac0324bb3efe81d11b120009bd1d73ad98ae5565553 +size 1607 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/familial/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.config new file mode 100755 index 00000000000..0960de2f558 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90f80d36fa98d3f7721478b7d3f54d0c1439af5ef43a30c1f9469adc4291c0e +size 1751 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf new file mode 100644 index 00000000000..e184399e8f4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0710dbeef221a115583eab64868519f2aca02bc4148bb3ed76f1afd3046ac21f +size 51415620 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf.idx b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf.idx new file mode 100644 index 00000000000..d408be70fea --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.gtf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea2c637c03561cedfc663c2b6be1edfa61f7c64cbcfd19da8232b3235e1eace +size 21663 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.dict b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.dict new file mode 100644 index 00000000000..f832bbca089 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.dict @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e31e8b23f306feeec4a7148a5026ab13fcf998b0cfb6c2fdf2dcba155f1397 +size 1324009 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa new file mode 100644 index 00000000000..8980366bc81 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb89fec0cd29e2a1705be30696a179f6a4d06e78eff2e41c09fb469023e9b67 +size 9532749 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa.fai b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa.fai new file mode 100644 index 00000000000..0e09d0cc8e8 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg19/gencode.v19.regressionTestVariantSet.pc_transcripts.fa.fai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb405f4aa02e61385350b095ce23246c248e99f2c852f885b377a99f2f524c0 +size 584862 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.config new file mode 100755 index 00000000000..a41677ce738 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:364d9872e9b6ed83df0287e0157785b826c229070906638915b3a9da45e56e4e +size 1751 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf new file mode 100644 index 00000000000..7971c95ccf4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3bcc4aa67a61b654a8df5a32237b52fb09f1a7f5efd15f1bc1824919003b8ae +size 285039 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf.idx b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf.idx new file mode 100644 index 00000000000..7c7d4459433 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.gtf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8121fd06e322f8de75c87cf80d32861c1b69cf307cc334d3f7b31813b429a17 +size 466 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.dict b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.dict new file mode 100644 index 00000000000..cfdfb3daa48 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.dict @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b93074618a47bb22af29a2889244c3c46a789c8ff39e44f03c03c3e39dae4d +size 6382 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa new file mode 100644 index 00000000000..c967633b1c2 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f733d007767f24eb83a4cffc1249ba44a73557e26a9262ad9ace311bd5404959 +size 60536 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa.fai b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa.fai new file mode 100644 index 00000000000..237d3b33414 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode/hg38/gencode.v28.regressionTestVariantSet.pc_transcripts.fa.fai @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ab896e07ba749af55952c3e7427dc4bf93e6dc250cd155f155f591e2d1f0440 +size 2811 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc.config new file mode 100755 index 00000000000..5e7f00ffdc5 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2186e0172291c105deb0f6f5b5c56411cd1adccb1520a21301a9f560d18840aa +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc_v75_37.hg19.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc_v75_37.hg19.tsv new file mode 100644 index 00000000000..8e516ab12c7 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg19/gencode_xhgnc_v75_37.hg19.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a595a1a2b26426334f97a0b81c2a5869888271c1968038177498d3f2904ec2b +size 21521419 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc.config new file mode 100755 index 00000000000..7ce822c33ea --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0bd76a1397520e3ea7a07ae2e009af378493e2184581d0dc4cf5279ea27bc8 +size 1645 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc_v90_38.hg38.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc_v90_38.hg38.tsv new file mode 100644 index 00000000000..ed4123df4e3 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xhgnc/hg38/gencode_xhgnc_v90_38.hg38.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:847ff8a6ffbb33365e6ac4b3256924873102e5adc3a75ff43d9d7fa422c7dd95 +size 20948186 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq.config new file mode 100755 index 00000000000..cad5f853222 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da8f13f064f0ff7ba2be960eb1fa32cbae050ee2467334441d73a5e2ba70345 +size 1646 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq_v75_37.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq_v75_37.tsv new file mode 100644 index 00000000000..5122b33f680 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg19/gencode_xrefseq_v75_37.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546a04ca46574dddd12b5293d7b1e14732e7518e60122430c9a782d8bc71d3b8 +size 2268385 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq.config new file mode 100755 index 00000000000..da5ff5476a4 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8df0c12dfcb4d3d7bc4a21358b67e12053e1f11e900b7c11ae8307a03634f07 +size 1646 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq_v90_38.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq_v90_38.tsv new file mode 100644 index 00000000000..04bc2532560 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gencode_xrefseq/hg38/gencode_xrefseq_v90_38.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6b2dae5d73858dd89b029e6544a4b4681319a853209e227f0d64d7b3c05350 +size 3897642 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gnomAD/hg19/gnomAD.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gnomAD/hg19/gnomAD.config new file mode 100644 index 00000000000..edc4f58afb9 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/gnomAD/hg19/gnomAD.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afadb719be43a2e25316cbda1d342711e5c7643aea0cc42324d1ff9720dc9d63 +size 1719 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc.config new file mode 100755 index 00000000000..217846f61e1 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c9c1d90f2d4269c8ace638a5724e426e66812a0409726d018be0bd305cc729 +size 1609 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc_download_Nov302017.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc_download_Nov302017.tsv new file mode 100644 index 00000000000..cac248e8f8a --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg19/hgnc_download_Nov302017.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c21b9ebd23b1a65b0994f8e20e1516b6926dfdc185f28aff8689bdf52689c2f +size 12648820 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/hgnc/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot.config new file mode 100755 index 00000000000..4fc3dca5c09 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670f8f8192e88603902fff6d31678f2eab0fdbf63305bf943d18070b80a7752b +size 1625 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot_Dec012014.tsv b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot_Dec012014.tsv new file mode 100755 index 00000000000..c9ff0b066b5 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg19/simple_uniprot_Dec012014.tsv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d082f1c506f166e8e1f5380e3594b875d79d3703eabce3caf663b56b2b0be64 +size 9675839 diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg38 b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg38 new file mode 120000 index 00000000000..3f78ab9361c --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/simple_uniprot/hg38 @@ -0,0 +1 @@ +hg19 \ No newline at end of file diff --git a/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/template.config b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/template.config new file mode 100755 index 00000000000..f5ac131f8e5 --- /dev/null +++ b/src/test/resources/large/funcotator/funcotator_dataSources_cloud_gnomad/template.config @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa293cf1d831bf414074a0e9713bbec437bba1b65172c2cc0fbba1683804834 +size 1557 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-index-cols.config b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-index-cols.config index f1fdbc028ea..ccc6eeaab02 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-index-cols.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-index-cols.config @@ -2,4 +2,8 @@ contig_column = 0 start_column = 1 end_column = 2 xsv_delimiter = \t -name = \ No newline at end of file +name = +version = 0.1 +src_file = annotated-interval-many-columns.seg +origin_location = gatk4 github +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-named-cols.config b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-named-cols.config index 18c0b73dbf0..f794724ce62 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-named-cols.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/annotated-interval-collection-named-cols.config @@ -2,4 +2,8 @@ contig_column = CONTIG start_column = START end_column = END xsv_delimiter = \t -name = \ No newline at end of file +name = +version = 0.1 +src_file = annotated-interval-many-columns.seg +origin_location = gatk4 github +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/old-header.config b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/old-header.config index b3d6bda81a3..1f27803bee9 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/old-header.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/copynumber/utils/annotatedinterval/old-header.config @@ -2,4 +2,8 @@ contig_column = Chromosome start_column = Start end_column = End xsv_delimiter = \t -name = \ No newline at end of file +name = +version = 0.1 +src_file = simple-annotated-interval-writer-replacement-header-comments.seg +origin_location = gatk4 github +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config index 862f93552bd..6ac0839b452 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/maf.config @@ -2,4 +2,8 @@ contig_column = Chromosome start_column = Start_Position end_column = End_Position xsv_delimiter = \t -name = \ No newline at end of file +name = +src_file = +version = +origin_location = +preprocessing_script = diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config index c347ab6a75f..4970d1f8e93 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test.config @@ -1,5 +1,50 @@ +name = XSV_LOCATABLE_TEST_NAME +version = TEST +src_file = xsv_locatable_test.csv +origin_location = LocatableXsvFuncotationFactoryUnitTest.java +preprocessing_script = + +# Supported types: +# simpleXSV -- Arbitrary separated value table (e.g. CSV), keyed off Gene Name OR Transcript ID +# locatableXSV -- Arbitrary separated value table (e.g. CSV), keyed off a genome location +# gencode -- Custom datasource class for GENCODE +# cosmic -- Custom datasource class for COSMIC +# vcf -- Custom datasource class for Variant Call Format (VCF) files +type = locatableXSV + +# Required field for GENCODE files. +# Path to the FASTA file from which to load the sequences for GENCODE transcripts: +gencode_fasta_path = + +# Required field for simpleXSV files. +# Valid values: +# GENE_NAME +# TRANSCRIPT_ID +xsv_key = + +# Required field for simpleXSV files. +# The 0-based index of the column containing the key on which to match +xsv_key_column = + +# Required field for simpleXSV AND locatableXSV files. +# The delimiter by which to split the XSV file into columns. +xsv_delimiter = , + +# Required field for simpleXSV files. +# Whether to permissively match the number of columns in the header and data rows +# Valid values: +# true +# false +xsv_permissive_cols = + +# Required field for locatableXSV files. +# The 0-based index of the column containing the contig for each row contig_column = 1 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the start position for each row start_column = 3 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the end position for each row end_column = 4 -xsv_delimiter = , -name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config index 42ff6703e18..9bedd0a205f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test2.config @@ -1,5 +1,50 @@ +name = SECOND_XSV_NAME +version = TEST +src_file = xsv_locatable_test2.csv +origin_location = LocatableXsvFuncotationFactoryUnitTest.java +preprocessing_script = + +# Supported types: +# simpleXSV -- Arbitrary separated value table (e.g. CSV), keyed off Gene Name OR Transcript ID +# locatableXSV -- Arbitrary separated value table (e.g. CSV), keyed off a genome location +# gencode -- Custom datasource class for GENCODE +# cosmic -- Custom datasource class for COSMIC +# vcf -- Custom datasource class for Variant Call Format (VCF) files +type = locatableXSV + +# Required field for GENCODE files. +# Path to the FASTA file from which to load the sequences for GENCODE transcripts: +gencode_fasta_path = + +# Required field for simpleXSV files. +# Valid values: +# GENE_NAME +# TRANSCRIPT_ID +xsv_key = + +# Required field for simpleXSV files. +# The 0-based index of the column containing the key on which to match +xsv_key_column = + +# Required field for simpleXSV AND locatableXSV files. +# The delimiter by which to split the XSV file into columns. +xsv_delimiter = , + +# Required field for simpleXSV files. +# Whether to permissively match the number of columns in the header and data rows +# Valid values: +# true +# false +xsv_permissive_cols = + +# Required field for locatableXSV files. +# The 0-based index of the column containing the contig for each row contig_column = 1 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the start position for each row start_column = 2 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the end position for each row end_column = 4 -xsv_delimiter = , -name = SECOND_XSV_NAME \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config index bbc2245d0fa..2121f2762f8 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/xsv_locatable_test3.config @@ -1,5 +1,50 @@ -contig_column = 1 +name = XSV_LOCATABLE_TEST_NAME +version = TEST +src_file = xsv_locatable_test3.tsv +origin_location = LocatableXsvFuncotationFactoryUnitTest.java +preprocessing_script = + +# Supported types: +# simpleXSV -- Arbitrary separated value table (e.g. CSV), keyed off Gene Name OR Transcript ID +# locatableXSV -- Arbitrary separated value table (e.g. CSV), keyed off a genome location +# gencode -- Custom datasource class for GENCODE +# cosmic -- Custom datasource class for COSMIC +# vcf -- Custom datasource class for Variant Call Format (VCF) files +type = locatableXSV + +# Required field for GENCODE files. +# Path to the FASTA file from which to load the sequences for GENCODE transcripts: +gencode_fasta_path = + +# Required field for simpleXSV files. +# Valid values: +# GENE_NAME +# TRANSCRIPT_ID +xsv_key = + +# Required field for simpleXSV files. +# The 0-based index of the column containing the key on which to match +xsv_key_column = + +# Required field for simpleXSV AND locatableXSV files. +# The delimiter by which to split the XSV file into columns. +xsv_delimiter = \t + +# Required field for simpleXSV files. +# Whether to permissively match the number of columns in the header and data rows +# Valid values: +# true +# false +xsv_permissive_cols = + +# Required field for locatableXSV files. +# The 0-based index of the column containing the contig for each row +contig_column = 1 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the start position for each row start_column = 3 + +# Required field for locatableXSV files. +# The 0-based index of the column containing the end position for each row end_column = 4 -xsv_delimiter = \t -name = XSV_LOCATABLE_TEST_NAME diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test.config index c347ab6a75f..4144b8795d5 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test.config @@ -2,4 +2,8 @@ contig_column = 1 start_column = 3 end_column = 4 xsv_delimiter = , -name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file +name = XSV_LOCATABLE_TEST_NAME +src_file = xsv_locatable_test.csv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test2.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test2.config index f0336d4514c..69113222e2f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test2.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test2.config @@ -2,4 +2,8 @@ contig_column = 1 start_column = 2 end_column = 4 xsv_delimiter = \t -name = SECOND_XSV_NAME \ No newline at end of file +name = SECOND_XSV_NAME +src_file = xsv_locatable_test2.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test3.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test3.config index 03f89c46d30..6dc21575ed6 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test3.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test3.config @@ -2,4 +2,8 @@ contig_column = chr start_column = start end_column = end xsv_delimiter = , -name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file +name = XSV_LOCATABLE_TEST_NAME +src_file = xsv_locatable_test3.csv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test4.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test4.config index 02985002e82..25dce88ee94 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test4.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test4.config @@ -2,4 +2,8 @@ contig_column = Chromosome,chr,CONTIG start_column = start,START end_column = end,END xsv_delimiter = , -name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file +name = XSV_LOCATABLE_TEST_NAME +src_file = xsv_locatable_test4.csv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_error_mixed_preamble.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_error_mixed_preamble.config index f0336d4514c..120f31e8636 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_error_mixed_preamble.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_error_mixed_preamble.config @@ -2,4 +2,8 @@ contig_column = 1 start_column = 2 end_column = 4 xsv_delimiter = \t -name = SECOND_XSV_NAME \ No newline at end of file +name = SECOND_XSV_NAME +src_file = xsv_locatable_test_error_mixed_preamble.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_fails_decode_checks.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_fails_decode_checks.config new file mode 100644 index 00000000000..11aea8139c8 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_fails_decode_checks.config @@ -0,0 +1,9 @@ +contig_column = Chromosome,chr,CONTIG +start_column = start,START +end_column = end,END +xsv_delimiter = , +name = XSV_LOCATABLE_TEST_NAME +src_file = DOES_NOT_EXIST_FILE.iaofe +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_fails_decode_checks2.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_fails_decode_checks2.config new file mode 100644 index 00000000000..f6c4ce57802 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_fails_decode_checks2.config @@ -0,0 +1,8 @@ +contig_column = Chromosome,chr,CONTIG +start_column = start,START +end_column = end,END +xsv_delimiter = , +name = XSV_LOCATABLE_TEST_NAME +src_file = DOES_NOT_EXIST_FILE.iaofe +version = TESTING +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.config index c347ab6a75f..1a0e36f073b 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.config @@ -1,5 +1,9 @@ -contig_column = 1 -start_column = 3 -end_column = 4 +contig_column = Chromosome,chr,CONTIG +start_column = start,START +end_column = end,END xsv_delimiter = , -name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file +name = XSV_LOCATABLE_TEST_NAME +src_file = xsv_locatable_test_mixed_encodings.csv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend.config index a1e7477da9f..d45e847e015 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend.config @@ -2,4 +2,8 @@ contig_column = Chromsome,Contig,chr,CONTIG start_column = pos,Position,start end_column = pos,Position,end xsv_delimiter = , -name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file +name = XSV_LOCATABLE_TEST_NAME +src_file = xsv_locatable_test_same_startend.csv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend_no_name.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend_no_name.config index 5ee941b1617..b0129097d2f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend_no_name.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_same_startend_no_name.config @@ -2,4 +2,8 @@ contig_column = Chromsome,Contig,chr,CONTIG start_column = pos,Position,start end_column = pos,Position,end xsv_delimiter = , -name = \ No newline at end of file +name = +src_file = xsv_locatable_test_same_startend.csv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader.config index f0336d4514c..82d7b0dc77e 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader.config @@ -2,4 +2,8 @@ contig_column = 1 start_column = 2 end_column = 4 xsv_delimiter = \t -name = SECOND_XSV_NAME \ No newline at end of file +name = SECOND_XSV_NAME +src_file = xsv_locatable_test_samfileheader.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_end.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_end.config index 328645754c1..1f4e564f6f2 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_end.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_end.config @@ -2,4 +2,9 @@ contig_column = end,CONTIG,chr,chrom,seqName,Chromosome,contig,chromosome,Chrom, start_column = START,start,start_position,Start_position,Start_Position,position,target_start end_column = END,end,end_position,End_position,End_Position,position,target_stop xsv_delimiter = \t -name = SECOND_XSV_NAME \ No newline at end of file +name = SECOND_XSV_NAME +# Doesn't matter as long as the file exists: +src_file = xsv_locatable_test_samfileheader.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_start.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_start.config index a6bee32ed56..cfb941734d8 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_start.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_contig_equals_start.config @@ -2,4 +2,9 @@ contig_column = start,CONTIG,chr,chrom,seqName,Chromosome,contig,chromosome,Chro start_column = START,start,start_position,Start_position,Start_Position,position,target_start end_column = END,end,end_position,End_position,End_Position,position,target_stop xsv_delimiter = \t -name = SECOND_XSV_NAME \ No newline at end of file +name = SECOND_XSV_NAME +# Doesn't matter as long as the file exists: +src_file = xsv_locatable_test_samfileheader.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_nothing_found.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_nothing_found.config index 8c5ed23f10f..7c1576403c4 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_nothing_found.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_error_nothing_found.config @@ -2,4 +2,8 @@ contig_column = CONTIG,chr,chrom,seqName,Chromosome,contig,chromosome,Chrom,CHRO start_column = START,start_position,Start_position,Start_Position,position,target_start end_column = END,end,end_position,End_position,End_Position,position,target_stop xsv_delimiter = \t -name = SECOND_XSV_NAME \ No newline at end of file +name = SECOND_XSV_NAME +src_file = xsv_locatable_test_samfileheader.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns.config index 1b5f0480b71..0cd1ab4ea91 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns.config @@ -2,4 +2,8 @@ contig_column = CONTIG,chr,chrom,seqName,Chromosome,contig,chromosome,Chrom,CHRO start_column = START,start,start_position,Start_position,Start_Position,position,target_start end_column = END,end,end_position,End_position,End_Position,position,target_stop xsv_delimiter = \t -name = SECOND_XSV_NAME \ No newline at end of file +name = SECOND_XSV_NAME +src_file = xsv_locatable_test_samfileheader.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns_no_name.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns_no_name.config index f7f5b7d5e3a..5c1be04701e 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns_no_name.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_multiple_columns_no_name.config @@ -2,4 +2,8 @@ contig_column = CONTIG,chr,chrom,seqName,Chromosome,contig,chromosome,Chrom,CHRO start_column = START,start,start_position,Start_position,Start_Position,position,target_start end_column = END,end,end_position,End_position,End_Position,position,target_stop xsv_delimiter = \t -name = \ No newline at end of file +name = +src_file = xsv_locatable_test_samfileheader.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_no_name.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_no_name.config index 15e19e1514a..0f2fe380f42 100644 --- a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_no_name.config +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_samfileheader_no_name.config @@ -2,4 +2,8 @@ contig_column = 1 start_column = 2 end_column = 4 xsv_delimiter = \t -name = \ No newline at end of file +name = +src_file = xsv_locatable_test_samfileheader_no_name.tsv +version = TESTING +origin_location = GATK Github Test Area +preprocessing_script = NA