From ea6ae8273573ebf5c024cc6b54a10a39243f5865 Mon Sep 17 00:00:00 2001 From: samuelklee Date: Fri, 22 Sep 2023 13:01:19 -0400 Subject: [PATCH] Performed a round of ablation on new annotation-based filtering tools. (#8131) * Performed a round of ablation on new annotation-based filtering tools. * Removed Javadoc tags unsupported by Barclay in VETS tool documentation and fixed other minor documentation issues. --- .../run_vcf_site_level_filtering_wdl.sh | 4 - .../vcf_site_level_filtering_pos_neg.json | 19 -- .../JointVcfFiltering.wdl | 17 +- .../scalable/ExtractVariantAnnotations.java | 69 ++-- .../LabeledVariantAnnotationsWalker.java | 26 +- .../scalable/ScoreVariantAnnotations.java | 78 ++--- .../TrainVariantAnnotationsModel.java | 311 +++++------------- .../data/LabeledVariantAnnotationsDatum.java | 4 +- .../modeling/BGMMVariantAnnotationsModel.java | 1 + ...ava => PythonVariantAnnotationsModel.java} | 16 +- ...va => PythonVariantAnnotationsScorer.java} | 6 +- .../modeling/VariantAnnotationsModel.java | 3 +- .../modeling/VariantAnnotationsScorer.java | 17 - ...ractVariantAnnotationsIntegrationTest.java | 11 - ...coreVariantAnnotationsIntegrationTest.java | 10 +- ...ariantAnnotationsModelIntegrationTest.java | 111 ++----- ...in.snpIndel.posNeg.IF.score.snp.annot.hdf5 | 3 - ...n.snpIndel.posNeg.IF.score.snp.scores.hdf5 | 3 - ...sUn.train.snpIndel.posNeg.IF.score.snp.vcf | 3 - ...train.snpIndel.posNeg.IF.score.snp.vcf.idx | 3 - ...pIndel.posNeg.IF.score.snpIndel.annot.hdf5 | 3 - ...Indel.posNeg.IF.score.snpIndel.scores.hdf5 | 3 - ...rain.snpIndel.posNeg.IF.score.snpIndel.vcf | 3 - ....snpIndel.posNeg.IF.score.snpIndel.vcf.idx | 3 - ...n.snpIndel.posOnly.IF.score.snp.annot.hdf5 | 3 + ....snpIndel.posOnly.IF.score.snp.scores.hdf5 | 3 + ...Un.train.snpIndel.posOnly.IF.score.snp.vcf | 3 + ...rain.snpIndel.posOnly.IF.score.snp.vcf.idx | 3 + ...Indel.posOnly.IF.score.snpIndel.annot.hdf5 | 3 + ...ndel.posOnly.IF.score.snpIndel.scores.hdf5 | 3 + ...ain.snpIndel.posOnly.IF.score.snpIndel.vcf | 3 + ...snpIndel.posOnly.IF.score.snpIndel.vcf.idx | 3 + ...in.snpIndel.posNeg.IF.score.snp.annot.hdf5 | 3 - ...n.snpIndel.posNeg.IF.score.snp.scores.hdf5 | 3 - ...sUn.train.snpIndel.posNeg.IF.score.snp.vcf | 3 - ...train.snpIndel.posNeg.IF.score.snp.vcf.idx | 3 - ...pIndel.posNeg.IF.score.snpIndel.annot.hdf5 | 3 - ...Indel.posNeg.IF.score.snpIndel.scores.hdf5 | 3 - ...rain.snpIndel.posNeg.IF.score.snpIndel.vcf | 3 - ....snpIndel.posNeg.IF.score.snpIndel.vcf.idx | 3 - ...n.snpIndel.posOnly.IF.score.snp.annot.hdf5 | 3 + ....snpIndel.posOnly.IF.score.snp.scores.hdf5 | 3 + ...Un.train.snpIndel.posOnly.IF.score.snp.vcf | 3 + ...rain.snpIndel.posOnly.IF.score.snp.vcf.idx | 3 + ...Indel.posOnly.IF.score.snpIndel.annot.hdf5 | 3 + ...ndel.posOnly.IF.score.snpIndel.scores.hdf5 | 3 + ...ain.snpIndel.posOnly.IF.score.snpIndel.vcf | 3 + ...snpIndel.posOnly.IF.score.snpIndel.vcf.idx | 3 + ...n.snp.posNeg.IF.snp.calibrationScores.hdf5 | 3 - ...rain.snp.posNeg.IF.snp.negative.scorer.pkl | 3 - ...l.posUn.train.snp.posNeg.IF.snp.scorer.pkl | 3 - ...rain.snp.posNeg.IF.snp.trainingScores.hdf5 | 3 - ...ain.snp.posNeg.IF.snp.unlabeledScores.hdf5 | 3 - ...IFDifferentSeed.snp.calibrationScores.hdf5 | 3 - ...eg.IFDifferentSeed.snp.negative.scorer.pkl | 3 - ....snp.posNeg.IFDifferentSeed.snp.scorer.pkl | 3 - ...eg.IFDifferentSeed.snp.trainingScores.hdf5 | 3 - ...g.IFDifferentSeed.snp.unlabeledScores.hdf5 | 3 - ...del.posNeg.IF.indel.calibrationScores.hdf5 | 3 - ...pIndel.posNeg.IF.indel.negative.scorer.pkl | 3 - ....train.snpIndel.posNeg.IF.indel.scorer.pkl | 3 - ...pIndel.posNeg.IF.indel.trainingScores.hdf5 | 3 - ...Indel.posNeg.IF.indel.unlabeledScores.hdf5 | 3 - ...Indel.posNeg.IF.snp.calibrationScores.hdf5 | 3 - ...snpIndel.posNeg.IF.snp.negative.scorer.pkl | 3 - ...Un.train.snpIndel.posNeg.IF.snp.scorer.pkl | 3 - ...snpIndel.posNeg.IF.snp.trainingScores.hdf5 | 3 - ...npIndel.posNeg.IF.snp.unlabeledScores.hdf5 | 3 - ...DifferentSeed.indel.calibrationScores.hdf5 | 3 - ....IFDifferentSeed.indel.negative.scorer.pkl | 3 - ...el.posNeg.IFDifferentSeed.indel.scorer.pkl | 3 - ....IFDifferentSeed.indel.trainingScores.hdf5 | 3 - ...IFDifferentSeed.indel.unlabeledScores.hdf5 | 3 - ...IFDifferentSeed.snp.calibrationScores.hdf5 | 3 - ...eg.IFDifferentSeed.snp.negative.scorer.pkl | 3 - ...ndel.posNeg.IFDifferentSeed.snp.scorer.pkl | 3 - ...eg.IFDifferentSeed.snp.trainingScores.hdf5 | 3 - ...g.IFDifferentSeed.snp.unlabeledScores.hdf5 | 3 - ...n.snp.posNeg.IF.snp.calibrationScores.hdf5 | 3 - ...rain.snp.posNeg.IF.snp.negative.scorer.pkl | 3 - ...l.posUn.train.snp.posNeg.IF.snp.scorer.pkl | 3 - ...rain.snp.posNeg.IF.snp.trainingScores.hdf5 | 3 - ...ain.snp.posNeg.IF.snp.unlabeledScores.hdf5 | 3 - ...IFDifferentSeed.snp.calibrationScores.hdf5 | 3 - ...eg.IFDifferentSeed.snp.negative.scorer.pkl | 3 - ....snp.posNeg.IFDifferentSeed.snp.scorer.pkl | 3 - ...eg.IFDifferentSeed.snp.trainingScores.hdf5 | 3 - ...g.IFDifferentSeed.snp.unlabeledScores.hdf5 | 3 - ...del.posNeg.IF.indel.calibrationScores.hdf5 | 3 - ...pIndel.posNeg.IF.indel.negative.scorer.pkl | 3 - ....train.snpIndel.posNeg.IF.indel.scorer.pkl | 3 - ...pIndel.posNeg.IF.indel.trainingScores.hdf5 | 3 - ...Indel.posNeg.IF.indel.unlabeledScores.hdf5 | 3 - ...Indel.posNeg.IF.snp.calibrationScores.hdf5 | 3 - ...snpIndel.posNeg.IF.snp.negative.scorer.pkl | 3 - ...Un.train.snpIndel.posNeg.IF.snp.scorer.pkl | 3 - ...snpIndel.posNeg.IF.snp.trainingScores.hdf5 | 3 - ...npIndel.posNeg.IF.snp.unlabeledScores.hdf5 | 3 - ...DifferentSeed.indel.calibrationScores.hdf5 | 3 - ....IFDifferentSeed.indel.negative.scorer.pkl | 3 - ...el.posNeg.IFDifferentSeed.indel.scorer.pkl | 3 - ....IFDifferentSeed.indel.trainingScores.hdf5 | 3 - ...IFDifferentSeed.indel.unlabeledScores.hdf5 | 3 - ...IFDifferentSeed.snp.calibrationScores.hdf5 | 3 - ...eg.IFDifferentSeed.snp.negative.scorer.pkl | 3 - ...ndel.posNeg.IFDifferentSeed.snp.scorer.pkl | 3 - ...eg.IFDifferentSeed.snp.trainingScores.hdf5 | 3 - ...g.IFDifferentSeed.snp.unlabeledScores.hdf5 | 3 - 108 files changed, 269 insertions(+), 710 deletions(-) delete mode 100644 scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_pos_neg.json rename src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/{PythonSklearnVariantAnnotationsModel.java => PythonVariantAnnotationsModel.java} (81%) rename src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/{PythonSklearnVariantAnnotationsScorer.java => PythonVariantAnnotationsScorer.java} (92%) delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf create mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 delete mode 100644 src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 diff --git a/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh b/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh index f25ad6bb191..27d5e522510 100644 --- a/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh +++ b/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh @@ -28,7 +28,6 @@ fi echo "Docker build done ==========" sed -r "s/__GATK_DOCKER__/broadinstitute\/gatk\:$HASH_TO_USE/g" $CROMWELL_TEST_DIR/vcf_site_level_filtering.json >$WORKING_DIR/vcf_site_level_filtering_mod.json -sed -r "s/__GATK_DOCKER__/broadinstitute\/gatk\:$HASH_TO_USE/g" $CROMWELL_TEST_DIR/vcf_site_level_filtering_pos_neg.json >$WORKING_DIR/vcf_site_level_filtering_pos_neg_mod.json echo "Running Filtering WDL through cromwell" @@ -41,6 +40,3 @@ done FIN cat $WORKING_DIR/vcf_site_level_filtering_mod.json java -jar $CROMWELL_JAR run $WDL_DIR/JointVcfFiltering.wdl -i $WORKING_DIR/vcf_site_level_filtering_mod.json - -cat $WORKING_DIR/vcf_site_level_filtering_pos_neg_mod.json -java -jar $CROMWELL_JAR run $WDL_DIR/JointVcfFiltering.wdl -i $WORKING_DIR/vcf_site_level_filtering_pos_neg_mod.json diff --git a/scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_pos_neg.json b/scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_pos_neg.json deleted file mode 100644 index ee2d116e1d4..00000000000 --- a/scripts/vcf_site_level_filtering_cromwell_tests/vcf_site_level_filtering_pos_neg.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "JointVcfFiltering.gatk_docker": "__GATK_DOCKER__", - "JointVcfFiltering.input_vcfs": [ - "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.chr21.avg.vcf.gz", - "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.chr22.avg.vcf.gz", - "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.empty.avg.vcf.gz"], - "JointVcfFiltering.input_vcf_idxs": [ - "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.chr21.avg.vcf.gz.tbi", - "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.chr22.avg.vcf.gz.tbi", - "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.empty.avg.vcf.gz.tbi"], - "JointVcfFiltering.sites_only_vcf": "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.chr21_chr22.sites_only.vcf.gz", - "JointVcfFiltering.sites_only_vcf_idx": "/home/runner/work/gatk/gatk/src/test/resources/large/filteringJointVcf/test_10_samples.chr21_chr22.sites_only.vcf.gz.tbi", - "JointVcfFiltering.annotations": ["ReadPosRankSum", "FS", "SOR", "QD"], - "JointVcfFiltering.output_prefix": "test_10_samples", - "JointVcfFiltering.resource_args": "--resource:hapmap,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/hapmap_3.3.hg38.vcf.gz --resource:omni,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz --resource:1000G,training=true gs://gcp-public-data--broad-references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz --resource:mills,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", - "JointVcfFiltering.extract_extra_args": "-L chr21 --maximum-number-of-unlabeled-variants 10000000", - "JointVcfFiltering.train_extra_args": "--calibration-sensitivity-threshold 0.95", - "JointVcfFiltering.score_extra_args": "--snp-calibration-sensitivity-threshold 0.95 --indel-calibration-sensitivity-threshold 0.95" -} \ No newline at end of file diff --git a/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl b/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl index 55b5fa1c390..7b265c45c54 100644 --- a/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl +++ b/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl @@ -29,8 +29,9 @@ workflow JointVcfFiltering { String resource_args String? model_backend - File? python_script + File? training_python_script File? hyperparameters_json + File? scoring_python_script String? extract_extra_args String? train_extra_args @@ -55,9 +56,9 @@ workflow JointVcfFiltering { model_backend: "(Optional) Model backend to be used by TrainVariantAnnotationsModel. See GATK documentation for this tool." python_script: "(Optional) Python script specifying custom model backend to be used by TrainVariantAnnotationsModel. See GATK documentation for this tool." hyperparameters_json: "(Optional) JSON file specifying model hyperparameters to be used by TrainVariantAnnotationsModel. See GATK documentation for this tool." - extract_extra_args: "(Optional) Catch-all string to provide additional arguments for ExtractVariantAnnotations. This can include intervals (as string arguments or non-localized files), variant-type modes, arguments for enabling positive-negative training, etc. The \"do-not-gzip-vcf-output\" argument is not supported by this workflow. See GATK documentation for this tool." - train_extra_args: "(Optional) Catch-all string to provide additional arguments for TrainVariantAnnotationsModel. This can include variant-type modes, arguments for enabling positive-negative training, etc. See GATK documentation for this tool." - score_extra_args: "(Optional) Catch-all string to provide additional arguments for ScoreVariantAnnotations. This can include intervals (as string arguments or non-localized files), variant-type modes, arguments for enabling positive-negative training and hard filtering, etc. The \"do-not-gzip-vcf-output\" argument is not supported by this workflow. See GATK documentation for this tool." + extract_extra_args: "(Optional) Catch-all string to provide additional arguments for ExtractVariantAnnotations. This can include intervals (as string arguments or non-localized files), variant-type modes, arguments for enabling positive-unlabeled learning, etc. The \"do-not-gzip-vcf-output\" argument is not supported by this workflow. See GATK documentation for this tool." + train_extra_args: "(Optional) Catch-all string to provide additional arguments for TrainVariantAnnotationsModel. This can include variant-type modes, arguments for enabling positive-unlabeled learning, etc. See GATK documentation for this tool." + score_extra_args: "(Optional) Catch-all string to provide additional arguments for ScoreVariantAnnotations. This can include intervals (as string arguments or non-localized files), variant-type modes, arguments for enabling positive-unlabeled learning and hard filtering, etc. The \"do-not-gzip-vcf-output\" argument is not supported by this workflow. See GATK documentation for this tool." } call ExtractVariantAnnotations { @@ -79,7 +80,7 @@ workflow JointVcfFiltering { annotations_hdf5 = ExtractVariantAnnotations.annotations_hdf5, unlabeled_annotations_hdf5 = ExtractVariantAnnotations.unlabeled_annotations_hdf5, model_backend = model_backend, - python_script = python_script, + python_script = training_python_script, hyperparameters_json = hyperparameters_json, output_prefix = output_prefix, extra_args = train_extra_args, @@ -101,6 +102,8 @@ workflow JointVcfFiltering { extracted_vcf_idx = ExtractVariantAnnotations.extracted_vcf_idx, model_prefix = output_prefix, model_files = TrainVariantAnnotationsModel.model_files, + model_backend = model_backend, + python_script = scoring_python_script, extra_args = score_extra_args, gatk_docker = gatk_docker, gatk_override = gatk_override, @@ -251,6 +254,8 @@ task ScoreVariantAnnotations { File extracted_vcf_idx String model_prefix Array[File] model_files + String? model_backend + File? python_script String? extra_args File? monitoring_script @@ -287,6 +292,8 @@ task ScoreVariantAnnotations { ~{resource_args} \ --resource:extracted,extracted=true ~{extracted_vcf} \ --model-prefix model-files/~{model_prefix}.train \ + ~{"--model-backend " + model_backend} \ + ~{"--python-script " + python_script} \ ~{extra_args} } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.java index 0213ff0b97d..0611af6d420 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.java @@ -31,7 +31,7 @@ * Extracts site-level variant annotations, labels, and other metadata from a VCF file to HDF5 files. * *

- * This tool is intended to be used as the first step in a variant-filtering workflow that supersedes the + * This tool is primarily intended to be used as the first step in a variant-filtering workflow that supersedes the * {@link VariantRecalibrator} workflow. This tool extracts site-level annotations, labels, and other relevant metadata * from variant sites (or alleles, in allele-specific mode) that are or are not present in specified labeled * resource VCFs (e.g., training or calibration VCFs). Input sites that are present in the resources are considered @@ -65,7 +65,7 @@ *

@@ -158,9 +157,9 @@ * *

* Extract annotations from training/calibration SNP/INDEL sites, producing the outputs - * 1) {@code extract.annot.hdf5}, 2) {@code extract.vcf.gz}, and 3) {@code extract.vcf.gz.tbi}. + * 1) extract.annot.hdf5, 2) extract.vcf.gz, and 3) extract.vcf.gz.tbi. * The HDF5 file can then be provided to {@link TrainVariantAnnotationsModel} - * to train a model using a positive-only approach. Note that the {@value MODE_LONG_NAME} arguments are made + * to train a model using a positive-only approach. Note that the "--mode" arguments are made * explicit here, although both SNP and INDEL modes are selected by default. * *

@@ -182,11 +181,10 @@
  * 

* Extract annotations from both training/calibration SNP/INDEL sites and a random sample of * 1000000 unlabeled (i.e., non-training/calibration) sites, producing the outputs - * 1) {@code extract.annot.hdf5}, 2) {@code extract.unlabeled.annot.hdf5}, 3) {@code extract.vcf.gz}, - * and 4) {@code extract.vcf.gz.tbi}. The HDF5 files can then be provided to {@link TrainVariantAnnotationsModel} - * to train a model using a positive-negative approach (similar to that used in {@link VariantRecalibrator}). - * Note that the {@value MODE_LONG_NAME} arguments are made explicit here, although both SNP and INDEL modes are - * selected by default. + * 1) extract.annot.hdf5, 2) extract.unlabeled.annot.hdf5, 3) extract.vcf.gz, + * and 4) extract.vcf.gz.tbi. The HDF5 files can then be provided to {@link TrainVariantAnnotationsModel} + * to train a model using a positive-unlabeled approach. Note that the "--mode" arguments + * are made explicit here, although both SNP and INDEL modes are selected by default. * *

  *     gatk ExtractVariantAnnotations \
@@ -200,17 +198,23 @@
  *          --mode INDEL \
  *          --resource:indel-training,training=true indel-training.vcf \
  *          --resource:indel-calibration,calibration=true indel-calibration.vcf \
- *          --maximum-number-of-unlableled-variants 1000000
+ *          --maximum-number-of-unlabeled-variants 1000000
  *          -O extract
  * 
*

* *

+ * Note that separate SNP and INDEL resources are shown in the above examples purely for demonstration purposes, + * as are separate training and calibration resources. However, it may be desirable to specify combined + * resource(s); e.g., "--resource:snp-and-indel-resource,training=true,calibration=true snp-and-indel-resource.vcf". + *

+ * + *

* In the (atypical) event that resource VCFs are unavailable, one can still extract annotations from a random sample of - * unlabeled sites, producing the outputs 1) {@code extract.unlabeled.annot.hdf5}, - * 2) {@code extract.vcf.gz} (which will contain no records), and 3) {@code extract.vcf.gz.tbi}. + * unlabeled sites, producing the outputs 1) extract.unlabeled.annot.hdf5, + * 2) extract.vcf.gz (which will contain no records), and 3) extract.vcf.gz.tbi. * This random sample cannot be used by {@link TrainVariantAnnotationsModel}, but may still be useful for - * exploratory analyses. Note that the {@value MODE_LONG_NAME} arguments are made explicit here, although both + * exploratory analyses. Note that the "--mode" arguments are made explicit here, although both * SNP and INDEL modes are selected by default. * *

@@ -221,12 +225,20 @@
  *          -A annotation_N \
  *          --mode SNP \
  *          --mode INDEL \
- *          --maximum-number-of-unlableled-variants 1000000
+ *          --maximum-number-of-unlabeled-variants 1000000
  *          -O extract
  * 
*

* - * DEVELOPER NOTE: See documentation in {@link LabeledVariantAnnotationsWalker}. + *

+ * Alternatively, if resource VCFs are unavailable, one might want to specify the input VCF itself as a resource + * and extract annotations for the input variants (or a subset thereof). Again, this may be useful for + * exploratory analyses. + *

+ * + *

+ * DEVELOPER NOTE: See documentation in {@link LabeledVariantAnnotationsWalker}. + *

* * @author Samuel Lee <slee@broadinstitute.org> */ @@ -249,11 +261,10 @@ public final class ExtractVariantAnnotations extends LabeledVariantAnnotationsWa doc = "Maximum number of unlabeled variants to extract. " + "If greater than zero, reservoir sampling will be used to randomly sample this number " + "of sites from input sites that are not present in the specified resources. " + - "Choice of this number should be guided by considerations for training the negative model in " + + "Choice of this number should be guided by considerations for training the model in " + "TrainVariantAnnotationsModel; users may wish to choose a number that is comparable to the " + "expected size of the labeled training set or that is compatible with available memory resources. " + - "Note that in allele-specific mode (--" + LabeledVariantAnnotationsWalker.USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME + - " true), this argument limits the number of variant records, rather than the number of alleles.", + "Note that in allele-specific mode, this argument limits the number of variant records, rather than the number of alleles.", minValue = 0) private int maximumNumberOfUnlabeledVariants = 0; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.java index 32a72c0f5f0..108fe4bccbd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.java @@ -9,6 +9,7 @@ import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; +import htsjdk.variant.vcf.VCFHeaderLineCount; import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; import org.apache.commons.collections4.ListUtils; @@ -34,6 +35,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.EnumSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -87,7 +89,6 @@ public abstract class LabeledVariantAnnotationsWalker extends MultiplePassVariantWalker { public static final String MODE_LONG_NAME = "mode"; - public static final String USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME = "use-allele-specific-annotations"; public static final String IGNORE_FILTER_LONG_NAME = "ignore-filter"; public static final String IGNORE_ALL_FILTERS_LONG_NAME = "ignore-all-filters"; public static final String DO_NOT_TRUST_ALL_POLYMORPHIC_LONG_NAME = "do-not-trust-all-polymorphic"; @@ -129,12 +130,6 @@ enum ResourceMatchingStrategy { minElements = 1) private List variantTypesToExtractList = new ArrayList<>(Arrays.asList(VariantType.SNP, VariantType.INDEL)); - @Argument( - fullName = USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME, - doc = "If true, use the allele-specific versions of the specified annotations.", - optional = true) - boolean useASAnnotations = false; - @Argument( fullName = IGNORE_FILTER_LONG_NAME, doc = "Ignore the specified filter(s) in the input VCF.", @@ -159,13 +154,13 @@ enum ResourceMatchingStrategy { @Argument( fullName = RESOURCE_MATCHING_STRATEGY_LONG_NAME, doc = "The strategy to use for determining whether an input variant is present in a resource " + - "in non-allele-specific mode (--" + USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME + " false). " + + "in non-allele-specific mode. " + "START_POSITION: Start positions of input and resource variants must match. " + "START_POSITION_AND_GIVEN_REPRESENTATION: The intersection of the sets of input and resource alleles " + "(in their given representations) must also be non-empty. " + "START_POSITION_AND_MINIMAL_REPRESENTATION: The intersection of the sets of input and resource alleles " + "(after converting alleles to their minimal representations) must also be non-empty. " + - "This argument has no effect in allele-specific mode (--" + USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME + " true), " + + "This argument has no effect in allele-specific mode, " + "in which the minimal representations of the input and resource alleles must match.", optional = true) private ResourceMatchingStrategy resourceMatchingStrategy = ResourceMatchingStrategy.START_POSITION; @@ -186,6 +181,7 @@ enum ResourceMatchingStrategy { private final Set ignoreInputFilterSet = new TreeSet<>(); Set variantTypesToExtract; TreeSet resourceLabels = new TreeSet<>(); + boolean useASAnnotations; File outputAnnotationsFile; VariantContextWriter vcfWriter; @@ -222,9 +218,11 @@ public void onTraversalStart() { LabeledVariantAnnotationsData.SNP_LABEL)); } + useASAnnotations = isAlleleSpecificAnnotationRequested(); + if (useASAnnotations && resourceMatchingStrategy != ResourceMatchingStrategy.START_POSITION_AND_MINIMAL_REPRESENTATION) { - logger.warn(String.format("The %s argument is ignored when %s is set to true. The START_POSITION_AND_MINIMAL_REPRESENTATION strategy will be used.", - RESOURCE_MATCHING_STRATEGY_LONG_NAME, USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME)); + logger.warn(String.format("The %s argument is ignored when allele-specific annotations are requested. The START_POSITION_AND_MINIMAL_REPRESENTATION strategy will be used.", + RESOURCE_MATCHING_STRATEGY_LONG_NAME)); resourceMatchingStrategy = ResourceMatchingStrategy.START_POSITION_AND_MINIMAL_REPRESENTATION; } @@ -251,6 +249,12 @@ public Object onTraversalSuccess() { return null; } + private boolean isAlleleSpecificAnnotationRequested() { + final Set distinctAnnotationNames = new LinkedHashSet<>(annotationNames); + final VCFHeader inputHeader = getHeaderForVariants(); + return distinctAnnotationNames.stream().anyMatch(a -> inputHeader.getInfoHeaderLine(a).getCountType() == VCFHeaderLineCount.A); + } + static void addExtractedVariantToData(final LabeledVariantAnnotationsData data, final VariantContext variant, final List, VariantType, TreeSet>> metadata) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotations.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotations.java index 522ac434403..94a69f89881 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotations.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotations.java @@ -25,7 +25,7 @@ import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsScorer; -import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsScorer; +import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonVariantAnnotationsScorer; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModel; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModelBackend; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsScorer; @@ -52,7 +52,7 @@ * Scores variant calls in a VCF file based on site-level annotations using a previously trained model. * *

- * This tool is intended to be used as the last step in a variant-filtering workflow that supersedes the + * This tool is primarily intended to be used as the last step in a variant-filtering workflow that supersedes the * {@link VariantRecalibrator} workflow. Using a previously trained model produced by {@link TrainVariantAnnotationsModel}, * this tool assigns a score to each call (with a lower score indicating that a call is more likely to be an artifact). * Each score can also be converted to a corresponding sensitivity with respect to a calibration set, if the latter is available. @@ -64,7 +64,7 @@ * Note that annotations and metadata are collected in memory during traversal until they are written to HDF5 files * upon completion of the traversal. Memory and disk requirements thus roughly scale linearly with both the number * of sites scored and the number of annotations. For large callsets, this tool may be run in parallel over separate - * genomic shards using the {@value StandardArgumentDefinitions#INTERVALS_LONG_NAME} argument as usual. + * genomic shards using the "--intervals/-L" argument as usual. *

* *

@@ -78,7 +78,7 @@ *

    *
  • * Input VCF file. Site-level annotations will be extracted from the contained variants (or alleles, - * if the {@value USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME} argument is specified). + * if at least one allele-specific annotation with "Number=A" is specified). *
  • *
  • * Annotations to use for scoring. These should be identical to those used in the {@link ExtractVariantAnnotations} @@ -97,7 +97,7 @@ * (Optional) Model backend. This should be identical to that specified in {@link TrainVariantAnnotationsModel}. * The default Python IsolationForest implementation requires either the GATK Python environment * or that certain Python packages (argparse, h5py, numpy, sklearn, and dill) are otherwise available. - * A custom backend can also be specified in conjunction with the {@value PYTHON_SCRIPT_LONG_NAME} argument. + * A custom backend can also be specified in conjunction with the "--python-script" argument. *
  • *
  • * (Optional) Resource VCF file(s). See the corresponding documentation in {@link ExtractVariantAnnotations}. @@ -121,34 +121,33 @@ * *
      *
    • - * Scored VCF file and index. The VCF will not be gzipped if the {@value DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME} + * Scored VCF file and index. The VCF will not be gzipped if the "--do-not-gzip-vcf-output" * argument is set to true. The INFO field in each VCF record will be annotated with: * *

      - * 1) a score (with a key as given by the {@value SCORE_KEY_LONG_NAME} argument, - * which has a default value of {@value DEFAULT_SCORE_KEY}), + * 1) a score (with a key as given by the "--score-key" argument, which has a default value of "SCORE"), *

      *

      * 2) if resources are provided, flags corresponding to the labels (e.g., - * {@value LabeledVariantAnnotationsData#TRAINING_LABEL}, {@value LabeledVariantAnnotationsData#CALIBRATION_LABEL}, etc.) + * "training", "calibration", etc.) * of resources containing the record, *

      *

      - * 3) if the {@value SNP_KEY_LONG_NAME} argument (which has a default value of {@value DEFAULT_SNP_KEY}) + * 3) if the "--snp-key" argument (which has a default value of "snp") * is non-null, a flag corresponding to whether a site is treated as a SNP, *

      *

      - * 4) if {@value SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} and/or - * {@value INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} are provided, a filter (with name given by - * the {@value LOW_SCORE_FILTER_NAME_LONG_NAME} argument, which has a default value of - * {@value DEFAULT_LOW_SCORE_FILTER_NAME}) will be applied if a record has a calibration-set sensitivity + * 4) if "--snp-calibration-sensitivity-threshold" and/or + * "--indel-calibration-sensitivity-threshold" are provided, a filter (with name given by + * the "--low-score-filter-name" argument, which has a default value of + * "LOW_SCORE") will be applied if a record has a calibration-set sensitivity * falling above the appropriate threshold (i.e., if it has a score falling below the corresponding * score threshold). *

      *

      - * If {@value USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME} is true, the score, SNP flag, calibration sensitivity, - * and filter appropriate for the highest scoring allele are used; however, the resource labels for all alleles - * are applied. + * In allele-specific mode (i.e., when allele-specific annotations are requested), the score, SNP flag, + * calibration sensitivity, and filter appropriate for the highest scoring allele are used for any + * multiallelic records; however, the resource labels for all alleles are applied. *

      * *
    • @@ -162,7 +161,7 @@ * *
    • * (Optional) Scores HDF5 file (.scores.hdf5). Scores for all scored sites are stored in the - * HDF5 path {@value VariantAnnotationsScorer#SCORES_PATH}. Scores are given in the same order as records + * HDF5 path "/data/scores". Scores are given in the same order as records * in both the VCF and the annotations HDF5 file. This file will only be produced if the number of scored sites * is nonzero. *

      @@ -174,12 +173,12 @@ *

      * Score sites using a model (produced by {@link TrainVariantAnnotationsModel} using the default * {@link VariantAnnotationsModelBackend#PYTHON_IFOREST} model backend and contained in the directory - * {@code model_dir}), producing the outputs 1) {@code output.vcf.gz}, 2) {@code output.vcf.gz.tbi}, - * 3) {@code output.annot.hdf5}, and 4) {@code output.scores.hdf5}. Note that {@code extract.vcf.gz} is + * model_dir), producing the outputs 1) output.vcf.gz, 2) output.vcf.gz.tbi, + * 3) output.annot.hdf5, and 4) output.scores.hdf5. Note that extract.vcf.gz is * produced by {@link ExtractVariantAnnotations}. Records will be filtered according to the values provided to the - * {@value SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} and {@value INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} + * "--snp-calibration-sensitivity-threshold" and "--indel-calibration-sensitivity-threshold" * arguments; the values below are only meant to be illustrative and should be set as appropriate for a given analysis. - * Note that the {@value MODE_LONG_NAME} arguments are made explicit here, although both SNP and INDEL modes are + * Note that the "--mode" arguments are made explicit here, although both SNP and INDEL modes are * selected by default. * *

      @@ -204,9 +203,9 @@
        * 

      * One may chain together two runs of this tool to score SNPs and INDELs using different models * (note that SNP and INDEL models have "snp" and "indel" tags in their respective filenames, so these - * models can still be contained in the same {@code model_dir} directory). + * models can still be contained in the same model_dir directory). * This may have implications for mixed SNP/INDEL sites, especially if filters are applied; see also the - * {@value IGNORE_ALL_FILTERS_LONG_NAME} and {@value IGNORE_FILTER_LONG_NAME} arguments. + * "--ignore-all-filters" and "--ignore-filter" arguments. * *

        *     gatk ScoreVariantAnnotations \
      @@ -223,7 +222,7 @@
        *          -O intermediate-output
        *
        *     gatk ScoreVariantAnnotations \
      - *          -V intermediate-output.vcf \
      + *          -V intermediate-output.vcf.gz \
        *          -A indel_annotation_1 \
        *          ...
        *          -A indel_annotation_M \
      @@ -236,16 +235,23 @@
        *          -O output
        * 
      * + *

      + * Note that separate SNP and INDEL resources are shown in the above examples purely for demonstration purposes, + * as are separate training and calibration resources. However, it may be desirable to specify combined + * resource(s); e.g., "--resource:combined-resource,training=true,calibration=true combined-resource.vcf". + * Recall that this is also the case in {@link ExtractVariantAnnotations}. + *

      + * *

      Custom modeling/scoring backends (ADVANCED)

      * *

      * The primary scoring functionality performed by this tool is accomplished by a "scoring backend" * whose fundamental contract is to take an input annotation matrix and to output corresponding scores, * with both input and output given as HDF5 files. Rather than using one of the available, implemented backends, - * advanced users may provide their own backend via the {@value PYTHON_SCRIPT_LONG_NAME} argument. + * advanced users may provide their own backend via the "--python-script" argument. * See documentation in the modeling and scoring interfaces ({@link VariantAnnotationsModel} and * {@link VariantAnnotationsScorer}, respectively), as well as the default Python IsolationForest implementation at - * {@link PythonSklearnVariantAnnotationsScorer} and + * {@link PythonVariantAnnotationsScorer} and * src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py. *

      * @@ -485,15 +491,9 @@ protected void afterNthPass(final int n) { private VariantAnnotationsScorer deserializeScorerFromPklFiles(final VariantType variantType) { final String variantTypeTag = '.' + variantType.toString().toLowerCase(); final File scorerPklFile = new File( - modelPrefix + variantTypeTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX); - final File negativeScorerPklFile = new File( - modelPrefix + variantTypeTag + TrainVariantAnnotationsModel.NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX); + modelPrefix + variantTypeTag + PythonVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX); return scorerPklFile.canRead() - ? negativeScorerPklFile.canRead() - ? VariantAnnotationsScorer.combinePositiveAndNegativeScorer( - new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, scorerPklFile), - new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, negativeScorerPklFile)) - : new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, scorerPklFile) + ? new PythonVariantAnnotationsScorer(pythonScriptFile, scorerPklFile) : null; } @@ -501,14 +501,8 @@ private VariantAnnotationsScorer deserializeScorerFromSerFiles(final VariantType final String variantTypeTag = '.' + variantType.toString().toLowerCase(); final File scorerSerFile = new File( modelPrefix + variantTypeTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX); - final File negativeScorerSerFile = new File( - modelPrefix + variantTypeTag + TrainVariantAnnotationsModel.NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX); return scorerSerFile.canRead() - ? negativeScorerSerFile.canRead() - ? VariantAnnotationsScorer.combinePositiveAndNegativeScorer( - BGMMVariantAnnotationsScorer.deserialize(scorerSerFile), - BGMMVariantAnnotationsScorer.deserialize(negativeScorerSerFile)) - : BGMMVariantAnnotationsScorer.deserialize(scorerSerFile) + ? BGMMVariantAnnotationsScorer.deserialize(scorerSerFile) : null; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModel.java index 6baf81c74eb..570cba33652 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModel.java @@ -1,9 +1,7 @@ package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable; import com.google.common.collect.Streams; -import com.google.common.primitives.Doubles; import org.apache.commons.math3.stat.descriptive.moment.Variance; -import org.apache.commons.math3.stat.descriptive.rank.Percentile; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; @@ -17,8 +15,8 @@ import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsModel; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsScorer; -import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsModel; -import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsScorer; +import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonVariantAnnotationsModel; +import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonVariantAnnotationsScorer; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModel; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModelBackend; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsScorer; @@ -31,7 +29,6 @@ import java.io.File; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -40,10 +37,10 @@ * Trains a model for scoring variant calls based on site-level annotations. * *

      - * This tool is intended to be used as the second step in a variant-filtering workflow that supersedes the + * This tool is primarily intended to be used as the second step in a variant-filtering workflow that supersedes the * {@link VariantRecalibrator} workflow. Given training (and optionally, calibration) sets of site-level annotations * produced by {@link ExtractVariantAnnotations}, this tool can be used to train a model for scoring variant - * calls. For each variant type (i.e., SNP or INDEL) specified using the {@value MODE_LONG_NAME} argument, the tool + * calls. For each variant type (i.e., SNP or INDEL) specified using the "--mode" argument, the tool * outputs files that are either: 1) serialized scorers, each of which persists to disk a function for computing * scores given subsequent annotations, or 2) HDF5 files containing a set of scores, each corresponding to training, * calibration, and unlabeled sets, as appropriate. @@ -59,29 +56,21 @@ *

      Modeling approaches

      * *

      - * This tool can perform modeling using either a positive-only approach or a positive-negative approach. + * This tool can perform modeling using either a positive-only approach or a positive-unlabeled approach. * In a positive-only approach, the annotation-space distribution of training sites is used to learn a * function for converting annotations for subsequent sites into a score; typically, higher scores correspond to - * regions of annotation space that are more densely populated by training sites. In contrast, a positive-negative - * approach attempts to additionally use unlabeled sites to better identify regions of annotation space that correspond - * to low scores against the original, positive-only model (with the assumption being that unlabeled sites are - * more likely to populate such regions than are training sites). A second, negative model can then be trained, - * and the resulting scores (which are presumably higher in regions of annotation space that are less densely - * populated by the original training sites) can be subtracted from the original scores to produce a final score. - * (Note that this positive-negative approach could be considered as a single iteration of a more general - * approach typically referred to as positive-unlabeled learning.) + * regions of annotation space that are more densely populated by training sites. In contrast, a positive-unlabeled + * approach attempts to additionally use unlabeled sites to better learn not only these regions of annotation space + * populated by training sites, but also those that are populated by sites that may be drawn from a different distribution. *

      * *

      * A positive-only approach is likely to perform well in cases where a sufficient number of reliable training sites * is available. In contrast, if 1) only a small number of reliable training sites is available, and/or * 2) the reliability of the training sites is questionable (e.g., the sites may be contaminated by - * a non-negigible number of sequencing artifacts), then a positive-negative approach may be beneficial. - * However, note that the positive-negative approach introduces an additional hyperparameter---the threshold - * that determines the selection of sites for training the negative model, controlled by the - * {@value CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} argument---which may require tuning. + * a non-negligible number of sequencing artifacts), then a positive-unlabeled approach may be beneficial. * Further note that although {@link VariantRecalibrator} (which this tool supplants) has typically been used to - * implement a positive-negative approach, a positive-only approach likely suffices in many use cases. + * implement a naive positive-unlabeled approach, a positive-only approach likely suffices in many use cases. *

      * *

      @@ -94,24 +83,9 @@ * generated using the positive model and output to a file.

    • *
    * - * Additionally, if a positive-negative approach has been specified (i.e., the {@value UNLABELED_ANNOTATIONS_HDF5_LONG_NAME} - * and {@value CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} arguments have been provided), - * and if both unlabeled and calibration sites of the variant type are available, then: - * - *
      - *
    • 4) The calibration scores generated from the positive model are used to convert the - * calibration-sensitivity threshold into a score threshold,
    • - *
    • 5) Training sites with scores below the score threshold are selected for training a negative model,
    • - *
    • 6) Scores for unlabeled sites are generated using the positive model and output to a file,
    • - *
    • 7) Unlabeled sites with scores below the score threshold are selected for training a negative model,
    • - *
    • 8) A negative model is trained using these selected training and unlabeled sites and is serialized to file,
    • - *
    • 9) Scores for calibration sites are generated using the positive-negative model and overwritten in the existing file.
    • - *
    - * - * Note that the positive-negative approach thus yields 1) scores for training and unlabeled sites generated from - * the positive model and 2) scores for calibration sites generated from the positive-negative model. This is opposed - * to generating scores from all sites from the positive-negative model, since these can simply be obtained from - * a downstream run of {@link ScoreVariantAnnotations}. + * In contrast, a positive-unlabeled approach may instead be specified by providing the + * "--unlabeled-annotations-hdf5" argument. Currently, this requires the use of a custom modeling backend; + * see below. *

    * *

    Modeling backends

    @@ -131,14 +105,14 @@ *

    * *

    - * This backend can be selected by specifying {@code PYTHON_IFOREST} to the {@value MODEL_BACKEND_LONG_NAME} argument + * This backend can be selected by specifying "--model-backend PYTHON_IFOREST" * and is also currently the the default backend. It is implemented by the script at * src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py, which * requires that the argparse, h5py, numpy, sklearn, and dill packages be present in the Python environment; users * may wish to simply use the provided GATK conda environment to ensure that the correct versions of all packages are available. * See the IsolationForest documentation here * as appropriate for the version of scikit-learn used in your Python environment. The hyperparameters documented - * there can be specified using the {@value HYPERPARAMETERS_JSON_LONG_NAME} argument; see + * there can be specified using the "--hyperparameters-json" argument; see * src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest-hyperparameters.json * for an example and the default values. *

    @@ -166,15 +140,13 @@ *
  • * Labeled-annotations HDF5 file (.annot.hdf5). Annotation data and metadata for labeled sites are stored in the * HDF5 directory structure given in the documentation for the {@link ExtractVariantAnnotations} tool. In typical - * usage, both the {@value LabeledVariantAnnotationsData#TRAINING_LABEL} and - * {@value LabeledVariantAnnotationsData#CALIBRATION_LABEL} labels would be available for non-empty sets of + * usage, both the "training" and "calibration" labels would be available for non-empty sets of * sites of the requested variant type. *
  • *
  • * (Optional) Unlabeled-annotations HDF5 file (.unlabeled.annot.hdf5). Annotation data and metadata for * unlabeled sites are stored in the HDF5 directory structure given in the documentation for the - * {@link ExtractVariantAnnotations} tool. If provided, a positive-negative modeling approach (similar to - * that used in {@link VariantRecalibrator} will be used. + * {@link ExtractVariantAnnotations} tool. If provided, a positive-unlabeled modeling approach will be used. *
  • *
  • * Variant types (i.e., SNP and/or INDEL) for which to train models. Logic for determining variant type was retained from @@ -185,7 +157,7 @@ *
  • *
  • * (Optional) Model backend. The Python isolation-forest backend is currently the default backend. - * A custom backend can also be specified in conjunction with the {@value PYTHON_SCRIPT_LONG_NAME} argument. + * A custom backend can also be specified in conjunction with the "--python-script" argument. *
  • *
  • * (Optional) Model hyperparameters JSON file. This file can be used to specify backend-specific @@ -205,10 +177,10 @@ *

    Outputs

    * *

    - * The following outputs are produced for each variant type specified by the {@value MODE_LONG_NAME} argument + * The following outputs are produced for each variant type specified by the "--mode" argument * and are delineated by type-specific tags in the filename of each output, which take the form of - * {@code {output-prefix}.{variant-type}.{file-suffix}}. For example, scores for the SNP calibration set - * will be output to the {@code {output-prefix}.snp.calibrationScores.hdf5} file. + * {output-prefix}.{variant-type}.{file-suffix}. For example, scores for the SNP calibration set + * will be output to the {output-prefix}.snp.calibrationScores.hdf5 file. *

    * *
      @@ -216,20 +188,11 @@ * Training-set positive-model scores HDF5 file (.trainingScores.hdf5). * *
    • - * Positive-model serialized scorer file. (.scorer.pkl for the default {@code PYTHON_IFOREST} model backend). - *
    • - *
    • - * (Optional) Unlabeled-set positive-model scores HDF5 file (.unlabeledScores.hdf5). This is only output - * if a positive-negative modeling approach is used. + * Positive-model serialized scorer file. (.scorer.pkl for the default PYTHON_IFOREST model backend). *
    • *
    • * (Optional) Calibration-set scores HDF5 file (.calibrationScores.hdf5). This is only output if a calibration - * set is provided. If a positive-only modeling approach is used, scores will be generated from the positive model; - * if a positive-negative modeling approach is used, scores will be generated from the positive-negative model. - *
    • - *
    • - * (Optional) Negative-model serialized scorer file. (.negative.scorer.pkl for the default {@code PYTHON_IFOREST} model backend). - * This is only output if a positive-negative modeling approach is used. + * set is provided. *
    • *
    * @@ -240,7 +203,7 @@ * given an input labeled-annotations HDF5 file generated by {@link ExtractVariantAnnotations} that contains * labels for both training and calibration sets, producing the outputs 1) train.snp.scorer.pkl, * 2) train.snp.trainingScores.hdf5, and 3) train.snp.calibrationScores.hdf5, as well as analogous files - * for the INDEL model. Note that the {@value MODE_LONG_NAME} arguments are made explicit here, although both + * for the INDEL model. Note that the "--mode" arguments are made explicit here, although both * SNP and INDEL modes are selected by default. * *
    @@ -252,37 +215,17 @@
      * 
    *

    * - *

    - * Train SNP and INDEL models using the default Python IsolationForest model backend with a positive-negative approach - * (using a calibration-sensitivity threshold of 0.95 to select sites for training the negative model), - * given an input labeled-annotations HDF5 file that contains labels for both training and calibration sets - * and an input unlabeled-annotations HDF5 file (with both HDF5 files generated by {@link ExtractVariantAnnotations}), - * producing the outputs 1) train.snp.scorer.pkl, 2) train.snp.negative.scorer.pkl, 3) train.snp.trainingScores.hdf5, - * 4) train.snp.calibrationScores.hdf5, and 5) train.snp.unlabeledScores.hdf5, as well as analogous files - * for the INDEL model. Note that the {@value MODE_LONG_NAME} arguments are made explicit here, although both - * SNP and INDEL modes are selected by default. - * - *

    - *     gatk TrainVariantAnnotationsModel \
    - *          --annotations-hdf5 extract.annot.hdf5 \
    - *          --unlabeled-annotations-hdf5 extract.unlabeled.annot.hdf5 \
    - *          --mode SNP \
    - *          --mode INDEL \
    - *          --calibration-sensitivity-threshold 0.95 \
    - *          -O train
    - * 
    - *

    - * *

    Custom modeling/scoring backends (ADVANCED)

    * *

    * The primary modeling functionality performed by this tool is accomplished by a "modeling backend" * whose fundamental contract is to take an input HDF5 file containing an annotation matrix for sites of a - * single variant type (i.e., SNP or INDEL) and to output a serialized scorer for that variant type. + * single variant type (i.e., SNP or INDEL) (as well as an analogous HDF5 file for unlabeled sites, + * if a positive-unlabeled modeling approach has been specified) and to output a serialized scorer for that variant type. * Rather than using one of the available, implemented backends, advanced users may provide their own backend - * via the {@value PYTHON_SCRIPT_LONG_NAME} argument. See documentation in the modeling and scoring interfaces + * via the "--python-script" argument. See documentation in the modeling and scoring interfaces * ({@link VariantAnnotationsModel} and {@link VariantAnnotationsScorer}, respectively), as well as the default - * Python IsolationForest implementation at {@link PythonSklearnVariantAnnotationsModel} and + * Python IsolationForest implementation at {@link PythonVariantAnnotationsModel} and * src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py. *

    * @@ -311,7 +254,6 @@ public final class TrainVariantAnnotationsModel extends CommandLineProgram { public static final String MODEL_BACKEND_LONG_NAME = "model-backend"; public static final String PYTHON_SCRIPT_LONG_NAME = "python-script"; public static final String HYPERPARAMETERS_JSON_LONG_NAME = "hyperparameters-json"; - public static final String CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME = "calibration-sensitivity-threshold"; public static final String ISOLATION_FOREST_PYTHON_SCRIPT = "isolation-forest.py"; public static final String ISOLATION_FOREST_HYPERPARAMETERS_JSON = "isolation-forest-hyperparameters.json"; @@ -323,7 +265,6 @@ enum AvailableLabelsMode { public static final String TRAINING_SCORES_HDF5_SUFFIX = ".trainingScores.hdf5"; public static final String CALIBRATION_SCORES_HDF5_SUFFIX = ".calibrationScores.hdf5"; public static final String UNLABELED_SCORES_HDF5_SUFFIX = ".unlabeledScores.hdf5"; - public static final String NEGATIVE_TAG = ".negative"; @Argument( fullName = ANNOTATIONS_HDF5_LONG_NAME, @@ -333,8 +274,7 @@ enum AvailableLabelsMode { @Argument( fullName = UNLABELED_ANNOTATIONS_HDF5_LONG_NAME, doc = "HDF5 file containing annotations extracted with ExtractVariantAnnotations. " + - "If specified with " + CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME + ", " + - "a positive-unlabeled modeling approach will be used; otherwise, a positive-only modeling " + + "If specified, a positive-unlabeled modeling approach will be used; otherwise, a positive-only modeling " + "approach will be used.", optional = true) private File inputUnlabeledAnnotationsFile; @@ -368,20 +308,6 @@ enum AvailableLabelsMode { doc = "Output prefix.") private String outputPrefix; - @Argument( - fullName = CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, - doc = "Calibration-sensitivity threshold that determines which sites will be used for training the negative model " + - "in the positive-unlabeled modeling approach. " + - "Increasing this will decrease the corresponding positive-model score threshold; sites with scores below this score " + - "threshold will be used for training the negative model. Thus, this parameter should typically be chosen to " + - "be close to 1, so that sites that score highly according to the positive model will not be used to train the negative model. " + - "The " + UNLABELED_ANNOTATIONS_HDF5_LONG_NAME + " argument must be specified in conjunction with this argument. " + - "If separate thresholds for SNP and INDEL models are desired, run the tool separately for each mode with its respective threshold.", - optional = true, - minValue = 0., - maxValue = 1.) - private Double calibrationSensitivityThreshold; - @Argument( fullName = MODE_LONG_NAME, doc = "Variant types for which to train models. Duplicate values will be ignored.", @@ -411,11 +337,7 @@ protected Object doWork() { private void validateArgumentsAndSetModes() { IOUtils.canReadFile(inputAnnotationsFile); - Utils.validateArg((inputUnlabeledAnnotationsFile == null) == (calibrationSensitivityThreshold == null), - "Unlabeled annotations and calibration-sensitivity threshold must both be unspecified (for positive-only model training) " + - "or specified (for positive-negative model training)."); - - availableLabelsMode = inputUnlabeledAnnotationsFile != null && calibrationSensitivityThreshold != null + availableLabelsMode = inputUnlabeledAnnotationsFile != null ? AvailableLabelsMode.POSITIVE_UNLABELED : AvailableLabelsMode.POSITIVE_ONLY; @@ -485,7 +407,29 @@ private void doModelingWorkForVariantType(final VariantType variantType) { logger.info(String.format("Training %s model with %d training sites x %d annotations %s...", variantTypeString, numTrainingAndVariantType, annotationNames.size(), annotationNames)); final File labeledTrainingAndVariantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, annotations, isTrainingAndVariantType); - trainAndSerializeModel(labeledTrainingAndVariantTypeAnnotationsFile, outputPrefixTag); + + File unlabeledAndVariantTypeAnnotationsFile = null; + int numUnlabeledAndVariantType = 0; + if (availableLabelsMode == AvailableLabelsMode.POSITIVE_UNLABELED) { + final double[][] unlabeledAnnotations = LabeledVariantAnnotationsData.readAnnotations(inputUnlabeledAnnotationsFile); + final List unlabeledIsSNP = LabeledVariantAnnotationsData.readLabel(inputUnlabeledAnnotationsFile, "snp"); + final List isUnlabeledAndVariantType = variantType == VariantType.SNP ? unlabeledIsSNP : unlabeledIsSNP.stream().map(x -> !x).collect(Collectors.toList()); + + numUnlabeledAndVariantType = numPassingFilter(isUnlabeledAndVariantType); + + if (numUnlabeledAndVariantType > 0) { + logger.info(String.format("Training %s model with %d unlabeled sites x %d annotations %s...", + variantTypeString, numUnlabeledAndVariantType, annotationNames.size(), annotationNames)); + unlabeledAndVariantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile( + annotationNames, unlabeledAnnotations, isUnlabeledAndVariantType); + } else { + throw new UserException.BadInput(String.format("Attempted to train %s model, " + + "but no suitable unlabeled sites were found in the provided annotations.", variantTypeString)); + } + } + + trainAndSerializeModel(labeledTrainingAndVariantTypeAnnotationsFile, unlabeledAndVariantTypeAnnotationsFile, outputPrefixTag); + logger.info(String.format("%s model trained and serialized with output prefix \"%s\".", variantTypeString, outputPrefix + outputPrefixTag)); if (modelBackend == VariantAnnotationsModelBackend.JAVA_BGMM) { @@ -508,64 +452,10 @@ private void doModelingWorkForVariantType(final VariantType variantType) { logger.warn(String.format("No %s calibration sites were available.", variantTypeString)); } - // negative model - if (availableLabelsMode == AvailableLabelsMode.POSITIVE_UNLABELED) { - if (numLabeledCalibrationAndVariantType == 0) { - throw new UserException.BadInput(String.format("Attempted to train %s negative model, " + - "but no suitable calibration sites were found in the provided annotations.", variantTypeString)); - } - final double[][] unlabeledAnnotations = LabeledVariantAnnotationsData.readAnnotations(inputUnlabeledAnnotationsFile); - final List unlabeledIsSNP = LabeledVariantAnnotationsData.readLabel(inputUnlabeledAnnotationsFile, "snp"); - final List isUnlabeledVariantType = variantType == VariantType.SNP ? unlabeledIsSNP : unlabeledIsSNP.stream().map(x -> !x).collect(Collectors.toList()); - - final int numUnlabeledVariantType = numPassingFilter(isUnlabeledVariantType); - - if (numUnlabeledVariantType > 0) { - final File labeledCalibrationAndVariantTypeScoresFile = new File(outputPrefix + outputPrefixTag + CALIBRATION_SCORES_HDF5_SUFFIX); - final double[] labeledCalibrationAndVariantTypeScores = VariantAnnotationsScorer.readScores(labeledCalibrationAndVariantTypeScoresFile); - final double scoreThreshold = calibrationSensitivityThreshold == 1. // Percentile requires quantile > 0, so we treat this as a special case - ? Doubles.min(labeledCalibrationAndVariantTypeScores) - : new Percentile(100. * (1. - calibrationSensitivityThreshold)).evaluate(labeledCalibrationAndVariantTypeScores); - logger.info(String.format("Using %s score threshold of %.4f corresponding to specified calibration-sensitivity threshold of %.4f ...", - variantTypeString, scoreThreshold, calibrationSensitivityThreshold)); - - final double[] labeledTrainingAndVariantTypeScores = VariantAnnotationsScorer.readScores(labeledTrainingAndVariantTypeScoresFile); - final List isNegativeTrainingFromLabeledTrainingAndVariantType = Arrays.stream(labeledTrainingAndVariantTypeScores).boxed().map(s -> s < scoreThreshold).collect(Collectors.toList()); - - logger.info(String.format("Scoring %d unlabeled %s sites...", numUnlabeledVariantType, variantTypeString)); - final File unlabeledVariantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, unlabeledAnnotations, isUnlabeledVariantType); - final File unlabeledVariantTypeScoresFile = score(unlabeledVariantTypeAnnotationsFile, outputPrefixTag, UNLABELED_SCORES_HDF5_SUFFIX); - final double[] unlabeledVariantTypeScores = VariantAnnotationsScorer.readScores(unlabeledVariantTypeScoresFile); - final List isNegativeTrainingFromUnlabeledVariantType = Arrays.stream(unlabeledVariantTypeScores).boxed().map(s -> s < scoreThreshold).collect(Collectors.toList()); // length matches unlabeledAnnotationsFile - final int numNegativeTrainingFromUnlabeledVariantType = numPassingFilter(isNegativeTrainingFromUnlabeledVariantType); - logger.info(String.format("Selected %d unlabeled %s sites below score threshold of %.4f for negative-model training...", - numNegativeTrainingFromUnlabeledVariantType, variantTypeString, scoreThreshold)); - - final double[][] negativeTrainingAndVariantTypeAnnotations = concatenateLabeledAndUnlabeledNegativeTrainingData( - annotationNames, annotations, unlabeledAnnotations, isNegativeTrainingFromLabeledTrainingAndVariantType, isNegativeTrainingFromUnlabeledVariantType); - final int numNegativeTrainingAndVariantType = negativeTrainingAndVariantTypeAnnotations.length; - final List isNegativeTrainingAndVariantType = Collections.nCopies(numNegativeTrainingAndVariantType, true); - - if (numNegativeTrainingAndVariantType > 0) { - logger.info(String.format("Training %s negative model with %d negative-training sites x %d annotations %s...", - variantTypeString, numNegativeTrainingAndVariantType, annotationNames.size(), annotationNames)); - final File negativeTrainingAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile( - annotationNames, negativeTrainingAndVariantTypeAnnotations, isNegativeTrainingAndVariantType); - trainAndSerializeModel(negativeTrainingAnnotationsFile, outputPrefixTag + NEGATIVE_TAG); - logger.info(String.format("%s negative model trained and serialized with output prefix \"%s\".", variantTypeString, outputPrefix + outputPrefixTag + NEGATIVE_TAG)); - } else { - throw new UserException.BadInput(String.format("Attempted to train %s negative model, " + - "but no suitable sites with scores below the specified threshold were found in the provided annotations.", variantTypeString)); - } - - logger.info(String.format("Re-scoring %d %s calibration sites...", numLabeledCalibrationAndVariantType, variantTypeString)); - final File labeledCalibrationAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, annotations, isLabeledCalibrationAndVariantType); - final File labeledCalibrationScoresFile = positiveNegativeScore(labeledCalibrationAnnotationsFile, outputPrefixTag, CALIBRATION_SCORES_HDF5_SUFFIX); - logger.info(String.format("Calibration scores written to %s.", labeledCalibrationScoresFile.getAbsolutePath())); - } else { - throw new UserException.BadInput(String.format("Attempted to train %s negative model, " + - "but no suitable unlabeled sites were found in the provided annotations.", variantTypeString)); - } + if (availableLabelsMode == AvailableLabelsMode.POSITIVE_UNLABELED && unlabeledAndVariantTypeAnnotationsFile != null) { + logger.info(String.format("Scoring %d %s unlabeled sites...", numUnlabeledAndVariantType, variantTypeString)); + final File unlabeledAndVariantTypeScoresFile = score(unlabeledAndVariantTypeAnnotationsFile, outputPrefixTag, UNLABELED_SCORES_HDF5_SUFFIX); + logger.info(String.format("%s unlabeled scores written to %s.", variantTypeString, unlabeledAndVariantTypeScoresFile.getAbsolutePath())); } } else { throw new UserException.BadInput(String.format("Attempted to train %s model, " + @@ -577,34 +467,41 @@ private static int numPassingFilter(final List isPassing) { return (int) isPassing.stream().filter(x -> x).count(); } + /** + * @param unlabeledAnnotationsFile if not {@code null}, use a positive-unlabeled approach + */ private void trainAndSerializeModel(final File trainingAnnotationsFile, + final File unlabeledAnnotationsFile, final String outputPrefixTag) { - readAndValidateTrainingAnnotations(trainingAnnotationsFile, outputPrefixTag); + readAndValidateAnnotations(trainingAnnotationsFile, outputPrefixTag); + if (unlabeledAnnotationsFile != null) { + readAndValidateAnnotations(unlabeledAnnotationsFile, outputPrefixTag); + } final VariantAnnotationsModel model; switch (modelBackend) { case JAVA_BGMM: model = new BGMMVariantAnnotationsModel(hyperparametersJSONFile); break; case PYTHON_IFOREST: - model = new PythonSklearnVariantAnnotationsModel(pythonScriptFile, hyperparametersJSONFile); + model = new PythonVariantAnnotationsModel(pythonScriptFile, hyperparametersJSONFile); break; case PYTHON_SCRIPT: - model = new PythonSklearnVariantAnnotationsModel(pythonScriptFile, hyperparametersJSONFile); + model = new PythonVariantAnnotationsModel(pythonScriptFile, hyperparametersJSONFile); break; default: throw new GATKException.ShouldNeverReachHereException("Unknown model mode."); } - model.trainAndSerialize(trainingAnnotationsFile, outputPrefix + outputPrefixTag); + model.trainAndSerialize(trainingAnnotationsFile, unlabeledAnnotationsFile, outputPrefix + outputPrefixTag); } /** * When training models on data that has been subset to a given variant type, * we FAIL if any annotation is completely missing and WARN if any annotation has zero variance. */ - private void readAndValidateTrainingAnnotations(final File trainingAnnotationsFile, - final String outputPrefixTag) { - final List annotationNames = LabeledVariantAnnotationsData.readAnnotationNames(trainingAnnotationsFile); - final double[][] annotations = LabeledVariantAnnotationsData.readAnnotations(trainingAnnotationsFile); + private void readAndValidateAnnotations(final File annotationsFile, + final String outputPrefixTag) { + final List annotationNames = LabeledVariantAnnotationsData.readAnnotationNames(annotationsFile); + final double[][] annotations = LabeledVariantAnnotationsData.readAnnotations(annotationsFile); // these checks are redundant, but we err on the side of robustness final int numAnnotationNames = annotationNames.size(); @@ -631,14 +528,8 @@ private void readAndValidateTrainingAnnotations(final File trainingAnnotationsFi if (!completelyMissingAnnotationNames.isEmpty()) { throw new UserException.BadInput( String.format("All values of the following annotations are missing in the training data for the %s model: %s. " + - "Consider repeating the extraction step with this annotation dropped. " + - "If this is a negative model and the amount of negative training data is small, " + - "perhaps also consider lowering the value of the %s argument so that more " + - "training data is considered, which may ultimately admit data with non-missing values for the annotation " + - "(although note that this will also have implications for the resulting model fit); " + - "alternatively, consider excluding the %s and %s arguments and running positive-only modeling.", - outputPrefix + outputPrefixTag, completelyMissingAnnotationNames, - CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, UNLABELED_ANNOTATIONS_HDF5_LONG_NAME, CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME)); + "Consider repeating the extraction step without specifying these annotations. ", + outputPrefix + outputPrefixTag, completelyMissingAnnotationNames)); } } @@ -652,7 +543,7 @@ private File score(final File annotationsFile, break; case PYTHON_IFOREST: case PYTHON_SCRIPT: - scorer = new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, new File(outputPrefix + outputPrefixTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX)); + scorer = new PythonVariantAnnotationsScorer(pythonScriptFile, new File(outputPrefix + outputPrefixTag + PythonVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX)); break; default: @@ -662,56 +553,4 @@ private File score(final File annotationsFile, scorer.score(annotationsFile, outputScoresFile); return outputScoresFile; } - - private File positiveNegativeScore(final File annotationsFile, - final String outputPrefixTag, - final String outputSuffix) { - final VariantAnnotationsScorer scorer; - switch (modelBackend) { - case JAVA_BGMM: - scorer = VariantAnnotationsScorer.combinePositiveAndNegativeScorer( - BGMMVariantAnnotationsScorer.deserialize(new File(outputPrefix + outputPrefixTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX)), - BGMMVariantAnnotationsScorer.deserialize(new File(outputPrefix + outputPrefixTag + NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX))); - break; - case PYTHON_IFOREST: - case PYTHON_SCRIPT: - scorer = VariantAnnotationsScorer.combinePositiveAndNegativeScorer( - new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, new File(outputPrefix + outputPrefixTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX)), - new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, new File(outputPrefix + outputPrefixTag + NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX))); - break; - default: - throw new GATKException.ShouldNeverReachHereException("Unknown model mode."); - } - final File outputScoresFile = new File(outputPrefix + outputPrefixTag + outputSuffix); - scorer.score(annotationsFile, outputScoresFile); - return outputScoresFile; - } - - private static double[][] concatenateLabeledAndUnlabeledNegativeTrainingData(final List annotationNames, - final double[][] annotations, - final double[][] unlabeledAnnotations, - final List isNegativeTrainingFromLabeledTrainingAndVariantType, - final List isNegativeTrainingFromUnlabeledVariantType) { - final double[][] negativeTrainingFromLabeledTrainingAndVariantTypeAnnotations; - if (numPassingFilter(isNegativeTrainingFromLabeledTrainingAndVariantType) > 0) { - final File negativeTrainingFromLabeledTrainingAndVariantTypeAnnotationsFile = - LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, annotations, isNegativeTrainingFromLabeledTrainingAndVariantType); - negativeTrainingFromLabeledTrainingAndVariantTypeAnnotations = LabeledVariantAnnotationsData.readAnnotations(negativeTrainingFromLabeledTrainingAndVariantTypeAnnotationsFile); - } else { - negativeTrainingFromLabeledTrainingAndVariantTypeAnnotations = new double[0][]; - } - - final double[][] negativeTrainingFromUnlabeledVariantTypeAnnotations; - if (numPassingFilter(isNegativeTrainingFromUnlabeledVariantType) > 0) { - final File negativeTrainingFromUnlabeledVariantTypeAnnotationsFile = - LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, unlabeledAnnotations, isNegativeTrainingFromUnlabeledVariantType); - negativeTrainingFromUnlabeledVariantTypeAnnotations = LabeledVariantAnnotationsData.readAnnotations(negativeTrainingFromUnlabeledVariantTypeAnnotationsFile); - } else { - negativeTrainingFromUnlabeledVariantTypeAnnotations = new double[0][]; - } - - return Streams.concat( - Arrays.stream(negativeTrainingFromLabeledTrainingAndVariantTypeAnnotations), - Arrays.stream(negativeTrainingFromUnlabeledVariantTypeAnnotations)).toArray(double[][]::new); - } } \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsDatum.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsDatum.java index 6b960d1042f..665777c1936 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsDatum.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsDatum.java @@ -93,8 +93,8 @@ private static double decodeAnnotation(final VariantContext vc, value = vc.getAttributeAsDouble(annotationName, Double.NaN); } catch (final ClassCastException e) { throw new UserException(String.format("Could not extract annotation %s from variant context: %s. " + - "Ensure that %s is specified, if desired. Encountered exception: %s", - annotationName, vc, LabeledVariantAnnotationsWalker.USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME, e)); + "Encountered exception: %s", + annotationName, vc, e)); } } if (Double.isInfinite(value)) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsModel.java index 14fedaa0a98..9f46d801869 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsModel.java @@ -14,6 +14,7 @@ public BGMMVariantAnnotationsModel(final File hyperparametersJSONFile) { @Override public void trainAndSerialize(final File trainingAnnotationsFile, + final File unlabeledAnnotationsFile, final String outputPrefix) { throw new NotImplementedException("BGMM module will be implemented in separate PR."); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonVariantAnnotationsModel.java similarity index 81% rename from src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsModel.java rename to src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonVariantAnnotationsModel.java index bbe082186a3..542f7b2cbcc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonVariantAnnotationsModel.java @@ -29,25 +29,26 @@ * * See src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py for an example implementation. */ -public final class PythonSklearnVariantAnnotationsModel implements VariantAnnotationsModel { +public final class PythonVariantAnnotationsModel implements VariantAnnotationsModel { private final File pythonScriptFile; private final File hyperparametersJSONFile; - public PythonSklearnVariantAnnotationsModel(final File pythonScriptFile, - final File hyperparametersJSONFile) { + public PythonVariantAnnotationsModel(final File pythonScriptFile, + final File hyperparametersJSONFile) { this.pythonScriptFile = pythonScriptFile; this.hyperparametersJSONFile = hyperparametersJSONFile; } @Override public void trainAndSerialize(final File trainingAnnotationsFile, + final File unlabeledAnnotationsFile, final String outputPrefix) { final PythonScriptExecutor executor = new PythonScriptExecutor(true); final ProcessOutput pythonProcessOutput = executor.executeScriptAndGetOutput( pythonScriptFile.getAbsolutePath(), null, - composePythonArguments(trainingAnnotationsFile, hyperparametersJSONFile, outputPrefix)); + composePythonArguments(trainingAnnotationsFile, unlabeledAnnotationsFile, hyperparametersJSONFile, outputPrefix)); if (pythonProcessOutput.getExitValue() != 0) { throw executor.getScriptException(executor.getExceptionMessageFromScriptError(pythonProcessOutput)); @@ -55,13 +56,18 @@ public void trainAndSerialize(final File trainingAnnotationsFile, } private static List composePythonArguments(final File annotationsFile, + final File unlabeledAnnotationsFile, final File hyperparametersJSONFile, final String outputPrefix) { try { - return new ArrayList<>(Arrays.asList( + final List arguments = new ArrayList<>(Arrays.asList( "--annotations_file=" + annotationsFile.getCanonicalPath(), "--hyperparameters_json_file=" + hyperparametersJSONFile.getCanonicalPath(), "--output_prefix=" + outputPrefix)); + if (unlabeledAnnotationsFile != null) { + arguments.add("--unlabeled_annotations_file=" + unlabeledAnnotationsFile.getCanonicalPath()); + } + return arguments; } catch (final IOException e) { throw new UserException.BadInput(String.format("Encountered exception resolving canonical file paths: %s", e)); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsScorer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonVariantAnnotationsScorer.java similarity index 92% rename from src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsScorer.java rename to src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonVariantAnnotationsScorer.java index 51e4e9a4e9b..fb5e190305e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsScorer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonVariantAnnotationsScorer.java @@ -25,7 +25,7 @@ * * See src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py for an example implementation. */ -public final class PythonSklearnVariantAnnotationsScorer implements VariantAnnotationsScorer, Serializable { +public final class PythonVariantAnnotationsScorer implements VariantAnnotationsScorer, Serializable { private static final long serialVersionUID = 1L; @@ -34,8 +34,8 @@ public final class PythonSklearnVariantAnnotationsScorer implements VariantAnnot private final File pythonScriptFile; private final File scorerPklFile; - public PythonSklearnVariantAnnotationsScorer(final File pythonScriptFile, - final File scorerPklFile) { + public PythonVariantAnnotationsScorer(final File pythonScriptFile, + final File scorerPklFile) { this.pythonScriptFile = pythonScriptFile; this.scorerPklFile = scorerPklFile; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModel.java index ee2e899d0a8..cb3c5595e3f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModel.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModel.java @@ -38,9 +38,10 @@ public interface VariantAnnotationsModel { * 2) we assume the model does not care about the variant type. * TODO we could also pass additional labels to be used in training, * but all backends would have to likewise respect directory structure - * + * @param unlabeledAnnotationsFile Unlabeled annotations in HDF5 format, with the same structure and format as the above. May be {@code null}. * @param outputPrefix Path prefix for all output files */ void trainAndSerialize(final File trainingAnnotationsFile, + final File unlabeledAnnotationsFile, final String outputPrefix); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsScorer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsScorer.java index c0550273c57..377bacecc5e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsScorer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsScorer.java @@ -91,21 +91,4 @@ static void writeScores(final File outputFile, exception, outputFile.getAbsolutePath())); } } - - /** - * Yields a VQSR-style positive-negative scorer that returns the difference of the positive score and the negative score. - */ - static VariantAnnotationsScorer combinePositiveAndNegativeScorer(final VariantAnnotationsScorer positiveScorer, - final VariantAnnotationsScorer negativeScorer) { - return (inputAnnotationsFile, outputScoresFile) -> { - final File tempPositiveScoresFile = IOUtils.createTempFile("positive", "scores.hdf5"); - final File tempNegativeScoresFile = IOUtils.createTempFile("negative", "scores.hdf5"); - positiveScorer.score(inputAnnotationsFile, tempPositiveScoresFile); - final double[] positiveScores = VariantAnnotationsScorer.readScores(tempPositiveScoresFile); - negativeScorer.score(inputAnnotationsFile, tempNegativeScoresFile); - final double[] negativeScores = VariantAnnotationsScorer.readScores(tempNegativeScoresFile); - final double[] scores = IntStream.range(0, positiveScores.length).mapToDouble(i -> positiveScores[i] - negativeScores[i]).toArray(); - VariantAnnotationsScorer.writeScores(outputScoresFile, scores); - }; - } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotationsIntegrationTest.java index dd3f1202b7f..e3379e91928 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotationsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotationsIntegrationTest.java @@ -83,7 +83,6 @@ public void assertThatExpectedOutputUpdateToggleIsDisabled() { return argsBuilder; }; static final Function ADD_ALLELE_SPECIFIC_ANNOTATIONS = argsBuilder -> { - argsBuilder.addFlag(LabeledVariantAnnotationsWalker.USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME); ALLELE_SPECIFIC_ANNOTATIONS.forEach(a -> argsBuilder.add(StandardArgumentDefinitions.ANNOTATION_LONG_NAME, a)); return argsBuilder; }; @@ -230,16 +229,6 @@ public void testNoVariantsInInput() { Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists()); } - @Test(expectedExceptions = UserException.class) - public void testForgotToSpecifyUseAlleleSpecificAnnotationsFlag() { - final File outputDir = createTempDir("extract"); - final String outputPrefix = String.format("%s/test", outputDir); - final ArgumentsBuilder argsBuilder = ADD_SNP_MODE_AND_RESOURCES.apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get()); - ALLELE_SPECIFIC_ANNOTATIONS.forEach(a -> argsBuilder.add(StandardArgumentDefinitions.ANNOTATION_LONG_NAME, a)); - argsBuilder.addOutput(outputPrefix); - runCommandLine(argsBuilder); - } - @Test(expectedExceptions = UserException.class) public void testReservedSNPResourceLabel() { final File outputDir = createTempDir("extract"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotationsIntegrationTest.java index 289821d0e54..1ff89982a9b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotationsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotationsIntegrationTest.java @@ -100,8 +100,8 @@ public void assertThatExpectedOutputUpdateToggleIsDisabled() { public Object[][] dataValidInputs() { final List>>> testConfigurations = Lists.cartesianProduct( Arrays.asList( - Pair.of("extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg", Function.identity()), - Pair.of("extract.AS.snpIndel.posUn.train.snpIndel.posNeg", Function.identity())), + Pair.of("extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly", Function.identity()), + Pair.of("extract.AS.snpIndel.posUn.train.snpIndel.posOnly", Function.identity())), Arrays.asList( Pair.of("IF.score", ab -> ADD_MODEL_BACKEND.apply(ab, VariantAnnotationsModelBackend.PYTHON_IFOREST)), // this and the following case give the same results, so they are given the same IF.score tag Pair.of("IF.score", ADD_ISOLATION_FOREST_PYTHON_SCRIPT @@ -113,7 +113,7 @@ public Object[][] dataValidInputs() { return testConfigurations.stream() .map(tagAndAddFunctionPairs -> new Object[]{ - tagAndAddFunctionPairs.stream().map(Pair::getLeft).collect(Collectors.joining(".")), // e.g., extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp + tagAndAddFunctionPairs.stream().map(Pair::getLeft).collect(Collectors.joining(".")), // e.g., extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp tagAndAddFunctionPairs.stream().map(Pair::getRight) // creates the corresponding ArgumentsBuilder .reduce(Function.identity(), Function::andThen) // by stringing together functions that add the .apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get())}) // appropriate arguments @@ -121,7 +121,7 @@ public Object[][] dataValidInputs() { } /** - * Checks expected outputs given a tag (e.g., "extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp") and arguments corresponding to the + * Checks expected outputs given a tag (e.g., "extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp") and arguments corresponding to the * Cartesian products generated in {@link #dataValidInputs}. * * We perform exact-match tests of any HDF5 files produced using h5diff, which is insensitive to timestamps within the file. @@ -136,7 +136,7 @@ public void testValidInputs(final String tag, argsBuilder.addOutput(outputPrefix); // add arguments for model prefix based on the - // train tag (the portion of the tag preceding ".score", e.g., extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF), + // train tag (the portion of the tag preceding ".score", e.g., extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF), // which gives the basename for the model files final String trainTag = tag.split(".score")[0]; if (tag.contains("nonAS")) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModelIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModelIntegrationTest.java index 9082fe7a0ad..4f631c77bf2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModelIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModelIntegrationTest.java @@ -9,7 +9,7 @@ import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsScorer; -import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsScorer; +import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonVariantAnnotationsScorer; import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModelBackend; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.io.Resource; @@ -69,10 +69,6 @@ public void assertThatExpectedOutputUpdateToggleIsDisabled() { argsBuilder.add(TrainVariantAnnotationsModel.UNLABELED_ANNOTATIONS_HDF5_LONG_NAME, unlabeledAnnotationsHDF5); return argsBuilder; }; - private static final BiFunction ADD_CALIBRATION_SENSITIVITY_THRESHOLD = (argsBuilder, calibrationSensitivityThreshold) -> { - argsBuilder.add(TrainVariantAnnotationsModel.CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, calibrationSensitivityThreshold); - return argsBuilder; - }; private static final Function ADD_SNP_MODE = argsBuilder -> { argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP); return argsBuilder; @@ -98,13 +94,12 @@ public void assertThatExpectedOutputUpdateToggleIsDisabled() { * Exact-match tests for (non-exhaustive) configurations given by the Cartesian product of the following options: * 1) non-allele-specific ("nonAS") vs. allele-specific ("AS") * 2) SNP-only ("snp") vs. SNP+INDEL ("snpIndel") (for both of these options, we use extracted annotations that contain both SNP and INDEL variants as input) - * 3) positive training with {extract-tag}.annot.hdf5 ("posOnly") vs. positive-negative training with {extract-tag}.annot.hdf5 and {extract-tag}.unlabeled.annot.hdf5 ("posNeg") - * 4) model backend - * 4a) Java Bayesian Gaussian Mixture Model (BGMM) backend TODO the BGMM has been reduced to a stub for this initial PR; subsequent PRs will cover the backend code and reconnect the stub - * 4b) default PYTHON_IFOREST with default hyperparameters ("IF") - * 4c) default PYTHON_IFOREST with non-default seed hyperparameter ("IFDifferentSeed") - * 4d) specified PYTHON_SCRIPT with non-default seed hyperparameter ("IFDifferentSeed"); we will simply use the same script as the default PYTHON_IFOREST backend, so this is just a test of the command-line interface - * We should expect 4c-d to give functionally identical results. + * 3) model backend + * 3a) Java Bayesian Gaussian Mixture Model (BGMM) backend TODO the BGMM has been reduced to a stub for this initial PR; subsequent PRs will cover the backend code and reconnect the stub + * 3b) default PYTHON_IFOREST with default hyperparameters ("IF") + * 3c) default PYTHON_IFOREST with non-default seed hyperparameter ("IFDifferentSeed") + * 3d) specified PYTHON_SCRIPT with non-default seed hyperparameter ("IFDifferentSeed"); we will simply use the same script as the default PYTHON_IFOREST backend, so this is just a test of the command-line interface + * We should expect 3c-d to give functionally identical results. */ @DataProvider(name = "dataValidInputs") public Object[][] dataValidInputs() { @@ -115,9 +110,8 @@ public Object[][] dataValidInputs() { Arrays.asList( Pair.of("snp", ADD_SNP_MODE), Pair.of("snpIndel", ADD_SNP_MODE.andThen(ADD_INDEL_MODE))), - Arrays.asList( // we will consume the tag and add appropriate arguments for positive and positive-negative training below - Pair.of("posOnly", Function.identity()), - Pair.of("posNeg", Function.identity())), + Collections.singletonList( + Pair.of("posOnly", Function.identity())), Arrays.asList( Pair.of("IF", ab -> ADD_MODEL_BACKEND.apply(ab, VariantAnnotationsModelBackend.PYTHON_IFOREST)), Pair.of("IFDifferentSeed", ADD_ISOLATION_FOREST_HYPERPARAMETERS_JSON @@ -157,17 +151,7 @@ public void testValidInputs(final String tag, extractTag + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX); final Function addPositiveAnnotations = ab -> ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5); - if (tag.contains("posNeg")) { - final File unlabeledAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR, - extractTag + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX); - final Function addUnlabeledAnnotations = ab -> - ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5); - final Function addCalibrationSensitivityThreshold = ab -> - ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, CALIBRATION_SENSITIVITY_THRESHOLD); - addPositiveAnnotations.andThen(addUnlabeledAnnotations).andThen(addCalibrationSensitivityThreshold).apply(argsBuilder); - } else { - addPositiveAnnotations.apply(argsBuilder); - } + addPositiveAnnotations.apply(argsBuilder); runCommandLine(argsBuilder); @@ -204,18 +188,10 @@ private static void assertExpectedOutputsForVariantType(final String tag, tagAndVariantType + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX, outputPrefixAndVariantType + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX)); - assertScorerExpectedOutputs(tagAndVariantType, outputPrefixAndVariantType, false); + assertScorerExpectedOutputs(tagAndVariantType, outputPrefixAndVariantType); - if (tag.contains("posNeg")) { - SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s %s", - EXPECTED_TEST_FILES_DIR, - tagAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX, - outputPrefixAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX)); - assertScorerExpectedOutputs(tagAndVariantType, outputPrefixAndVariantType, true); - } else { + if (tag.contains("posOnly")) { Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX).exists()); - Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists()); - Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); } } @@ -226,9 +202,7 @@ private static void assertOutputsForVariantTypeDoNotExist(final String outputPre Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX).exists()); Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX).exists()); Assert.assertFalse(new File(outputPrefixAndVariantType + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists()); - Assert.assertFalse(new File(outputPrefixAndVariantType + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); - Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists()); - Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); + Assert.assertFalse(new File(outputPrefixAndVariantType + PythonVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); } /** @@ -237,15 +211,13 @@ private static void assertOutputsForVariantTypeDoNotExist(final String outputPre * coverage. */ private static void assertScorerExpectedOutputs(final String tagAndVariantType, - final String outputPrefixAndVariantType, - final boolean isNegative) { - final String positiveOrNegativeTag = isNegative ? ".negative" : ""; - final String scorerTag = outputPrefixAndVariantType + positiveOrNegativeTag; + final String outputPrefixAndVariantType) { + final String scorerTag = outputPrefixAndVariantType; if (tagAndVariantType.contains("BGMM")) { Assert.assertTrue(new File(scorerTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists()); - Assert.assertFalse(new File(scorerTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); + Assert.assertFalse(new File(scorerTag + PythonVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); } else if (tagAndVariantType.contains("IF")) { - Assert.assertTrue(new File(scorerTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); + Assert.assertTrue(new File(scorerTag + PythonVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists()); Assert.assertFalse(new File(scorerTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists()); } else { Assert.fail("Unknown model-backend tag."); @@ -288,46 +260,6 @@ public void testSNPOnlyModelsFromSNPOnlyAndSNPPlusIndelAnnotationsAreIdentical() outputPrefixSNPPlusIndel + ".snp" + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX)); } - @Test(expectedExceptions = IllegalArgumentException.class) - public void testUnlabeledAnnotationsSpecifiedWithoutCalibrationSensitivityThreshold() { - final File outputDir = createTempDir("train"); - final String outputPrefix = String.format("%s/test", outputDir); - final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get(); - argsBuilder.addOutput(outputPrefix); - final String extractTag = "extract.nonAS.snpIndel.posUn"; - final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR, - extractTag + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX); - final Function addPositiveAnnotations = ab -> - ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5); - final File unlabeledAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR, - extractTag + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX); - final Function addUnlabeledAnnotations = ab -> - ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5); - addPositiveAnnotations - .andThen(addUnlabeledAnnotations) - .apply(argsBuilder); - runCommandLine(argsBuilder); - } - - @Test(expectedExceptions = IllegalArgumentException.class) - public void testCalibrationSensitivityThresholdSpecifiedWithoutUnlabeledAnnotations() { - final File outputDir = createTempDir("train"); - final String outputPrefix = String.format("%s/test", outputDir); - final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get(); - argsBuilder.addOutput(outputPrefix); - final String extractTag = "extract.nonAS.snpIndel.posUn"; - final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR, - extractTag + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX); - final Function addPositiveAnnotations = ab -> - ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5); - final Function addCalibrationSensitivityThreshold = ab -> - ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, CALIBRATION_SENSITIVITY_THRESHOLD); - addPositiveAnnotations - .andThen(addCalibrationSensitivityThreshold) - .apply(argsBuilder); - runCommandLine(argsBuilder); - } - @Test(expectedExceptions = IllegalArgumentException.class) // python environment is required to run tool public void testPositiveAndUnlabeledAnnotationNamesAreNotIdentical() { final File outputDir = createTempDir("train"); @@ -342,11 +274,8 @@ public void testPositiveAndUnlabeledAnnotationNamesAreNotIdentical() { "extract.AS.snpIndel.posUn" + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX); // allele-specific final Function addUnlabeledAnnotations = ab -> ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5); - final Function addCalibrationSensitivityThreshold = ab -> - ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, CALIBRATION_SENSITIVITY_THRESHOLD); addPositiveAnnotations .andThen(addUnlabeledAnnotations) - .andThen(addCalibrationSensitivityThreshold) .apply(argsBuilder); runCommandLine(argsBuilder); } @@ -368,7 +297,8 @@ public void testPositiveAnnotationsOfSpecifiedVariantTypesNotPresent() { runCommandLine(argsBuilder); } - @Test(expectedExceptions = UserException.BadInput.class, groups = {"python"}) // python environment is required to run tool + // we will enable this once a positive-unlabeled backend is implemented + @Test(expectedExceptions = UserException.BadInput.class, groups = {"python"}, enabled = false) // python environment is required to run tool public void testUnlabeledAnnotationsOfSpecifiedVariantTypesNotPresent() { final File outputDir = createTempDir("train"); final String outputPrefix = String.format("%s/test", outputDir); @@ -382,12 +312,9 @@ public void testUnlabeledAnnotationsOfSpecifiedVariantTypesNotPresent() { "extract.nonAS.snp.posUn" + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX); // contains only SNPs, but SNP+INDEL is specified final Function addUnlabeledAnnotations = ab -> ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5); - final Function addCalibrationSensitivityThreshold = ab -> - ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, CALIBRATION_SENSITIVITY_THRESHOLD); ADD_SNP_MODE.andThen(ADD_INDEL_MODE) .andThen(addPositiveAnnotations) .andThen(addUnlabeledAnnotations) - .andThen(addCalibrationSensitivityThreshold) .apply(argsBuilder); runCommandLine(argsBuilder); } diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 deleted file mode 100644 index b75fc0d7d7f..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8d333bfb49c88d34c24250c6e31ae53b00bda9076c1106390ed10f5949de160 -size 736656 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 deleted file mode 100644 index fb9d835d194..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8801b51a901df9ef298359cba9e6764d76fa773c9c92c5e14727ef77f70d2beb -size 35136 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf deleted file mode 100644 index 1ddff59081d..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6bfd450c342ce64428a70975bcdb9764e37154ec4463d008203fc63631391b14 -size 2227806 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx deleted file mode 100644 index d9579cdc497..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:29e2978e195940ad1c236267274871f91d51ac7b12e824aac03d56ffe2f946f1 -size 119222 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 deleted file mode 100644 index 5089c01571c..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7777d249687060ee948557d6271916c597c87ee9f297e07d8f78a8451e405d86 -size 822288 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 deleted file mode 100644 index c87bb0f1a42..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb63a07115d848e929102cc23a77c735187126d2abfc3af7813a4fe4a77b612c -size 38440 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf deleted file mode 100644 index 6849cdd175a..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7dd1d93b67390c83f9569a641239eebcd01860673b41907ad05d19635fd60ab8 -size 2243854 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx deleted file mode 100644 index 6af660bbdb2..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c967b19e7d946467c37f531d7cc6901e8e903ae59466000b58db08ff7edf611b -size 119227 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 new file mode 100644 index 00000000000..9f4e9f393ef --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a91bb19f59cb602656805cd7efb2bb302022686fe21aef03a51e7ea3c0aef0 +size 736656 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 new file mode 100644 index 00000000000..c9eee51c9c5 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f99391792c228f5f34bb83dc581d521e03e06f442140b8f3af3066dcb9592ce +size 35136 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf new file mode 100644 index 00000000000..d8dcbfe3b57 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca7f76315099e24dedee2de127d8c24f3d28f62d7569b617d56958eb0bfedd5 +size 2228705 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx new file mode 100644 index 00000000000..4d0b41d5596 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eefcac29c9ccf8760a6d3a9e55cb863c9e4ea804a1a78ba38db4c448cb41b20 +size 119223 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 new file mode 100644 index 00000000000..08faea04b52 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4510202dd1e097698b2f862fafa8d076d2d8e11cf5908bf3883fe6f618d69fa7 +size 822288 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 new file mode 100644 index 00000000000..3d0cb431766 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bcf784e97396a9b14dc65912d043ad011dbe1c9c8d70e9eea1b45502eb7d7d +size 38440 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf new file mode 100644 index 00000000000..14ac057b300 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29c4e43204b5a655135c84fd7e4489d69ae542892f11f01d097a1fcd62a759ec +size 2244914 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx new file mode 100644 index 00000000000..c1a130a9f64 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb24eed9c59f80aae758dc2fb89130c782fbc07e14220da57d46936611d8c390 +size 119228 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 deleted file mode 100644 index fbf0990ee70..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b25607c74d197a7116421014925ad4dcc10c326e561b193b1e2eb71152598369 -size 766368 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 deleted file mode 100644 index ee4850c9acb..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:afb44cc0b2f1c821d4b79f4c0145edc5fc662d06ce13239fd2077e1d1e045783 -size 34960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf deleted file mode 100644 index e46bbcf2a15..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:993e2d40dea8558c001a7321a4bbe4804877b2de36c3a266416310446c915ccb -size 2226076 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx deleted file mode 100644 index 9be1548020d..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a56007a28f971a86349be709cb2b5ce3821ef5f3ae19ff0f9dcd2841021a510 -size 119225 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 deleted file mode 100644 index 1378a5e61da..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38fb5c443979d9468de740c26c1e3b2d8f27938c1ffb43ebf48ae1bef94196b3 -size 829672 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 deleted file mode 100644 index 58244d511a7..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3bb53ebfca7a737737a1d01ff541d414c3cef07d507b3e360d360079239d723a -size 37720 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf deleted file mode 100644 index 4af1921ce48..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05f85d264a457cdd81896bde03f51b2369343da5ade21b1c8df183a2b7e8f974 -size 2242450 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx deleted file mode 100644 index 34133ce42ad..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cba3840238dc7d6c7d85eeda892da51abdccf1e80c60b9030fa781da42d16b9f -size 119230 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 new file mode 100644 index 00000000000..711ae0e8e75 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.annot.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf123ed80e8c503ad4d2d1364be63f22d82379b32eb0c4f563f08cb4abbb2df +size 766368 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 new file mode 100644 index 00000000000..ca29220bbcc --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.scores.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8467b31a8916052b511adf002a877b36a0b5fb0e64d035022b337beabd437da +size 34960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf new file mode 100644 index 00000000000..dd76708357f --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e40f44f8bbd173cb325a9e14aa3240ca5725296abbfc4fb005eb2bb14a2cdd +size 2227870 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx new file mode 100644 index 00000000000..e19112ffa42 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snp.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdbec4dc5e2c9263da4b217e33fd7fbf4dac34f9f42821d3d8c69136b9131f62 +size 119226 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 new file mode 100644 index 00000000000..bf62479462f --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.annot.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6539c887b88be6d4b2ed0b03866d2d20054c0cbb24f777d27cc043bcbcc69055 +size 829672 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 new file mode 100644 index 00000000000..a65701f8e94 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.scores.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19295ce01f6d6a84b48c88e8a53c23d65036b34f04a1ac7f13760b803813aa4 +size 37720 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf new file mode 100644 index 00000000000..017e1ec2493 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ca4c5ab46abe321506c5b43db550257e55c441f0a52553aca3acb5fc78ab63 +size 2244522 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx new file mode 100644 index 00000000000..7fbb1578345 --- /dev/null +++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.score.snpIndel.vcf.idx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95e34b18a698a7d4db138297b3f13c427d8b371d1c0e94f289f05ae240f438b +size 119231 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 deleted file mode 100644 index e6ffb5c84fa..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f64ff3525514a1b3da32c2ea87a22dc46b57382f09286400878f182f60e41f6e -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl deleted file mode 100644 index d7516056325..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05dae24fc2ae3e88654a53d49d23cf0345a4a358af8cdb97881f4498df9c7d7e -size 354768 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl deleted file mode 100644 index dc18c6fa7e2..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c1fda6d8d6b4f200bfb7a2707ce544dd9a27ae483f1ca1d649e5dbddf330c69d -size 506091 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 deleted file mode 100644 index d30433b70d0..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:336e6546b3c9fddbd6134c3e9f6cd47b45a2c05aa52a65ffadeb0a6d041b00f8 -size 5984 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 deleted file mode 100644 index e5ffe1456ac..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2f758e56b80afa511ad8e743ff651b2807e6c741617a51e9f7815e52a55e8e7 -size 3184 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 deleted file mode 100644 index d2ae036241d..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d665749610d4f9aa117dbbd098a4f9cf2fd139b679285fdf854c2e2656ca10c -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl deleted file mode 100644 index d30a719af1d..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff23ea334269b3cc502646a7d301d956876717e781502cf3ccca6a0b4a36268a -size 374750 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl deleted file mode 100644 index 348ebedf945..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be6d3e97dbe88caeead043c0264b7345589da11bf3e3842693bec9fccac9f802 -size 514532 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 deleted file mode 100644 index 9cbba14a079..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76b90080d91b94a79c4a19ce0b26c1f63b173fd32b5148aba88087dd6c3bd7ac -size 5984 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 deleted file mode 100644 index 36cc0a423c5..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:94b85b2bc701c9e895ac089629e5da035eec4fd2bd482a4b0cc41e4a78ed8750 -size 3184 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 deleted file mode 100644 index 79c760c631f..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8891aa2ff9b4ec967bcf1322076005ab7c1a3e95af246c0489c337890b5b4475 -size 2664 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl deleted file mode 100644 index 5fdff35c06d..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:934d96ba164a225333a5d5b736a2ecfe874833d3f356c2e003ee4053005055ed -size 127038 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl deleted file mode 100644 index 9f86fab9b13..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a1937d6b3ea7cf030d154d5c9a6e7d2b14b3db38be2c934ccf06f48549eb3a8 -size 235812 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 deleted file mode 100644 index 6e60311572d..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9bd8a9643b807bbc107c8edbcbf2b0d3112ab47c0ca90a8d37e6874bdb74f8cb -size 2880 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 deleted file mode 100644 index 4fca186b572..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:49c8737964fd8944aa7df315ca977256f97c51dfcf720a8fce729e466b90829b -size 2488 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 deleted file mode 100644 index 3f3047a05a4..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2be28cb801165772e3cefc27eff812f87a392f53873da6379571ed6419ec4f33 -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl deleted file mode 100644 index 7741e476134..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3461d5a5e306a17a133260557b1753acd467a1f6c344551af16eba1eae6bfc8e -size 354767 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl deleted file mode 100644 index 697a8dde554..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f5e2c57e426d79d87922e8dafbbf245380fc756e0f61b4dc935529527ab1e08e -size 506090 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 deleted file mode 100644 index 5f20c187730..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8201352895fe4b1284886cd1b7a33516d25d7ba98a5f8e2f03c7437380cc823f -size 5984 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 deleted file mode 100644 index 6306b6a8478..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:45a1dfd68dc4617de6537ec92711d486c7033adaa51944fe6cbed824bee890ff -size 3184 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 deleted file mode 100644 index 59900e515e6..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbda044e552390c1766e74c0209c6328c708b8dc789c0d0f8da99ea30ace3fac -size 2664 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl deleted file mode 100644 index 9d8cdfe2ae8..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:318291edce230c68c1c8c8cd47326b64da3735f792ed501894c38123fcf5a738 -size 130238 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl deleted file mode 100644 index 6b3104e110c..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e80bc19607efaa536f9dee44ef5cca3ce2554a9d6c9b498694bd9de73f8da40d -size 240054 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 deleted file mode 100644 index 9cd7f179d2a..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b769079f7729d6c691f64fb30a2a3225fb9e4c0dd0ab72a8847daa9ed69fd8d -size 2880 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 deleted file mode 100644 index 463e56dcfdf..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c73d66459b9ba13ccf739b6a09e68184ebaaa29f83451b51057ed9f6051beab2 -size 2488 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 deleted file mode 100644 index 60ffad8c4c3..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0aa00881b8362481f68fdfff6dc85ab708348b0f487c0d0da4f6d27a1a0ad81f -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl deleted file mode 100644 index d30a719af1d..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff23ea334269b3cc502646a7d301d956876717e781502cf3ccca6a0b4a36268a -size 374750 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl deleted file mode 100644 index 348ebedf945..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be6d3e97dbe88caeead043c0264b7345589da11bf3e3842693bec9fccac9f802 -size 514532 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 deleted file mode 100644 index 02d8c94608e..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4eac1941fe274f44029780339614d4197211a10a6cb4bdb8191d82bf7b3ca933 -size 5984 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 deleted file mode 100644 index 6de63dfaf4b..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d692b4f62830aa24b882be7f7dcf4c63e9266ed2ff6dfe516715a6ed47d523ec -size 3184 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 deleted file mode 100644 index 1af4242bbec..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7687aa701fe0fdb52d86a27ccc12a6cc8bb2b57906ea2335146396bfae47ea1b -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl deleted file mode 100644 index f99de98d4fb..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d7d481a06e9c5d27b8a322712818731b508ecb09309cacff1c5f24df1077d975 -size 368366 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl deleted file mode 100644 index 21c88876f37..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f4f417b26d9478fb36ac5372f1634140a4114f09d207a434f8d582a5936e9b9 -size 556675 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 deleted file mode 100644 index 17e3ab4ef97..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:048b4690d9f1fdce1e3dbabf995e8e306965ea8e00c21a92129d67e0e3b8fb5c -size 5992 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 deleted file mode 100644 index 323cf93db29..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e4033c901c80e97d4668d38e40b6677ec68d5d6f960d498c90d6311c971d6ae -size 3168 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 deleted file mode 100644 index 9638837ac77..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff6eb7db94e1e78d2aa4e2bad08eb92e0df1ae88ef29e27f7f81362ac56e4faa -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl deleted file mode 100644 index 02016de49a7..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:373207a645b1caa64fe15fd4fec77556c87c11f0f304b8e4920094a59cae89eb -size 359135 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl deleted file mode 100644 index ac1fe518303..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0510d85a2680139fbfb2d3c8f2d5fed8977af834bf82d09d9090a06fa8d454f4 -size 525312 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 deleted file mode 100644 index 6972268a95c..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:443ed54e4403ba517c2370e18f95200f9b9dc4648c914424344626bacab6c4f2 -size 5992 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 deleted file mode 100644 index 90420cf2917..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3cd7216a8a13adb6640b1e91d939600bc664202964b0cc74d76d8070c3422b75 -size 3168 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 deleted file mode 100644 index 4ae3f77a6dd..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:298e36701a70226f3720cd40cd6ca8f37404a807cbd193c23d764141f5594f36 -size 2664 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl deleted file mode 100644 index 8c7e18e918e..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ff46e688731825bb03a6c9504ed7847c998c129a1e13c507a14f7adb56d733ad -size 108247 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl deleted file mode 100644 index 2aba7f5c93a..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe5ca4414318a540488afbdea99293e0f67b02725839c13faa5b3ff39b959e7e -size 259163 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 deleted file mode 100644 index 004a5bdb157..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7243a9e90836489a979905a386811a688bd07968c115063351b77bf91c72efc3 -size 2880 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 deleted file mode 100644 index 9cc88998aef..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:00393e27ad2c98fd4c36a003d7dcbc175d31d8346ace63e52b458df76a8d7457 -size 2496 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 deleted file mode 100644 index a52edeb5b40..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b3040e1926e7b59c623e2b930e16556d26dd91c4d586120f7db156f3f2f14fe -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl deleted file mode 100644 index df2c423b7d9..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4cbb64dbf96f8cbc10908aed4c8ec2e3fcf01af7a3512c90e48aa743af3bf28 -size 368366 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl deleted file mode 100644 index 9192d59204c..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86e04d419ae8a211acbead86c467cd8c3578c3312f31fdc5600eaefd040ffc32 -size 556675 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 deleted file mode 100644 index 483bfe123ec..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8007c65df2a62f1b12cb2bd9d0818ea106edf2fc18610318e1e09def0f1bd77a -size 5992 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 deleted file mode 100644 index b7ab0c0c576..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6860775947699fe2df688d0c17321de501ef18c12c87dbb430221ba1c27e56b -size 3168 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 deleted file mode 100644 index 624a515e7f8..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:223ee064aed2de5a2e0c7a773b08d730666fa28b49181984dc8f16d07233b2ef -size 2664 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl deleted file mode 100644 index 54aa186cf73..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0b377bad272e94bf11bca14ed7a7dd3c67296f347509d272b9538b695579199 -size 132823 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl deleted file mode 100644 index 1142e1f4599..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:426cec56e16eec10f47c824797c035772d1ebf3cf4f73972e8a541deca622cd3 -size 248813 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 deleted file mode 100644 index c91fcf083b5..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f80beea7f769595a56aa8af6d90335626fa5c93fe5f61761e998f59e774f3104 -size 2880 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 deleted file mode 100644 index 64fbf36622e..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ba9bf669a916ee10750cf8cf78968af0372a5d272640be9fe3fba97fc4e6059 -size 2496 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 deleted file mode 100644 index 6a831bf1e51..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dce6eb6a9175112940d97573e3e99818a041f4c8311bd0d790b11bad7153cc90 -size 4960 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl deleted file mode 100644 index 02016de49a7..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:373207a645b1caa64fe15fd4fec77556c87c11f0f304b8e4920094a59cae89eb -size 359135 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl deleted file mode 100644 index ac1fe518303..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0510d85a2680139fbfb2d3c8f2d5fed8977af834bf82d09d9090a06fa8d454f4 -size 525312 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 deleted file mode 100644 index 45b09bcdafc..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9885e1ace48f972b88d808fe9dcf31aae828b0f327909c3922305d48659c9516 -size 5992 diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 deleted file mode 100644 index 3baaddc00b8..00000000000 --- a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60dfec8ee105371c8b69b4c6790f6920c1f94adfad1bca58fa353a016224c3c4 -size 3168