diff --git a/build.gradle b/build.gradle
index 3af1061cad7..e2754b5a489 100644
--- a/build.gradle
+++ b/build.gradle
@@ -285,6 +285,7 @@ dependencies {
 
     implementation 'org.apache.commons:commons-lang3:3.5'
     implementation 'org.apache.commons:commons-math3:3.5'
+    implementation 'org.hipparchus:hipparchus-stat:2.0'
     implementation 'org.apache.commons:commons-collections4:4.1'
     implementation 'org.apache.commons:commons-vfs2:2.0'
     implementation 'org.apache.commons:commons-configuration2:2.4'
diff --git a/scripts/gatkcondaenv.yml.template b/scripts/gatkcondaenv.yml.template
index 10467af3cdf..dbe29ed5a28 100644
--- a/scripts/gatkcondaenv.yml.template
+++ b/scripts/gatkcondaenv.yml.template
@@ -42,6 +42,7 @@ dependencies:
 - conda-forge::matplotlib=3.2.1
 - conda-forge::pandas=1.0.3
 - conda-forge::typing_extensions=4.1.1   # see https://github.com/broadinstitute/gatk/issues/7800 and linked PRs
+- conda-forge::dill=0.3.4                # used for pickling lambdas in TrainVariantAnnotationsModel
 
 # core R dependencies; these should only be used for plotting and do not take precedence over core python dependencies!
 - r-base=3.6.2
diff --git a/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl b/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl
index 89e05abcb89..87b520aca0d 100644
--- a/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl
+++ b/scripts/vcf_site_level_filtering_wdl/JointVcfFiltering.wdl
@@ -192,8 +192,6 @@ task TrainVariantAnnotationModel {
 	command <<<
 		set -e
 
-		conda install -y --name gatk dill
-
 		export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override}
 
 		mode=$(echo "~{mode}" | awk '{print toupper($0)}')
@@ -245,8 +243,6 @@ task ScoreVariantAnnotations {
 
 		ln -s ~{sep=" . && ln -s " model_files} .
 
-		conda install -y --name gatk dill
-
 		export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override}
 
 		gatk --java-options "-Xmx~{command_mem}m" \
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java
index 4706a1e1d7d..dbe972fa541 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java
@@ -68,7 +68,7 @@
  *         to TSV format. Using HDF5 files with {@link CreateReadCountPanelOfNormals}
  *         can decrease runtime, by reducing time spent on IO, so this is the default output format.
  *         The HDF5 format contains information in the paths defined in {@link HDF5SimpleCountCollection}. HDF5 files may be viewed using
- *         <a href="https://support.hdfgroup.org/products/java/hdfview/">hdfview</a> or loaded in python using
+ *         <a href="https://support.hdfgroup.org/products/java/hdfview/">hdfview</a> or loaded in Python using
  *         <a href="http://www.pytables.org/">PyTables</a> or <a href="http://www.h5py.org/">h5py</a>.
  *         The TSV format has a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in
  *         {@link SimpleCountCollection.SimpleCountTableColumn}, and the corresponding entry rows.
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java
index dbcc0cc1c4d..d4d6b8db9c0 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java
@@ -85,7 +85,7 @@
  *         Panel-of-normals file.
  *         This is an HDF5 file containing the panel data in the paths defined in {@link HDF5SVDReadCountPanelOfNormals}.
  *         HDF5 files may be viewed using <a href="https://support.hdfgroup.org/products/java/hdfview/">hdfview</a>
- *         or loaded in python using <a href="http://www.pytables.org/">PyTables</a> or <a href="http://www.h5py.org/">h5py</a>.
+ *         or loaded in Python using <a href="http://www.pytables.org/">PyTables</a> or <a href="http://www.h5py.org/">h5py</a>.
  *     </li>
  * </ul>
  *
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/HDF5Utils.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/HDF5Utils.java
index 8590e3476f2..870ce37b7dc 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/HDF5Utils.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/HDF5Utils.java
@@ -135,7 +135,7 @@ public static double[][] readChunkedDoubleMatrix(final HDF5File file,
      * Given a large matrix, chunks the matrix into equally sized subsets of rows
      * (plus a subset containing the remainder, if necessary) and writes these submatrices to indexed sub-paths
      * to avoid a hard limit in Java HDF5 on the number of elements in a matrix given by
-     * {@code MAX_NUM_VALUES_PER_HDF5_MATRIX}. The number of chunks is determined by {@code maxChunkSize},
+     * {@code MAX_NUMBER_OF_VALUES_PER_HDF5_MATRIX}. The number of chunks is determined by {@code maxChunkSize},
      * which should be set appropriately for the desired number of columns.
      *
      * @param maxChunkSize  The maximum number of values in each chunk. Decreasing this number will reduce
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/VariantRecalibrator.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/VariantRecalibrator.java
index f7148d043f1..2da7997a51c 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/VariantRecalibrator.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/VariantRecalibrator.java
@@ -10,6 +10,7 @@
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLine;
 import org.broadinstitute.barclay.help.DocumentedFeature;
+import org.broadinstitute.hdf5.HDF5File;
 import org.broadinstitute.hellbender.cmdline.*;
 import org.broadinstitute.barclay.argparser.CommandLineException;
 import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
@@ -23,6 +24,8 @@
 import org.broadinstitute.hellbender.engine.ReadsContext;
 import org.broadinstitute.hellbender.engine.ReferenceContext;
 import org.broadinstitute.hellbender.engine.MultiVariantWalker;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
 import picard.cmdline.programgroups.VariantFilteringProgramGroup;
 import org.broadinstitute.hellbender.utils.R.RScriptExecutor;
 import org.broadinstitute.hellbender.utils.SimpleInterval;
@@ -41,6 +44,7 @@
 
 import java.io.*;
 import java.util.*;
+import java.util.stream.IntStream;
 
 /**
  * Build a recalibration model to score variant quality for filtering purposes
@@ -639,6 +643,10 @@ public Object onTraversalSuccess() {
         for (int i = 1; i <= max_attempts; i++) {
             try {
                 dataManager.setData(reduceSum);
+
+                final String rawAnnotationsOutput = output.toString().endsWith(".recal") ? output.toString().split(".recal")[0] : output.toString();
+                writeAnnotationsHDF5(new File(rawAnnotationsOutput + ".annot.raw.hdf5"));
+
                 dataManager.normalizeData(inputModel == null, annotationOrder); // Each data point is now (x - mean) / standard deviation
 
                 final GaussianMixtureModel goodModel;
@@ -678,6 +686,9 @@ public Object onTraversalSuccess() {
                     }
                 }
 
+                final String annotationsOutput = output.toString().endsWith(".recal") ? output.toString().split(".recal")[0] : output.toString();
+                writeAnnotationsHDF5(new File(annotationsOutput + ".annot.hdf5"));
+
                 dataManager.dropAggregateData(); // Don't need the aggregate data anymore so let's free up the memory
                 engine.evaluateData(dataManager.getData(), badModel, true);
 
@@ -686,6 +697,10 @@ public Object onTraversalSuccess() {
                     saveModelReport(report, outputModel);
                 }
 
+                final String modelOutput = output.toString().endsWith(".recal") ? output.toString().split(".recal")[0] : output.toString();
+                writeModelHDF5(new File(modelOutput + ".positive.hdf5"), goodModel);
+                writeModelHDF5(new File(modelOutput + ".negative.hdf5"), badModel);
+
                 engine.calculateWorstPerformingAnnotation(dataManager.getData(), goodModel, badModel);
 
 
@@ -1176,4 +1191,43 @@ private void createArrangeFunction( final PrintStream stream ) {
         stream.println("}");
         stream.println("}");
     }
+
+    public void writeAnnotationsHDF5(final File file) {
+        try (final HDF5File hdf5File = new HDF5File(file, HDF5File.OpenMode.CREATE)) { // TODO allow appending
+            IOUtils.canReadFile(hdf5File.getFile());
+
+            hdf5File.makeStringArray("/data/annotation_names", dataManager.getAnnotationKeys().stream().toArray(String[]::new));
+            hdf5File.makeDoubleMatrix("/data/annotations", dataManager.getData().stream().map(vd -> vd.annotations).toArray(double[][]::new));
+            hdf5File.makeDoubleArray("/data/is_training", dataManager.getData().stream().mapToDouble(vd -> vd.atTrainingSite ? 1 : 0).toArray());
+            hdf5File.makeDoubleArray("/data/is_truth", dataManager.getData().stream().mapToDouble(vd -> vd.atTruthSite ? 1 : 0).toArray());
+            hdf5File.makeDoubleArray("/data/is_anti_training", dataManager.getData().stream().mapToDouble(vd -> vd.atAntiTrainingSite ? 1 : 0).toArray());
+
+            logger.info(String.format("Annotations written to %s.", file.getAbsolutePath()));
+        } catch (final RuntimeException exception) {
+            throw new GATKException(String.format("Exception encountered during writing of annotations (%s). Output file at %s may be in a bad state.",
+                    exception, file.getAbsolutePath()));
+        }
+    }
+
+    public void writeModelHDF5(final File file,
+                               final GaussianMixtureModel model) {
+        try (final HDF5File hdf5File = new HDF5File(file, HDF5File.OpenMode.CREATE)) { // TODO allow appending
+            IOUtils.canReadFile(hdf5File.getFile());
+
+            final int nComponents = model.getModelGaussians().size();
+            final int nFeatures = model.getNumAnnotations();
+            hdf5File.makeDouble("/vqsr/number_of_components", nComponents);
+            hdf5File.makeDouble("/vqsr/number_of_features", nComponents);
+            hdf5File.makeDoubleArray("/vqsr/weights", model.getModelGaussians().stream().mapToDouble(g -> Math.pow(10., (g.pMixtureLog10))).toArray());
+            IntStream.range(0, nComponents).forEach(
+                    k -> hdf5File.makeDoubleArray("/vqsr/means/" + k, model.getModelGaussians().get(k).mu));
+            IntStream.range(0, nComponents).forEach(
+                    k -> hdf5File.makeDoubleMatrix("vqsr/covariances/" + k, model.getModelGaussians().get(k).sigma.getArray()));
+
+            logger.info(String.format("VQSR model written to %s.", file.getAbsolutePath()));
+        } catch (final RuntimeException exception) {
+            throw new GATKException(String.format("Exception encountered during writing of VQSR model (%s). Output file at %s may be in a bad state.",
+                    exception, file.getAbsolutePath()));
+        }
+    }
 }
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.java
new file mode 100644
index 00000000000..48f73007767
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.java
@@ -0,0 +1,361 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import org.apache.commons.lang3.tuple.Triple;
+import org.apache.commons.math3.random.RandomGenerator;
+import org.apache.commons.math3.random.RandomGeneratorFactory;
+import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.BetaFeature;
+import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
+import org.broadinstitute.barclay.help.DocumentedFeature;
+import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
+import org.broadinstitute.hellbender.engine.FeatureContext;
+import org.broadinstitute.hellbender.engine.ReadsContext;
+import org.broadinstitute.hellbender.engine.ReferenceContext;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.VariantRecalibrator;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
+import picard.cmdline.programgroups.VariantFilteringProgramGroup;
+
+import java.io.File;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+/**
+ * Extracts site-level variant annotations, labels, and other metadata from a VCF file to HDF5 files.
+ *
+ * <p>
+ *     This tool is intended to be used as the first step in a variant-filtering workflow that supersedes the
+ *     {@link VariantRecalibrator} workflow. This tool extracts site-level annotations, labels, and other relevant metadata
+ *     from variant sites (or alleles, in allele-specific mode) that are or are not present in specified labeled
+ *     resource VCFs (e.g., training or calibration VCFs). The former, present sites are considered labeled; each site
+ *     can have multiple labels. The latter sites are considered unlabeled and can be randomly downsampled using
+ *     reservoir sampling; extraction of these is optional. The outputs of the tool are HDF5 files containing the
+ *     extracted data for labeled and (optional) unlabeled variant sets, as well as a sites-only indexed VCF containing
+ *     the labeled variants.
+ * </p>
+ * 
+ * <p>
+ *     The extracted sets can be provided as input to the {@link TrainVariantAnnotationsModel} tool
+ *     to produce an annotation-based model for scoring variant calls. This model can in turn be provided
+ *     along with a VCF file to the {@link ScoreVariantAnnotations} tool, which assigns a score to each call
+ *     (with a lower score indicating that a call is more likely to be an artifact and should perhaps be filtered).
+ *     Each score can also be converted to a corresponding sensitivity to a calibration set, if the latter is available.
+ * </p>
+ *
+ * <p>
+ *     Note that annotations and metadata are collected in memory during traversal until they are written to HDF5 files
+ *     upon completion of the traversal. Memory requirements thus roughly scale linearly with both the number of sites
+ *     extracted and the number of annotations.
+ * </p>
+ *
+ * <p>
+ *     Note that HDF5 files may be viewed using <a href="https://support.hdfgroup.org/products/java/hdfview/">hdfview</a>
+ *     or loaded in Python using <a href="http://www.pytables.org/">PyTables</a> or <a href="http://www.h5py.org/">h5py</a>.
+ * </p>
+ * 
+ * <h3>Inputs</h3>
+ *
+ * <ul>
+ *     <li>
+ *         Input VCF file. Site-level annotations will be extracted from the contained variants (or alleles, 
+ *         if the {@value USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME} argument is specified).
+ *     </li>
+ *     <li>
+ *         Annotations to extract.
+ *     </li>
+ *     <li>
+ *         Variant types (i.e., SNP and/or INDEL) to extract. Logic for determining variant type was retained from
+ *         {@link VariantRecalibrator}; see {@link VariantType}. Extracting SNPs and INDELs separately in two runs of
+ *         this tool can be useful if one wishes to extract different sets of annotations for each variant type,
+ *         for example.
+ *     </li>
+ *     <li>
+ *         (Optional) Resource VCF file(s). Each resource should be tagged with a label, which will be assigned to
+ *         extracted sites that are present in the resource. In typical use, the {@value LabeledVariantAnnotationsData#TRAINING_LABEL}
+ *         and {@value LabeledVariantAnnotationsData#CALIBRATION_LABEL} labels should be used to tag at least one resource
+ *         apiece. The resulting sets of sites will be used for model training and conversion of scores to
+ *         calibration-set sensitivity, respectively; the trustworthiness of the respective resources should be
+ *         taken into account accordingly. The {@value LabeledVariantAnnotationsData#SNP_LABEL} label is
+ *         reserved by the tool, as it is used to label sites determined to be SNPs, and thus it cannot be used to tag
+ *         provided resources.
+ *     </li>
+ *     <li>
+ *         (Optional) Maximum number of unlabeled variants (or alleles) to randomly sample with reservoir downsampling.
+ *         If nonzero, annotations will also be extracted from unlabeled sites (i.e., those that are not present
+ *         in the labeled resources).
+ *     </li>
+ *     <li>
+ *         Output prefix.
+ *         This is used as the basename for output files.
+ *     </li>
+ * </ul>
+ *
+ * <h3>Outputs</h3>
+ *
+ * <ul>
+ *     <li>
+ *         (Optional) Labeled-annotations HDF5 file (.annot.hdf5). Annotation data and metadata for those sites that
+ *         are present in labeled resources are stored in the following HDF5 directory structure:
+ *
+ *         <p>
+ *           |--- alleles<br>
+ *           │    |--- alt<br>
+ *           │    |--- ref<br>
+ *           |--- annotations<br>
+ *           │    |--- chunk_0<br>
+ *           │    |--- ...<br>
+ *           │    |--- chunk_{num_chunks - 1}<br>
+ *           │    |--- names<br>
+ *           │    |--- num_chunks<br>
+ *           │    |--- num_columns<br>
+ *           │    |--- num_rows<br>
+ *           |--- intervals<br>
+ *           │    |--- indexed_contig_names<br>
+ *           │    |--- transposed_index_start_end<br>
+ *           |--- labels<br>
+ *           │    |--- snp<br>
+ *           │    |--- ... (e.g., training, calibration, etc.)<br>
+ *           │    |--- ...<br>
+ *         </p>
+ *
+ *         <p>
+ *             Here, each chunk is a double matrix, with dimensions given by (number of sites in the chunk) x (number of annotations).
+ *             See the methods {@link HDF5Utils#writeChunkedDoubleMatrix} and {@link HDF5Utils#writeIntervals} for additional details.
+ *             If {@value USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME} is specified, each record corresponds to an individual allele;
+ *             otherwise, each record corresponds to a variant site, which may contain multiple alleles.
+ *             Storage of alleles can be omitted using the {@value OMIT_ALLELES_IN_HDF5_LONG_NAME} argument, which will reduce
+ *             the size of the file. This file will only be produced if resources are provided and the number of extracted
+ *             labeled sites is nonzero.
+ *         </p>
+ *
+ *     </li>
+ *     <li>
+ *         Labeled sites-only VCF file and index. The VCF will not be gzipped if the {@value DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME}
+ *         argument is set to true. The VCF can be provided as a resource in subsequent runs of
+ *         {@link ScoreVariantAnnotations} and used to indicate labeled sites that were extracted.
+ *         This can be useful if the {@value StandardArgumentDefinitions#INTERVALS_LONG_NAME} argument was used to
+ *         subset sites in training or calibration resources for extraction; this may occur when setting up
+ *         training/validation/test splits, for example. Note that records for the random sample of unlabeled sites are
+ *         currently not included in the VCF.
+ *     </li>
+ *     <li>
+ *         (Optional) Unlabeled-annotations HDF5 file. This will have the same directory structure as in the
+ *         labeled-annotations HDF5 file. However, note that records are currently written in the order they
+ *         appear in the downsampling reservoir after random sampling, and hence, are not in genomic order.
+ *         This file will only be produced if a nonzero value of the {@value MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME}
+ *         argument is provided.
+ *     </li>
+ * </ul>
+ *
+ * <h3>Usage examples</h3>
+ *
+ * <p>
+ *     Extract annotations from training/calibration SNP/INDEL sites, producing the outputs
+ *     1) {@code extract.annot.hdf5}, 2) {@code extract.vcf.gz}, and 3) {@code extract.vcf.gz.tbi}.
+ *     The HDF5 file can then be provided to {@link TrainVariantAnnotationsModel}
+ *     to train a model using a positive-only approach.
+ *
+ * <pre>
+ *     gatk ExtractVariantAnnotations \
+ *          -V input.vcf \
+ *          -A annotation_1 \
+ *          ...
+ *          -A annotation_N \
+ *          --mode SNP \
+ *          --resource snp-training,training=true snp-training.vcf \
+ *          --resource snp-calibration,calibration=true snp-calibration.vcf \
+ *          --mode INDEL \
+ *          --resource indel-training,training=true indel-training.vcf \
+ *          --resource indel-calibration,calibration=true indel-calibration.vcf \
+ *          -O extract
+ * </pre>
+ * </p>
+ *
+ * <p>
+ *     Extract annotations from both training/calibration SNP/INDEL sites and a random sample of
+ *     1000000 unlabeled (i.e., non-training/calibration) sites, producing the outputs
+ *     1) {@code extract.annot.hdf5}, 2) {@code extract.unlabeled.annot.hdf5}, 3) {@code extract.vcf.gz},
+ *     and 4) {@code extract.vcf.gz.tbi}. The HDF5 files can then be provided to {@link TrainVariantAnnotationsModel}
+ *     to train a model using a positive-negative approach (similar to that used in {@link VariantRecalibrator}).
+ *
+ * <pre>
+ *     gatk ExtractVariantAnnotations \
+ *          -V input.vcf \
+ *          -A annotation_1 \
+ *          ...
+ *          -A annotation_N \
+ *          --mode SNP \
+ *          --resource snp-training,training=true snp-training.vcf \
+ *          --resource snp-calibration,calibration=true snp-calibration.vcf \
+ *          --mode INDEL \
+ *          --resource indel-training,training=true indel-training.vcf \
+ *          --resource indel-calibration,calibration=true indel-calibration.vcf \
+ *          --maximum-number-of-unlableled-variants 1000000
+ *          -O extract
+ * </pre>
+ * </p>
+ *
+ * <p>
+ *     In the (atypical) event that resource VCFs are unavailable, one can still extract annotations from a random sample of
+ *     unlabeled sites, producing the outputs 1) {@code extract.unlabeled.annot.hdf5},
+ *     2) {@code extract.vcf.gz} (which will contain no records), and 3) {@code extract.vcf.gz.tbi}.
+ *     This random sample cannot be used by {@link TrainVariantAnnotationsModel}, but may still be useful for
+ *     exploratory analyses.
+ *
+ * <pre>
+ *     gatk ExtractVariantAnnotations \
+ *          -V input.vcf \
+ *          -A annotation_1 \
+ *          ...
+ *          -A annotation_N \
+ *          --mode SNP \
+ *          --mode INDEL \
+ *          --maximum-number-of-unlableled-variants 1000000
+ *          -O extract
+ * </pre>
+ * </p>
+ *
+ * DEVELOPER NOTE: See documentation in {@link LabeledVariantAnnotationsWalker}.
+ *
+ * @author Samuel Lee &lt;slee@broadinstitute.org&gt;
+ */
+@CommandLineProgramProperties(
+        summary = "Extracts site-level variant annotations, labels, and other metadata from a VCF file to HDF5 files.",
+        oneLineSummary = "Extracts site-level variant annotations, labels, and other metadata from a VCF file to HDF5 files",
+        programGroup = VariantFilteringProgramGroup.class
+)
+@DocumentedFeature
+@BetaFeature
+public final class ExtractVariantAnnotations extends LabeledVariantAnnotationsWalker {
+
+    public static final String MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME = "maximum-number-of-unlabeled-variants";
+    public static final String RESERVOIR_SAMPLING_RANDOM_SEED_LONG_NAME = "reservoir-sampling-random-seed";
+
+    public static final String UNLABELED_TAG = ".unlabeled";
+
+    @Argument(
+            fullName = MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME,
+            doc = "Maximum number of unlabeled variants to extract. " +
+                    "If greater than zero, reservoir sampling will be used to randomly sample this number " +
+                    "of sites from input sites that are not present in the specified resources.",
+            minValue = 0)
+    private int maximumNumberOfUnlabeledVariants = 0;
+
+    @Argument(
+            fullName = RESERVOIR_SAMPLING_RANDOM_SEED_LONG_NAME,
+            doc = "Random seed to use for reservoir sampling of unlabeled variants.")
+    private int reservoirSamplingRandomSeed = 0;
+
+    private RandomGenerator rng;
+    private LabeledVariantAnnotationsData unlabeledDataReservoir; // will not be sorted in genomic order
+    private int unlabeledIndex = 0;
+
+    @Override
+    public void afterOnTraversalStart() {
+        if (!resourceLabels.contains(LabeledVariantAnnotationsData.TRAINING_LABEL)) {
+            logger.warn("No training set found! If you are using the downstream TrainVariantAnnotationsModel and ScoreVariantAnnotations tools, " +
+                    "provide sets of known polymorphic loci marked with the training=true feature input tag. " +
+                    "For example, --resource:hapmap,training=true hapmap.vcf");
+        }
+        if (!resourceLabels.contains(LabeledVariantAnnotationsData.CALIBRATION_LABEL)) {
+            logger.warn("No calibration set found! If you are using the downstream TrainVariantAnnotationsModel and ScoreVariantAnnotations tools " +
+                    "and wish to convert scores to sensitivity to a calibration set of variants, " +
+                    "provide sets of known polymorphic loci marked with the calibration=true feature input tag. " +
+                    "For example, --resource:hapmap,calibration=true hapmap.vcf");
+        }
+
+        rng = RandomGeneratorFactory.createRandomGenerator(new Random(reservoirSamplingRandomSeed));
+        unlabeledDataReservoir = maximumNumberOfUnlabeledVariants == 0
+                ? null
+                : new LabeledVariantAnnotationsData(annotationNames, resourceLabels, useASAnnotations, maximumNumberOfUnlabeledVariants);
+    }
+
+    @Override
+    protected void nthPassApply(final VariantContext variant,
+                                final ReadsContext readsContext,
+                                final ReferenceContext referenceContext,
+                                final FeatureContext featureContext,
+                                final int n) {
+        if (n == 0) {
+            final List<Triple<List<Allele>, VariantType, TreeSet<String>>> metadata = extractVariantMetadata(
+                    variant, featureContext, unlabeledDataReservoir != null);
+            final boolean isVariantExtracted = !metadata.isEmpty();
+            if (isVariantExtracted) {
+                final boolean isUnlabeled = metadata.stream().map(Triple::getRight).allMatch(Set::isEmpty);
+                if (!isUnlabeled) {
+                    addExtractedVariantToData(data, variant, metadata);
+                    writeExtractedVariantToVCF(variant, metadata);
+                } else {
+                    // Algorithm R for reservoir sampling: https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
+                    if (unlabeledIndex < maximumNumberOfUnlabeledVariants) {
+                        addExtractedVariantToData(unlabeledDataReservoir, variant, metadata);
+                    } else {
+                        final int j = rng.nextInt(unlabeledIndex);
+                        if (j < maximumNumberOfUnlabeledVariants) {
+                            setExtractedVariantInData(unlabeledDataReservoir, variant, metadata, j);
+                        }
+                    }
+                    unlabeledIndex++;
+                }
+            }
+        }
+    }
+
+    @Override
+    protected void afterNthPass(final int n) {
+        if (n == 0) {
+            writeAnnotationsToHDF5();
+            data.clear();
+            if (unlabeledDataReservoir != null) {
+                writeUnlabeledAnnotationsToHDF5();
+                // TODO write extracted unlabeled variants to VCF, which can be used to mark extraction in scoring step
+                unlabeledDataReservoir.clear();
+            }
+            if (vcfWriter != null) {
+                vcfWriter.close();
+            }
+        }
+    }
+
+    @Override
+    public Object onTraversalSuccess() {
+
+        logger.info(String.format("%s complete.", getClass().getSimpleName()));
+
+        return null;
+    }
+
+    private static void setExtractedVariantInData(final LabeledVariantAnnotationsData data,
+                                                  final VariantContext variant,
+                                                  final List<Triple<List<Allele>, VariantType, TreeSet<String>>> metadata,
+                                                  final int index) {
+        data.set(index, variant,
+                metadata.stream().map(Triple::getLeft).collect(Collectors.toList()),
+                metadata.stream().map(Triple::getMiddle).collect(Collectors.toList()),
+                metadata.stream().map(Triple::getRight).collect(Collectors.toList()));
+    }
+
+    private void writeUnlabeledAnnotationsToHDF5() {
+        final File outputUnlabeledAnnotationsFile = new File(outputPrefix + UNLABELED_TAG + ANNOTATIONS_HDF5_SUFFIX);
+        if (unlabeledDataReservoir.size() == 0) {
+            throw new GATKException("No unlabeled variants were present in the input VCF.");
+        }
+        for (final VariantType variantType : variantTypesToExtract) {
+            logger.info(String.format("Extracted unlabeled annotations for %d variants of type %s.",
+                    unlabeledDataReservoir.getVariantTypeFlat().stream().mapToInt(t -> t == variantType ? 1 : 0).sum(), variantType));
+        }
+        logger.info(String.format("Extracted unlabeled annotations for %s total variants.", unlabeledDataReservoir.size()));
+
+        logger.info("Writing unlabeled annotations...");
+        // TODO coordinate sort
+        unlabeledDataReservoir.writeHDF5(outputUnlabeledAnnotationsFile, omitAllelesInHDF5);
+        logger.info(String.format("Unlabeled annotations and metadata written to %s.", outputUnlabeledAnnotationsFile.getAbsolutePath()));
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.java
new file mode 100644
index 00000000000..128b3bcf1df
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.java
@@ -0,0 +1,382 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+import org.apache.commons.collections4.ListUtils;
+import org.apache.commons.lang3.tuple.Triple;
+import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
+import org.broadinstitute.barclay.help.DocumentedFeature;
+import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
+import org.broadinstitute.hellbender.engine.FeatureContext;
+import org.broadinstitute.hellbender.engine.FeatureInput;
+import org.broadinstitute.hellbender.engine.MultiplePassVariantWalker;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsScorer;
+import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
+import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
+import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
+import org.broadinstitute.hellbender.utils.variant.VcfUtils;
+import picard.cmdline.programgroups.VariantFilteringProgramGroup;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+/**
+ * Base walker for both {@link ExtractVariantAnnotations} and {@link ScoreVariantAnnotations},
+ * which enforces identical variant-extraction behavior in both tools via {@link #extractVariantMetadata}.
+ *
+ * This base implementation covers functionality for {@link ExtractVariantAnnotations}. Namely, it is a single-pass
+ * walker, performing the operations:
+ *
+ *   - nthPassApply(n = 0)
+ *      - if variant/alleles pass filters and variant-type/overlapping-resource checks, then:
+ *          - add variant/alleles to a {@link LabeledVariantAnnotationsData} collection
+ *          - write variant/alleles with labels appended to a sites-only VCF file
+ *   - afterNthPass(n = 0)
+ *      - write the resulting {@link LabeledVariantAnnotationsData} collection to an HDF5 file
+ *
+ * This results in the following output:
+ *
+ *   - an HDF5 file, with the directory structure documented in {@link LabeledVariantAnnotationsData#writeHDF5};
+ *     note that the matrix of annotations contains a single row per datum (i.e., per allele, in allele-specific mode,
+ *     and per variant otherwise)
+ *   - a sites-only VCF file, containing a single line per extracted variant, with labels appended
+ *
+ * In contrast, the {@link ScoreVariantAnnotations} implementation overrides methods to yield a two-pass walker,
+ * performing the operations:
+ *
+ *   - nthPassApply(n = 0)
+ *      - if variant/alleles pass filters and variant-type checks, then:
+ *          - add variant/alleles to a {@link LabeledVariantAnnotationsData} collection
+ *   - afterNthPass(n = 0)
+ *      - write the resulting {@link LabeledVariantAnnotationsData} collection to an HDF5 file
+ *      - pass this annotations HDF5 file to a {@link VariantAnnotationsScorer}, which generates and writes scores to an HDF5 file
+ *      - read the scores back in and load them into an iterator
+ *   - nthPassApply(n = 1)
+ *      - if variant/alleles pass filters and variant-type checks (which are identical to the first pass), then:
+ *          - draw the corresponding score (or scores, in allele-specific mode) from the iterator
+ *          - write the variant (with all alleles, not just those extracted) with the score
+ *            (or best score, in allele-specific mode) appended to a VCF file
+ *      - else:
+ *          - write an unprocessed copy of the variant to a VCF file
+ *
+ * This results in the following output:
+ *
+ *   - an HDF5 file, as above
+ *   - a VCF file, containing the input variants, with labels and scores appended for those passing variant-type checks TODO + calibration-sensitivity scores + filters applied?
+ */
+@CommandLineProgramProperties(
+        // TODO
+        summary = "",
+        oneLineSummary = "",
+        programGroup = VariantFilteringProgramGroup.class
+)
+@DocumentedFeature
+public abstract class LabeledVariantAnnotationsWalker extends MultiplePassVariantWalker {
+
+    public static final String MODE_LONG_NAME = "mode";
+    public static final String USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME = "use-allele-specific-annotations";
+    public static final String IGNORE_FILTER_LONG_NAME = "ignore-filter";
+    public static final String IGNORE_ALL_FILTERS_LONG_NAME = "ignore-all-filters";
+    public static final String DO_NOT_TRUST_ALL_POLYMORPHIC_LONG_NAME = "do-not-trust-all-polymorphic";
+    public static final String OMIT_ALLELES_IN_HDF5_LONG_NAME = "omit-alleles-in-hdf5";
+    public static final String DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME = "do-not-gzip-vcf-output";
+
+    public static final String ANNOTATIONS_HDF5_SUFFIX = ".annot.hdf5";
+
+    public static final String RESOURCE_LABEL_INFO_HEADER_LINE_FORMAT_STRING = "This site was labeled as %s according to resources";
+
+    @Argument(
+            fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
+            shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
+            doc = "Prefix for output filenames.")
+    String outputPrefix;
+
+    @Argument(
+            fullName = StandardArgumentDefinitions.RESOURCE_LONG_NAME,
+            doc = "Resource VCFs used to label extracted variants.",
+            optional = true)
+    private List<FeatureInput<VariantContext>> resources = new ArrayList<>(10);
+
+    @Argument(
+            fullName = StandardArgumentDefinitions.ANNOTATION_LONG_NAME,
+            shortName = StandardArgumentDefinitions.ANNOTATION_SHORT_NAME,
+            doc = "Names of the annotations to extract. Note that a requested annotation may in fact not be present " +
+                    "at any extraction site; NaN missing values will be generated for such annotations.",
+            minElements = 1)
+    List<String> annotationNames = new ArrayList<>();
+
+    @Argument(
+            fullName = MODE_LONG_NAME,
+            doc = "Variant types to extract.",
+            minElements = 1)
+    private List<VariantType> variantTypesToExtractList = new ArrayList<>(Arrays.asList(VariantType.SNP, VariantType.INDEL));
+
+    @Argument(
+            fullName = USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME,
+            doc = "If true, use the allele-specific versions of the specified annotations.",
+            optional = true)
+    boolean useASAnnotations = false;
+
+    @Argument(
+            fullName = IGNORE_FILTER_LONG_NAME,
+            doc = "Ignore the specified filter(s) in the input VCF.",
+            optional = true)
+    private List<String> ignoreInputFilters = new ArrayList<>();
+
+    @Argument(
+            fullName = IGNORE_ALL_FILTERS_LONG_NAME,
+            doc = "If true, ignore all filters in the input VCF.",
+            optional = true)
+    private boolean ignoreAllFilters = false;
+
+    // TODO this is a perhaps vestigial argument inherited from VQSR; its impact and necessity could be reevaluated
+    @Argument(
+            fullName = DO_NOT_TRUST_ALL_POLYMORPHIC_LONG_NAME,
+            doc = "If true, do not trust that unfiltered records in the resources contain only polymorphic sites. " +
+                    "This may increase runtime.",
+            optional = true)
+    private boolean doNotTrustAllPolymorphic = false;
+
+    @Argument(
+            fullName = OMIT_ALLELES_IN_HDF5_LONG_NAME,
+            doc = "If true, omit alleles in output HDF5 files in order to decrease file sizes.",
+            optional = true
+    )
+    boolean omitAllelesInHDF5 = false;
+
+    @Argument(
+            fullName = DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME,
+            doc = "If true, VCF output will not be compressed.",
+            optional = true
+    )
+    boolean doNotGZIPVCFOutput = false;
+
+    private final Set<String> ignoreInputFilterSet = new TreeSet<>();
+    Set<VariantType> variantTypesToExtract;
+    TreeSet<String> resourceLabels = new TreeSet<>();
+
+    File outputAnnotationsFile;
+    VariantContextWriter vcfWriter;
+
+    LabeledVariantAnnotationsData data;
+
+    @Override
+    public void onTraversalStart() {
+
+        ignoreInputFilterSet.addAll(ignoreInputFilters);
+
+        variantTypesToExtract = EnumSet.copyOf(variantTypesToExtractList);
+
+        outputAnnotationsFile = new File(outputPrefix + ANNOTATIONS_HDF5_SUFFIX);
+        final String vcfSuffix = doNotGZIPVCFOutput ? ".vcf" : ".vcf.gz";
+        final File outputVCFFile = new File(outputPrefix + vcfSuffix);
+
+        // TODO this validation method should perhaps be moved outside of the CNV code
+        CopyNumberArgumentValidationUtils.validateOutputFiles(outputAnnotationsFile, outputVCFFile);
+
+        for (final FeatureInput<VariantContext> resource : resources) {
+            final TreeSet<String> trackResourceLabels = resource.getTagAttributes().entrySet().stream()
+                    .filter(e -> e.getValue().equals("true"))
+                    .map(Map.Entry::getKey)
+                    .sorted()
+                    .collect(Collectors.toCollection(TreeSet::new));
+            resourceLabels.addAll(trackResourceLabels);
+            logger.info( String.format("Found %s track: labels = %s", resource.getName(), trackResourceLabels));
+        }
+        resourceLabels.forEach(String::intern);
+
+        if (resourceLabels.contains(LabeledVariantAnnotationsData.SNP_LABEL)) {
+            throw new UserException.BadInput(String.format("The resource label \"%s\" is reserved for labeling variant types.",
+                    LabeledVariantAnnotationsData.SNP_LABEL));
+        }
+
+        data = new LabeledVariantAnnotationsData(annotationNames, resourceLabels, useASAnnotations);
+
+        vcfWriter = createVCFWriter(outputVCFFile);
+        vcfWriter.writeHeader(constructVCFHeader(data.getSortedLabels()));
+
+        afterOnTraversalStart();   // perform additional validation, set modes in child tools, etc.
+    }
+
+    public void afterOnTraversalStart() {
+        // override
+    }
+
+    @Override
+    protected int numberOfPasses() {
+        return 1;
+    }
+
+    @Override
+    public Object onTraversalSuccess() {
+        return null;
+    }
+
+    // TODO maybe clean up all this Triple and metadata business with a class?
+    static void addExtractedVariantToData(final LabeledVariantAnnotationsData data,
+                                          final VariantContext variant,
+                                          final List<Triple<List<Allele>, VariantType, TreeSet<String>>> metadata) {
+        data.add(variant,
+                metadata.stream().map(Triple::getLeft).collect(Collectors.toList()),
+                metadata.stream().map(Triple::getMiddle).collect(Collectors.toList()),
+                metadata.stream().map(Triple::getRight).collect(Collectors.toList()));
+    }
+
+    void writeExtractedVariantToVCF(final VariantContext variant,
+                                    final List<Triple<List<Allele>, VariantType, TreeSet<String>>> metadata) {
+        writeExtractedVariantToVCF(variant,
+                metadata.stream().map(Triple::getLeft).flatMap(List::stream).collect(Collectors.toList()),
+                metadata.stream().map(Triple::getRight).flatMap(Set::stream).collect(Collectors.toSet()));
+    }
+
+    void writeAnnotationsToHDF5() {
+        if (data.size() == 0) {
+            logger.warn("Found no input variants for extraction. This may be because the specified " +
+                    "genomic region contains no input variants of the requested type(s) or, if extracting " +
+                    "training labels, because none of the input variants were contained in the resource VCFs " +
+                    "or no resource VCFs were provided. The annotations HDF5 file will not be generated.");
+            return;
+        }
+        for (final VariantType variantType : variantTypesToExtract) {
+            logger.info(String.format("Extracted annotations for %d variants of type %s.",
+                    data.getVariantTypeFlat().stream().mapToInt(t -> t == variantType ? 1 : 0).sum(), variantType));
+        }
+        for (final String label : data.getSortedLabels()) {
+            logger.info(String.format("Extracted annotations for %d variants labeled as %s.",
+                    data.isLabelFlat(label).stream().mapToInt(b -> b ? 1 : 0).sum(), label));
+        }
+        logger.info(String.format("Extracted annotations for %s total variants.", data.size()));
+
+        logger.info("Writing annotations...");
+        data.writeHDF5(outputAnnotationsFile, omitAllelesInHDF5);
+        logger.info(String.format("Annotations and metadata written to %s.", outputAnnotationsFile.getAbsolutePath()));
+    }
+
+    /**
+     * Writes a sites-only VCF containing the extracted variants and corresponding labels.
+     */
+    void writeExtractedVariantToVCF(final VariantContext vc,
+                                    final List<Allele> altAlleles,
+                                    final Set<String> labels) {
+        final List<Allele> alleles = ListUtils.union(Collections.singletonList(vc.getReference()), altAlleles);
+        final VariantContextBuilder builder = new VariantContextBuilder(
+                vc.getSource(), vc.getContig(), vc.getStart(), vc.getEnd(), alleles);
+        labels.forEach(l -> builder.attribute(l, true)); // labels should already be sorted as a TreeSet
+        vcfWriter.add(builder.make());
+    }
+
+    // modified from VQSR code
+    // TODO we're just writing a standard sites-only VCF here, maybe there's a nicer way to do this?
+    VCFHeader constructVCFHeader(final List<String> sortedLabels) {
+        Set<VCFHeaderLine> hInfo = getDefaultToolVCFHeaderLines();
+        hInfo.addAll(sortedLabels.stream()
+                .map(l -> new VCFInfoHeaderLine(l, 1, VCFHeaderLineType.Flag, String.format(RESOURCE_LABEL_INFO_HEADER_LINE_FORMAT_STRING, l)))
+                .collect(Collectors.toList()));
+        hInfo.add(GATKVCFHeaderLines.getFilterLine(VCFConstants.PASSES_FILTERS_v4));
+        final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
+        hInfo = VcfUtils.updateHeaderContigLines(hInfo, null, sequenceDictionary, true);
+        return new VCFHeader(hInfo);
+    }
+
+    /**
+     * Performs variant-filter and variant-type checks to determine variants/alleles suitable for extraction, and returns
+     * a corresponding list of metadata. This method should not be overridden, as it is intended to enforce identical
+     * variant-extraction behavior in all child tools. Logic here and below for filtering and determining variant type
+     * was retained from VQSR, but has been heavily refactored.
+     */
+    final List<Triple<List<Allele>, VariantType, TreeSet<String>>> extractVariantMetadata(final VariantContext vc,
+                                                                                          final FeatureContext featureContext,
+                                                                                          final boolean isExtractUnlabeled) {
+        // if variant is filtered, do not consume here
+        if (vc == null || !(ignoreAllFilters || vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()))) {
+            return Collections.emptyList();
+        }
+        if (!useASAnnotations) {
+            // in non-allele-specific mode, get a singleton list of the triple
+            // (list of alt alleles passing variant-type and overlapping-resource checks, variant type, set of labels)
+            final VariantType variantType = VariantType.getVariantType(vc);
+            if (variantTypesToExtract.contains(variantType)) {
+                final TreeSet<String> overlappingResourceLabels = findOverlappingResourceLabels(vc, null, null, featureContext);
+                if (isExtractUnlabeled || !overlappingResourceLabels.isEmpty()) {
+                    return Collections.singletonList(Triple.of(vc.getAlternateAlleles(), variantType, overlappingResourceLabels));
+                }
+            }
+        } else {
+            // in allele-specific mode, get a list containing the triples
+            // (singleton list of alt allele, variant type, set of labels)
+            // corresponding to alt alleles that pass variant-type and overlapping-resource checks
+            return vc.getAlternateAlleles().stream()
+                    .filter(a -> !GATKVCFConstants.isSpanningDeletion(a))
+                    .filter(a -> variantTypesToExtract.contains(VariantType.getVariantType(vc, a)))
+                    .map(a -> Triple.of(Collections.singletonList(a), VariantType.getVariantType(vc, a),
+                            findOverlappingResourceLabels(vc, vc.getReference(), a, featureContext)))
+                    .filter(t -> isExtractUnlabeled || !t.getRight().isEmpty())
+                    .collect(Collectors.toList());
+        }
+        // if variant-type and overlapping-resource checks failed, return an empty list
+        return Collections.emptyList();
+    }
+
+    private TreeSet<String> findOverlappingResourceLabels(final VariantContext vc,
+                                                          final Allele refAllele,
+                                                          final Allele altAllele,
+                                                          final FeatureContext featureContext) {
+        final TreeSet<String> overlappingResourceLabels = new TreeSet<>();
+        for (final FeatureInput<VariantContext> resource : resources) {
+            final List<VariantContext> resourceVCs = featureContext.getValues(resource, featureContext.getInterval().getStart());
+            for (final VariantContext resourceVC : resourceVCs) {
+                if (useASAnnotations && !doAllelesMatch(refAllele, altAllele, resourceVC)) {
+                    continue;
+                }
+                if (isValidVariant(vc, resourceVC, !doNotTrustAllPolymorphic)) {
+                    resource.getTagAttributes().entrySet().stream()
+                            .filter(e -> e.getValue().equals("true"))
+                            .map(Map.Entry::getKey)
+                            .forEach(overlappingResourceLabels::add);
+                }
+            }
+        }
+        return overlappingResourceLabels;
+    }
+
+    private static boolean isValidVariant(final VariantContext vc,
+                                          final VariantContext resourceVC,
+                                          final boolean trustAllPolymorphic) {
+        return resourceVC != null && resourceVC.isNotFiltered() && resourceVC.isVariant() && VariantType.checkVariantType(vc, resourceVC) &&
+                (trustAllPolymorphic || !resourceVC.hasGenotypes() || resourceVC.isPolymorphicInSamples());
+    }
+
+    private static boolean doAllelesMatch(final Allele refAllele,
+                                          final Allele altAllele,
+                                          final VariantContext resourceVC) {
+        if (altAllele == null) {
+            return true;
+        }
+        try {
+            return GATKVariantContextUtils.isAlleleInList(refAllele, altAllele, resourceVC.getReference(), resourceVC.getAlternateAlleles());
+        } catch (final IllegalStateException e) {
+            throw new IllegalStateException("Reference allele mismatch at position " + resourceVC.getContig() + ':' + resourceVC.getStart() + " : ", e);
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotations.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotations.java
new file mode 100644
index 00000000000..33fefe62ad1
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotations.java
@@ -0,0 +1,624 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import com.google.common.primitives.Doubles;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import htsjdk.variant.vcf.VCFFilterHeaderLine;
+import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLine;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
+import org.apache.commons.lang3.tuple.Triple;
+import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.BetaFeature;
+import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
+import org.broadinstitute.barclay.help.DocumentedFeature;
+import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
+import org.broadinstitute.hellbender.engine.FeatureContext;
+import org.broadinstitute.hellbender.engine.ReadsContext;
+import org.broadinstitute.hellbender.engine.ReferenceContext;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils;
+import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.VariantRecalibrator;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsScorer;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsScorer;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModel;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModelBackend;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsScorer;
+import org.broadinstitute.hellbender.utils.Utils;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+import org.broadinstitute.hellbender.utils.io.Resource;
+import org.broadinstitute.hellbender.utils.python.PythonScriptExecutor;
+import picard.cmdline.programgroups.VariantFilteringProgramGroup;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * Scores variant calls in a VCF file based on site-level annotations using a previously trained model.
+ *
+ * <p>
+ *     This tool is intended to be used as the last step in a variant-filtering workflow that supersedes the
+ *     {@link VariantRecalibrator} workflow. Using a previously trained model produced by {@link TrainVariantAnnotationsModel},
+ *     this tool assigns a score to each call (with a lower score indicating that a call is more likely to be an artifact).
+ *     Each score can also be converted to a corresponding sensitivity to a calibration set, if the latter is available.
+ *     Each VCF record can also be annotated with additional resource labels and/or hard filtered based on its
+ *     calibration-set sensitivity, if desired.
+ * </p>
+ *
+ * <p>
+ *     Note that annotations and metadata are collected in memory during traversal until they are written to HDF5 files
+ *     upon completion of the traversal. Memory requirements thus roughly scale linearly with both the number of sites
+ *     scored and the number of annotations. For large callsets, this tool may be run in parallel over separate
+ *     genomic shards using the {@value StandardArgumentDefinitions#INTERVALS_LONG_NAME} argument as usual.
+ * </p>
+ *
+ * <p>
+ *     Scores and annotations are also output to HDF5 files, which may be viewed using
+ *     <a href="https://support.hdfgroup.org/products/java/hdfview/">hdfview</a> or loaded in Python using
+ *     <a href="http://www.pytables.org/">PyTables</a> or <a href="http://www.h5py.org/">h5py</a>.
+ * </p>
+ *
+ * <h3>Inputs</h3>
+ *
+ * <ul>
+ *     <li>
+ *         Input VCF file. Site-level annotations will be extracted from the contained variants (or alleles,
+ *         if the {@value USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME} argument is specified).
+ *     </li>
+ *     <li>
+ *         Annotations to use for scoring. These should be identical to those used in the {@link ExtractVariantAnnotations}
+ *         step to create the training set.
+ *     </li>
+ *     <li>
+ *         Variant types (i.e., SNP and/or INDEL) to score. Logic for determining variant type was retained from
+ *         {@link VariantRecalibrator}; see {@link VariantType}. To use different models for SNPs and INDELs
+ *         (e.g., if it is desired to use different sets of annotations for each variant type), one can first run
+ *         this tool to score SNPs and then again on the resulting output to score INDELs.
+ *     </li>
+ *     <li>
+ *         Model prefix. This should denote the path of model files produced by {@link TrainVariantAnnotationsModel}.
+ *     </li>
+ *     <li>
+ *         (Optional) Model backend. This should be identical to that specified in {@link TrainVariantAnnotationsModel}.
+ *         The default Python IsolationForest implementation requires either the GATK Python environment
+ *         or that certain Python packages (argparse, h5py, numpy, sklearn, and dill) are otherwise available.
+ *         A custom backend can also be specified in conjunction with the {@value PYTHON_SCRIPT_LONG_NAME} argument.
+ *     </li>
+ *     <li>
+ *         (Optional) Resource VCF file(s). See the corresponding documentation in {@link ExtractVariantAnnotations}.
+ *         In typical usage, the same resource VCFs and tags provided to that tool should also be provided here.
+ *         In addition, the sites-only VCF that is produced by that tool can also be provided here and used to
+ *         mark those labeled sites that were extracted, which can be useful if these are a subset of the resource sites.
+ *     </li>
+ *     <li>
+ *         (Optional) Calibration-set sensitivity thresholds for SNPs and INDELs. If the corresponding SNP or INDEL
+ *         calibration-set scores are available in the provided model files, sites that have a calibration-set
+ *         sensitivity falling above the corresponding threshold (i.e., a score falling below the corresponding
+ *         score threshold) will have a filter applied.
+ *     </li>
+ *     <li>
+ *         Output prefix.
+ *         This is used as the basename for output files.
+ *     </li>
+ * </ul>
+ *
+ * <h3>Outputs</h3>
+ *
+ * <ul>
+ *     <li>
+ *         Scored VCF file and index. The VCF will not be gzipped if the {@value DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME}
+ *         argument is set to true. The INFO field in each VCF record will be annotated with:
+ *
+ *         <p>
+ *             1) a score (with a key as given by the {@value SCORE_KEY_LONG_NAME} argument,
+ *             which has a default value of {@value DEFAULT_SCORE_KEY}),
+ *         </p>
+ *         <p>
+ *             2) if resources are provided, flags corresponding to the labels (e.g.,
+ *             {@value LabeledVariantAnnotationsData#TRAINING_LABEL}, {@value LabeledVariantAnnotationsData#CALIBRATION_LABEL}, etc.)
+ *             of resources containing the record,
+ *         </p>
+ *         <p>
+ *             3) if the {@value SNP_KEY_LONG_NAME} argument (which has a default value of {@value DEFAULT_SNP_KEY})
+ *             is non-null, a flag corresponding to whether a site is treated as a SNP,
+ *         </p>
+ *         <p>
+ *             4) if {@value SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} and/or
+ *             {@value INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} are provided, a filter (with name given by
+ *             the {@value LOW_SCORE_FILTER_NAME_LONG_NAME} argument, which has a default value of
+ *             {@value DEFAULT_LOW_SCORE_FILTER_NAME}) will be applied if a record has a calibration-set sensitivity
+ *             falling above the appropriate threshold (i.e., if it has a score falling below the corresponding
+ *             score threshold).
+ *         </p>
+ *         <p>
+ *             If {@value USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME} is true, the score, SNP flag, calibration sensitivity,
+ *             and filter appropriate for the highest scoring allele are used; however, the resource labels for all alleles
+ *             are applied.
+ *         </p>
+ *
+ *     </li>
+ *     <li>
+ *         (Optional) Annotations HDF5 file (.annot.hdf5). Annotation data and metadata for all scored sites
+ *         (labeled and unlabeled) are stored in the HDF5 directory structure given in the documentation for the
+ *         {@link ExtractVariantAnnotations} tool. This file will only be produced if the number of scored sites
+ *         is nonzero.
+ *         </p>
+ *
+ *     </li>
+ *     <li>
+ *         (Optional) Scores HDF5 file (.scores.hdf5). Scores for all scored sites are stored in the
+ *         HDF5 path {@value VariantAnnotationsScorer#SCORES_PATH}. Scores are given in the same order as records
+ *         in both the VCF and the annotations HDF5 file. This file will only be produced if the number of scored sites
+ *         is nonzero.
+ *         </p>
+ *     </li>
+ * </ul>
+ *
+ * <h3>Usage examples</h3>
+ *
+ * <p>
+ *     Score sites using a model (produced by {@link TrainVariantAnnotationsModel} using the default
+ *     {@link VariantAnnotationsModelBackend#PYTHON_IFOREST} model backend and contained in the directory
+ *     {@code model_dir}), producing the outputs 1) {@code output.vcf.gz}, 2) {@code output.vcf.gz.tbi},
+ *     3) {@code output.annot.hdf5}, and 4) {@code output.scores.hdf5}. Note that {@code extract.vcf.gz} is
+ *     produced by {@link ExtractVariantAnnotations}. Records will be filtered according to the values provided to the
+ *     {@value SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME} and {@value INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME}
+ *     arguments; the values below are only meant to be illustrative and should be set as appropriate for a given analysis.
+ *
+ * <pre>
+ *     gatk ScoreVariantAnnotations \
+ *          -V input.vcf \
+ *          -A annotation_1 \
+ *          ...
+ *          -A annotation_N \
+ *          --model-prefix model_dir \
+ *          --mode SNP \
+ *          --resource snp-training,training=true snp-training.vcf \
+ *          --resource snp-calibration,calibration=true snp-calibration.vcf \
+ *          --mode INDEL \
+ *          --resource indel-training,training=true indel-training.vcf \
+ *          --resource indel-calibration,calibration=true indel-calibration.vcf \
+ *          --resource extracted,extracted=true extract.vcf.gz \
+ *          --snp-calibration-sensitivity-threshold 0.99 \
+ *          --indel-calibration-sensitivity-threshold 0.99 \
+ *          -O output
+ * </pre>
+ *
+ * <p>
+ *     One may chain together two runs of this tool to score SNPs and INDELs using different models
+ *     (note that SNP and INDEL models have "snp" and "indel" tags in their respective filenames, so these
+ *     models can still be contained in the same {@code model_dir} directory).
+ *     This may have implications for mixed SNP/INDEL sites, especially if filters are applied; see also the
+ *     {@value IGNORE_ALL_FILTERS_LONG_NAME} and {@value IGNORE_FILTER_LONG_NAME} arguments.
+ *
+ * <pre>
+ *     gatk ScoreVariantAnnotations \
+ *          -V input.vcf \
+ *          -A snp_annotation_1 \
+ *          ...
+ *          -A snp_annotation_N \
+ *          --model-prefix model_dir \
+ *          --mode SNP \
+ *          --resource snp-training,training=true snp-training.vcf \
+ *          --resource snp-calibration,calibration=true snp-calibration.vcf \
+ *          --resource extracted,extracted=true snp-extract.vcf.gz \
+ *          --snp-calibration-sensitivity-threshold 0.99 \
+ *          -O intermediate-output
+ *
+ *     gatk ScoreVariantAnnotations \
+ *          -V intermediate-output.vcf \
+ *          -A indel_annotation_1 \
+ *          ...
+ *          -A indel_annotation_M \
+ *          --model-prefix model_dir \
+ *          --mode INDEL \
+ *          --resource indel-training,training=true indel-training.vcf \
+ *          --resource indel-calibration,calibration=true indel-calibration.vcf \
+ *          --resource extracted,extracted=true indel-extract.vcf.gz \
+ *          --indel-calibration-sensitivity-threshold 0.99 \
+ *          -O output
+ * </pre>
+ *
+ * <h3>Custom modeling/scoring backends (ADVANCED)</h3>
+ *
+ * <p>
+ *     The primary scoring functionality performed by this tool is accomplished by a "scoring backend"
+ *     whose fundamental contract is to take an input annotation matrix and to output corresponding scores,
+ *     with both input and output given as HDF5 files. Rather than using one of the available, implemented backends,
+ *     advanced users may provide their own backend via the {@value PYTHON_SCRIPT_LONG_NAME} argument.
+ *     See documentation in the modeling and scoring interfaces ({@link VariantAnnotationsModel} and
+ *     {@link VariantAnnotationsScorer}, respectively), as well as the default Python IsolationForest implementation at
+ *     org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py.
+ * </p>
+ *
+ * DEVELOPER NOTE: See documentation in {@link LabeledVariantAnnotationsWalker}.
+ *
+ * @author Samuel Lee &lt;slee@broadinstitute.org&gt;
+ */
+@CommandLineProgramProperties(
+        summary = "Scores variant calls in a VCF file based on site-level annotations using a previously trained model.",
+        oneLineSummary = "Scores variant calls in a VCF file based on site-level annotations using a previously trained model",
+        programGroup = VariantFilteringProgramGroup.class
+)
+@DocumentedFeature
+@BetaFeature
+public class ScoreVariantAnnotations extends LabeledVariantAnnotationsWalker {
+
+    public static final String MODEL_PREFIX_LONG_NAME = "model-prefix";
+    public static final String MODEL_BACKEND_LONG_NAME = TrainVariantAnnotationsModel.MODEL_BACKEND_LONG_NAME;
+    public static final String PYTHON_SCRIPT_LONG_NAME = "python-script";
+    public static final String SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME = "snp-calibration-sensitivity-threshold";
+    public static final String INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME = "indel-calibration-sensitivity-threshold";
+
+    public static final String SNP_KEY_LONG_NAME = "snp-key";
+    public static final String SCORE_KEY_LONG_NAME = "score-key";
+    public static final String CALIBRATION_SENSITIVITY_KEY_LONG_NAME = "calibration-sensitivity-key";
+    public static final String LOW_SCORE_FILTER_NAME_LONG_NAME = "low-score-filter-name";
+    public static final String DOUBLE_FORMAT_LONG_NAME = "double-format";
+
+    public static final String DEFAULT_SNP_KEY = LabeledVariantAnnotationsData.SNP_LABEL;
+    public static final String DEFAULT_SCORE_KEY = "SCORE";
+    public static final String DEFAULT_CALIBRATION_SENSITIVITY_KEY = "CALIBRATION_SENSITIVITY";
+    public static final String DEFAULT_LOW_SCORE_FILTER_NAME = "LOW_SCORE";
+    public static final String DEFAULT_DOUBLE_FORMAT = "%.4f";
+
+    public static final String SCORES_HDF5_SUFFIX = ".scores.hdf5";
+
+    @Argument(
+            fullName = MODEL_PREFIX_LONG_NAME)
+    private String modelPrefix;
+
+    @Argument(
+            fullName = MODEL_BACKEND_LONG_NAME,
+            doc = "Backend to use for scoring. " +
+                    "JAVA_BGMM will use a pure Java implementation (ported from Python scikit-learn) of the Bayesian Gaussian Mixture Model. " +
+                    "PYTHON_IFOREST will use the Python scikit-learn implementation of the IsolationForest method and " +
+                    "will require that the corresponding Python dependencies are present in the environment. " +
+                    "PYTHON_SCRIPT will use the script specified by the " + PYTHON_SCRIPT_LONG_NAME + " argument. " +
+                    "See the tool documentation for more details." )
+    private VariantAnnotationsModelBackend modelBackend = VariantAnnotationsModelBackend.PYTHON_IFOREST;
+
+    @Argument(
+            fullName = PYTHON_SCRIPT_LONG_NAME,
+            doc = "Python script used for specifying a custom scoring backend. If provided, " + MODEL_BACKEND_LONG_NAME + " must also be set to PYTHON_SCRIPT.",
+            optional = true)
+    private File pythonScriptFile;
+
+    @Argument(
+            fullName = SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME,
+            doc = "If specified, SNPs with scores corresponding to a calibration sensitivity that is greater than or equal to this threshold will be hard filtered.",
+            optional = true,
+            minValue = 0.,
+            maxValue = 1.)
+    private Double snpCalibrationSensitivityThreshold;
+
+    @Argument(
+            fullName = INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME,
+            doc = "If specified, indels with scores corresponding to a calibration sensitivity that is greater than or equal to this threshold will be hard filtered.",
+            optional = true,
+            minValue = 0.,
+            maxValue = 1.)
+    private Double indelCalibrationSensitivityThreshold;
+
+    @Argument(
+            fullName = SNP_KEY_LONG_NAME,
+            doc = "Annotation flag to use for labeling sites as SNPs in output. " +
+                    "Set this to \"null\" to omit these labels.")
+    private String snpKey = DEFAULT_SNP_KEY;
+
+    @Argument(
+            fullName = SCORE_KEY_LONG_NAME,
+            doc = "Annotation key to use for score values in output.")
+    private String scoreKey = DEFAULT_SCORE_KEY;
+
+    @Argument(
+            fullName = CALIBRATION_SENSITIVITY_KEY_LONG_NAME,
+            doc = "Annotation key to use for calibration-sensitivity values in output.")
+    private String calibrationSensitivityKey = DEFAULT_CALIBRATION_SENSITIVITY_KEY;
+
+    @Argument(
+            fullName = LOW_SCORE_FILTER_NAME_LONG_NAME,
+            doc = "Name to use for low-score filter in output.")
+    private String lowScoreFilterName = DEFAULT_LOW_SCORE_FILTER_NAME;
+
+    @Argument(
+            fullName = DOUBLE_FORMAT_LONG_NAME,
+            doc = "Format string to use for formatting score and calibration-sensitivity values in output.")
+    private String doubleFormat = DEFAULT_DOUBLE_FORMAT;
+
+    private File outputScoresFile;
+    private Iterator<Double> scoresIterator;
+    private Iterator<Boolean> isSNPIterator;
+
+    private VariantAnnotationsScorer snpScorer;
+    private VariantAnnotationsScorer indelScorer;
+
+    private Function<Double, Double> snpCalibrationSensitivityConverter;
+    private Function<Double, Double> indelCalibrationSensitivityConverter;
+
+    @Override
+    protected int numberOfPasses() {
+        return 2;
+    }
+
+    @Override
+    public void afterOnTraversalStart() {
+
+        Utils.nonNull(scoreKey);
+        Utils.nonNull(calibrationSensitivityKey);
+        Utils.nonNull(lowScoreFilterName);
+        Utils.nonNull(doubleFormat);
+
+        switch (modelBackend) {
+            case JAVA_BGMM:
+                Utils.validateArg(pythonScriptFile == null,
+                        "Python script should not be provided when using JAVA_BGMM backend.");
+                logger.info("Running in JAVA_BGMM mode...");
+                snpScorer = deserializeScorerFromSerFiles(VariantType.SNP);
+                indelScorer = deserializeScorerFromSerFiles(VariantType.INDEL);
+                break;
+            case PYTHON_IFOREST:
+                Utils.validateArg(pythonScriptFile == null,
+                        "Python script should not be provided when using PYTHON_IFOREST backend.");
+
+                pythonScriptFile = IOUtils.writeTempResource(new Resource(TrainVariantAnnotationsModel.ISOLATION_FOREST_PYTHON_SCRIPT, TrainVariantAnnotationsModel.class));
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("argparse");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("h5py");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("numpy");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("sklearn");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("dill");
+                logger.info("Running in PYTHON_IFOREST mode...");
+                snpScorer = deserializeScorerFromPklFiles(VariantType.SNP);
+                indelScorer = deserializeScorerFromPklFiles(VariantType.INDEL);
+                break;
+            case PYTHON_SCRIPT:
+                IOUtils.canReadFile(pythonScriptFile);
+                logger.info("Running in PYTHON_SCRIPT mode...");
+                snpScorer = deserializeScorerFromPklFiles(VariantType.SNP);
+                indelScorer = deserializeScorerFromPklFiles(VariantType.INDEL);
+                break;
+            default:
+                throw new GATKException.ShouldNeverReachHereException("Unknown model-backend mode.");
+        }
+
+        if (snpScorer == null && indelScorer == null) {
+            throw new UserException.BadInput(String.format("At least one serialized scorer must be present " +
+                    "in the model files with the prefix %s.", modelPrefix));
+        }
+        if (variantTypesToExtract.contains(VariantType.SNP) && snpScorer == null) {
+            throw new UserException.BadInput(String.format("SNPs were indicated for extraction via the %s argument, " +
+                    "but no serialized SNP scorer was available in the model files with the prefix.", MODE_LONG_NAME, modelPrefix));
+        }
+        if (variantTypesToExtract.contains(VariantType.INDEL) && indelScorer == null) {
+            throw new UserException.BadInput(String.format("INDELs were indicated for extraction via the %s argument, " +
+                    "but no serialized INDEL scorer was available in the model files with the prefix.", MODE_LONG_NAME, modelPrefix));
+        }
+
+        snpCalibrationSensitivityConverter = readCalibrationScoresAndCreateConverter(VariantType.SNP);
+        indelCalibrationSensitivityConverter = readCalibrationScoresAndCreateConverter(VariantType.INDEL);
+
+        if (snpCalibrationSensitivityConverter == null && snpCalibrationSensitivityThreshold != null) {
+            throw new UserException.BadInput(String.format("The %s argument was specified, " +
+                            "but no SNP calibration scores were provided in the model files with the prefix %s.",
+                    SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, modelPrefix));
+        }
+        if (indelCalibrationSensitivityConverter == null && indelCalibrationSensitivityThreshold != null) {
+            throw new UserException.BadInput(String.format("The %s argument was specified, " +
+                    "but no INDEL calibration scores were provided in the model files with the prefix %s.",
+                    INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, modelPrefix));
+        }
+
+        outputScoresFile = new File(outputPrefix + SCORES_HDF5_SUFFIX);
+
+        // TODO this validation method should perhaps be moved outside of the CNV code
+        CopyNumberArgumentValidationUtils.validateOutputFiles(outputScoresFile);
+    }
+
+    @Override
+    protected void nthPassApply(final VariantContext variant,
+                                final ReadsContext readsContext,
+                                final ReferenceContext referenceContext,
+                                final FeatureContext featureContext,
+                                final int n) {
+        final List<Triple<List<Allele>, VariantType, TreeSet<String>>> metadata = extractVariantMetadata(variant, featureContext, true);
+        final boolean isVariantExtracted = !metadata.isEmpty();
+        if (n == 0 && isVariantExtracted) {
+            addExtractedVariantToData(data, variant, metadata);
+        }
+        if (n == 1) {
+            if (isVariantExtracted) {
+                writeExtractedVariantToVCF(variant, metadata);
+            } else {
+                vcfWriter.add(variant);
+            }
+        }
+    }
+
+    @Override
+    protected void afterNthPass(final int n) {
+        if (n == 0) {
+            // TODO if BGMM, preprocess annotations and write to HDF5 with BGMMVariantAnnotationsScorer.preprocessAnnotationsWithBGMMAndWriteHDF5
+            writeAnnotationsToHDF5();
+            if (data.size() > 0) {
+                data.clear();
+                readAnnotationsAndWriteScoresToHDF5();
+                scoresIterator = Arrays.stream(VariantAnnotationsScorer.readScores(outputScoresFile)).iterator();
+                isSNPIterator = LabeledVariantAnnotationsData.readLabel(outputAnnotationsFile, LabeledVariantAnnotationsData.SNP_LABEL).iterator();
+            } else {
+                scoresIterator = Collections.emptyIterator();
+                isSNPIterator = Collections.emptyIterator();
+            }
+        }
+        if (n == 1) {
+            if (scoresIterator.hasNext()) {
+                throw new IllegalStateException("Traversals of scores and variants " +
+                        "(or alleles, in allele-specific mode) were not correctly synchronized.");
+            }
+            if (vcfWriter != null) {
+                vcfWriter.close();
+            }
+        }
+    }
+
+    private VariantAnnotationsScorer deserializeScorerFromPklFiles(final VariantType variantType) {
+        final String variantTypeTag = '.' + variantType.toString().toLowerCase();
+        final File scorerPklFile = new File(
+                modelPrefix + variantTypeTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX);
+        final File negativeScorerPklFile = new File(
+                modelPrefix + variantTypeTag + TrainVariantAnnotationsModel.NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX);
+        return scorerPklFile.canRead()
+                ? negativeScorerPklFile.canRead()
+                ? VariantAnnotationsScorer.combinePositiveAndNegativeScorer(
+                new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, scorerPklFile),
+                new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, negativeScorerPklFile))
+                : new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, scorerPklFile)
+                : null;
+    }
+
+    private VariantAnnotationsScorer deserializeScorerFromSerFiles(final VariantType variantType) {
+        final String variantTypeTag = '.' + variantType.toString().toLowerCase();
+        final File scorerSerFile = new File(
+                modelPrefix + variantTypeTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX);
+        final File negativeScorerSerFile = new File(
+                modelPrefix + variantTypeTag + TrainVariantAnnotationsModel.NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX);
+        return scorerSerFile.canRead()
+                ? negativeScorerSerFile.canRead()
+                ? VariantAnnotationsScorer.combinePositiveAndNegativeScorer(
+                BGMMVariantAnnotationsScorer.deserialize(scorerSerFile),
+                BGMMVariantAnnotationsScorer.deserialize(negativeScorerSerFile))
+                : BGMMVariantAnnotationsScorer.deserialize(scorerSerFile)
+                : null;
+    }
+
+    private Function<Double, Double> readCalibrationScoresAndCreateConverter(final VariantType variantType) {
+        final String variantTypeTag = '.' + variantType.toString().toLowerCase();
+        final File calibrationScores = new File(
+                modelPrefix + variantTypeTag + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX);
+        return calibrationScores.canRead()
+                ? VariantAnnotationsScorer.createScoreToCalibrationSensitivityConverter(VariantAnnotationsScorer.readScores(calibrationScores))
+                : null;
+    }
+
+    private void readAnnotationsAndWriteScoresToHDF5() {
+        final List<String> annotationNames = LabeledVariantAnnotationsData.readAnnotationNames(outputAnnotationsFile);
+        final List<Boolean> isSNP = LabeledVariantAnnotationsData.readLabel(outputAnnotationsFile, LabeledVariantAnnotationsData.SNP_LABEL);
+        final double[][] allAnnotations = LabeledVariantAnnotationsData.readAnnotations(outputAnnotationsFile);
+        final int numAll = allAnnotations.length;
+        final List<Double> allScores = new ArrayList<>(Collections.nCopies(numAll, Double.NaN));
+        if (variantTypesToExtract.contains(VariantType.SNP)) {
+            logger.info("Scoring SNP variants...");
+            scoreVariantTypeAndSetElementsOfAllScores(annotationNames, allAnnotations, isSNP, snpScorer, allScores);
+        }
+        if (variantTypesToExtract.contains(VariantType.INDEL)) {
+            logger.info("Scoring INDEL variants...");
+            final List<Boolean> isIndel = isSNP.stream().map(x -> !x).collect(Collectors.toList());
+            scoreVariantTypeAndSetElementsOfAllScores(annotationNames, allAnnotations, isIndel, indelScorer, allScores);
+        }
+        VariantAnnotationsScorer.writeScores(outputScoresFile, Doubles.toArray(allScores));
+        logger.info(String.format("Scores written to %s.", outputScoresFile.getAbsolutePath()));
+    }
+
+    private static void scoreVariantTypeAndSetElementsOfAllScores(final List<String> annotationNames,
+                                                                  final double[][] allAnnotations,
+                                                                  final List<Boolean> isVariantType,
+                                                                  final VariantAnnotationsScorer variantTypeScorer,
+                                                                  final List<Double> allScores) {
+        final File variantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, allAnnotations, isVariantType);
+        final File variantTypeScoresFile = IOUtils.createTempFile("temp", ".scores.hdf5");
+        variantTypeScorer.score(variantTypeAnnotationsFile, variantTypeScoresFile); // TODO we do not fail until here in the case of mismatched annotation names; we could fail earlier
+        final double[] variantTypeScores = VariantAnnotationsScorer.readScores(variantTypeScoresFile);
+        final Iterator<Double> variantTypeScoresIterator = Arrays.stream(variantTypeScores).iterator();
+        IntStream.range(0, allScores.size()).filter(isVariantType::get).forEach(i -> allScores.set(i, variantTypeScoresIterator.next()));
+    }
+
+    @Override
+    void writeExtractedVariantToVCF(final VariantContext vc,
+                                    final List<Allele> altAlleles,
+                                    final Set<String> labels) {
+        final VariantContextBuilder builder = new VariantContextBuilder(vc);
+        labels.forEach(l -> builder.attribute(l, true)); // labels should already be sorted as a TreeSet
+
+        final List<Double> scores = useASAnnotations
+                ? altAlleles.stream().map(a -> scoresIterator.next()).collect(Collectors.toList())
+                : Collections.singletonList(scoresIterator.next());
+        final double score = Collections.max(scores);
+        final int scoreIndex = scores.indexOf(score);
+        builder.attribute(scoreKey, formatDouble(score));
+
+        final List<Boolean> isSNP = useASAnnotations
+                ? altAlleles.stream().map(a -> isSNPIterator.next()).collect(Collectors.toList())
+                : Collections.singletonList(isSNPIterator.next());
+        final boolean isSNPMax = isSNP.get(scoreIndex);
+
+        if (snpKey != null) {
+            builder.attribute(snpKey, isSNPMax);
+        }
+
+        final Function<Double, Double> calibrationSensitivityConverter = isSNPMax ? snpCalibrationSensitivityConverter : indelCalibrationSensitivityConverter;
+        if (calibrationSensitivityConverter != null) {
+            final double calibrationSensitivity = calibrationSensitivityConverter.apply(score);
+            builder.attribute(calibrationSensitivityKey, formatDouble(calibrationSensitivity));
+            final Double calibrationSensitivityThreshold = isSNPMax ? snpCalibrationSensitivityThreshold : indelCalibrationSensitivityThreshold;
+            if (calibrationSensitivityThreshold != null && calibrationSensitivity >= calibrationSensitivityThreshold) {
+                builder.filter(lowScoreFilterName); // TODO does this sufficiently cover the desired behavior when dealing with previously filtered sites, etc.?
+            }
+        }
+
+        vcfWriter.add(builder.make());
+    }
+
+    private String formatDouble(final double x) {
+        return String.format(doubleFormat, x);
+    }
+
+    /**
+     * Copies the header from the input VCF and adds info lines for the score, calibration-sensitivity, and label keys,
+     * as well as the filter line.
+     */
+    @Override
+    VCFHeader constructVCFHeader(final List<String> sortedLabels) {
+        final VCFHeader inputHeader = getHeaderForVariants();
+        final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder();
+
+        final Set<VCFHeaderLine> hInfo = new HashSet<>(inputHeaders);
+        hInfo.add(new VCFInfoHeaderLine(scoreKey, 1, VCFHeaderLineType.Float,
+                "Score according to the model applied by ScoreVariantAnnotations"));
+        hInfo.add(new VCFInfoHeaderLine(calibrationSensitivityKey, 1, VCFHeaderLineType.Float,
+                String.format("Calibration sensitivity corresponding to the value of %s", scoreKey)));
+        hInfo.add(new VCFFilterHeaderLine(lowScoreFilterName, "Low score (corresponding to high calibration sensitivity)"));
+
+        hInfo.addAll(getDefaultToolVCFHeaderLines());
+        if (snpKey != null) {
+            hInfo.add(new VCFInfoHeaderLine(snpKey, 1, VCFHeaderLineType.Flag, "This site was considered a SNP during filtering"));
+        }
+        hInfo.addAll(sortedLabels.stream()
+                .map(l -> new VCFInfoHeaderLine(l, 1, VCFHeaderLineType.Flag, String.format(RESOURCE_LABEL_INFO_HEADER_LINE_FORMAT_STRING, l)))
+                .collect(Collectors.toList()));
+
+        return new VCFHeader(hInfo, inputHeader.getGenotypeSamples());
+    }
+
+    @Override
+    public Object onTraversalSuccess() {
+
+        logger.info(String.format("%s complete.", getClass().getSimpleName()));
+
+        return null;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModel.java
new file mode 100644
index 00000000000..9a8a1c8b845
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModel.java
@@ -0,0 +1,570 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import com.google.common.collect.Streams;
+import com.google.common.primitives.Doubles;
+import org.apache.commons.math3.stat.descriptive.moment.Variance;
+import org.apache.commons.math3.stat.descriptive.rank.Percentile;
+import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.BetaFeature;
+import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
+import org.broadinstitute.barclay.help.DocumentedFeature;
+import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
+import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.VariantRecalibrator;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsModel;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsScorer;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsModel;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsScorer;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModel;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModelBackend;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsScorer;
+import org.broadinstitute.hellbender.utils.Utils;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+import org.broadinstitute.hellbender.utils.io.Resource;
+import org.broadinstitute.hellbender.utils.python.PythonScriptExecutor;
+import picard.cmdline.programgroups.VariantFilteringProgramGroup;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * Trains a model for scoring variant calls based on site-level annotations.
+ *
+ * <p>
+ *     This tool is intended to be used as the second step in a variant-filtering workflow that supersedes the
+ *     {@link VariantRecalibrator} workflow. Given training (and optionally, calibration) sets of site-level annotations
+ *     produced by {@link ExtractVariantAnnotations}, this tool can be used to train a model for scoring variant
+ *     calls. The outputs of the tool are TODO
+ * </p>
+ *
+ * <p>
+ *     The model trained by this tool can in turn be provided along with a VCF file to the {@link ScoreVariantAnnotations}
+ *     tool, which assigns a score to each call (with a lower score indicating that a call is more likely to be an artifact
+ *     and should perhaps be filtered). Each score can also be converted to a corresponding sensitivity to a
+ *     calibration set, if the latter is available.
+ * </p>
+ *
+ * <p>
+ *     TODO model definition
+ * </p>
+ *
+ * <p>
+ *     TODO calibration-sensitivity conversion, considerations, and comparison to tranche files
+ * </p>
+ *
+ * <p>
+ *     TODO positive vs. positive-negative
+ * </p>
+ *  *
+ * <p>
+ *     TODO IsolationForest section with description of method and hyperparameters
+ * </p>
+ *
+ * <p>
+ *     Note that HDF5 files may be viewed using <a href="https://support.hdfgroup.org/products/java/hdfview/">hdfview</a>
+ *     or loaded in Python using <a href="http://www.pytables.org/">PyTables</a> or <a href="http://www.h5py.org/">h5py</a>.
+ * </p>
+ *
+ * <h3>Inputs</h3>
+ *
+ * <ul>
+ *     <li>
+ *         Labeled-annotations HDF5 file (.annot.hdf5). Annotation data and metadata for labeled sites are stored in the
+ *         HDF5 directory structure given in the documentation for the {@link ExtractVariantAnnotations} tool. In typical
+ *         usage, both the {@value LabeledVariantAnnotationsData#TRAINING_LABEL} and
+ *         {@value LabeledVariantAnnotationsData#CALIBRATION_LABEL} labels would be available for non-empty sets of
+ *         sites of the requested variant type.
+ *     </li>
+ *     <li>
+ *         (Optional) Unlabeled-annotations HDF5 file (.unlabeled.annot.hdf5). Annotation data and metadata for
+ *         unlabeled sites are stored in the HDF5 directory structure given in the documentation for the
+ *         {@link ExtractVariantAnnotations} tool. If provided, a positive-negative modeling approach (similar to
+ *         that used in {@link VariantRecalibrator} will be used.
+ *     </li>
+ *     <li>
+ *         Variant types (i.e., SNP and/or INDEL) for which to train models. Logic for determining variant type was retained from
+ *         {@link VariantRecalibrator}; see {@link VariantType}. A separate model will be trained for each variant type
+ *         and separate sets of outputs with corresponding tags in the filenames (i.e., "snp" or "indel") will be produced.
+ *         TODO can run tool twice
+ *     </li>
+ *     <li>
+ *         (Optional) Model backend. The default Python IsolationForest implementation requires either the GATK Python environment
+ *         or that certain Python packages (argparse, h5py, numpy, sklearn, and dill) are otherwise available.
+ *         A custom backend can also be specified in conjunction with the {@value PYTHON_SCRIPT_LONG_NAME} argument.
+ *     </li>
+ *     <li>
+ *         (Optional) Model hyperparameters JSON file. TODO
+ *     </li>
+ *     <li>
+ *         (Optional) Calibration-set sensitivity threshold. TODO if separate SNP/INDEL thresholds, run tool twice
+ *     </li>
+ *     <li>
+ *         Output prefix.
+ *         This is used as the basename for output files.
+ *     </li>
+ * </ul>
+ *
+ * <h3>Outputs</h3>
+ *
+ * <ul>
+ *     <li>
+ *         TODO
+ *     </li>
+ *     <li>
+ *         (Optional) TODO
+ *     </li>
+ * </ul>
+ *
+ * <h3>Usage examples</h3>
+ *
+ * <p>
+ *     TODO, positive-only, producing the outputs 1)
+ *
+ * <pre>
+ *     gatk TrainVariantAnnotationsModel \
+ *          TODO
+ * </pre>
+ * </p>
+ *
+ * <p>
+ *     TODO, positive-negative, producing the outputs 1)
+ *
+ * <pre>
+ *     gatk TrainVariantAnnotationsModel \
+ *          TODO
+ * </pre>
+ * </p>
+ *
+ * <h3>Custom modeling/scoring backends (ADVANCED)</h3>
+ *
+ * <p>
+ *     The primary modeling functionality performed by this tool is accomplished by a "modeling backend"
+ *     whose fundamental contract is to take an input HDF5 file containing an annotation matrix for sites of a
+ *     single variant type (i.e., SNP or INDEL) and to output a serialized scorer for that variant type.
+ *     Rather than using one of the available, implemented backends, advanced users may provide their own backend
+ *     via the {@value PYTHON_SCRIPT_LONG_NAME} argument. See documentation in the modeling and scoring interfaces
+ *     ({@link VariantAnnotationsModel} and {@link VariantAnnotationsScorer}, respectively), as well as the default
+ *     Python IsolationForest implementation at org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py.
+ * </p>
+ *
+ * <p>
+ *     Extremely advanced users could potentially substitute their own implementation for the entire
+ *     {@link TrainVariantAnnotationsModel} tool, while still making use of the up/downstream
+ *     {@link ExtractVariantAnnotations} and {@link ScoreVariantAnnotations} tools. To do so, one would additionally
+ *     have to implement functionality for subsetting training/calibration sets by variant type,
+ *     calling modeling backends as appropriate, and scoring calibration sets.
+ * </p>
+ *
+ * @author Samuel Lee &lt;slee@broadinstitute.org&gt;
+ */
+@CommandLineProgramProperties(
+        summary = "Trains a model for scoring variant calls based on site-level annotations.",
+        oneLineSummary = "Trains a model for scoring variant calls based on site-level annotations",
+        programGroup = VariantFilteringProgramGroup.class
+)
+@DocumentedFeature
+@BetaFeature
+public final class TrainVariantAnnotationsModel extends CommandLineProgram {
+
+    public static final String MODE_LONG_NAME = "mode";
+    public static final String ANNOTATIONS_HDF5_LONG_NAME = "annotations-hdf5";
+    public static final String UNLABELED_ANNOTATIONS_HDF5_LONG_NAME = "unlabeled-annotations-hdf5";
+    public static final String MODEL_BACKEND_LONG_NAME = "model-backend";
+    public static final String PYTHON_SCRIPT_LONG_NAME = "python-script";
+    public static final String HYPERPARAMETERS_JSON_LONG_NAME = "hyperparameters-json";
+    public static final String CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME = "calibration-sensitivity-threshold";
+
+    public static final String ISOLATION_FOREST_PYTHON_SCRIPT = "isolation-forest.py";
+    public static final String ISOLATION_FOREST_HYPERPARAMETERS_JSON = "isolation-forest-hyperparameters.json";
+
+    enum AvailableLabelsMode {
+        POSITIVE_ONLY, POSITIVE_UNLABELED
+    }
+
+    public static final String TRAINING_SCORES_HDF5_SUFFIX = ".trainingScores.hdf5";
+    public static final String CALIBRATION_SCORES_HDF5_SUFFIX = ".calibrationScores.hdf5";
+    public static final String UNLABELED_SCORES_HDF5_SUFFIX = ".unlabeledScores.hdf5";
+    public static final String NEGATIVE_TAG = ".negative";
+
+    @Argument(
+            fullName = ANNOTATIONS_HDF5_LONG_NAME,
+            doc = "HDF5 file containing annotations extracted with ExtractVariantAnnotations.")
+    private File inputAnnotationsFile;
+
+    @Argument(
+            fullName = UNLABELED_ANNOTATIONS_HDF5_LONG_NAME,
+            doc = "HDF5 file containing annotations extracted with ExtractVariantAnnotations. " +
+                    "If specified with " + CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME + ", " +
+                    "a positive-unlabeled modeling approach will be used; otherwise, a positive-only modeling " +
+                    "approach will be used.",
+            optional = true)
+    private File inputUnlabeledAnnotationsFile;
+
+    @Argument(
+            fullName = MODEL_BACKEND_LONG_NAME,
+            doc = "Backend to use for training models. " +
+                    "JAVA_BGMM will use a pure Java implementation (ported from Python scikit-learn) of the Bayesian Gaussian Mixture Model. " +
+                    "PYTHON_IFOREST will use the Python scikit-learn implementation of the IsolationForest method and " +
+                    "will require that the corresponding Python dependencies are present in the environment. " +
+                    "PYTHON_SCRIPT will use the script specified by the " + PYTHON_SCRIPT_LONG_NAME + " argument. " +
+                    "See the tool documentation for more details.")
+    private VariantAnnotationsModelBackend modelBackend = VariantAnnotationsModelBackend.PYTHON_IFOREST;
+
+    @Argument(
+            fullName = PYTHON_SCRIPT_LONG_NAME,
+            doc = "Python script used for specifying a custom scoring backend. If provided, " + MODEL_BACKEND_LONG_NAME + " must also be set to PYTHON_SCRIPT.",
+            optional = true)
+    private File pythonScriptFile;
+
+    @Argument(
+            fullName = HYPERPARAMETERS_JSON_LONG_NAME,
+            doc = "JSON file containing hyperparameters. Optional if the PYTHON_IFOREST backend is used " +
+                    "(if not specified, a default set of hyperparameters will be used); otherwise required.",
+            optional = true)
+    private File hyperparametersJSONFile;
+
+    @Argument(
+            fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
+            shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
+            doc = "Output prefix.")
+    private String outputPrefix;
+
+    @Argument(
+            fullName = CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME,
+            doc = "Calibration-sensitivity threshold that determines which sites will be used for training the negative model " +
+                    "in the positive-unlabeled modeling approach. " +
+                    "Increasing this will decrease the corresponding positive-model score threshold; sites with scores below this score " +
+                    "threshold will be used for training the negative model. Thus, this parameter should typically be chosen to " +
+                    "be close to 1, so that sites that score highly according to the positive model will not be used to train the negative model. " +
+                    "The " + UNLABELED_ANNOTATIONS_HDF5_LONG_NAME + " argument must be specified in conjunction with this argument. " +
+                    "If separate thresholds for SNP and INDEL models are desired, run the tool separately for each mode with its respective threshold.",
+            optional = true,
+            minValue = 0.,
+            maxValue = 1.)
+    private Double calibrationSensitivityThreshold;
+
+    @Argument(
+            fullName = MODE_LONG_NAME,
+            doc = "Variant types for which to train models. Duplicate values will be ignored.",
+            minElements = 1)
+    public List<VariantType> variantTypes = new ArrayList<>(Arrays.asList(VariantType.SNP, VariantType.INDEL));
+
+    private AvailableLabelsMode availableLabelsMode;
+
+    @Override
+    protected Object doWork() {
+
+        validateArgumentsAndSetModes();
+
+        logger.info("Starting training...");
+
+        for (final VariantType variantType : VariantType.values()) { // enforces order in which models are trained
+            if (variantTypes.contains(variantType)) {
+                doModelingWorkForVariantType(variantType);
+            }
+        }
+
+        logger.info(String.format("%s complete.", getClass().getSimpleName()));
+
+        return null;
+    }
+
+    private void validateArgumentsAndSetModes() {
+        IOUtils.canReadFile(inputAnnotationsFile);
+
+        Utils.validateArg((inputUnlabeledAnnotationsFile == null) == (calibrationSensitivityThreshold == null),
+                "Unlabeled annotations and calibration-sensitivity threshold must both be unspecified (for positive-only model training) " +
+                        "or specified (for positive-unlabeled model training).");
+
+        availableLabelsMode = inputUnlabeledAnnotationsFile != null && calibrationSensitivityThreshold != null
+                ? AvailableLabelsMode.POSITIVE_UNLABELED
+                : AvailableLabelsMode.POSITIVE_ONLY;
+
+        if (inputUnlabeledAnnotationsFile != null) {
+            IOUtils.canReadFile(inputUnlabeledAnnotationsFile);
+            final List<String> annotationNames = LabeledVariantAnnotationsData.readAnnotationNames(inputAnnotationsFile);
+            final List<String> unlabeledAnnotationNames = LabeledVariantAnnotationsData.readAnnotationNames(inputUnlabeledAnnotationsFile);
+            Utils.validateArg(annotationNames.equals(unlabeledAnnotationNames), "Annotation names must be identical for positive and unlabeled annotations.");
+        }
+
+        switch (modelBackend) {
+            case JAVA_BGMM:
+                Utils.validateArg(pythonScriptFile == null,
+                        "Python script should not be provided when using JAVA_BGMM backend.");
+                IOUtils.canReadFile(hyperparametersJSONFile);
+                logger.info("Running in JAVA_BGMM mode...");
+                break;
+            case PYTHON_IFOREST:
+                Utils.validateArg(pythonScriptFile == null,
+                        "Python script should not be provided when using PYTHON_IFOREST backend.");
+
+                pythonScriptFile = IOUtils.writeTempResource(new Resource(ISOLATION_FOREST_PYTHON_SCRIPT, TrainVariantAnnotationsModel.class));
+                if (hyperparametersJSONFile == null) {
+                    hyperparametersJSONFile = IOUtils.writeTempResource(new Resource(ISOLATION_FOREST_HYPERPARAMETERS_JSON, TrainVariantAnnotationsModel.class));
+                }
+                IOUtils.canReadFile(hyperparametersJSONFile);
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("argparse");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("h5py");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("numpy");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("sklearn");
+                PythonScriptExecutor.checkPythonEnvironmentForPackage("dill");
+                logger.info("Running in PYTHON_IFOREST mode...");
+                break;
+            case PYTHON_SCRIPT:
+                IOUtils.canReadFile(pythonScriptFile);
+                IOUtils.canReadFile(hyperparametersJSONFile);
+                logger.info("Running in PYTHON_SCRIPT mode...");
+                break;
+            default:
+                throw new GATKException.ShouldNeverReachHereException("Unknown model-backend mode.");
+        }
+    }
+
+    /**
+     * TODO
+     */
+    private void doModelingWorkForVariantType(final VariantType variantType) {
+        // positive model
+        final List<String> annotationNames = LabeledVariantAnnotationsData.readAnnotationNames(inputAnnotationsFile);
+        final double[][] annotations = LabeledVariantAnnotationsData.readAnnotations(inputAnnotationsFile);
+
+        final List<Boolean> isTraining = LabeledVariantAnnotationsData.readLabel(inputAnnotationsFile, LabeledVariantAnnotationsData.TRAINING_LABEL);
+        final List<Boolean> isCalibration = LabeledVariantAnnotationsData.readLabel(inputAnnotationsFile, LabeledVariantAnnotationsData.CALIBRATION_LABEL);
+        final List<Boolean> isSNP = LabeledVariantAnnotationsData.readLabel(inputAnnotationsFile, LabeledVariantAnnotationsData.SNP_LABEL);
+        final List<Boolean> isVariantType = variantType == VariantType.SNP ? isSNP : isSNP.stream().map(x -> !x).collect(Collectors.toList());
+
+        final List<Boolean> isTrainingAndVariantType = Streams.zip(isTraining.stream(), isVariantType.stream(), (a, b) -> a && b).collect(Collectors.toList());
+        final int numTrainingAndVariantType = numPassingFilter(isTrainingAndVariantType);
+
+        final String variantTypeString = variantType.toString();
+        final String outputPrefixTag = '.' + variantType.toString().toLowerCase();
+
+        if (numTrainingAndVariantType > 0) {
+            logger.info(String.format("Training %s model with %d training sites x %d annotations %s...",
+                    variantTypeString, numTrainingAndVariantType, annotationNames.size(), annotationNames));
+            final File labeledTrainingAndVariantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, annotations, isTrainingAndVariantType);
+            trainAndSerializeModel(labeledTrainingAndVariantTypeAnnotationsFile, outputPrefixTag);
+            logger.info(String.format("%s model trained and serialized with output prefix \"%s\".", variantTypeString, outputPrefix + outputPrefixTag));
+
+            if (modelBackend == VariantAnnotationsModelBackend.JAVA_BGMM) {
+                BGMMVariantAnnotationsScorer.preprocessAnnotationsWithBGMMAndWriteHDF5(
+                        annotationNames, outputPrefix + outputPrefixTag, labeledTrainingAndVariantTypeAnnotationsFile, logger);
+            }
+
+            logger.info(String.format("Scoring %d %s training sites...", numTrainingAndVariantType, variantTypeString));
+            final File labeledTrainingAndVariantTypeScoresFile = score(labeledTrainingAndVariantTypeAnnotationsFile, outputPrefixTag, TRAINING_SCORES_HDF5_SUFFIX);
+            logger.info(String.format("%s training scores written to %s.", variantTypeString, labeledTrainingAndVariantTypeScoresFile.getAbsolutePath()));
+
+            final List<Boolean> isLabeledCalibrationAndVariantType = Streams.zip(isCalibration.stream(), isVariantType.stream(), (a, b) -> a && b).collect(Collectors.toList());
+            final int numLabeledCalibrationAndVariantType = numPassingFilter(isLabeledCalibrationAndVariantType);
+            if (numLabeledCalibrationAndVariantType > 0) {
+                logger.info(String.format("Scoring %d %s calibration sites...", numLabeledCalibrationAndVariantType, variantTypeString));
+                final File labeledCalibrationAndVariantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, annotations, isLabeledCalibrationAndVariantType);
+                final File labeledCalibrationAndVariantTypeScoresFile = score(labeledCalibrationAndVariantTypeAnnotationsFile, outputPrefixTag, CALIBRATION_SCORES_HDF5_SUFFIX);
+                logger.info(String.format("%s calibration scores written to %s.", variantTypeString, labeledCalibrationAndVariantTypeScoresFile.getAbsolutePath()));
+            } else {
+                logger.warn(String.format("No %s calibration sites were available.", variantTypeString));
+            }
+
+            // negative model
+            if (availableLabelsMode == AvailableLabelsMode.POSITIVE_UNLABELED) {
+                final double[][] unlabeledAnnotations = LabeledVariantAnnotationsData.readAnnotations(inputUnlabeledAnnotationsFile);
+                final List<Boolean> unlabeledIsSNP = LabeledVariantAnnotationsData.readLabel(inputUnlabeledAnnotationsFile, "snp");
+                final List<Boolean> isUnlabeledVariantType = variantType == VariantType.SNP ? unlabeledIsSNP : unlabeledIsSNP.stream().map(x -> !x).collect(Collectors.toList());
+
+                final int numUnlabeledVariantType = numPassingFilter(isUnlabeledVariantType);
+
+                if (numUnlabeledVariantType > 0) {
+                    final File labeledCalibrationAndVariantTypeScoresFile = new File(outputPrefix + outputPrefixTag + CALIBRATION_SCORES_HDF5_SUFFIX);
+                    final double[] labeledCalibrationAndVariantTypeScores = VariantAnnotationsScorer.readScores(labeledCalibrationAndVariantTypeScoresFile);
+                    final double scoreThreshold = calibrationSensitivityThreshold == 1. // Percentile requires quantile > 0, so we treat this as a special case
+                            ? Doubles.min(labeledCalibrationAndVariantTypeScores)
+                            : new Percentile(100. * (1. - calibrationSensitivityThreshold)).evaluate(labeledCalibrationAndVariantTypeScores);
+                    logger.info(String.format("Using %s score threshold of %.4f corresponding to specified calibration-sensitivity threshold of %.4f ...",
+                            variantTypeString, scoreThreshold, calibrationSensitivityThreshold));
+
+                    final double[] labeledTrainingAndVariantTypeScores = VariantAnnotationsScorer.readScores(labeledTrainingAndVariantTypeScoresFile);
+                    final List<Boolean> isNegativeTrainingFromLabeledTrainingAndVariantType = Arrays.stream(labeledTrainingAndVariantTypeScores).boxed().map(s -> s < scoreThreshold).collect(Collectors.toList());
+                    final int numNegativeTrainingFromLabeledTrainingAndVariantType = numPassingFilter(isNegativeTrainingFromLabeledTrainingAndVariantType);
+                    logger.info(String.format("Selected %d labeled %s sites below score threshold of %.4f for negative-model training...",
+                            numNegativeTrainingFromLabeledTrainingAndVariantType, variantTypeString, scoreThreshold));
+
+                    logger.info(String.format("Scoring %d unlabeled %s sites...", numUnlabeledVariantType, variantTypeString));
+                    final File unlabeledVariantTypeAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, unlabeledAnnotations, isUnlabeledVariantType);
+                    final File unlabeledVariantTypeScoresFile = score(unlabeledVariantTypeAnnotationsFile, outputPrefixTag, UNLABELED_SCORES_HDF5_SUFFIX);
+                    final double[] unlabeledVariantTypeScores = VariantAnnotationsScorer.readScores(unlabeledVariantTypeScoresFile);
+                    final List<Boolean> isNegativeTrainingFromUnlabeledVariantType = Arrays.stream(unlabeledVariantTypeScores).boxed().map(s -> s < scoreThreshold).collect(Collectors.toList()); // length matches unlabeledAnnotationsFile
+                    final int numNegativeTrainingFromUnlabeledVariantType = numPassingFilter(isNegativeTrainingFromUnlabeledVariantType);
+                    logger.info(String.format("Selected %d unlabeled %s sites below score threshold of %.4f for negative-model training...",
+                            numNegativeTrainingFromUnlabeledVariantType, variantTypeString, scoreThreshold));
+
+                    final double[][] negativeTrainingAndVariantTypeAnnotations = concatenateLabeledAndUnlabeledNegativeTrainingData(
+                            annotationNames, annotations, unlabeledAnnotations, isNegativeTrainingFromLabeledTrainingAndVariantType, isNegativeTrainingFromUnlabeledVariantType);
+                    final int numNegativeTrainingAndVariantType = negativeTrainingAndVariantTypeAnnotations.length;
+                    final List<Boolean> isNegativeTrainingAndVariantType = Collections.nCopies(numNegativeTrainingAndVariantType, true);
+
+                    logger.info(String.format("Training %s negative model with %d negative-training sites x %d annotations %s...",
+                            variantTypeString, numNegativeTrainingAndVariantType, annotationNames.size(), annotationNames));
+                    final File negativeTrainingAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(
+                            annotationNames, negativeTrainingAndVariantTypeAnnotations, isNegativeTrainingAndVariantType);
+                    trainAndSerializeModel(negativeTrainingAnnotationsFile, outputPrefixTag + NEGATIVE_TAG);
+                    logger.info(String.format("%s negative model trained and serialized with output prefix \"%s\".", variantTypeString, outputPrefix + outputPrefixTag + NEGATIVE_TAG));
+
+                    if (numLabeledCalibrationAndVariantType > 0) {
+                        logger.info(String.format("Re-scoring %d %s calibration sites...", numLabeledCalibrationAndVariantType, variantTypeString));
+                        final File labeledCalibrationAnnotationsFile = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, annotations, isLabeledCalibrationAndVariantType);
+                        final File labeledCalibrationScoresFile = positiveNegativeScore(labeledCalibrationAnnotationsFile, outputPrefixTag, CALIBRATION_SCORES_HDF5_SUFFIX);
+                        logger.info(String.format("Calibration scores written to %s.", labeledCalibrationScoresFile.getAbsolutePath()));
+                    }
+                } else {
+                    throw new UserException.BadInput(String.format("Attempted to train %s negative model, " +
+                            "but no suitable sites were found in the provided annotations.", variantTypeString));
+                }
+            }
+        } else {
+            throw new UserException.BadInput(String.format("Attempted to train %s model, " +
+                    "but no suitable training sites were found in the provided annotations.", variantTypeString));
+        }
+    }
+
+    private static int numPassingFilter(List<Boolean> isPassing) {
+        return isPassing.stream().mapToInt(x -> x ? 1 : 0).sum();
+    }
+
+    private void trainAndSerializeModel(final File trainingAnnotationsFile,
+                                        final String outputPrefixTag) {
+        readAndValidateTrainingAnnotations(trainingAnnotationsFile, outputPrefixTag);
+        final VariantAnnotationsModel model;
+        switch (modelBackend) {
+            case JAVA_BGMM:
+                model = new BGMMVariantAnnotationsModel(hyperparametersJSONFile);
+                break;
+            case PYTHON_IFOREST:
+                model = new PythonSklearnVariantAnnotationsModel(pythonScriptFile, hyperparametersJSONFile);
+                break;
+            case PYTHON_SCRIPT:
+                model = new PythonSklearnVariantAnnotationsModel(pythonScriptFile, hyperparametersJSONFile);
+                break;
+            default:
+                throw new GATKException.ShouldNeverReachHereException("Unknown model mode.");
+        }
+        model.trainAndSerialize(trainingAnnotationsFile, outputPrefix + outputPrefixTag);
+    }
+
+    /**
+     * When training models on data that has been subset to a given variant type,
+     * we FAIL if any annotation is completely missing and WARN if any annotation has zero variance.
+     */
+    private void readAndValidateTrainingAnnotations(final File trainingAnnotationsFile,
+                                                    final String outputPrefixTag) {
+        final List<String> annotationNames = LabeledVariantAnnotationsData.readAnnotationNames(trainingAnnotationsFile);
+        final double[][] annotations = LabeledVariantAnnotationsData.readAnnotations(trainingAnnotationsFile);
+
+        // these checks are redundant, but we err on the side of robustness
+        final int numAnnotationNames = annotationNames.size();
+        final int numData = annotations.length;
+        Utils.validateArg(numAnnotationNames > 0, "Number of annotation names must be positive.");
+        Utils.validateArg(numData > 0, "Number of data points must be positive.");
+        final int numFeatures = annotations[0].length;
+        Utils.validateArg(numAnnotationNames == numFeatures,
+                "Number of annotation names must match the number of features in the annotation data.");
+
+        final List<String> completelyMissingAnnotationNames = new ArrayList<>(numFeatures);
+        IntStream.range(0, numFeatures).forEach(
+                i -> {
+                    if (new Variance().evaluate(IntStream.range(0, numData).mapToDouble(n -> annotations[n][i]).toArray()) == 0.) {
+                        logger.warn(String.format("All values of the annotation %s are identical in the training data for the %s model.",
+                                annotationNames.get(i), outputPrefix + outputPrefixTag));
+                    }
+                    if (IntStream.range(0, numData).boxed().map(n -> annotations[n][i]).allMatch(x -> Double.isNaN(x))) {
+                        completelyMissingAnnotationNames.add(annotationNames.get(i));
+                    }
+                }
+        );
+
+        if (!completelyMissingAnnotationNames.isEmpty()) {
+            throw new UserException.BadInput(
+                    String.format("All values of the following annotations are missing in the training data for the %s model: %s. " +
+                                    "Consider repeating the extraction step with this annotation dropped. " +
+                                    "If this is a negative model and the amount of negative training data is small, " +
+                                    "perhaps also consider lowering the value of the %s argument so that more " +
+                                    "training data is considered, which may ultimately admit data with non-missing values for the annotation " +
+                                    "(although note that this will also have implications for the resulting model fit); " +
+                                    "alternatively, consider excluding the %s and %s arguments and running positive-only modeling.",
+                            outputPrefix + outputPrefixTag, completelyMissingAnnotationNames,
+                            CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, UNLABELED_ANNOTATIONS_HDF5_LONG_NAME, CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME));
+        }
+    }
+
+    private File score(final File annotationsFile,
+                       final String outputPrefixTag,
+                       final String outputSuffix) {
+        final VariantAnnotationsScorer scorer;
+        switch (modelBackend) {
+            case JAVA_BGMM:
+                scorer = BGMMVariantAnnotationsScorer.deserialize(new File(outputPrefix + outputPrefixTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX));
+                break;
+            case PYTHON_IFOREST:
+            case PYTHON_SCRIPT:
+                scorer = new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, new File(outputPrefix + outputPrefixTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX));
+                break;
+
+            default:
+                throw new GATKException.ShouldNeverReachHereException("Unknown model mode.");
+        }
+        final File outputScoresFile = new File(outputPrefix + outputPrefixTag + outputSuffix);
+        scorer.score(annotationsFile, outputScoresFile);
+        return outputScoresFile;
+    }
+
+    private File positiveNegativeScore(final File annotationsFile,
+                                       final String outputPrefixTag,
+                                       final String outputSuffix) {
+        final VariantAnnotationsScorer scorer;
+        switch (modelBackend) {
+            case JAVA_BGMM:
+                scorer = VariantAnnotationsScorer.combinePositiveAndNegativeScorer(
+                        BGMMVariantAnnotationsScorer.deserialize(new File(outputPrefix + outputPrefixTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX)),
+                        BGMMVariantAnnotationsScorer.deserialize(new File(outputPrefix + outputPrefixTag + NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX)));
+                break;
+            case PYTHON_IFOREST:
+            case PYTHON_SCRIPT:
+                scorer = VariantAnnotationsScorer.combinePositiveAndNegativeScorer(
+                        new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, new File(outputPrefix + outputPrefixTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX)),
+                        new PythonSklearnVariantAnnotationsScorer(pythonScriptFile, new File(outputPrefix + outputPrefixTag + NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX)));
+                break;
+            default:
+                throw new GATKException.ShouldNeverReachHereException("Unknown model mode.");
+        }
+        final File outputScoresFile = new File(outputPrefix + outputPrefixTag + outputSuffix);
+        scorer.score(annotationsFile, outputScoresFile);
+        return outputScoresFile;
+    }
+
+    private static double[][] concatenateLabeledAndUnlabeledNegativeTrainingData(final List<String> annotationNames,
+                                                                                 final double[][] annotations,
+                                                                                 final double[][] unlabeledAnnotations,
+                                                                                 final List<Boolean> isNegativeTrainingFromLabeledTrainingAndVariantType,
+                                                                                 final List<Boolean> isNegativeTrainingFromUnlabeledVariantType) {
+        final File negativeTrainingFromLabeledTrainingAndVariantTypeAnnotationsFile =
+                LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, annotations, isNegativeTrainingFromLabeledTrainingAndVariantType);
+        final double[][] negativeTrainingFromLabeledTrainingAndVariantTypeAnnotations = LabeledVariantAnnotationsData.readAnnotations(negativeTrainingFromLabeledTrainingAndVariantTypeAnnotationsFile);
+
+        final File negativeTrainingFromUnlabeledVariantTypeAnnotationsFile =
+                LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(annotationNames, unlabeledAnnotations, isNegativeTrainingFromUnlabeledVariantType);
+        final double[][] negativeTrainingFromUnlabeledVariantTypeAnnotations = LabeledVariantAnnotationsData.readAnnotations(negativeTrainingFromUnlabeledVariantTypeAnnotationsFile);
+
+        return Streams.concat(
+                Arrays.stream(negativeTrainingFromLabeledTrainingAndVariantTypeAnnotations),
+                Arrays.stream(negativeTrainingFromUnlabeledVariantTypeAnnotations)).toArray(double[][]::new);
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsData.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsData.java
new file mode 100644
index 00000000000..2abd7fce48b
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsData.java
@@ -0,0 +1,284 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data;
+
+import com.google.common.collect.ImmutableList;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.broadinstitute.hdf5.HDF5File;
+import org.broadinstitute.hdf5.HDF5LibException;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModel;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsScorer;
+import org.broadinstitute.hellbender.utils.Utils;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+/**
+ * Represents a collection of {@link LabeledVariantAnnotationsDatum} as a list of lists of datums.
+ * The outer list is always per-variant. In allele-specific mode, each datum in the inner lists
+ * corresponds to a single allele; otherwise, each inner list trivially contains a single datum corresponding
+ * to the variant.
+ */
+public final class LabeledVariantAnnotationsData {
+    private static final Logger logger = LogManager.getLogger(LabeledVariantAnnotationsData.class);
+
+    // chunk size in temporary annotation files
+    // TODO this could be exposed
+    private static final int CHUNK_DIVISOR = 16;
+    private static final int MAXIMUM_CHUNK_SIZE = HDF5Utils.MAX_NUMBER_OF_VALUES_PER_HDF5_MATRIX / CHUNK_DIVISOR;
+
+    private static final int INITIAL_SIZE = 10_000_000;
+
+    public static final String TRAINING_LABEL = "training";
+    public static final String CALIBRATION_LABEL = "calibration";
+    public static final String SNP_LABEL = "snp";
+
+    public static final String INTERVALS_PATH = "/intervals";
+    public static final String ALLELES_REF_PATH = "/alleles/ref";
+    public static final String ALLELES_ALT_PATH = "/alleles/alt";
+    public static final String ANNOTATIONS_NAMES_PATH = "/annotations/names";
+    public static final String ANNOTATIONS_PATH = "/annotations";
+    public static final String LABELS_PATH = "/labels";
+    public static final String LABELS_SNP_PATH = LABELS_PATH + "/snp";
+
+    private final List<String> sortedAnnotationNames;
+    final List<String> sortedLabels;
+
+    private final List<List<LabeledVariantAnnotationsDatum>> data;
+    private final boolean useASAnnotations;
+
+    public LabeledVariantAnnotationsData(final Collection<String> annotationNames,
+                                         final Collection<String> labels,
+                                         final boolean useASAnnotations,
+                                         final int initialSize) {
+        data = new ArrayList<>(initialSize);
+        sortedAnnotationNames = ImmutableList.copyOf(annotationNames.stream().distinct().sorted().collect(Collectors.toList()));
+        Utils.validateArg(sortedAnnotationNames.size() > 0, "Number of annotation names must be positive.");
+        if (sortedAnnotationNames.size() != annotationNames.size()) {
+            logger.warn(String.format("Ignoring duplicate annotations: %s.", Utils.getDuplicatedItems(annotationNames)));
+        }
+        sortedLabels = ImmutableList.copyOf(labels.stream().distinct().sorted().collect(Collectors.toList()));
+        if (sortedLabels.size() != labels.size()) {
+            logger.warn(String.format("Ignoring duplicate labels: %s.", Utils.getDuplicatedItems(labels)));
+        }
+        this.useASAnnotations = useASAnnotations;
+    }
+
+    public LabeledVariantAnnotationsData(final Collection<String> annotationNames,
+                                         final Collection<String> labels,
+                                         final boolean useASAnnotations) {
+        this(annotationNames, labels, useASAnnotations, INITIAL_SIZE);
+    }
+
+    public List<String> getSortedAnnotationNames() {
+        return sortedAnnotationNames;
+    }
+
+    public List<String> getSortedLabels() {
+        return sortedLabels;
+    }
+
+    public int size() {
+        return data.size();
+    }
+
+    public void clear() {
+        data.clear();
+    }
+
+    /**
+     * Adds an element to the underlying {@link #data} collection.
+     */
+    public void add(final VariantContext vc,
+                    final List<List<Allele>> altAllelesPerDatum,
+                    final List<VariantType> variantTypePerDatum,
+                    final List<TreeSet<String>> labelsPerDatum) {
+        if (!useASAnnotations) {
+            data.add(Collections.singletonList(new LabeledVariantAnnotationsDatum(
+                    vc, altAllelesPerDatum.get(0), variantTypePerDatum.get(0), labelsPerDatum.get(0), sortedAnnotationNames, useASAnnotations)));
+        } else {
+            data.add(IntStream.range(0, altAllelesPerDatum.size()).boxed()
+                    .map(i -> new LabeledVariantAnnotationsDatum(
+                            vc, altAllelesPerDatum.get(i), variantTypePerDatum.get(i), labelsPerDatum.get(i), sortedAnnotationNames, useASAnnotations))
+                    .collect(Collectors.toList()));
+        }
+    }
+
+    /**
+     * Sets the element at a specified index in the underlying {@link #data} collection.
+     */
+    public void set(final int index,
+                    final VariantContext vc,
+                    final List<List<Allele>> altAllelesPerDatum,
+                    final List<VariantType> variantTypePerDatum,
+                    final List<TreeSet<String>> labelsPerDatum) {
+        if (!useASAnnotations) {
+            data.set(index, Collections.singletonList(new LabeledVariantAnnotationsDatum(
+                    vc, altAllelesPerDatum.get(0), variantTypePerDatum.get(0), labelsPerDatum.get(0), sortedAnnotationNames, useASAnnotations)));
+        } else {
+            data.set(index, IntStream.range(0, altAllelesPerDatum.size()).boxed()
+                    .map(i -> new LabeledVariantAnnotationsDatum(
+                            vc, altAllelesPerDatum.get(i), variantTypePerDatum.get(i), labelsPerDatum.get(i), sortedAnnotationNames, useASAnnotations))
+                    .collect(Collectors.toList()));
+        }
+    }
+
+    /**
+     * @return  list of {@link VariantType} indicators, with length given by the number of corresponding sites
+     */
+    public List<VariantType> getVariantTypeFlat() {
+        return streamFlattenedData().map(datum -> datum.variantType).collect(Collectors.toList());
+    }
+
+    /**
+     * @return  list of boolean label indicators, with length given by the number of sites;
+     *          an element in the list will be true if the corresponding site is assigned to the specified label
+     */
+    public List<Boolean> isLabelFlat(final String label) {
+        return streamFlattenedData().map(datum -> datum.labels.contains(label)).collect(Collectors.toList());
+    }
+
+    private Stream<LabeledVariantAnnotationsDatum> streamFlattenedData() {
+        return data.stream().flatMap(List::stream);
+    }
+
+    /**
+     * Writes a representation of the collection to an HDF5 file with the following directory structure:
+     *
+     *      <p>
+     *          |--- alleles<br>
+     *          │    |--- alt<br>
+     *          │    |--- ref<br>
+     *          |--- annotations<br>
+     *          │    |--- chunk_0<br>
+     *          │    |--- ...<br>
+     *          │    |--- chunk_{num_chunks - 1}<br>
+     *          │    |--- names<br>
+     *          │    |--- num_chunks<br>
+     *          │    |--- num_columns<br>
+     *          │    |--- num_rows<br>
+     *          |--- intervals<br>
+     *          │    |--- indexed_contig_names<br>
+     *          │    |--- transposed_index_start_end<br>
+     *          |--- labels<br>
+     *          │    |--- snp<br>
+     *          │    |--- ... (e.g., training, calibration, etc.)<br>
+     *          │    |--- ...<br>
+     *      </p>
+     *
+     * Here, each chunk is a double matrix, with dimensions given by (number of sites in the chunk) x (number of annotations).
+     * See the methods {@link HDF5Utils#writeChunkedDoubleMatrix} and {@link HDF5Utils#writeIntervals} for additional details.
+     *
+     * @param omitAllelesInHDF5 string arrays containing ref/alt alleles can be large, so we allow the option of omitting them
+     */
+    public void writeHDF5(final File outputFile,
+                          final boolean omitAllelesInHDF5) {
+
+        try (final HDF5File outputHDF5File = new HDF5File(outputFile, HDF5File.OpenMode.CREATE)) {
+            IOUtils.canReadFile(outputHDF5File.getFile());
+            HDF5Utils.writeIntervals(outputHDF5File, INTERVALS_PATH,
+                    streamFlattenedData().map(datum -> datum.interval).collect(Collectors.toList()));
+            if (!omitAllelesInHDF5) {
+                outputHDF5File.makeStringArray(ALLELES_REF_PATH,
+                        streamFlattenedData().map(datum -> datum.refAllele.getDisplayString()).toArray(String[]::new));
+                if (!useASAnnotations) {
+                    outputHDF5File.makeStringArray(ALLELES_ALT_PATH,
+                            streamFlattenedData()
+                                    .map(datum -> datum.altAlleles.stream().map(Allele::getDisplayString).collect(Collectors.joining(",")))
+                                    .toArray(String[]::new));
+                } else {
+                    outputHDF5File.makeStringArray(ALLELES_ALT_PATH,
+                            streamFlattenedData().map(datum -> datum.altAlleles.get(0).getDisplayString()).toArray(String[]::new));
+                }
+            }
+            outputHDF5File.makeStringArray(ANNOTATIONS_NAMES_PATH, sortedAnnotationNames.toArray(new String[0]));
+            HDF5Utils.writeChunkedDoubleMatrix(outputHDF5File, ANNOTATIONS_PATH,
+                    streamFlattenedData().map(datum -> datum.annotations).toArray(double[][]::new), MAXIMUM_CHUNK_SIZE);
+            outputHDF5File.makeDoubleArray(LABELS_SNP_PATH,
+                    streamFlattenedData().mapToDouble(datum -> datum.variantType == VariantType.SNP ? 1 : 0).toArray());
+            for (final String label : sortedLabels) {
+                outputHDF5File.makeDoubleArray(String.format("%s/%s", LABELS_PATH, label),
+                        streamFlattenedData().mapToDouble(datum -> datum.labels.contains(label) ? 1 : 0).toArray());
+            }
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during writing of annotations and metadata (%s). Output file at %s may be in a bad state.",
+                    exception, outputFile.getAbsolutePath()));
+        }
+    }
+
+    /**
+     * @return  list of annotation names, with length given by the number of annotations, read from the specified file
+     */
+    public static List<String> readAnnotationNames(final File annotationsFile) {
+        try (final HDF5File annotationsHDF5File = new HDF5File(annotationsFile, HDF5File.OpenMode.READ_ONLY)) {
+            IOUtils.canReadFile(annotationsHDF5File.getFile());
+            return Arrays.asList(annotationsHDF5File.readStringArray(ANNOTATIONS_NAMES_PATH));
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during reading of annotation names from %s: %s",
+                    annotationsFile.getAbsolutePath(), exception));
+        }
+    }
+
+    /**
+     * @return  matrix with dimensions (number of sites) x (number of annotations), read from the specified file
+     */
+    public static double[][] readAnnotations(final File annotationsFile) {
+        try (final HDF5File annotationsHDF5File = new HDF5File(annotationsFile, HDF5File.OpenMode.READ_ONLY)) {
+            IOUtils.canReadFile(annotationsHDF5File.getFile());
+            return HDF5Utils.readChunkedDoubleMatrix(annotationsHDF5File, ANNOTATIONS_PATH);
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during reading of annotations from %s: %s",
+                    annotationsFile.getAbsolutePath(), exception));
+        }
+    }
+
+    /**
+     * @return  list of boolean label indicators, with length given by the number of corresponding sites, read from the specified file;
+     *          an element in the list will be true if the corresponding site is assigned to the specified label
+     */
+    public static List<Boolean> readLabel(final File annotationsFile,
+                                          final String label) {
+        try (final HDF5File annotationsHDF5File = new HDF5File(annotationsFile, HDF5File.OpenMode.READ_ONLY)) {
+            IOUtils.canReadFile(annotationsHDF5File.getFile());
+            return Arrays.stream(annotationsHDF5File.readDoubleArray(String.format("/labels/%s", label))).boxed().map(d -> d == 1).collect(Collectors.toList());
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during reading of label %s from %s: %s",
+                    label, annotationsFile.getAbsolutePath(), exception));
+        }
+    }
+
+    /**
+     * Subsets annotation data according to a boolean filter and writes a limited representation to a temporary HDF5 file.
+     * Intended for passing annotations via the file interfaces of {@link VariantAnnotationsModel} and {@link VariantAnnotationsScorer}.
+     */
+    public static File subsetAnnotationsToTemporaryFile(final List<String> annotationNames,
+                                                        final double[][] allAnnotations,
+                                                        final List<Boolean> isSubset) {
+        Utils.validateArg(annotationNames.size() > 0, "Number of annotation names must be positive.");
+        Utils.validateArg(allAnnotations.length > 0, "Number of annotation data points must be positive.");
+        Utils.validateArg(annotationNames.size() == allAnnotations[0].length,
+                "Number of annotation names must match number of features in annotation data.");
+        final double[][] subsetData = IntStream.range(0, isSubset.size()).boxed().filter(isSubset::get).map(i -> allAnnotations[i]).toArray(double[][]::new);
+        final File subsetAnnotationsFile = IOUtils.createTempFile("subset.annot", ".hdf5");
+        try (final HDF5File subsetAnnotationsHDF5File = new HDF5File(subsetAnnotationsFile, HDF5File.OpenMode.CREATE)) {
+            subsetAnnotationsHDF5File.makeStringArray(ANNOTATIONS_NAMES_PATH, annotationNames.toArray(new String[0]));
+            HDF5Utils.writeChunkedDoubleMatrix(subsetAnnotationsHDF5File, ANNOTATIONS_PATH, subsetData, MAXIMUM_CHUNK_SIZE);
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during writing of annotations (%s). Output file at %s may be in a bad state.",
+                    exception, subsetAnnotationsFile.getAbsolutePath()));
+        }
+        return subsetAnnotationsFile;
+    }
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsDatum.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsDatum.java
new file mode 100644
index 00000000000..884529f5c56
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/LabeledVariantAnnotationsDatum.java
@@ -0,0 +1,104 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import htsjdk.samtools.util.Locatable;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.LabeledVariantAnnotationsWalker;
+import org.broadinstitute.hellbender.utils.SimpleInterval;
+import org.broadinstitute.hellbender.utils.Utils;
+import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
+
+import java.util.List;
+import java.util.TreeSet;
+
+/**
+ * Represents metadata and annotations extracted from either a variant or a single alt allele (if in allele-specific mode).
+ * Intended to be package-private and accessed only by {@link LabeledVariantAnnotationsData}.
+ */
+final class LabeledVariantAnnotationsDatum implements Locatable {
+    final SimpleInterval interval;
+    final Allele refAllele;
+    final ImmutableList<Allele> altAlleles; // in allele-specific mode, this contains a single alt allele; otherwise, it contains all alt alleles that passed variant-type checks
+    final VariantType variantType;
+    final ImmutableSet<String> labels;      // sorted TreeSet
+    final double[] annotations;             // TODO use ImmutableDoubleArray?
+
+    LabeledVariantAnnotationsDatum(final VariantContext vc,
+                                   final List<Allele> altAlleles,
+                                   final VariantType variantType,
+                                   final TreeSet<String> labels,
+                                   final List<String> sortedAnnotationNames,
+                                   final boolean useASAnnotations) {
+        Utils.validate(!useASAnnotations || altAlleles.size() == 1,
+                "Datum should only be associated with one alt allele in allele-specific mode.");
+        this.interval = new SimpleInterval(vc);
+        this.refAllele = vc.getReference();
+        this.altAlleles = ImmutableList.copyOf(altAlleles);
+        this.variantType = variantType;
+        this.labels = ImmutableSet.copyOf(labels);
+        this.annotations = sortedAnnotationNames.stream()
+                .mapToDouble(a -> decodeAnnotation(vc, altAlleles, a, useASAnnotations))
+                .toArray();
+    }
+
+    @Override
+    public String getContig() {
+        return interval.getContig();
+    }
+
+    @Override
+    public int getStart() {
+        return interval.getStart();
+    }
+
+    @Override
+    public int getEnd() {
+        return interval.getEnd();
+    }
+
+    // code mostly retained from VQSR; some exception catching added
+    private static double decodeAnnotation(final VariantContext vc,
+                                           final List<Allele> altAlleles,
+                                           final String annotationName,
+                                           final boolean useASAnnotations) {
+        double value;
+        try {
+            // if we're in allele-specific mode and an allele-specific annotation has been requested, parse the appropriate value from the list
+            // TODO: can we trigger allele-specific parsing based on annotation prefix or some other logic?
+            if (useASAnnotations && annotationName.startsWith(GATKVCFConstants.ALLELE_SPECIFIC_PREFIX)) {
+                final List<Object> valueList = vc.getAttributeAsList(annotationName);
+                final Allele altAllele = altAlleles.get(0);
+                // FIXME: we need to look at the ref allele here too (SL: this comment was retained from VQSR code, I'm not sure what it means...)
+                if (vc.hasAllele(altAllele)) {
+                    final int altIndex = vc.getAlleleIndex(altAllele) - 1; //- 1 is to convert the index from all alleles (including reference) to just alternate alleles
+                    try {
+                        value = Double.parseDouble((String) valueList.get(altIndex));
+                    } catch (final IndexOutOfBoundsException e) {
+                        throw new UserException(String.format("Could not extract annotation %s from variant context: %s. " +
+                                "Encountered exception: %s", annotationName, vc, e));
+                    }
+                } else {
+                    //if somehow our alleles got mixed up
+                    throw new IllegalStateException("Allele " + altAllele + " is not contained in the input VariantContext.");
+                }
+            } else {
+                try {
+                    value = vc.getAttributeAsDouble(annotationName, Double.NaN);
+                } catch (final ClassCastException e) {
+                    throw new UserException(String.format("Could not extract annotation %s from variant context: %s. " +
+                                    "Ensure that %s is specified, if desired. Encountered exception: %s",
+                            annotationName, vc, LabeledVariantAnnotationsWalker.USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME, e));
+                }
+            }
+            if (Double.isInfinite(value)) {
+                value = Double.NaN;
+            }
+        } catch (final NumberFormatException e) {
+            value = Double.NaN;
+        }
+        return value;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/VariantType.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/VariantType.java
new file mode 100644
index 00000000000..0bfeb7df4e7
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/data/VariantType.java
@@ -0,0 +1,49 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data;
+
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+
+/**
+ * Logic for determining variant types was retained from VQSR.
+ */
+public enum VariantType {
+    SNP,
+    INDEL;
+
+    public static boolean checkVariantType(final VariantContext vc,
+                                           final VariantContext resourceVC) {
+        switch (resourceVC.getType()) {
+            case SNP:
+            case MNP:
+                return getVariantType(vc) == SNP;
+            case INDEL:
+            case MIXED:
+            case SYMBOLIC:
+                return getVariantType(vc) == INDEL;
+            default:
+                return false;
+        }
+    }
+
+    public static VariantType getVariantType(final VariantContext vc) {
+        if (vc.isSNP() || vc.isMNP()) {
+            return SNP;
+        } else if (vc.isStructuralIndel() || vc.isIndel() || vc.isMixed() || vc.isSymbolic()) {
+            return INDEL;
+        } else {
+            throw new IllegalStateException("Encountered unknown variant type: " + vc.getType());
+        }
+    }
+
+    public static VariantType getVariantType(final VariantContext vc,
+                                             final Allele allele) {
+        if (vc.getReference().length() == allele.length()) {
+            //note that spanning deletions are considered SNPs by this logic
+            return SNP;
+        } else if ((vc.getReference().length() != allele.length()) || allele.isSymbolic()) {
+            return INDEL;
+        } else {
+            throw new IllegalStateException("Encountered unknown variant type: " + vc.getType());
+        }
+    }
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsModel.java
new file mode 100644
index 00000000000..53e616bb515
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsModel.java
@@ -0,0 +1,31 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling;
+
+import org.apache.commons.lang.NotImplementedException;
+
+import java.io.File;
+import java.io.Serializable;
+
+// TODO this is just a stub, will be fleshed out in a separate PR
+public final class BGMMVariantAnnotationsModel implements VariantAnnotationsModel {
+
+    public BGMMVariantAnnotationsModel(final File hyperparametersJSONFile) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+
+    @Override
+    public void trainAndSerialize(final File trainingAnnotationsFile,
+                                  final String outputPrefix) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+
+    static final class Preprocesser implements Serializable {
+        private static final long serialVersionUID = 1L;
+
+        Preprocesser() {
+        }
+
+        double[][] transform(final double[][] data) {
+            throw new NotImplementedException("BGMM module implemented in separate PR.");
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsScorer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsScorer.java
new file mode 100644
index 00000000000..0ae9a5e09a8
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/BGMMVariantAnnotationsScorer.java
@@ -0,0 +1,67 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling;
+
+import org.apache.commons.lang.NotImplementedException;
+import org.apache.logging.log4j.Logger;
+import org.broadinstitute.hdf5.HDF5File;
+import org.broadinstitute.hdf5.HDF5LibException;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.utils.clustering.BayesianGaussianMixtureModeller;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+
+import java.io.File;
+import java.io.Serializable;
+import java.util.List;
+
+// TODO this is just a stub, will be fleshed out in a separate PR
+public final class BGMMVariantAnnotationsScorer implements VariantAnnotationsScorer, Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    public static final String BGMM_SCORER_SER_SUFFIX = ".bgmmScorer.ser";
+
+    public BGMMVariantAnnotationsScorer(final List<String> annotationNames,
+                                        final BGMMVariantAnnotationsModel.Preprocesser preprocesser,
+                                        final BayesianGaussianMixtureModeller bgmm) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+
+    @Override
+    public void score(final File inputAnnotationsFile,
+                      final File outputScoresFile) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+
+    public double[][] preprocess(final double[][] annotations) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+
+    public void serialize(final File scorerFile) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+
+    public static BGMMVariantAnnotationsScorer deserialize(final File scorerFile) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+
+    // TODO clean this up, copy more fields
+    public static void preprocessAnnotationsWithBGMMAndWriteHDF5(final List<String> annotationNames,
+                                                                 final String outputPrefix,
+                                                                 final File labeledTrainingAndVariantTypeAnnotationsFile,
+                                                                 final Logger logger) {
+        final double[][] rawAnnotations = LabeledVariantAnnotationsData.readAnnotations(labeledTrainingAndVariantTypeAnnotationsFile);
+        final BGMMVariantAnnotationsScorer scorer = BGMMVariantAnnotationsScorer.deserialize(new File(outputPrefix + BGMM_SCORER_SER_SUFFIX));
+        final double[][] preprocessedAnnotations = scorer.preprocess(rawAnnotations);
+        final File outputPreprocessedAnnotationsFile = new File(outputPrefix + ".annot.pre.hdf5");
+        try (final HDF5File hdf5File = new HDF5File(outputPreprocessedAnnotationsFile, HDF5File.OpenMode.CREATE)) {
+            IOUtils.canReadFile(hdf5File.getFile());
+            hdf5File.makeStringArray("/data/annotation_names", annotationNames.toArray(new String[0]));
+            HDF5Utils.writeChunkedDoubleMatrix(hdf5File, "/data/annotations", preprocessedAnnotations, HDF5Utils.MAX_NUMBER_OF_VALUES_PER_HDF5_MATRIX / 16);
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during writing of preprocessed annotations (%s). Output file at %s may be in a bad state.",
+                    exception, outputPreprocessedAnnotationsFile.getAbsolutePath()));
+        }
+        logger.info(String.format("Preprocessed annotations written to %s.", outputPreprocessedAnnotationsFile.getAbsolutePath()));
+    }
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsModel.java
new file mode 100644
index 00000000000..6b6feae5d26
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsModel.java
@@ -0,0 +1,68 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling;
+
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.utils.python.PythonScriptExecutor;
+import org.broadinstitute.hellbender.utils.runtime.ProcessOutput;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Given an HDF5 file containing annotations for a training set (in the format specified by
+ * {@link VariantAnnotationsModel#trainAndSerialize}), a Python script containing modeling code,
+ * and a JSON file containing hyperparameters, the {@link #trainAndSerialize} method can be used to train a model.
+ *
+ * The modeling script is expected to generate the file {outputPrefix}.scorer.pkl. This file should contain
+ * a pickled Python lambda function to be used for generating scores from annotations in a subsequent test set.
+ * The lambda should have the signature:
+ *
+ *      lambda test_annotation_names_i, test_X_ni
+ *
+ * Here, test_annotation_names_i is a numpy array of strings containing the annotation names, and
+ * test X_ni is a numpy matrix of float-valued annotations, with dimensions (number of data points) x (number of annotations).
+ * The lambda should check the test annotation names against the training annotation names and
+ * then return a numpy array of float-valued scores with length given by the number of data points.
+ *
+ * See org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py for an example implementation.
+ */
+public final class PythonSklearnVariantAnnotationsModel implements VariantAnnotationsModel {
+
+    private final File pythonScriptFile;
+    private final File hyperparametersJSONFile;
+
+    public PythonSklearnVariantAnnotationsModel(final File pythonScriptFile,
+                                                final File hyperparametersJSONFile) {
+        this.pythonScriptFile = pythonScriptFile;
+        this.hyperparametersJSONFile = hyperparametersJSONFile;
+    }
+
+    @Override
+    public void trainAndSerialize(final File trainingAnnotationsFile,
+                                  final String outputPrefix) {
+        final PythonScriptExecutor executor = new PythonScriptExecutor(true);
+        final ProcessOutput pythonProcessOutput = executor.executeScriptAndGetOutput(
+                pythonScriptFile.getAbsolutePath(),
+                null,
+                composePythonArguments(trainingAnnotationsFile, hyperparametersJSONFile, outputPrefix));
+
+        if (pythonProcessOutput.getExitValue() != 0) {
+            throw executor.getScriptException(executor.getExceptionMessageFromScriptError(pythonProcessOutput));
+        }
+    }
+
+    private static List<String> composePythonArguments(final File annotationsFile,
+                                                       final File hyperparametersJSONFile,
+                                                       final String outputPrefix) {
+        try {
+            return new ArrayList<>(Arrays.asList(
+                    "--annotations_file=" + annotationsFile.getCanonicalPath(),
+                    "--hyperparameters_json_file=" + hyperparametersJSONFile.getCanonicalPath(),
+                    "--output_prefix=" + outputPrefix));
+        } catch (final IOException e) {
+            throw new UserException.BadInput(String.format("Encountered exception resolving canonical file paths: %s", e));
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsScorer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsScorer.java
new file mode 100644
index 00000000000..cb3ab93a547
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/PythonSklearnVariantAnnotationsScorer.java
@@ -0,0 +1,68 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling;
+
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.utils.python.PythonScriptExecutor;
+import org.broadinstitute.hellbender.utils.runtime.ProcessOutput;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Given an HDF5 file containing annotations for a test set (in the format specified by
+ * {@link VariantAnnotationsScorer#score}), a Python script containing scoring code,
+ * and a file containing a pickled Python lambda function for scoring,
+ * the {@link #score} method can be used to generate scores.
+ *
+ * The scoring script is expected to load both the annotations and the pickled scoring function,
+ * which are then used to generate the file {outputPrefix}.scores.hdf5. This HDF5 file should contain
+ * a double array of the scores in {@value SCORES_PATH}, in the same order as the corresponding data points
+ * in the provided annotations.
+ *
+ * See org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py for an example implementation.
+ */
+public final class PythonSklearnVariantAnnotationsScorer implements VariantAnnotationsScorer, Serializable {
+
+    private static final long serialVersionUID = 1L;
+
+    public static final String PYTHON_SCORER_PKL_SUFFIX = ".scorer.pkl";
+
+    private final File pythonScriptFile;
+    private final File scorerPklFile;
+
+    public PythonSklearnVariantAnnotationsScorer(final File pythonScriptFile,
+                                                 final File scorerPklFile) {
+        this.pythonScriptFile = pythonScriptFile;
+        this.scorerPklFile = scorerPklFile;
+    }
+
+    @Override
+    public void score(final File inputAnnotationsFile,
+                      final File outputScoresFile) {
+        final PythonScriptExecutor executor = new PythonScriptExecutor(true);
+        final ProcessOutput pythonProcessOutput = executor.executeScriptAndGetOutput(
+                pythonScriptFile.getAbsolutePath(),
+                null,
+                composePythonArguments(inputAnnotationsFile, scorerPklFile, outputScoresFile));
+
+        if (pythonProcessOutput.getExitValue() != 0) {
+            throw executor.getScriptException(executor.getExceptionMessageFromScriptError(pythonProcessOutput));
+        }
+    }
+
+    private static List<String> composePythonArguments(final File annotationsFile,
+                                                       final File scorerPklFile,
+                                                       final File outputScoresFile) {
+        try {
+            return new ArrayList<>(Arrays.asList(
+                    "--annotations_file=" + annotationsFile.getCanonicalPath(),
+                    "--scorer_pkl_file=" + scorerPklFile.getCanonicalPath(),
+                    "--output_scores_file=" + outputScoresFile.getCanonicalPath()));
+        } catch (final IOException e) {
+            throw new UserException.BadInput(String.format("Encountered exception resolving canonical file paths: %s", e));
+        }
+    }
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModel.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModel.java
new file mode 100644
index 00000000000..ee2e899d0a8
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModel.java
@@ -0,0 +1,46 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling;
+
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+
+import java.io.File;
+
+/**
+ * File interface for passing annotations to a modeling backend and indicating a path prefix for resulting output.
+ */
+public interface VariantAnnotationsModel {
+
+    /**
+     * @param trainingAnnotationsFile   Training annotations in HDF5 format, containing at least the directory structure
+     *
+     *                                  <p>
+     *                                    |--- annotations<br>
+     *                                    |    |--- chunk_0<br>
+     *                                    |    |--- ...<br>
+     *                                    |    |--- chunk_{num_chunks - 1}<br>
+     *                                    |    |--- names<br>
+     *                                    |    |--- num_chunks<br>
+     *                                    |    |--- num_columns<br>
+     *                                    |    |--- num_rows<br>
+     *                                  </p>
+     *
+     *                                  Here, each chunk is a double matrix, with dimensions given by
+     *                                  (number of sites in the chunk) x (number of annotations).
+     *                                  See {@link LabeledVariantAnnotationsData#writeHDF5}.
+     *
+     *                                  Modeling backends are responsible for consuming annotations in this format
+     *                                  and outputting a {@link VariantAnnotationsScorer} for each variant type
+     *                                  with the appropriate output names. This responsibility includes the
+     *                                  implementation of functionality that allows validation of annotation names
+     *                                  in downstream {@link VariantAnnotationsScorer} instances.
+     *
+     *                                  In current use, we assume that a single model will be trained, so either
+     *                                    1) training annotations have already been subset to a single variant type (SNP or INDEL), or
+     *                                    2) we assume the model does not care about the variant type.
+     *                                  TODO we could also pass additional labels to be used in training,
+     *                                       but all backends would have to likewise respect directory structure
+     *
+     * @param outputPrefix              Path prefix for all output files
+     */
+    void trainAndSerialize(final File trainingAnnotationsFile,
+                           final String outputPrefix);
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModelBackend.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModelBackend.java
new file mode 100644
index 00000000000..a4fa8460440
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsModelBackend.java
@@ -0,0 +1,16 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling;
+
+public enum VariantAnnotationsModelBackend {
+    // TODO will be added in a separate PR
+    JAVA_BGMM,
+
+    /**
+     * Use the script at org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py
+     */
+    PYTHON_IFOREST,
+
+    /**
+     * Use a user-provided script.
+     */
+    PYTHON_SCRIPT
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsScorer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsScorer.java
new file mode 100644
index 00000000000..6b881fdcbe2
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/modeling/VariantAnnotationsScorer.java
@@ -0,0 +1,111 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling;
+
+import org.broadinstitute.hdf5.HDF5File;
+import org.broadinstitute.hdf5.HDF5LibException;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.utils.Utils;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+import org.hipparchus.stat.fitting.EmpiricalDistribution;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.function.Function;
+import java.util.stream.IntStream;
+
+/**
+ * File interface for passing annotations to a scoring backend and returning scores.
+ */
+public interface VariantAnnotationsScorer {
+
+    String SCORES_PATH = "/data/scores"; // our HDF5 library does not allow writing to a bare/root path (e.g., /scores)
+
+    /**
+     * @param inputAnnotationsFile  Annotations to be scored in HDF5 format, containing at least the directory structure
+     *
+     *                              <p>
+     *                                |--- annotations<br>
+     *                                |    |--- chunk_0<br>
+     *                                |    |--- ...<br>
+     *                                |    |--- chunk_{num_chunks - 1}<br>
+     *                                |    |--- names<br>
+     *                                |    |--- num_chunks<br>
+     *                                |    |--- num_columns<br>
+     *                                |    |--- num_rows<br>
+     *                              </p>
+     *
+     *                              Here, each chunk is a double matrix, with dimensions given by
+     *                              (number of sites in the chunk) x (number of annotations).
+     *                              See {@link LabeledVariantAnnotationsData#writeHDF5}.
+     *
+     *                              Scoring backends are responsible for consuming annotations in this format and
+     *                              outputting a double array of scores to file. This responsibility includes
+     *                              validation of annotation names.
+     *
+     * @param outputScoresFile      Output file in HDF5 format, containing scores at {@link VariantAnnotationsScorer#SCORES_PATH}.
+     */
+    void score(final File inputAnnotationsFile,
+               final File outputScoresFile);
+
+    /**
+     * Given scores for a calibration set, returns a function for converting a subsequent score to a
+     * sensitivity to that calibration set. This function is simply given by 1 - ECDF,
+     * where ECDF is the empirical cumulative distribution function of the calibration scores;
+     * see <a href='https://en.wikipedia.org/wiki/Empirical_distribution_function'>here</a>.
+     * For example, a score that is very low relative to the calibration scores would yield a
+     * high calibration sensitivity; that is, using this score as the minimum allowable threshold for filtering
+     * would result in a high sensitivity to the calibration set.
+     *
+     * @param calibrationScores must all be finite
+     */
+    static Function<Double, Double> createScoreToCalibrationSensitivityConverter(final double[] calibrationScores) {
+        Utils.validateArg(Arrays.stream(calibrationScores).allMatch(Double::isFinite),
+                "Calibration scores must all be finite.");
+        final EmpiricalDistribution empiricalDistribution = new EmpiricalDistribution();
+        empiricalDistribution.load(calibrationScores);
+        return score -> 1. - empiricalDistribution.cumulativeProbability(score);
+    }
+
+    /**
+     * Reads a double array of scores from {@value SCORES_PATH} in an HDF5 file.
+     */
+    static double[] readScores(final File inputFile) {
+        try (final HDF5File inputHDF5File = new HDF5File(inputFile, HDF5File.OpenMode.READ_ONLY)) {
+            IOUtils.canReadFile(inputHDF5File.getFile());
+            return inputHDF5File.readDoubleArray(SCORES_PATH);
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during reading of scores from %s: %s",
+                    inputFile.getAbsolutePath(), exception));
+        }
+    }
+
+    /**
+     * Writes a double array of scores to {@value SCORES_PATH} in an HDF5 file.
+     */
+    static void writeScores(final File outputFile,
+                            final double[] scores) {
+        try (final HDF5File outputHDF5File = new HDF5File(outputFile, HDF5File.OpenMode.CREATE)) {
+            outputHDF5File.makeDoubleArray(SCORES_PATH, scores);
+        } catch (final HDF5LibException exception) {
+            throw new GATKException(String.format("Exception encountered during writing of scores (%s). Output file at %s may be in a bad state.",
+                    exception, outputFile.getAbsolutePath()));
+        }
+    }
+
+    /**
+     * Yields a VQSR-style positive-negative scorer that returns the difference of the positive score and the negative score.
+     */
+    static VariantAnnotationsScorer combinePositiveAndNegativeScorer(final VariantAnnotationsScorer positiveScorer,
+                                                                     final VariantAnnotationsScorer negativeScorer) {
+        return (inputAnnotationsFile, outputScoresFile) -> {
+            final File tempPositiveScoresFile = IOUtils.createTempFile("positive", "scores.hdf5");
+            final File tempNegativeScoresFile = IOUtils.createTempFile("negative", "scores.hdf5");
+            positiveScorer.score(inputAnnotationsFile, tempPositiveScoresFile);
+            final double[] positiveScores = VariantAnnotationsScorer.readScores(tempPositiveScoresFile);
+            negativeScorer.score(inputAnnotationsFile, tempNegativeScoresFile);
+            final double[] negativeScores = VariantAnnotationsScorer.readScores(tempNegativeScoresFile);
+            final double[] scores = IntStream.range(0, positiveScores.length).mapToDouble(i -> positiveScores[i] - negativeScores[i]).toArray();
+            VariantAnnotationsScorer.writeScores(outputScoresFile, scores);
+        };
+    }
+}
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java
index edd19e5686c..3a2ccba5f6a 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java
@@ -34,6 +34,7 @@ public final class MathUtils {
     public static final double LOG10_ONE_HALF = Math.log10(0.5);
     public static final double LOG10_ONE_THIRD = -Math.log10(3.0);
     public static final double LOG_ONE_THIRD = -Math.log(3.0);
+    public static final double LOG_2 = Math.log(2.0);
     public static final double INV_LOG_2 = 1.0 / Math.log(2.0);
     public static final double LOG_10 = Math.log(10);
     public static final double INV_LOG_10 = 1.0 / LOG_10;
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java
index 6de748f01f7..55f7b9d8909 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/NaturalLogUtils.java
@@ -81,7 +81,7 @@ public static double logSumExp(final double... logValues) {
             }
         }
         if ( Double.isNaN(sum) || sum == Double.POSITIVE_INFINITY ) {
-            throw new IllegalArgumentException("log10 p: Values must be non-infinite and non-NAN");
+            throw new IllegalArgumentException("logValues must be non-infinite and non-NAN");
         }
         return maxValue + (sum != 1.0 ? Math.log(sum) : 0.0);
     }
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/clustering/BayesianGaussianMixtureModeller.java b/src/main/java/org/broadinstitute/hellbender/utils/clustering/BayesianGaussianMixtureModeller.java
new file mode 100644
index 00000000000..fc759db3e9d
--- /dev/null
+++ b/src/main/java/org/broadinstitute/hellbender/utils/clustering/BayesianGaussianMixtureModeller.java
@@ -0,0 +1,35 @@
+package org.broadinstitute.hellbender.utils.clustering;
+
+import org.apache.commons.lang.NotImplementedException;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealVector;
+
+import java.io.Serializable;
+
+public final class BayesianGaussianMixtureModeller implements Serializable {
+    private static final long serialVersionUID = 1L;
+
+    public enum InitMethod {
+        K_MEANS_PLUS_PLUS, RANDOM, TEST
+    }
+
+    private BayesianGaussianMixtureModeller(final int nComponents,
+                                            final double tol,
+                                            final double regCovar,
+                                            final int maxIter,
+                                            final int nInit,
+                                            final InitMethod initMethod,
+                                            final double weightConcentrationPrior,
+                                            final double meanPrecisionPrior,
+                                            final RealVector meanPrior,
+                                            final Double degreesOfFreedomPrior,
+                                            final RealMatrix covariancePrior,
+                                            final int seed,
+                                            final boolean warmStart,
+                                            final int verboseInterval,
+                                            final double relativeSymmetryThreshold,
+                                            final double absolutePositivityThreshold,
+                                            final double epsilon) {
+        throw new NotImplementedException("BGMM module implemented in separate PR.");
+    }
+}
\ No newline at end of file
diff --git a/src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest-hyperparameters.json b/src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest-hyperparameters.json
new file mode 100644
index 00000000000..172b8aa42eb
--- /dev/null
+++ b/src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest-hyperparameters.json
@@ -0,0 +1,3 @@
+{
+  "random_state": 0
+}
\ No newline at end of file
diff --git a/src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py b/src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py
new file mode 100644
index 00000000000..554817162b2
--- /dev/null
+++ b/src/main/resources/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/isolation-forest.py
@@ -0,0 +1,138 @@
+import argparse
+import h5py
+import sklearn.ensemble
+import sklearn.impute
+import numpy as np
+import dill
+import json
+
+
+def read_annotations(h5file):
+    with h5py.File(h5file, 'r') as f:
+        annotation_names_i = f['/annotations/names'][()].astype(str)
+
+        # read chunked annotations
+        num_chunks = int(f['/annotations/num_chunks'][()])
+        num_columns = int(f['/annotations/num_columns'][()])
+        num_rows = int(f['/annotations/num_rows'][()])
+        X_ni = np.zeros((num_rows, num_columns))
+        n = 0
+        for chunk_index in range(num_chunks):
+            chunk_ni = f[f'/annotations/chunk_{chunk_index}'][()]
+            num_rows_in_chunk = len(chunk_ni)
+            X_ni[n:n + num_rows_in_chunk, :] = chunk_ni
+            n += num_rows_in_chunk
+        assert n == num_rows
+    return annotation_names_i, X_ni
+
+
+def train(annotations_file,
+          hyperparameters_json_file,
+          output_prefix):
+    print('Reading annotations...')
+    annotation_names_i, X_ni = read_annotations(annotations_file)
+    print(f'Annotations: {annotation_names_i}.')
+
+    print('Reading hyperparameters...')
+    with open(hyperparameters_json_file) as json_file:
+        hyperparameters_kwargs = json.load(json_file)
+    print('Hyperparameters:', hyperparameters_kwargs)
+
+    print('Imputing annotations...')
+    imputer = sklearn.impute.SimpleImputer(strategy='median')
+    imputed_X_ni = imputer.fit_transform(X_ni)
+
+    # SimpleImputer will drop any features that are completely missing, resulting in different shapes for
+    # imputed_X_ni and X_ni and misalignment of features when training and scoring downstream if not checked.
+    # We externally check for and fail in the presence of any entirely missing features, but we do a redundant check here.
+    assert imputed_X_ni.shape == X_ni.shape, \
+        f'Shape of imputed annotations differs from shape of raw annotations; at least one feature is completely missing ' \
+        f'and hence dropped during imputation.'
+
+    print(f'Training IsolationForest with {imputed_X_ni.shape[0]} training sites x {imputed_X_ni.shape[1]} annotations...')
+    clf = sklearn.ensemble.IsolationForest(**hyperparameters_kwargs)
+    clf.fit(imputed_X_ni)
+    print('Training complete.')
+
+    def score_samples(test_annotation_names_i,
+                      test_X_ni):
+        assert np.array_equal(test_annotation_names_i, annotation_names_i), \
+            f'Input annotation names ({test_annotation_names_i}) must be identical to those used to train the scorer ({annotation_names_i}).'
+        return clf.score_samples(imputer.transform(test_X_ni)) # TODO sklearn's implementation is single-threaded, but this could perhaps be parallelized
+
+    scorer_lambda = lambda test_annotation_names_i, test_X_ni: score_samples(test_annotation_names_i, test_X_ni)
+
+    print(f'Pickling scorer...')
+    output_scorer_pkl_file = f'{output_prefix}.scorer.pkl'
+    with open(output_scorer_pkl_file, 'wb') as f:
+        dill.dump(scorer_lambda, f) # the dill package can be used to pickle lambda functions
+    print(f'Scorer pickled to {output_scorer_pkl_file}.')
+
+
+def score(annotations_file,
+          scorer_pkl_file,
+          output_scores_file):
+    annotation_names_i, X_ni = read_annotations(annotations_file)
+
+    with open(scorer_pkl_file, 'rb') as f:
+        scorer_lambda = dill.load(f)
+    score_n = scorer_lambda(annotation_names_i, X_ni)
+
+    with h5py.File(output_scores_file, 'w') as f:
+        scores_dset = f.create_dataset('data/scores', (len(score_n),), dtype='d')
+        scores_dset[:] = score_n
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--annotations_file',
+                        type=str,
+                        required=True,
+                        help='')
+
+    parser.add_argument('--hyperparameters_json_file',
+                        type=str,
+                        required=False,
+                        help='')
+
+    parser.add_argument('--output_prefix',
+                        type=str,
+                        required=False,
+                        help='')
+
+    parser.add_argument('--scorer_pkl_file',
+                        type=str,
+                        required=False,
+                        help='')
+
+    parser.add_argument('--output_scores_file',
+                        type=str,
+                        required=False,
+                        help='')
+
+    args = parser.parse_args()
+
+    annotations_file = args.annotations_file
+
+    # this script can handle both training and scoring; we check the passed arguments to determine which is appropriate
+    if args.hyperparameters_json_file is not None and args.output_prefix is not None and \
+        args.scorer_pkl_file is None and args.output_scores_file is None:
+        hyperparameters_json_file = args.hyperparameters_json_file
+        output_prefix = args.output_prefix
+        train(annotations_file,
+              hyperparameters_json_file,
+              output_prefix)
+    elif args.hyperparameters_json_file is None and args.output_prefix is None and \
+            args.scorer_pkl_file is not None and args.output_scores_file is not None:
+        scorer_pkl_file = args.scorer_pkl_file
+        output_scores_file = args.output_scores_file
+        score(annotations_file,
+              scorer_pkl_file,
+              output_scores_file)
+    else:
+        raise
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotationsIntegrationTest.java
new file mode 100644
index 00000000000..509dfffee1a
--- /dev/null
+++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotationsIntegrationTest.java
@@ -0,0 +1,253 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.tuple.Pair;
+import org.broadinstitute.hellbender.CommandLineProgramTest;
+import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/**
+ * Note that the expected outputs for the exact-match tests below are used as inputs for
+ * {@link TrainVariantAnnotationsModelIntegrationTest}. Similarly, the expected outputs for
+ * {@link TrainVariantAnnotationsModelIntegrationTest} are used as inputs for {@link ScoreVariantAnnotationsIntegrationTest}.
+ * Thus, developers should keep the expected outputs for all of these integration tests in sync when updating any of them.
+ * This can easily be done by setting the UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS flags for all tools to be true and then running
+ * the tests in order.
+ */
+public final class ExtractVariantAnnotationsIntegrationTest extends CommandLineProgramTest {
+
+    // If true, update the expected outputs in tests that assert an exact match vs. prior output,
+    // instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=ExtractVariantAnnotationsIntegrationTest"
+    // to update all of the exact-match tests at once. After you do this, you should look at the
+    // diffs in the new expected outputs in git to confirm that they are consistent with expectations.
+    public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false;
+
+    /**
+     * Make sure that someone didn't leave the UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS toggle turned on.
+     */
+    @Test
+    public void assertThatExpectedOutputUpdateToggleIsDisabled() {
+        Assert.assertFalse(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS, "The toggle to update expected outputs should not be left enabled.");
+    }
+
+    private static final List<String> NON_ALLELE_SPECIFIC_ANNOTATIONS = Arrays.asList(
+            "DP", "FS", "MQ", "MQRankSum", "QD", "ReadPosRankSum", "SOR");
+
+    private static final List<String> ALLELE_SPECIFIC_ANNOTATIONS = Arrays.asList(
+            "DP", "AS_FS", "AS_MQ", "AS_MQRankSum", "AS_QD", "AS_ReadPosRankSum", "AS_SOR");
+
+    private static final File PACKAGE_TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/");
+    private static final File TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract");
+    private static final File EXPECTED_TEST_FILES_DIR = new File(TEST_FILES_DIR, "expected");
+
+    // The input VCF should cover a genomic region given by the union of regions in the below training and calibration resources
+    // and should also contain a few multiallelics that overlap those resources.
+    private static final File INPUT_VCF = new File(PACKAGE_TEST_FILES_DIR, "input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf");
+
+    // We use snippets of the Omni sites for SNP training (chr1:1-5000000) and calibration (chr1:5000000-10000000); we don't sweat the 1bp overlap.
+    private static final File SNP_TRAINING_VCF = new File(PACKAGE_TEST_FILES_DIR, "resources/1000G_omni2.5.hg38.chr1.1-5M.vcf.gz");
+    private static final File SNP_CALIBRATION_VCF = new File(PACKAGE_TEST_FILES_DIR, "resources/1000G_omni2.5.hg38.chr1.5M-10M.vcf.gz");
+
+    // We use snippets of the Mills sites for indel training (chr1:1-5000000) and calibration (chr1:5000000-10000000); we don't sweat the 1bp overlap.
+    private static final File INDEL_TRAINING_VCF = new File(PACKAGE_TEST_FILES_DIR, "resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.1-5M.vcf.gz");
+    private static final File INDEL_CALIBRATION_VCF = new File(PACKAGE_TEST_FILES_DIR, "resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.5M-10M.vcf.gz");
+
+    private static final int MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS = 100;
+
+    // Supplier and functions for creating and adding various arguments to an ArgumentsBuilder.
+    private static final Supplier<ArgumentsBuilder> BASE_ARGUMENTS_BUILDER_SUPPLIER = () -> {
+        final ArgumentsBuilder argsBuilder = new ArgumentsBuilder();
+        argsBuilder.addVCF(INPUT_VCF);
+        argsBuilder.addFlag(LabeledVariantAnnotationsWalker.DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME); // we do not gzip VCF outputs so that we can use diff to compare to the expected result
+        argsBuilder.add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, false);
+        return argsBuilder;
+    };
+    static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS = argsBuilder -> {
+        NON_ALLELE_SPECIFIC_ANNOTATIONS.forEach(a -> argsBuilder.add(StandardArgumentDefinitions.ANNOTATION_LONG_NAME, a));
+        return argsBuilder;
+    };
+    static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_ALLELE_SPECIFIC_ANNOTATIONS = argsBuilder -> {
+        argsBuilder.addFlag(LabeledVariantAnnotationsWalker.USE_ALLELE_SPECIFIC_ANNOTATIONS_LONG_NAME);
+        ALLELE_SPECIFIC_ANNOTATIONS.forEach(a -> argsBuilder.add(StandardArgumentDefinitions.ANNOTATION_LONG_NAME, a));
+        return argsBuilder;
+    };
+    static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_SNP_MODE_AND_RESOURCES = argsBuilder -> {
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .add(StandardArgumentDefinitions.RESOURCE_LONG_NAME + String.format(":omni-training,%s=true", LabeledVariantAnnotationsData.TRAINING_LABEL), SNP_TRAINING_VCF)
+                .add(StandardArgumentDefinitions.RESOURCE_LONG_NAME + String.format(":omni-calibration,%s=true", LabeledVariantAnnotationsData.CALIBRATION_LABEL), SNP_CALIBRATION_VCF);
+        return argsBuilder;
+    };
+    static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_INDEL_MODE_AND_RESOURCES = argsBuilder -> {
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.INDEL)
+                .add(StandardArgumentDefinitions.RESOURCE_LONG_NAME + String.format(":mills-training,%s=true", LabeledVariantAnnotationsData.TRAINING_LABEL), INDEL_TRAINING_VCF)
+                .add(StandardArgumentDefinitions.RESOURCE_LONG_NAME + String.format(":mills-calibration,%s=true", LabeledVariantAnnotationsData.CALIBRATION_LABEL), INDEL_CALIBRATION_VCF);
+        return argsBuilder;
+    };
+    private static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS = argsBuilder -> {
+        argsBuilder.add(ExtractVariantAnnotations.MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME, MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS);
+        return argsBuilder;
+    };
+
+    /**
+     * Exact-match tests for configurations given by the Cartesian product of the following options:
+     *  1) non-allele-specific ("nonAS') vs. allele-specific ("AS")
+     *  2) SNP-only ("snp") vs. INDEL-only ("indel") vs. SNP+INDEL ("snpIndel")
+     *  3) positive ("pos") vs. positive-unlabeled ("posUn")
+     */
+    @DataProvider(name = "dataValidInputs")
+    public Object[][] dataValidInputs() {
+        final List<List<Pair<String, Function<ArgumentsBuilder, ArgumentsBuilder>>>> testConfigurations = Lists.cartesianProduct(
+                Collections.singletonList(
+                        Pair.of("extract", Function.identity())),
+                Arrays.asList(
+                        Pair.of("nonAS", ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS),
+                        Pair.of("AS", ADD_ALLELE_SPECIFIC_ANNOTATIONS)),
+                Arrays.asList(
+                        Pair.of("snp", ADD_SNP_MODE_AND_RESOURCES),
+                        Pair.of("indel", ADD_INDEL_MODE_AND_RESOURCES),
+                        Pair.of("snpIndel", ADD_SNP_MODE_AND_RESOURCES.andThen(ADD_INDEL_MODE_AND_RESOURCES))),
+                Arrays.asList(
+                        Pair.of("pos", Function.identity()),
+                        Pair.of("posUn", ADD_MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS)));
+
+        return testConfigurations.stream()
+                .map(tagAndAddFunctionPairs -> new Object[]{
+                        tagAndAddFunctionPairs.stream().map(Pair::getLeft).collect(Collectors.joining(".")),    // e.g., "extract.nonAS.snp.pos"
+                        tagAndAddFunctionPairs.stream().map(Pair::getRight)                                              // creates the corresponding ArgumentsBuilder
+                                .reduce(Function.identity(), Function::andThen)                                          //  by stringing together functions that add the
+                                .apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get())})                                          //  appropriate arguments
+                .toArray(Object[][]::new);
+    }
+
+    /**
+     * Checks expected outputs given a tag (e.g., "extract.nonAS.snp.pos") and arguments corresponding to the
+     * Cartesian products generated in {@link #dataValidInputs}.
+     *
+     * We perform exact-match tests of any annotation HDF5 files produced using h5diff, which is insensitive to timestamps within the file.
+     * We also perform exact-match tests of VCF files using diff. VCF indices may not be diff equivalent, so
+     * we just check for their existence.
+     */
+    @Test(dataProvider = "dataValidInputs", groups = {"python"}) // python environment is required to use h5diff for exact-match comparisons
+    public void testValidInputs(final String tag,
+                                final ArgumentsBuilder argsBuilder) {
+        final File outputDir = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? EXPECTED_TEST_FILES_DIR : createTempDir("extract");
+        final String outputPrefix = String.format("%s/%s", outputDir, tag);
+        argsBuilder.addOutput(outputPrefix);
+        runCommandLine(argsBuilder);
+
+        if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
+            assertOutputs(tag, outputPrefix);
+        }
+    }
+
+    private static void assertOutputs(final String tag,
+                                      final String outputPrefix) {
+        // vcf.idx files are not reproducible
+        SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s %s",
+                EXPECTED_TEST_FILES_DIR,
+                tag + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX,
+                outputPrefix + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX));
+        SystemCommandUtilsTest.runSystemCommand(String.format("diff %s/%s.vcf %s.vcf",
+                EXPECTED_TEST_FILES_DIR, tag, outputPrefix));
+        if (tag.contains("posUn")) {
+            SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s %s",
+                    EXPECTED_TEST_FILES_DIR,
+                    tag + ExtractVariantAnnotations.UNLABELED_TAG + ExtractVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX,
+                    outputPrefix + ExtractVariantAnnotations.UNLABELED_TAG + ExtractVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX));
+        } else {
+            Assert.assertFalse(new File(outputPrefix + ExtractVariantAnnotations.UNLABELED_TAG + ExtractVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        }
+    }
+
+    /**
+     * If no resources are provided and we do not extract unlabeled sites, then only a zero-record VCF and the corresponding index are created.
+     * This is because we cannot create HDF5 files with empty arrays/matrices.
+     */
+    @Test
+    public void testNoResources() {
+        final File outputDir = createTempDir("extract");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = ADD_ALLELE_SPECIFIC_ANNOTATIONS.apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get());
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .addOutput(outputPrefix);
+        runCommandLine(argsBuilder);
+        Assert.assertFalse(new File(outputPrefix + ExtractVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefix + ExtractVariantAnnotations.UNLABELED_TAG + ExtractVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf").exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists());
+    }
+
+    /**
+     * If no resources are provided but we do extract unlabeled sites, then all output files except the labeled-annotations HDF5 file are created.
+     * This is because we cannot create HDF5 files with empty arrays/matrices.
+     */
+    @Test
+    public void testNoResourcesAndExtractUnlabeled() {
+        final File outputDir = createTempDir("extract");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = ADD_ALLELE_SPECIFIC_ANNOTATIONS.apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get());
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .add(ExtractVariantAnnotations.MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME, 1)
+                .addOutput(outputPrefix);
+        runCommandLine(argsBuilder);
+        Assert.assertFalse(new File(outputPrefix + ExtractVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        Assert.assertTrue(new File(outputPrefix + ExtractVariantAnnotations.UNLABELED_TAG + ExtractVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf").exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists());
+    }
+
+    /**
+     * If no variants are present in the input in the specified region, then only a zero-record VCF and the corresponding index are created.
+     * This is because we cannot create HDF5 files with empty arrays/matrices.
+     */
+    @Test
+    public void testNoVariantsInInput() {
+        final File outputDir = createTempDir("extract");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = ADD_ALLELE_SPECIFIC_ANNOTATIONS.apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get());
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .addOutput(outputPrefix);
+        runCommandLine(argsBuilder);
+        Assert.assertFalse(new File(outputPrefix + ScoreVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefix + ScoreVariantAnnotations.SCORES_HDF5_SUFFIX).exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf").exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists());
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void testForgotToSpecifyUseAlleleSpecificAnnotationsFlag() {
+        final File outputDir = createTempDir("extract");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = ADD_SNP_MODE_AND_RESOURCES.apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get());
+        ALLELE_SPECIFIC_ANNOTATIONS.forEach(a -> argsBuilder.add(StandardArgumentDefinitions.ANNOTATION_LONG_NAME, a));
+        argsBuilder.addOutput(outputPrefix);
+        runCommandLine(argsBuilder);
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void testReservedSNPResourceLabel() {
+        final File outputDir = createTempDir("extract");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS.apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get());
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .add(StandardArgumentDefinitions.RESOURCE_LONG_NAME + String.format(":snp,%s=true", LabeledVariantAnnotationsData.SNP_LABEL), SNP_TRAINING_VCF)
+                .addOutput(outputPrefix);
+        runCommandLine(argsBuilder);
+    }
+}
\ No newline at end of file
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotationsIntegrationTest.java
new file mode 100644
index 00000000000..83807d6a7a7
--- /dev/null
+++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ScoreVariantAnnotationsIntegrationTest.java
@@ -0,0 +1,260 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.tuple.Pair;
+import org.broadinstitute.hellbender.CommandLineProgramTest;
+import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModelBackend;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+import org.broadinstitute.hellbender.utils.io.Resource;
+import org.broadinstitute.hellbender.utils.python.PythonScriptExecutorException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/**
+ * See documentation for {@link ExtractVariantAnnotationsIntegrationTest} for information about how inputs and
+ * expected outputs used there are related to those used here and in {@link TrainVariantAnnotationsModelIntegrationTest}.
+ */
+public final class ScoreVariantAnnotationsIntegrationTest extends CommandLineProgramTest {
+
+    // If true, update the expected outputs in tests that assert an exact match vs. prior output,
+    // instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=ScoreVariantAnnotationsIntegrationTest"
+    // to update all of the exact-match tests at once. After you do this, you should look at the
+    // diffs in the new expected outputs in git to confirm that they are consistent with expectations.
+    public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false;
+
+    /**
+     * Make sure that someone didn't leave the UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS toggle turned on.
+     */
+    @Test
+    public void assertThatExpectedOutputUpdateToggleIsDisabled() {
+        Assert.assertFalse(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS, "The toggle to update expected outputs should not be left enabled.");
+    }
+
+    private static final double CALIBRATION_SENSITIVITY_THRESHOLD = 0.9;
+
+    private static final File PACKAGE_TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/");
+    private static final File TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score");
+    private static final File INPUT_FROM_TRAIN_EXPECTED_TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected");
+    private static final File EXPECTED_TEST_FILES_DIR = new File(TEST_FILES_DIR, "expected");
+
+    private static final File ISOLATION_FOREST_PYTHON_SCRIPT = IOUtils.writeTempResource(
+            new Resource("isolation-forest.py", TrainVariantAnnotationsModel.class));
+
+    private static final File INPUT_VCF = new File(PACKAGE_TEST_FILES_DIR, "input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf");
+
+    // Supplier and functions for creating and adding various arguments to an ArgumentsBuilder.
+    private static final Supplier<ArgumentsBuilder> BASE_ARGUMENTS_BUILDER_SUPPLIER = () -> {
+        final ArgumentsBuilder argsBuilder = new ArgumentsBuilder();
+        argsBuilder.addVCF(INPUT_VCF);
+        argsBuilder.addFlag(LabeledVariantAnnotationsWalker.DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME);
+        argsBuilder.add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, false);
+        return argsBuilder;
+    };
+    private static final BiFunction<ArgumentsBuilder, String, ArgumentsBuilder> ADD_MODEL_PREFIX = (argsBuilder, modelPrefix) -> {
+        argsBuilder.add(ScoreVariantAnnotations.MODEL_PREFIX_LONG_NAME, modelPrefix);
+        return argsBuilder;
+    };
+    private static final BiFunction<ArgumentsBuilder, Double, ArgumentsBuilder> ADD_CALIBRATION_SENSITIVITY_THRESHOLD = (argsBuilder, calibrationSensitivityThreshold) -> {
+        argsBuilder.add(ScoreVariantAnnotations.SNP_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, calibrationSensitivityThreshold);
+        argsBuilder.add(ScoreVariantAnnotations.INDEL_CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, calibrationSensitivityThreshold);
+        return argsBuilder;
+    };
+    private static final BiFunction<ArgumentsBuilder, VariantAnnotationsModelBackend, ArgumentsBuilder> ADD_MODEL_BACKEND = (argsBuilder, modelBackendMode) -> {
+        argsBuilder.add(TrainVariantAnnotationsModel.MODEL_BACKEND_LONG_NAME, modelBackendMode);
+        return argsBuilder;
+    };
+    private static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_ISOLATION_FOREST_PYTHON_SCRIPT = argsBuilder -> {
+        argsBuilder.add(ScoreVariantAnnotations.PYTHON_SCRIPT_LONG_NAME, ISOLATION_FOREST_PYTHON_SCRIPT);
+        return argsBuilder;
+    };
+
+    /**
+     * Exact-match tests for (non-exhaustive) configurations given by the Cartesian product of the following options:
+     * 1) non-allele-specific ("nonAS") vs. allele-specific ("AS")
+     * 2) model backend
+     *      2a) Java Bayesian Gaussian Mixture Model (BGMM) backend TODO the BGMM has been reduced to a stub for this initial PR; subsequent PRs will cover the backend code and reconnect the stub
+     *      2b) default PYTHON_IFOREST ("IF.score")
+     *      2c) specified PYTHON_SCRIPT ("IF.score"); we will simply use the same script as the default PYTHON_IFOREST backend, so this is just a test of the command-line interface
+     *      We should expect 2b-c to give functionally identical results.
+     * 3) SNP-only ("snp") vs. SNP+INDEL ("snpIndel") (for both of these options, we use trained models that contain both SNP and INDEL scorers as input)
+     *  TODO the BGMM has been reduced to a stub for this initial PR; subsequent PRs will cover the backend code and reconnect the stub
+     */
+    @DataProvider(name = "dataValidInputs")
+    public Object[][] dataValidInputs() {
+        final List<List<Pair<String, Function<ArgumentsBuilder, ArgumentsBuilder>>>> testConfigurations = Lists.cartesianProduct(
+                Arrays.asList(
+                        Pair.of("extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg", Function.identity()),
+                        Pair.of("extract.AS.snpIndel.posUn.train.snpIndel.posNeg", Function.identity())),
+                Arrays.asList(
+                        Pair.of("IF.score", ab -> ADD_MODEL_BACKEND.apply(ab, VariantAnnotationsModelBackend.PYTHON_IFOREST)), // this and the following case give the same results, so they are given the same IF.score tag
+                        Pair.of("IF.score", ADD_ISOLATION_FOREST_PYTHON_SCRIPT
+                                .andThen(ab -> ADD_MODEL_BACKEND.apply(ab, VariantAnnotationsModelBackend.PYTHON_SCRIPT)))),
+                Arrays.asList(
+                        Pair.of("snp", ExtractVariantAnnotationsIntegrationTest.ADD_SNP_MODE_AND_RESOURCES),
+                        Pair.of("snpIndel", ExtractVariantAnnotationsIntegrationTest.ADD_SNP_MODE_AND_RESOURCES
+                                .andThen(ExtractVariantAnnotationsIntegrationTest.ADD_INDEL_MODE_AND_RESOURCES))));
+
+        return testConfigurations.stream()
+                .map(tagAndAddFunctionPairs -> new Object[]{
+                        tagAndAddFunctionPairs.stream().map(Pair::getLeft).collect(Collectors.joining(".")), // e.g., extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp
+                        tagAndAddFunctionPairs.stream().map(Pair::getRight)                                              // creates the corresponding ArgumentsBuilder
+                                .reduce(Function.identity(), Function::andThen)                                          //  by stringing together functions that add the
+                                .apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get())})                                          //  appropriate arguments
+                .toArray(Object[][]::new);
+    }
+
+    /**
+     * Checks expected outputs given a tag (e.g., "extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp") and arguments corresponding to the
+     * Cartesian products generated in {@link #dataValidInputs}.
+     *
+     * We perform exact-match tests of any HDF5 files produced using h5diff, which is insensitive to timestamps within the file.
+     * We also perform exact-match tests of VCF files using diff. VCF indices may not be diff equivalent, so
+     * we just check for their existence.
+     */
+    @Test(dataProvider = "dataValidInputs", groups = {"python"}) // python environment is required to run tool and to use h5diff for exact-match comparisons
+    public void testValidInputs(final String tag,
+                                final ArgumentsBuilder argsBuilder) {
+        final File outputDir = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? EXPECTED_TEST_FILES_DIR : createTempDir("score");
+        final String outputPrefix = String.format("%s/%s", outputDir, tag);
+        argsBuilder.addOutput(outputPrefix);
+
+        // add arguments for model prefix based on the
+        // train tag (the portion of the tag preceding ".score", e.g., extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF),
+        // which gives the basename for the model files
+        final String trainTag = tag.split(".score")[0];
+        if (tag.contains("nonAS")) {
+            ExtractVariantAnnotationsIntegrationTest.ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS.apply(argsBuilder);
+        } else {
+            ExtractVariantAnnotationsIntegrationTest.ADD_ALLELE_SPECIFIC_ANNOTATIONS.apply(argsBuilder);
+        }
+        final String modelPrefix = new File(INPUT_FROM_TRAIN_EXPECTED_TEST_FILES_DIR, trainTag).toString();
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addModelPrefix = ab ->
+                ADD_MODEL_PREFIX.apply(ab, modelPrefix);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addCalibrationSensitivityThreshold = ab ->
+                ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, CALIBRATION_SENSITIVITY_THRESHOLD);
+        addModelPrefix.andThen(addCalibrationSensitivityThreshold).apply(argsBuilder);
+
+        // TODO test use of sites-only VCF (output by extract tool) to label extracted sites
+
+        runCommandLine(argsBuilder);
+
+        if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
+            assertExpectedOutputs(tag, outputPrefix);
+        }
+    }
+
+    private static void assertExpectedOutputs(final String tag,
+                                              final String outputPrefix) {
+        // vcf.idx files are not reproducible
+        SystemCommandUtilsTest.runSystemCommand(String.format("diff %s/%s.vcf %s.vcf",
+                EXPECTED_TEST_FILES_DIR, tag, outputPrefix));
+        SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s.annot.hdf5 %s.annot.hdf5",
+                EXPECTED_TEST_FILES_DIR, tag, outputPrefix));
+        SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s.scores.hdf5 %s.scores.hdf5",
+                EXPECTED_TEST_FILES_DIR, tag, outputPrefix));
+    }
+
+    /**
+     * In contrast to {@link ExtractVariantAnnotationsIntegrationTest#testNoResources}, the non-presence of
+     * resources here does not really affect the output.
+     */
+    @Test(groups = {"python"}) // python environment is required to run tool
+    public void testNoResources() {
+        final File outputDir = createTempDir("score");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .addOutput(outputPrefix);
+        final String modelPrefix = new File(INPUT_FROM_TRAIN_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF").toString();
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addModelPrefix = ab ->
+                ADD_MODEL_PREFIX.apply(ab, modelPrefix);
+        addModelPrefix
+                .andThen(ExtractVariantAnnotationsIntegrationTest.ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+        Assert.assertTrue(new File(outputPrefix + ScoreVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        Assert.assertTrue(new File(outputPrefix + ScoreVariantAnnotations.SCORES_HDF5_SUFFIX).exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf").exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists());
+    }
+
+    /**
+     * If no variants are present in the input in the specified region, we do not create the scores or annotations HDF5 files.
+     * This is because we cannot create HDF5 files with empty arrays/matrices.
+     */
+    @Test(groups = {"python"}) // python environment is required to run tool
+    public void testNoVariantsInInput() {
+        final File outputDir = createTempDir("score");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .add(StandardArgumentDefinitions.INTERVALS_LONG_NAME, "chr2") // the test input VCF does not have variants here
+                .addOutput(outputPrefix);
+        final String modelPrefix = new File(INPUT_FROM_TRAIN_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF").toString();
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addModelPrefix = ab ->
+                ADD_MODEL_PREFIX.apply(ab, modelPrefix);
+        addModelPrefix
+                .andThen(ExtractVariantAnnotationsIntegrationTest.ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS)
+                .andThen(ExtractVariantAnnotationsIntegrationTest.ADD_SNP_MODE_AND_RESOURCES)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+        Assert.assertFalse(new File(outputPrefix + ScoreVariantAnnotations.ANNOTATIONS_HDF5_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefix + ScoreVariantAnnotations.SCORES_HDF5_SUFFIX).exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf").exists());
+        Assert.assertTrue(new File(outputPrefix + ".vcf.idx").exists());
+    }
+
+    @Test(expectedExceptions = PythonScriptExecutorException.class, groups = {"python"}) // python environment is required to run tool
+    public void testAnnotationsDoNotMatchThoseUsedToTrainModel() {
+        final File outputDir = createTempDir("score");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .addOutput(outputPrefix);
+        final String modelPrefix = new File(INPUT_FROM_TRAIN_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF").toString();
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addModelPrefix = ab ->
+                ADD_MODEL_PREFIX.apply(ab, modelPrefix);
+        addModelPrefix
+                .andThen(ExtractVariantAnnotationsIntegrationTest.ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS)  // model was trained with non-AS annotations
+                .andThen(ExtractVariantAnnotationsIntegrationTest.ADD_ALLELE_SPECIFIC_ANNOTATIONS)      // but we additionally specify AS annotations
+                .andThen(ExtractVariantAnnotationsIntegrationTest.ADD_SNP_MODE_AND_RESOURCES)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+
+    @Test(expectedExceptions = UserException.class, groups = {"python"}) // python environment is required to run tool
+    public void testReservedSNPResourceLabel() {
+        final File outputDir = createTempDir("extract");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = ExtractVariantAnnotationsIntegrationTest.ADD_NON_ALLELE_SPECIFIC_ANNOTATIONS.apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get());
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP)
+                .add(StandardArgumentDefinitions.RESOURCE_LONG_NAME + String.format(":snp,%s=true", LabeledVariantAnnotationsData.SNP_LABEL), INPUT_VCF) // we just use the input VCF as a dummy resource
+                .addOutput(outputPrefix);
+        final String modelPrefix = new File(INPUT_FROM_TRAIN_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF").toString();
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addModelPrefix = ab ->
+                ADD_MODEL_PREFIX.apply(ab, modelPrefix);
+        addModelPrefix.apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+}
\ No newline at end of file
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/SystemCommandUtilsTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/SystemCommandUtilsTest.java
new file mode 100644
index 00000000000..705f292116a
--- /dev/null
+++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/SystemCommandUtilsTest.java
@@ -0,0 +1,62 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import org.broadinstitute.hellbender.GATKBaseTest;
+import org.broadinstitute.hellbender.exceptions.GATKException;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+public final class SystemCommandUtilsTest extends GATKBaseTest {
+
+    private static final File TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract");
+    private static final File EXPECTED_TEST_FILES_DIR = new File(TEST_FILES_DIR, "expected");
+
+    static void runSystemCommand(final String command) {
+        logger.debug(String.format("Testing command: %s", command));
+        try {
+            final ProcessBuilder processBuilder = new ProcessBuilder("sh", "-c", command).redirectErrorStream(true);
+            final Process process = processBuilder.start();
+
+            final BufferedReader stdInReader = new BufferedReader(new InputStreamReader(process.getInputStream()));
+            String stdInLine;
+            while ((stdInLine = stdInReader.readLine()) != null) {
+                Assert.fail(String.format("The command \"%s\" resulted in: %s", command, stdInLine));
+            }
+            stdInReader.close();
+
+        } catch (final IOException e) {
+            throw new GATKException.ShouldNeverReachHereException(e.getMessage());
+        }
+    }
+
+    @Test(groups = {"python"}) // python environment is required to use h5diff
+    public void testRunSystemCommand() {
+        runSystemCommand(String.format("h5diff %s/extract.AS.indel.pos.annot.hdf5 %s/extract.AS.indel.pos.annot.hdf5",
+                EXPECTED_TEST_FILES_DIR, EXPECTED_TEST_FILES_DIR));
+        runSystemCommand(String.format("diff %s/extract.AS.indel.pos.vcf %s/extract.AS.indel.pos.vcf",
+                EXPECTED_TEST_FILES_DIR, EXPECTED_TEST_FILES_DIR));
+    }
+
+    @Test(expectedExceptions = AssertionError.class, groups = {"python"}) // python environment is required to use h5diff
+    public void testRunSystemCommandH5diffException() {
+        runSystemCommand(String.format("h5diff %s/extract.AS.indel.pos.annot.hdf5 %s/extract.AS.snp.pos.annot.hdf5",
+                EXPECTED_TEST_FILES_DIR, EXPECTED_TEST_FILES_DIR));
+    }
+
+    @Test(expectedExceptions = AssertionError.class)
+    public void testRunSystemCommandDiffException() {
+        runSystemCommand(String.format("diff %s/extract.AS.indel.pos.vcf %s/extract.AS.snp.pos.vcf",
+                EXPECTED_TEST_FILES_DIR, EXPECTED_TEST_FILES_DIR));
+    }
+
+    @Test(expectedExceptions = AssertionError.class)
+    public void testRunSystemCommandDiffNoSuchFileException() {
+        runSystemCommand(String.format("diff %s/blahblah %s/extract.AS.snp.pos.vcf",
+                EXPECTED_TEST_FILES_DIR, EXPECTED_TEST_FILES_DIR));
+    }
+}
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModelIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModelIntegrationTest.java
new file mode 100644
index 00000000000..03e09782e1e
--- /dev/null
+++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/TrainVariantAnnotationsModelIntegrationTest.java
@@ -0,0 +1,432 @@
+package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;
+
+import com.google.common.collect.Lists;
+import org.apache.commons.lang3.tuple.Pair;
+import org.broadinstitute.hdf5.HDF5File;
+import org.broadinstitute.hellbender.CommandLineProgramTest;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.BGMMVariantAnnotationsScorer;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.PythonSklearnVariantAnnotationsScorer;
+import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.modeling.VariantAnnotationsModelBackend;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
+import org.broadinstitute.hellbender.utils.io.Resource;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.stream.Collectors;
+
+/**
+ * See documentation for {@link ExtractVariantAnnotationsIntegrationTest} for information about how inputs and
+ * expected outputs used there are related to those used here and in {@link ScoreVariantAnnotationsIntegrationTest}.
+ */
+public final class TrainVariantAnnotationsModelIntegrationTest extends CommandLineProgramTest {
+
+    // If true, update the expected outputs in tests that assert an exact match vs. prior output,
+    // instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=TrainVariantAnnotationsIntegrationTest"
+    // to update all of the exact-match tests at once. After you do this, you should look at the
+    // diffs in the new expected outputs in git to confirm that they are consistent with expectations.
+    public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false;
+
+    /**
+     * Make sure that someone didn't leave the UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS toggle turned on.
+     */
+    @Test
+    public void assertThatExpectedOutputUpdateToggleIsDisabled() {
+        Assert.assertFalse(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS, "The toggle to update expected outputs should not be left enabled.");
+    }
+    
+    private static final double CALIBRATION_SENSITIVITY_THRESHOLD = 0.9;
+
+    private static final File TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train");
+    private static final File INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR = new File(largeFileTestDir,
+            "org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected");
+    private static final File EXPECTED_TEST_FILES_DIR = new File(TEST_FILES_DIR, "expected");
+
+    private static final File ISOLATION_FOREST_PYTHON_SCRIPT = IOUtils.writeTempResource(
+            new Resource("isolation-forest.py", TrainVariantAnnotationsModel.class));
+    private static final File ISOLATION_FOREST_HYPERPARAMETERS_JSON = new File(TEST_FILES_DIR,
+            "isolation-forest-hyperparameters-different-seed.json");
+
+    // Supplier and functions for creating and adding various arguments to an ArgumentsBuilder.
+    private static final Supplier<ArgumentsBuilder> BASE_ARGUMENTS_BUILDER_SUPPLIER = ArgumentsBuilder::new;
+    private static final BiFunction<ArgumentsBuilder, File, ArgumentsBuilder> ADD_ANNOTATIONS_HDF5 = (argsBuilder, annotationsHDF5) -> {
+        argsBuilder.add(TrainVariantAnnotationsModel.ANNOTATIONS_HDF5_LONG_NAME, annotationsHDF5);
+        return argsBuilder;
+    };
+    private static final BiFunction<ArgumentsBuilder, File, ArgumentsBuilder> ADD_UNLABELED_ANNOTATIONS_HDF5 = (argsBuilder, unlabeledAnnotationsHDF5) -> {
+        argsBuilder.add(TrainVariantAnnotationsModel.UNLABELED_ANNOTATIONS_HDF5_LONG_NAME, unlabeledAnnotationsHDF5);
+        return argsBuilder;
+    };
+    private static final BiFunction<ArgumentsBuilder, Double, ArgumentsBuilder> ADD_CALIBRATION_SENSITIVITY_THRESHOLD = (argsBuilder, calibrationSensitivityThreshold) -> {
+        argsBuilder.add(TrainVariantAnnotationsModel.CALIBRATION_SENSITIVITY_THRESHOLD_LONG_NAME, calibrationSensitivityThreshold);
+        return argsBuilder;
+    };
+    private static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_SNP_MODE = argsBuilder -> {
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.SNP);
+        return argsBuilder;
+    };
+    private static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_INDEL_MODE = argsBuilder -> {
+        argsBuilder.add(LabeledVariantAnnotationsWalker.MODE_LONG_NAME, VariantType.INDEL);
+        return argsBuilder;
+    };
+    private static final BiFunction<ArgumentsBuilder, VariantAnnotationsModelBackend, ArgumentsBuilder> ADD_MODEL_BACKEND = (argsBuilder, modelBackendMode) -> {
+        argsBuilder.add(TrainVariantAnnotationsModel.MODEL_BACKEND_LONG_NAME, modelBackendMode);
+        return argsBuilder;
+    };
+    private static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_ISOLATION_FOREST_PYTHON_SCRIPT = argsBuilder -> {
+        argsBuilder.add(TrainVariantAnnotationsModel.PYTHON_SCRIPT_LONG_NAME, ISOLATION_FOREST_PYTHON_SCRIPT);
+        return argsBuilder;
+    };
+    private static final Function<ArgumentsBuilder, ArgumentsBuilder> ADD_ISOLATION_FOREST_HYPERPARAMETERS_JSON = argsBuilder -> {
+        argsBuilder.add(TrainVariantAnnotationsModel.HYPERPARAMETERS_JSON_LONG_NAME, ISOLATION_FOREST_HYPERPARAMETERS_JSON);
+        return argsBuilder;
+    };
+
+    /**
+     * Exact-match tests for (non-exhaustive) configurations given by the Cartesian product of the following options:
+     *  1) non-allele-specific ("nonAS") vs. allele-specific ("AS")
+     *  2) SNP-only ("snp") vs. SNP+INDEL ("snpIndel") (for both of these options, we use extracted annotations that contain both SNP and INDEL variants as input)
+     *  3) positive training with {extract-tag}.annot.hdf5 ("posOnly") vs. positive-unlabeled training with {extract-tag}.annot.hdf5 and {extract-tag}.unlabeled.annot.hdf5 ("posNeg")
+     *  4) model backend
+     *      4a) Java Bayesian Gaussian Mixture Model (BGMM) backend TODO the BGMM has been reduced to a stub for this initial PR; subsequent PRs will cover the backend code and reconnect the stub
+     *      4b) default PYTHON_IFOREST with default hyperparameters ("IF")
+     *      4c) default PYTHON_IFOREST with non-default seed hyperparameter ("IFDifferentSeed")
+     *      4d) specified PYTHON_SCRIPT with non-default seed hyperparameter ("IFDifferentSeed"); we will simply use the same script as the default PYTHON_IFOREST backend, so this is just a test of the command-line interface
+     *      We should expect 4c-d to give functionally identical results.
+     */
+    @DataProvider(name = "dataValidInputs")
+    public Object[][] dataValidInputs() {
+        final List<List<Pair<String, Function<ArgumentsBuilder, ArgumentsBuilder>>>> testConfigurations = Lists.cartesianProduct(
+                Arrays.asList(
+                        Pair.of("extract.nonAS.snpIndel.posUn.train", Function.identity()),
+                        Pair.of("extract.AS.snpIndel.posUn.train", Function.identity())),
+                Arrays.asList(
+                        Pair.of("snp", ADD_SNP_MODE),
+                        Pair.of("snpIndel", ADD_SNP_MODE.andThen(ADD_INDEL_MODE))),
+                Arrays.asList(              // we will consume the tag and add appropriate arguments for positive and positive-negative training below
+                        Pair.of("posOnly", Function.identity()),
+                        Pair.of("posNeg", Function.identity())),
+                Arrays.asList(
+                        Pair.of("IF", ab -> ADD_MODEL_BACKEND.apply(ab, VariantAnnotationsModelBackend.PYTHON_IFOREST)),
+                        Pair.of("IFDifferentSeed", ADD_ISOLATION_FOREST_HYPERPARAMETERS_JSON
+                                .andThen(ab -> ADD_MODEL_BACKEND.apply(ab, VariantAnnotationsModelBackend.PYTHON_IFOREST))), // this and the following case give the same results, so they are given the same IFDifferentSeed tag
+                        Pair.of("IFDifferentSeed", ADD_ISOLATION_FOREST_PYTHON_SCRIPT
+                                .andThen(ADD_ISOLATION_FOREST_HYPERPARAMETERS_JSON)
+                                .andThen(ab -> ADD_MODEL_BACKEND.apply(ab, VariantAnnotationsModelBackend.PYTHON_SCRIPT)))));
+
+        return testConfigurations.stream()
+                .map(tagAndAddFunctionPairs -> new Object[]{
+                        tagAndAddFunctionPairs.stream().map(Pair::getLeft).collect(Collectors.joining(".")), // e.g., extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF
+                        tagAndAddFunctionPairs.stream().map(Pair::getRight)                                              // creates the corresponding ArgumentsBuilder
+                                .reduce(Function.identity(), Function::andThen)                                          //  by stringing together functions that add the
+                                .apply(BASE_ARGUMENTS_BUILDER_SUPPLIER.get())})                                          //  appropriate arguments
+                .toArray(Object[][]::new);
+    }
+
+    /**
+     * Checks expected outputs given a tag (e.g., "extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF") and arguments corresponding to the
+     * Cartesian products generated in {@link #dataValidInputs}.
+     *
+     * We perform exact-match tests of any HDF5 files produced using h5diff, which is insensitive to timestamps within the file.
+     * Binary serialized scorers may not be diff equivalent, so we just check for their existence.
+     */
+    @Test(dataProvider = "dataValidInputs", groups = {"python"}) // python environment is required to run tool and to use h5diff for exact-match comparisons
+    public void testValidInputs(final String tag,
+                                final ArgumentsBuilder argsBuilder) {
+        final File outputDir = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? EXPECTED_TEST_FILES_DIR : createTempDir("train");
+        final String outputPrefix = String.format("%s/%s", outputDir, tag);
+        argsBuilder.addOutput(outputPrefix);
+
+        // add arguments for positive/unlabeled annotations based on the
+        // extract tag (the portion of the tag preceding ".train", e.g., extract.nonAS.snpIndel.posUn),
+        // which gives the basename for the annotation files
+        final String extractTag = tag.split(".train")[0];
+        final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                extractTag + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotations = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5);
+        if (tag.contains("posNeg")) {
+            final File unlabeledAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                    extractTag + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+            final Function<ArgumentsBuilder, ArgumentsBuilder> addUnlabeledAnnotations = ab ->
+                    ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5);
+            final double calibrationSensitivityThreshold = CALIBRATION_SENSITIVITY_THRESHOLD;
+            final Function<ArgumentsBuilder, ArgumentsBuilder> addCalibrationSensitivityThreshold = ab ->
+                    ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, calibrationSensitivityThreshold);
+            addPositiveAnnotations.andThen(addUnlabeledAnnotations).andThen(addCalibrationSensitivityThreshold).apply(argsBuilder);
+        } else {
+            addPositiveAnnotations.apply(argsBuilder);
+        }
+
+        runCommandLine(argsBuilder);
+
+        if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
+            assertExpectedOutputs(tag, outputPrefix);
+        }
+    }
+
+    private static void assertExpectedOutputs(final String tag,
+                                              final String outputPrefix) {
+        if (tag.contains("train.snp.")) {
+            assertExpectedOutputsForVariantType(tag, outputPrefix, "snp");
+            assertOutputsForVariantTypeDoNotExist(outputPrefix, "indel");
+        } else if (tag.contains("train.snpIndel.")) {
+            assertExpectedOutputsForVariantType(tag, outputPrefix, "snp");
+            assertExpectedOutputsForVariantType(tag, outputPrefix, "indel");
+        } else {
+            Assert.fail("Unknown variant-type tag.");
+        }
+    }
+
+    private static void assertExpectedOutputsForVariantType(final String tag,
+                                                            final String outputPrefix,
+                                                            final String variantType) {
+        final String tagAndVariantType = String.format("%s.%s", tag, variantType);
+        final String outputPrefixAndVariantType = String.format("%s.%s", outputPrefix, variantType);
+
+        SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s %s",
+                EXPECTED_TEST_FILES_DIR,
+                tagAndVariantType + TrainVariantAnnotationsModel.TRAINING_SCORES_HDF5_SUFFIX,
+                outputPrefixAndVariantType + TrainVariantAnnotationsModel.TRAINING_SCORES_HDF5_SUFFIX));
+        SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s %s",
+                EXPECTED_TEST_FILES_DIR,
+                tagAndVariantType + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX,
+                outputPrefixAndVariantType + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX));
+
+        assertScorerExpectedOutputs(tagAndVariantType, outputPrefixAndVariantType, false);
+
+        if (tag.contains("posNeg")) {
+            SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s/%s %s",
+                    EXPECTED_TEST_FILES_DIR,
+                    tagAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX,
+                    outputPrefixAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX));
+            assertScorerExpectedOutputs(tagAndVariantType, outputPrefixAndVariantType, true);
+        } else {
+            Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX).exists());
+            Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists());
+            Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists());
+        }
+    }
+
+    private static void assertOutputsForVariantTypeDoNotExist(final String outputPrefix,
+                                                              final String variantType) {
+        final String outputPrefixAndVariantType = String.format("%s.%s", outputPrefix, variantType);
+        Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.TRAINING_SCORES_HDF5_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.UNLABELED_SCORES_HDF5_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefixAndVariantType + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefixAndVariantType + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists());
+        Assert.assertFalse(new File(outputPrefixAndVariantType + TrainVariantAnnotationsModel.NEGATIVE_TAG + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists());
+    }
+
+    /**
+     * Binary serialized scorers may not be diff equivalent, so we just check for their existence.
+     * We assume that checking elsewhere for equivalence of the scores that the scorers generate provides sufficient
+     * coverage.
+     */
+    private static void assertScorerExpectedOutputs(final String tagAndVariantType,
+                                                    final String outputPrefixAndVariantType,
+                                                    final boolean isNegative) {
+        final String positiveOrNegativeTag = isNegative ? ".negative" : "";
+        final String scorerTag = outputPrefixAndVariantType + positiveOrNegativeTag;
+        if (tagAndVariantType.contains("BGMM")) {
+            Assert.assertTrue(new File(scorerTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists());
+            Assert.assertFalse(new File(scorerTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists());
+        } else if (tagAndVariantType.contains("IF")) {
+            Assert.assertTrue(new File(scorerTag + PythonSklearnVariantAnnotationsScorer.PYTHON_SCORER_PKL_SUFFIX).exists());
+            Assert.assertFalse(new File(scorerTag + BGMMVariantAnnotationsScorer.BGMM_SCORER_SER_SUFFIX).exists());
+        } else {
+            Assert.fail("Unknown model-backend tag.");
+        }
+    }
+
+    @Test(groups = {"python"}) // python environment is required to run tool and to use h5diff for exact-match comparisons
+    public void testSNPOnlyModelsFromSNPOnlyAndSNPPlusIndelAnnotationsAreIdentical() {
+        final File outputDir = createTempDir("train");
+
+        final String outputPrefixSNPOnly = String.format("%s/test-snp", outputDir);
+        final ArgumentsBuilder argsBuilderSNPOnly = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilderSNPOnly.addOutput(outputPrefixSNPOnly);
+        final File positiveAnnotationsHDF5SNPOnly = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snp.pos" + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotationsSNPOnly = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5SNPOnly);
+        addPositiveAnnotationsSNPOnly
+                .andThen(ADD_SNP_MODE)
+                .apply(argsBuilderSNPOnly);
+        runCommandLine(argsBuilderSNPOnly);
+
+        final String outputPrefixSNPPlusIndel = String.format("%s/test-snpIndel", outputDir);
+        final ArgumentsBuilder argsBuilderSNPPlusIndel = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilderSNPPlusIndel.addOutput(outputPrefixSNPPlusIndel);
+        final File positiveAnnotationsHDF5SNPPlusIndel = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snpIndel.pos" + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotationsSNPPlusIndel = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5SNPPlusIndel);
+        addPositiveAnnotationsSNPPlusIndel
+                .andThen(ADD_SNP_MODE)
+                .apply(argsBuilderSNPPlusIndel);
+        runCommandLine(argsBuilderSNPPlusIndel);
+
+        SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s %s",
+                outputPrefixSNPOnly + ".snp" + TrainVariantAnnotationsModel.TRAINING_SCORES_HDF5_SUFFIX,
+                outputPrefixSNPPlusIndel + ".snp" + TrainVariantAnnotationsModel.TRAINING_SCORES_HDF5_SUFFIX));
+        SystemCommandUtilsTest.runSystemCommand(String.format("h5diff %s %s",
+                outputPrefixSNPOnly + ".snp" + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX,
+                outputPrefixSNPPlusIndel + ".snp" + TrainVariantAnnotationsModel.CALIBRATION_SCORES_HDF5_SUFFIX));
+    }
+
+    @Test(expectedExceptions = IllegalArgumentException.class, groups = {"python"}) // python environment is required to run tool
+    public void testUnlabeledAnnotationsSpecifiedWithoutCalibrationSensitivityThreshold() {
+        final File outputDir = createTempDir("train");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.addOutput(outputPrefix);
+        final String extractTag = "extract.nonAS.snpIndel.posUn";
+        final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                extractTag + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotations = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5);
+        final File unlabeledAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                extractTag + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addUnlabeledAnnotations = ab ->
+                ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5);
+        addPositiveAnnotations
+                .andThen(addUnlabeledAnnotations)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+
+    @Test(expectedExceptions = IllegalArgumentException.class, groups = {"python"}) // python environment is required to run tool
+    public void testCalibrationSensitivityThresholdSpecifiedWithoutUnlabeledAnnotations() {
+        final File outputDir = createTempDir("train");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.addOutput(outputPrefix);
+        final String extractTag = "extract.nonAS.snpIndel.posUn";
+        final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                extractTag + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotations = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5);
+        final double calibrationSensitivityThreshold = CALIBRATION_SENSITIVITY_THRESHOLD;
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addCalibrationSensitivityThreshold = ab ->
+                ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, calibrationSensitivityThreshold);
+        addPositiveAnnotations
+                .andThen(addCalibrationSensitivityThreshold)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+
+    @Test(expectedExceptions = IllegalArgumentException.class, groups = {"python"}) // python environment is required to run tool
+    public void testPositiveAndUnlabeledAnnotationNamesAreNotIdentical() {
+        final File outputDir = createTempDir("train");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.addOutput(outputPrefix);
+        final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snpIndel.posUn" + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);                                          // non-allele-specific
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotations = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5);
+        final File unlabeledAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                "extract.AS.snpIndel.posUn" + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);  // allele-specific
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addUnlabeledAnnotations = ab ->
+                ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5);
+        final double calibrationSensitivityThreshold = CALIBRATION_SENSITIVITY_THRESHOLD;
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addCalibrationSensitivityThreshold = ab ->
+                ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, calibrationSensitivityThreshold);
+        addPositiveAnnotations
+                .andThen(addUnlabeledAnnotations)
+                .andThen(addCalibrationSensitivityThreshold)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+
+    @Test(expectedExceptions = UserException.BadInput.class, groups = {"python"}) // python environment is required to run tool
+    public void testPositiveAnnotationsOfSpecifiedVariantTypesNotPresent() {
+        final File outputDir = createTempDir("train");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.addOutput(outputPrefix);
+        final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snp.posUn" + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);     // contains only SNPs, but SNP+INDEL is specified
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotations = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5);
+        ADD_SNP_MODE
+                .andThen(ADD_INDEL_MODE)
+                .andThen(addPositiveAnnotations)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+
+    @Test(expectedExceptions = UserException.BadInput.class, groups = {"python"}) // python environment is required to run tool
+    public void testUnlabeledAnnotationsOfSpecifiedVariantTypesNotPresent() {
+        final File outputDir = createTempDir("train");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.addOutput(outputPrefix);
+        final File positiveAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snpIndel.posUn" + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotations = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5);
+        final File unlabeledAnnotationsHDF5 = new File(INPUT_FROM_EXTRACT_EXPECTED_TEST_FILES_DIR,
+                "extract.nonAS.snp.posUn" + ExtractVariantAnnotations.UNLABELED_TAG + LabeledVariantAnnotationsWalker.ANNOTATIONS_HDF5_SUFFIX);    // contains only SNPs, but SNP+INDEL is specified
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addUnlabeledAnnotations = ab ->
+                ADD_UNLABELED_ANNOTATIONS_HDF5.apply(ab, unlabeledAnnotationsHDF5);
+        final double calibrationSensitivityThreshold = CALIBRATION_SENSITIVITY_THRESHOLD;
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addCalibrationSensitivityThreshold = ab ->
+                ADD_CALIBRATION_SENSITIVITY_THRESHOLD.apply(ab, calibrationSensitivityThreshold);
+        ADD_SNP_MODE.andThen(ADD_INDEL_MODE)
+                .andThen(addPositiveAnnotations)
+                .andThen(addUnlabeledAnnotations)
+                .andThen(addCalibrationSensitivityThreshold)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+
+    @Test(expectedExceptions = UserException.BadInput.class, groups = {"python"}) // python environment is required to run tool
+    public void testPositiveAnnotationForOneVariantTypeIsCompletelyMissing() { // TODO add analogous test that warning is emitted when annotation has zero variance?
+        final File outputDir = createTempDir("train");
+        final String outputPrefix = String.format("%s/test", outputDir);
+        final ArgumentsBuilder argsBuilder = BASE_ARGUMENTS_BUILDER_SUPPLIER.get();
+        argsBuilder.addOutput(outputPrefix);
+
+        // we will dummy up an annotations file that contains 2 annotations (ANNOT_1 and ANNOT_2)
+        // for 4 variants (2 SNPs and 2 INDELs); the INDELs will all have missing (i.e., NaN) ANNOT_1 values
+        final List<String> annotationNames = Arrays.asList("ANNOT_1", "ANNOT_2");
+        final double[][] annotations = new double[][]{
+                new double[]{1, 2},             // SNP
+                new double[]{3, 4},             // SNP
+                new double[]{Double.NaN, 2},    // INDEL
+                new double[]{Double.NaN, 4}};   // INDEL
+        final List<Boolean> isSubset = Collections.nCopies(4, true);
+
+        final File positiveAnnotationsHDF5 = LabeledVariantAnnotationsData.subsetAnnotationsToTemporaryFile(
+                annotationNames, annotations, isSubset);
+
+        try (final HDF5File positiveAnnotationsHDF5File = new HDF5File(positiveAnnotationsHDF5, HDF5File.OpenMode.READ_WRITE)) {
+            positiveAnnotationsHDF5File.makeDoubleArray("/labels/snp", new double[]{1, 1, 0, 0});
+            positiveAnnotationsHDF5File.makeDoubleArray("/labels/training", new double[]{1, 1, 1, 1});
+            positiveAnnotationsHDF5File.makeDoubleArray("/labels/calibration", new double[]{1, 1, 1, 1});
+        }
+        final Function<ArgumentsBuilder, ArgumentsBuilder> addPositiveAnnotations = ab ->
+                ADD_ANNOTATIONS_HDF5.apply(ab, positiveAnnotationsHDF5);
+
+        ADD_SNP_MODE.andThen(ADD_INDEL_MODE)
+                .andThen(addPositiveAnnotations)
+                .apply(argsBuilder);
+        runCommandLine(argsBuilder);
+    }
+}
\ No newline at end of file
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.annot.hdf5
new file mode 100644
index 00000000000..773930b7e98
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a2441845e66c7ccfbbde701f1736aade9f2c72f50f95f6e7a0a6e66fe0752a8
+size 31088
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.vcf
new file mode 100644
index 00000000000..67a8e58fe29
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1d20489c2ff9b0ccba12a24c84d5d9fd61d62d8ffbb416593559120461b8140
+size 171038
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.vcf.idx
new file mode 100644
index 00000000000..834ef275edd
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.pos.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4098f119d6d22391e7a6c839b12f1b1f1e36ced1df1fcbef1c4e1b09b2bd8704
+size 114263
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.annot.hdf5
new file mode 100644
index 00000000000..bbeab77af5b
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f050ca3dc10bc4a9c205568320e7a829129e72fe72e1dba31098c7e0a1a11167
+size 31200
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.unlabeled.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.unlabeled.annot.hdf5
new file mode 100644
index 00000000000..6d843706e64
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.unlabeled.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e9024e4b622dc4fdc5fd12bf59881a21d90a5099f34a58cdc02f700416c2af4
+size 39248
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.vcf
new file mode 100644
index 00000000000..c33bd749e80
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0232e0dbb277e6a5e736d01e0b17c1d0e7f0254c89f200e693393ab0962293e6
+size 171044
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.vcf.idx
new file mode 100644
index 00000000000..4cd06d7a707
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.indel.posUn.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f21bccaab78e09cc3f48fc9bef944e7f8b96ce3dbd30179de611c9622629100
+size 114265
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.annot.hdf5
new file mode 100644
index 00000000000..5d0b70e972d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7a06b6b23ed66bc05fd45c80beca1c103a52dca5b81409babb6fa899dff0bc2
+size 152912
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.vcf
new file mode 100644
index 00000000000..fef16673a21
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3060969881dbc006d167f09817924d38b6345e25976ac53880f624d94aea68e9
+size 193277
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.vcf.idx
new file mode 100644
index 00000000000..3151b6bf26e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.pos.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:590a999540ccf1ba0ecd95fcef993f16436acbde20a19a606c3101c8d734bb8c
+size 114298
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.annot.hdf5
new file mode 100644
index 00000000000..d27272a8169
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d7bdbcf4e37f722dc13b0f944238f99f50a40cdde09fbd8e297ea7a78ac95ab
+size 153184
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.unlabeled.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.unlabeled.annot.hdf5
new file mode 100644
index 00000000000..91c0861efca
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.unlabeled.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf803b41fad0be998389707101c0b35ab80074a2360b1108c8730cc8e703816
+size 33120
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.vcf
new file mode 100644
index 00000000000..3dbb5880865
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cf5ee7adacf635c73d7493b99cc8df19a31acbbec991fbe5173e7cd6b405491
+size 193281
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.vcf.idx
new file mode 100644
index 00000000000..0e9f0d3a39d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snp.posUn.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41568d1de020fdbe972628ba36d47d03bafabacbe578c474b868eeda8b903be2
+size 114300
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.annot.hdf5
new file mode 100644
index 00000000000..e0f693cb0d1
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb09a014315261f93ea375d123b479c670839401b7fd518f7b4bf3998388827c
+size 180856
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.vcf
new file mode 100644
index 00000000000..1b2a380111c
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ea0ee6bca7622ae8670c2f8ee2930a3223ba5d3edd89871c8e4b5cf3cf96f9f
+size 196269
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.vcf.idx
new file mode 100644
index 00000000000..68badf87382
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.pos.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ebcd9088ef7de5b901316ed2a23b81f8e3bb8a58d15b0f9220dcf4cc590c8f2
+size 114496
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.annot.hdf5
new file mode 100644
index 00000000000..3a6ca28d90d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e50f7011bfa07113b9abfa885ab8e5fcb3753eca41c96a86403a0c4dac74125d
+size 181264
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.unlabeled.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.unlabeled.annot.hdf5
new file mode 100644
index 00000000000..8c86f2e7674
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.unlabeled.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1f519745eeceac897ad1b03bb6412aa5fbed60f5366e8614dec7c0a05e5f6e7
+size 32880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.vcf
new file mode 100644
index 00000000000..201b4860fa1
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fa4160640da9143000d5f3b2497ca20c02c0944ca53cfa03b1a63d935b2cf2e
+size 196279
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.vcf.idx
new file mode 100644
index 00000000000..960ee8b9d4f
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.AS.snpIndel.posUn.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5112f2a5cfbfcbea25be8cbd9f1fde5f3b3c84ae6981ddbc304afef4f2cbdc9d
+size 114498
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.annot.hdf5
new file mode 100644
index 00000000000..2d385db28f5
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85932f2adf41c57634f4610dd78446284e937a32e12344045308895259ff9686
+size 31088
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.vcf
new file mode 100644
index 00000000000..c33bd749e80
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0232e0dbb277e6a5e736d01e0b17c1d0e7f0254c89f200e693393ab0962293e6
+size 171044
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.vcf.idx
new file mode 100644
index 00000000000..545dd32d2dd
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.pos.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3751f13f13783c5c580053bf17e79c48cce582c588bee3f09b39dc535131a6fa
+size 114266
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.annot.hdf5
new file mode 100644
index 00000000000..2d385db28f5
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85932f2adf41c57634f4610dd78446284e937a32e12344045308895259ff9686
+size 31088
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.unlabeled.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.unlabeled.annot.hdf5
new file mode 100644
index 00000000000..4ba95be06c8
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.unlabeled.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5602062a62176c9430426ca6818c887af6244d38976978c73d9eb7e90b12d825
+size 32880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.vcf
new file mode 100644
index 00000000000..c33bd749e80
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0232e0dbb277e6a5e736d01e0b17c1d0e7f0254c89f200e693393ab0962293e6
+size 171044
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.vcf.idx
new file mode 100644
index 00000000000..e637efbecf8
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.indel.posUn.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5058b4e06c1773f6693991c26ee45e2dc095cfb38f2073ba1c6d14954636c135
+size 114268
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.annot.hdf5
new file mode 100644
index 00000000000..dcd7adedb1e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab001c1c4f75558ea4724800a0a167b339f9801b3781dba35b4cb05574452d08
+size 153048
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.vcf
new file mode 100644
index 00000000000..665f9422ec8
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fada922eff03dbd88bbe2e8d593a5cd194e657babb98db49b6c14adae0c2f9de
+size 193313
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.vcf.idx
new file mode 100644
index 00000000000..0d6dd8f3a5c
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.pos.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ddbcd95fe053e9dc9b0593f6fa4dd7e63b9960d7b646bf7d8476e9338827152
+size 114301
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.annot.hdf5
new file mode 100644
index 00000000000..230577f3f18
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9694a52e7485a62c7104e0a8113ae2c7cb64e02fa42b895716d89c0c7ef6adb8
+size 153048
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.unlabeled.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.unlabeled.annot.hdf5
new file mode 100644
index 00000000000..4958f31f7c6
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.unlabeled.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97d0502e52fe9821be8291db8100d197473538347bdd45f6d117ee0d04f0e511
+size 32880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.vcf
new file mode 100644
index 00000000000..665f9422ec8
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fada922eff03dbd88bbe2e8d593a5cd194e657babb98db49b6c14adae0c2f9de
+size 193313
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.vcf.idx
new file mode 100644
index 00000000000..e334fdcd2f3
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snp.posUn.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cfb720e4d4ddbb52611d9730740d92d4b3f5b21af490688e57b860ee5a49b52
+size 114303
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.annot.hdf5
new file mode 100644
index 00000000000..5fa7822dca6
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3f11eee781282323d3accdcfc82db103238ad094f5fc5ef1dcdd3d9adc806e6
+size 180992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.vcf
new file mode 100644
index 00000000000..abec25cff9d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84b539e52f7e0f24e840845ce3d090e4f55e7444d50c2a44177bb62041fb172a
+size 196311
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.vcf.idx
new file mode 100644
index 00000000000..6c984b4dfe3
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.pos.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abeae05044326f9cba30da108b5ded10265b3f7884a74cf30c322ac06080d39f
+size 114502
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.annot.hdf5
new file mode 100644
index 00000000000..ef3dee693c1
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e21468903993f7c31cd808e22a8a34d7dac95451215f66177135651938af505
+size 180992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.unlabeled.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.unlabeled.annot.hdf5
new file mode 100644
index 00000000000..c2e342413c6
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.unlabeled.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5c8a0cde229c2e5714c4e412ef0c0952b5b40a9c268ba3ff04d30a882b56b7
+size 32880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.vcf
new file mode 100644
index 00000000000..abec25cff9d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84b539e52f7e0f24e840845ce3d090e4f55e7444d50c2a44177bb62041fb172a
+size 196311
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.vcf.idx
new file mode 100644
index 00000000000..0f734427511
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/extract/expected/extract.nonAS.snpIndel.posUn.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a4ae5483ce2b01b56109c77366a32aa3661a88cdbbf60567f9c4dba25cf9ea6
+size 114504
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf
new file mode 100644
index 00000000000..5bb2ef3ab94
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ea6cbe230a5a18f3447cfd5d29ce2787fd4a625128ab147ce0a1b207e577d50
+size 2013818
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf.idx
new file mode 100644
index 00000000000..6926fb95f58
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/input/small_callset_low_threshold.sites-only.chr1.1-10M.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f40a26b8528447a9d1b1154643cbd682a154e91a67e6dda58cf11a620a1af3dc
+size 5387
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.1-5M.vcf.gz b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.1-5M.vcf.gz
new file mode 100644
index 00000000000..4157ac3128e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.1-5M.vcf.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5804bcbfb060e10c3aa841a4a92acfbafbf1b24c88c87fceaa0d9089eee699e
+size 127853
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.1-5M.vcf.gz.tbi b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.1-5M.vcf.gz.tbi
new file mode 100644
index 00000000000..bc59b8a6e25
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.1-5M.vcf.gz.tbi
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d09b831af6a1b8585c26da1b29d131f8983f121703c5131a6596a1e81e0408f
+size 2141
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.5M-10M.vcf.gz b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.5M-10M.vcf.gz
new file mode 100644
index 00000000000..5a556e7a0d7
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.5M-10M.vcf.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c6559d5c1567042ddb0fb05d7a5b7d9a07c56c61d8d21adfa85c15bf44e24fa
+size 132259
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.5M-10M.vcf.gz.tbi b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.5M-10M.vcf.gz.tbi
new file mode 100644
index 00000000000..a7a45835346
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/1000G_omni2.5.hg38.chr1.5M-10M.vcf.gz.tbi
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbf9f97389369ac5e5a41420e58aaa3fa0a5f5edc21a6bd04b7e18c5bc21c914
+size 2542
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.1-5M.vcf.gz b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.1-5M.vcf.gz
new file mode 100644
index 00000000000..187e5f24e86
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.1-5M.vcf.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b610a0aeccbec80b69572abcb89e1d3c5e96bc7df22b38b8dccf0b3c6b0ed1b5
+size 45717
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.1-5M.vcf.gz.tbi b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.1-5M.vcf.gz.tbi
new file mode 100644
index 00000000000..582b14d068e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.1-5M.vcf.gz.tbi
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91926cca5a1c36a336f54ba918d0fe0581a6f6e89421a971c78c39aa9e5dd3e6
+size 2040
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.5M-10M.vcf.gz b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.5M-10M.vcf.gz
new file mode 100644
index 00000000000..38011d42e49
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.5M-10M.vcf.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5063e401c67443ce0c12c1534b3b1284fe690c826c8987d0430e516193d062ce
+size 49655
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.5M-10M.vcf.gz.tbi b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.5M-10M.vcf.gz.tbi
new file mode 100644
index 00000000000..27bd4edcff5
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/resources/Mills_and_1000G_gold_standard.indels.hg38.chr1.5M-10M.vcf.gz.tbi
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a738a699beff718443d36022bf9fb35686498f63d7f8e5c40f79ef26e3d5908
+size 2465
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5
new file mode 100644
index 00000000000..3e8642a3b87
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ddd32415de62806eb4d6e9f6d877ce48ff4afdf866ddc99ac50f4f608a84f76
+size 769360
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5
new file mode 100644
index 00000000000..2edf48f3050
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:720a5f717bc736a97a45c9b94ff7b1b2e0b6a72882ce02e3ac41b5760064a42e
+size 35136
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf
new file mode 100644
index 00000000000..0c56eab2dd0
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbe7d8c66fa02024fcacb5356f7a8f63fe68f4c45f451fa1f40074ba60e12cdd
+size 2226691
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx
new file mode 100644
index 00000000000..9e44f3a8d25
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f55d658a38cb5ee5e03a2352a03b455887c025a406469864e0b4704ae1e703a
+size 119222
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5
new file mode 100644
index 00000000000..ad25b5fd13a
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9e76640cbe974ea3c90671b1511d8d741346b08da8a7bbb04af694347e470bb
+size 858296
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5
new file mode 100644
index 00000000000..09f07d2e6bd
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b2c801b83c65db356ecb922cb4e5a113b4d51d268a751cb4755d5ff208f82b
+size 38440
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf
new file mode 100644
index 00000000000..6657efb8919
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:344467e055f48a6af7eb0dae413a6890c289bfdc5603f94f7111bbcefc1ef096
+size 2242652
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx
new file mode 100644
index 00000000000..320366021d7
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99de9277bc4a27e2053bc126fff4fa40732235081c34cc04eb12297b1a10f094
+size 119227
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5
new file mode 100644
index 00000000000..e892f373649
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:999a5d700940f1ee3f34f824d055583e71a74f8cce515e9753c8e32ef72199ed
+size 766368
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5
new file mode 100644
index 00000000000..1d346b1433d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.scores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56523ce8c46dd132b26ddcd00d4c4404fa6285807c8425daf0b5a96a78a556ad
+size 34960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf
new file mode 100644
index 00000000000..e46bbcf2a15
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:993e2d40dea8558c001a7321a4bbe4804877b2de36c3a266416310446c915ccb
+size 2226076
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx
new file mode 100644
index 00000000000..0c6d0619b61
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snp.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe5b8b9eb01cd5e86c2c9e57fa9f60cf6baab5474cba697b93e554557b197844
+size 119225
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5
new file mode 100644
index 00000000000..d16045c4035
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.annot.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ef7d6bebcb961f25ee1d83280c22ac1ef664250ee7a5466af87f014f278c9e7
+size 829672
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5
new file mode 100644
index 00000000000..15ed17d3d28
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.scores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:889ea5a6e75168ed0925a20ed9a506374cc866d01eee7ef3d1e56916cb05ba5d
+size 37720
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf
new file mode 100644
index 00000000000..4af1921ce48
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05f85d264a457cdd81896bde03f51b2369343da5ade21b1c8df183a2b7e8f974
+size 2242450
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx
new file mode 100644
index 00000000000..f93d3f908cc
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/score/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.score.snpIndel.vcf.idx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f4bfa4bdbd17eb3b33f4464921c1616dcade460c37558436b16583c4c6dec17
+size 119230
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..ee4e288ce0d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c86e349291873b6842482917dcbc4adaca389d7b1024f08ac7635207ed658bec
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..ec6a6946b59
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d11c2aaf79130735fe23b1310f1e71639d2732245ba2aa6f980f32f0000a5007
+size 383290
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..327e4870279
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c4ba38c85f7d0552bd425a9983bc9b9edde3235af5bd9a90d20b7a1341a8500
+size 547416
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..38a580ccdf8
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05634a06c9eaa7d9668beac0b9fc2abb2fc1ae3abce4f3f9bc961b710eaa3adf
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..63812d55193
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5edc21e043e3e00dc52a8c9406d420036af0bc6ea8bbd8a30691431703a7457
+size 3192
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..59667fa4758
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9747a320daa223c085327584cfa619fb13b72c8a7ad79a4423600bcb445557d1
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..dc93df3c615
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d5c8e2cb4090e091c5689bc533a55ce078a339f33e44b48a04c76dbcd39d8d3
+size 385484
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..cf53a88b22d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce595a7d881ba30a91faf0633652176d23475d987cdade18e28fc3ad7b50c592
+size 547672
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..bf9afd308b6
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52e02e103f8da5f68df7dcbb2310ca2b3577051d897cf0d912361a1c0465bef
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..b2b15d2ada5
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae02f7c771453a2cf9c2ccc496374bf59a730130319cfbea890a23b241b972ec
+size 3192
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..84d18527553
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10684b03e9cda817198599a6ff16bc72e0a7de0af61fed5fe6a3585ae3afd77d
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..37b6aaaee1b
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c7f9b9edb775e5e550206e9fb600e4e4935743faf2d25b1418ac5645e89ced3
+size 547414
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..81e0cf0bc41
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1417290317ae1919cb53bc20df786b22a55a60db8f6b486a261fcc00767c106
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..2903011df8e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b768396c95302b2417c3966dda6ec96bf39a323bb86f882ebbf7d62dba72a475
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..cf53a88b22d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce595a7d881ba30a91faf0633652176d23475d987cdade18e28fc3ad7b50c592
+size 547672
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..1d1e1ca254e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1aae124b17cace08e3eb7ec5b29f03bbcfb4aca6d216c216b57d127b992ea100
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..e2fbacb00bf
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34ed77628bdb203281ca2c2ff53cf378fcdfff7ae164c85a7543be7243b5453b
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl
new file mode 100644
index 00000000000..106582be542
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbc3a904292d0fef1ce0d4dfacf5d19736cc39d6be9e9a5b757bbcba3fe3a8bb
+size 133292
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl
new file mode 100644
index 00000000000..086acb5b830
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ca24f63b37c8e47a7dc9a6b538fcfff85e61b901759c267bd7c5b1412864032
+size 248621
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..cb888d205de
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe27be5af1f958112a7d96d51bd1859a21b4ac2468175e7570fdc5270149d550
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5
new file mode 100644
index 00000000000..cf187c5d4cf
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:761c2f2c063a5c80223fe247ec999308596dd71b619dc0ac5e608b89dcf54d19
+size 2472
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..f590dd72785
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f43a4e3ae888f8275c179412e407e25ca562922c9bb0cce2ace06d77474d7ea
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..6175b8f3538
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1911a9970b8020800b43a26314dd285a166f2519f6f891d54b246c352483b21f
+size 383290
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..d8f0b75d96d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d1a8c9875ea6a045d9b8ec6c07b637a75063dc3f06954768238f3c034a73762
+size 547416
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..8af1c23b254
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:108e279f0708fedfb14af48be8103ea23b36e187d8f54d305dcef8c53fc7fb73
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..d448097c76b
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56b77554dcff0def314931f776a602ea12f791bd730adac0ebcbbdb154c3ff0a
+size 3192
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..204b0c6bde1
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b0c96739db44b4c0e2075053fded315268821a070bd8535b2605555cf058c2e
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl
new file mode 100644
index 00000000000..753b924b903
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0a91a10ac594a414747c6cca91724c919ef84997729e3a31dee21acbd1ad862
+size 131500
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl
new file mode 100644
index 00000000000..29acdf69596
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:971241c711511787ea2174729a132ecb6fb194203a0a687a94be1b3c2f386e61
+size 258117
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..e2a66ea327a
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab06dd2dbd424d8199fd29ec7f3055063ebe11b8c670a2fec4a3a5387421d487
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5
new file mode 100644
index 00000000000..8cb5feba953
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80e9e5ee795e34d0a14410485f674a4c6626991ef2ed7a737c9ec925755afe3b
+size 2472
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..681be839ce0
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20abec02257257e46c2587fdd00d4f22d0ede66869b14c5a44439fac065546ff
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..dc93df3c615
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d5c8e2cb4090e091c5689bc533a55ce078a339f33e44b48a04c76dbcd39d8d3
+size 385484
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..cf53a88b22d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce595a7d881ba30a91faf0633652176d23475d987cdade18e28fc3ad7b50c592
+size 547672
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..db4d29a60b6
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31edd143f65ebd84d8d6478b1182a2c641cc28210e224cf4ccff449b0d433bfd
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..319c56e2887
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:618a074fdbaf3821d0320871eaeead76b657c2a08120d1d8e600da460454d92b
+size 3192
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..e3ee61cde33
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:066bff7fcc54d96fc388b2d2e810737a2fae092295ab23d1a86fd26248875fa2
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.scorer.pkl
new file mode 100644
index 00000000000..f371e4a8c40
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e60950ec257dd1dbafa4438b6454eef3561b7fe8aab977f939ac1aa484d6ef21
+size 248621
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..f50551b6df5
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1afbe046bf9f6843f0c70d2b80f1c6d508be0cc020eee123e5581a670404097
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..450f847c31b
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6dd0a7cf575dbe4361d2a8c7e1ce827a9e19b14a1da60171a2f21d62c6ab90c
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..78f99b8c18b
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32322fcf64720c3d7f0761d0214fe1655ad3a774444a20fb559ff4a04b2d673d
+size 547416
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..4bfae9c79c1
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68628cf4169c4bfea257db989295514fc55ab6be0214adb036b97d3e89c522be
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..b9275e66479
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a9bdfd5475708edfdb45b3be9859d15c23e853bfe31efeb1051990479ac590b
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.scorer.pkl
new file mode 100644
index 00000000000..29acdf69596
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:971241c711511787ea2174729a132ecb6fb194203a0a687a94be1b3c2f386e61
+size 258117
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..89609f5b1ba
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:457da91649faabf0c97454c72ecb2599a94e7d0f5e0e13209526521fd2d24967
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..f54673180c0
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e0b40cfb3c99d390509b05fdc3acf6dae85261cd27311056ff983c27d4727c5
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..cf53a88b22d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce595a7d881ba30a91faf0633652176d23475d987cdade18e28fc3ad7b50c592
+size 547672
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..08e917e9cee
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.AS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95a334d5dbbc130e5aa78992b39c30d9ba5033a85b79a23977bf26775a123f27
+size 5984
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..eacf51121bd
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:337e9ae2798650c76e1639de6d873c924ff5bcb5d9aed01ddb1b118592121362
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..8771f724f11
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e3cd0c75f8b6ade95ced7cd03d86e809580bfe65c8455cfb0b526a3dedbea12
+size 368367
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..87adbb0b4be
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b078a2ae61a30ff3e7ca753f21d9294eec8cebbab42a1eabc7f3753e21617f0e
+size 556676
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..0b2c64749b8
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39797c6b95bbc8ac687042da7b60b28a24c54fbc4321c64a3d0559d4b77eef8f
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..8468ae9a55e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IF.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60e748fe1e1e26a006e1d4ece97abff9e5723883e8ad1774e0cd63b17bacca6e
+size 3168
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..e05d0ba0cdd
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1ee9ab7fbf01e834c32729a84c1f699fee7896a20b4966f88d8b73ffe9f1ee1
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..f3d63e89231
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02ea61d317a9782fb49689fe18b4ecb100538c771a2ca6403c1b1c323358e1a3
+size 359136
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..90b198fbde4
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf9f706ee2ee2241663c9a8cc2a939cb851f58eec9dd551baf38c669c12cd20
+size 525313
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..79fbe9271ec
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fedd367ae518ca7a420684e09f6e043e62b338263ea328edfa1feac731c58717
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..f3505388573
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7539e559f152311925d552f8143514b00b59e5f2a3727fff35213f09a446c17e
+size 3168
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..f0a49de465e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34bbbd98bf30c5abee92ecd5bf1e5838f4310c7e60b5e925985294c0b80ee897
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..3777424a4cf
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:524e8c41b21f3efabec74bfed799941ee02083d578c65351e672329b724765c5
+size 556676
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..1e710180709
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a18628b0dd05c50e41771aef301834a7d9f070e410aa48ad2b33702da8b5767c
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..a04fc38d0bd
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b6ddc616d7e353b2a9c2dcd3830065386cbe5ebf2ee3833ca8833390217717b
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..90b198fbde4
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf9f706ee2ee2241663c9a8cc2a939cb851f58eec9dd551baf38c669c12cd20
+size 525313
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..d97235df500
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snp.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea99a9aec1c142b5d4c08a5f70027bcf6db8b1dc06cf8b6e7072e5300cf33dba
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..5499e96df9c
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d65f9ac8acf8ae8fd99de8f2148357283c18d2210357f2f961b06ecc5d9ea476
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl
new file mode 100644
index 00000000000..9876244c2a0
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46e82c2222e1e206cc5f15c6b2e173ec30c79be5f3466313b841a9fc0947409d
+size 108247
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl
new file mode 100644
index 00000000000..7d9ae303257
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a3102dc2bccf300f8a101a8087c22edfc70049274691eb58691f627e7a5cf77
+size 259163
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..fbce46f8a73
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e99a57eb29259b0cb76876ba69a00009bcce9a0197c390405b6bbe6dc2d6ac00
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5
new file mode 100644
index 00000000000..b9fcf1a9d4f
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.indel.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74b2d49d826bab13fbd1a862ce0ef75adeb939c625eb9aa4f9cc4403ef078b5c
+size 2496
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..6a63227e00b
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5b62757c20f97475e7f2fa4c30caadac7b98d2483b2d7b9bd83e6c442a5f428
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..9b4df1f88db
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5b5eee807d0db47119926067f54a7f38ad242090d19b50005d849c19ea06b9b
+size 368366
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..4f032f617ed
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e766e906340119331ede315b578af92de6c7e307a8f4009ea7ec3268bac8eed
+size 556675
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..42d57b45d1e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b23331c08949ef559ccb282553fabd14849982c4491004eb35c99450a6981ee5
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..2438f2bdeeb
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IF.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf627f796420f66f21289f0f138a47b5f1a2f4fe8665484819853147a0958523
+size 3168
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..54bf9ede9dd
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5237283409f8c9c3380e36f361a74c5c79531af73b70c1abe0cd8e7e758ab423
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl
new file mode 100644
index 00000000000..0821f41755c
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b110c498b70170a3e41258344ef78fdc266326168f253ed076a203f94dcfe45d
+size 132824
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl
new file mode 100644
index 00000000000..b880fca3d31
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e46a4531d68bdf6122231ba3ed3e263c091089ab227d7871ba3349c21c93f4f
+size 248814
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..2cddb25120d
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:284f5f52917ce99f3b7679373c6ae011edb3c91bd8a1ddb675e1f115dda3e810
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5
new file mode 100644
index 00000000000..9580c080eb4
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.indel.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b607fcf3181b0cef441617146cfc44d0e49a9e7f2a1f1c1e3ee778a632841586
+size 2496
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..bb5056a3d90
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24708a2c6f66d23669ffc52eb63e9372e9883d6517371a23d9e81c22fcf0db46
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
new file mode 100644
index 00000000000..f3d63e89231
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.negative.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02ea61d317a9782fb49689fe18b4ecb100538c771a2ca6403c1b1c323358e1a3
+size 359136
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..90b198fbde4
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf9f706ee2ee2241663c9a8cc2a939cb851f58eec9dd551baf38c669c12cd20
+size 525313
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..ba37a2d0a75
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec107cfac03b83f9062a5f1f40bd76fe4cd5e2641b37b1d0027417576c5f77e4
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
new file mode 100644
index 00000000000..4a6efa95dca
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posNeg.IFDifferentSeed.snp.unlabeledScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbbbf3723990daa0f398120b36fa2c6b8354681adb74ae2298d2e05a22956372
+size 3168
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..c400fa9af57
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b13c6ebe092625234fb0878bfcea5f0a75a092626a2dd074ad384689bbc5c98
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.scorer.pkl
new file mode 100644
index 00000000000..ce6ed251be8
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdf14d85d1b0b58a44c851ee8b84e8a10d78bfb8372139dd9ef229d51791dc78
+size 259163
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..45cd8fd5d76
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d51882fd91528fb6005dc73fd7d4ea9ca2b57ba3426004603f839eafa4aa4305
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..98afa8e174e
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5b380fd59ed8afc20098ff5a8f6fbd7e6e90c7bd7f873ba6b91951ece600d76
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.scorer.pkl
new file mode 100644
index 00000000000..14c00193bba
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f213af5f6931cc35a0f21eb998740412e1e42e7274b916bc639cd4fb07922dd
+size 556675
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..8a64fbc2814
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IF.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:803ee77783e2c52f888b6fcb64b9179e4ca091b995e53735204369ed1f823de4
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.calibrationScores.hdf5
new file mode 100644
index 00000000000..2d138011e4c
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16e12374eaf43eb990e52c43718c6db20e6d8ba715f334f43dfdb2af6da97b3d
+size 2664
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.scorer.pkl
new file mode 100644
index 00000000000..b880fca3d31
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e46a4531d68bdf6122231ba3ed3e263c091089ab227d7871ba3349c21c93f4f
+size 248814
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.trainingScores.hdf5
new file mode 100644
index 00000000000..ae946940860
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.indel.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bf264b767541c3036d88042e213d27fb1a4a3bd3663173022854844ae57d258
+size 2880
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
new file mode 100644
index 00000000000..95b3d2fcb5f
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.calibrationScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bbb2201dcddea0e3ce7f96377edc2aafb05134c60926d11df0c103795bbd006
+size 4960
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.scorer.pkl b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.scorer.pkl
new file mode 100644
index 00000000000..90b198fbde4
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.scorer.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf9f706ee2ee2241663c9a8cc2a939cb851f58eec9dd551baf38c669c12cd20
+size 525313
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.trainingScores.hdf5 b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
new file mode 100644
index 00000000000..3d569294bb2
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/expected/extract.nonAS.snpIndel.posUn.train.snpIndel.posOnly.IFDifferentSeed.snp.trainingScores.hdf5
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a7c429c04a96894c0d6d80cf27940eed25e9254538e9c1cdd5250554a7ba009
+size 5992
diff --git a/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/isolation-forest-hyperparameters-different-seed.json b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/isolation-forest-hyperparameters-different-seed.json
new file mode 100644
index 00000000000..6fbb7d105da
--- /dev/null
+++ b/src/test/resources/large/org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/train/isolation-forest-hyperparameters-different-seed.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddca401b3f0fdceedc96946c8ced9870984f1ae34ce5e5626cc4b08152639532
+size 23