From a7fada1d40a4a42b7323dd244a29b62ecccaf5a3 Mon Sep 17 00:00:00 2001 From: takutosato Date: Mon, 7 May 2018 16:24:10 -0400 Subject: [PATCH 1/7] 2 pass variant walker class --- .../engine/TwoPassVariantWalker.java | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java diff --git a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java new file mode 100644 index 00000000000..a5ed8133e3d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java @@ -0,0 +1,70 @@ +package org.broadinstitute.hellbender.engine; + +import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.engine.filters.CountingReadFilter; +import org.broadinstitute.hellbender.engine.filters.VariantFilter; +import org.broadinstitute.hellbender.utils.SimpleInterval; + +import java.util.stream.StreamSupport; + +public abstract class TwoPassVariantWalker extends VariantWalker { + + @Override + public void traverse(){ + final VariantFilter variantFilter = makeVariantFilter(); + final CountingReadFilter readFilter = makeReadFilter(); + + // First pass through the variant + traverseVariants(variantFilter, readFilter, this::firstPassApply); + logger.info("Finished first pass through the variants"); + + afterFirstPass(); + + // Second pass + logger.info("Starting second pass through the variants"); + traverseVariants(variantFilter, readFilter, this::secondPassApply); + logger.info(readFilter.getSummaryLine()); + } + + + protected abstract void firstPassApply(final VariantContext variant, + final ReadsContext readsContext, + final ReferenceContext referenceContext, + final FeatureContext featureContext ); + + protected abstract void secondPassApply(final VariantContext variant, + final ReadsContext readsContext, + final ReferenceContext referenceContext, + final FeatureContext featureContext); + + protected abstract void afterFirstPass(); + + private void traverseVariants(final VariantFilter variantFilter, final CountingReadFilter readFilter, final VariantConsumer variantConsumer){ + StreamSupport.stream(getSpliteratorForDrivingVariants(), false) + .filter(variantFilter) + .forEach(variant -> { + final SimpleInterval variantInterval = new SimpleInterval(variant); + variantConsumer.consume(variant, + new ReadsContext(reads, variantInterval, readFilter), + new ReferenceContext(reference, variantInterval), + new FeatureContext(features, variantInterval)); + + progressMeter.update(variantInterval); + }); + } + + /** + * a common abstraction for first and second pass apply functions + */ + @FunctionalInterface + private interface VariantConsumer { + void consume(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext reference, final FeatureContext features); + } + + + // Make this final to hide it from subclasses + @Override + public final void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + } + +} From 191065132025dbde2616b025c99e23dc38671978 Mon Sep 17 00:00:00 2001 From: takutosato Date: Thu, 10 May 2018 11:44:08 -0400 Subject: [PATCH 2/7] wrote docs, tests --- .../engine/TwoPassVariantWalker.java | 51 ++++++++--- .../examples/ExampleTwoPassVariantWalker.java | 89 +++++++++++++++++++ .../engine/TwoPassVariantWalkerUnitTest.java | 53 +++++++++++ ...leTwoPassVariantWalkerIntegrationTest.java | 48 ++++++++++ 4 files changed, 228 insertions(+), 13 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java create mode 100644 src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java diff --git a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java index a5ed8133e3d..3d9a23e2042 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java @@ -7,36 +7,65 @@ import java.util.stream.StreamSupport; +/** + * A modification of a VariantWalker that makes two passes through the variants. + * This allows the user to store internal states during the first pass, which the user then + * can process and access during the second pass + **/ public abstract class TwoPassVariantWalker extends VariantWalker { - + /** + * Overrides the traversal framework of {@link VariantWalkerBase} + */ @Override public void traverse(){ - final VariantFilter variantFilter = makeVariantFilter(); + final VariantFilter variantContextFilter = makeVariantFilter(); final CountingReadFilter readFilter = makeReadFilter(); // First pass through the variant - traverseVariants(variantFilter, readFilter, this::firstPassApply); + traverseVariants(variantContextFilter, readFilter, this::firstPassApply); logger.info("Finished first pass through the variants"); + // Process the data accumulated during the first pass afterFirstPass(); // Second pass logger.info("Starting second pass through the variants"); - traverseVariants(variantFilter, readFilter, this::secondPassApply); + traverseVariants(variantContextFilter, readFilter, this::secondPassApply); logger.info(readFilter.getSummaryLine()); } - + /** + * + * First pass through the variants. Store relevant data as a private member of the walker + * and process it in {@link #afterFirstPass} before making a second pass + * + * @param variant A variant record in vcf + * @param readsContext Reads overlapping the current variant. Will be empty if a read source (e.g. bam) isn't provided + * @param referenceContext Reference bases spanning the current variant + * @param featureContext A vcf record overlapping the current variant from an auxiliary data source (e.g. gnomad) + */ protected abstract void firstPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext ); + /** + * + * Having seen all of the variants in a vcf, make a second pass through the variants + * + * @param variant A variant record in vcf + * @param readsContext Reads overlapping the current variant. Will be empty if a read source (e.g. bam) isn't provided + * @param referenceContext Reference bases spanning the current variant + * @param featureContext A vcf record overlapping the current variant from an auxiliary data source (e.g. gnomad) + */ protected abstract void secondPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext); + /** + * Process the data collected during the first pass + */ protected abstract void afterFirstPass(); private void traverseVariants(final VariantFilter variantFilter, final CountingReadFilter readFilter, final VariantConsumer variantConsumer){ @@ -48,23 +77,19 @@ private void traverseVariants(final VariantFilter variantFilter, final CountingR new ReadsContext(reads, variantInterval, readFilter), new ReferenceContext(reference, variantInterval), new FeatureContext(features, variantInterval)); - progressMeter.update(variantInterval); }); } - /** - * a common abstraction for first and second pass apply functions - */ @FunctionalInterface private interface VariantConsumer { void consume(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext reference, final FeatureContext features); } - // Make this final to hide it from subclasses + /** + * Make final to hide it from subclasses + */ @Override - public final void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { - } - + public final void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {} } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java new file mode 100644 index 00000000000..76c5183c94d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java @@ -0,0 +1,89 @@ +package org.broadinstitute.hellbender.tools.examples; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.*; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.ExampleProgramGroup; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadsContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.TwoPassVariantWalker; +import org.ojalgo.array.blas.COPY; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.QUAL_BY_DEPTH_KEY; + +@CommandLineProgramProperties( + summary = "Example variant walker that makes two passes through a vcf", + oneLineSummary = "Example two variant walker", + programGroup = ExampleProgramGroup.class, + omitFromCommandLine = true +) +public class ExampleTwoPassVariantWalker extends TwoPassVariantWalker { + @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + doc = "Output vcf", optional = true) + private File outputVcf = null; + + final List qualByDepths = new ArrayList<>(); + + private VariantContextWriter vcfWriter; + + public static String QD_P_VALUE_KEY_NAME = "P_VALUE"; + + public static String COPY_OF_QD_KEY_NAME = "QD_COPY"; + + private double averageQualByDepth; + + private double varianceOfQDs; + + int counter = 0; + + @Override + public void onTraversalStart() { + final VCFHeader inputHeader = getHeaderForVariants(); + inputHeader.addMetaDataLine(new VCFInfoHeaderLine(QD_P_VALUE_KEY_NAME, 1, VCFHeaderLineType.Float, "p value of the quality by depth")); + inputHeader.addMetaDataLine(new VCFInfoHeaderLine(COPY_OF_QD_KEY_NAME, 1, VCFHeaderLineType.Float, "copy of the QD INFO field")); + vcfWriter = createVCFWriter(outputVcf); + vcfWriter.writeHeader(inputHeader); + } + + @Override + protected void firstPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + qualByDepths.add(variant.getAttributeAsDouble(QUAL_BY_DEPTH_KEY, 0.0)); + } + + @Override + protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + VariantContextBuilder vcb = new VariantContextBuilder(variant); + final double qualByDepth = qualByDepths.get(counter++); + final double pValue = 0.05; + vcb.attribute(QD_P_VALUE_KEY_NAME, pValue); + vcb.attribute(COPY_OF_QD_KEY_NAME, qualByDepth); + + vcfWriter.add(vcb.make()); + } + + @Override + protected void afterFirstPass() { + final int n = qualByDepths.size(); + if (n == 1){ + return; + } + averageQualByDepth = qualByDepths.stream().mapToDouble(x -> x).average().orElse(0.0); + varianceOfQDs = (1/(n-1))* qualByDepths.stream().mapToDouble(x -> Math.pow((x - averageQualByDepth), 2.0)).sum(); + } + + @Override + public void closeTool() { + if ( vcfWriter != null ) { + vcfWriter.close(); + } + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java new file mode 100644 index 00000000000..4127356a1c2 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java @@ -0,0 +1,53 @@ +package org.broadinstitute.hellbender.engine; + +import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.cmdline.TestProgramGroup; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class TwoPassVariantWalkerUnitTest { + @CommandLineProgramProperties( + summary = "An example subclass of TwoPassVariantWalker", + oneLineSummary = "An example subclass of TwoPassVariantWalker", + programGroup = TestProgramGroup.class + ) + private static class DummyTwoPassVariantWalker extends TwoPassVariantWalker { + public int firstPass = 0; + public int secondPass = 0; + public boolean visitedAfterFirstPass = false; + + @Override + protected void firstPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + firstPass++; + } + + @Override + protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + secondPass++; + } + + @Override + protected void afterFirstPass(){ + visitedAfterFirstPass = true; + } + } + + @Test + public void testNum() { + final DummyTwoPassVariantWalker walker = new DummyTwoPassVariantWalker(); + final String testVcf = "src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/multiallelic.vcf"; + + final String[] args = { + "-V", testVcf, + }; + + walker.instanceMain(args); + + final int expectedNumberOfVariantContexts = 52; + Assert.assertEquals(walker.firstPass, expectedNumberOfVariantContexts); + Assert.assertEquals(walker.secondPass, expectedNumberOfVariantContexts); + Assert.assertTrue(walker.visitedAfterFirstPass); + } + +} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java new file mode 100644 index 00000000000..0c7de4bc845 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java @@ -0,0 +1,48 @@ +package org.broadinstitute.hellbender.tools.examples; + +import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.FeatureDataSource; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +import static org.broadinstitute.hellbender.tools.examples.ExampleTwoPassVariantWalker.COPY_OF_QD_KEY_NAME; +import static org.broadinstitute.hellbender.tools.examples.ExampleTwoPassVariantWalker.QD_P_VALUE_KEY_NAME; +import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.QUAL_BY_DEPTH_KEY; + +public class ExampleTwoPassVariantWalkerIntegrationTest extends CommandLineProgramTest { + @Test + public void test() throws IOException { + final File outputVcf = File.createTempFile("output", "vcf"); + final String inputVcf = "src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/haploid-multisample.vcf"; + + final String[] args = { + "-" + StandardArgumentDefinitions.VARIANT_SHORT_NAME, inputVcf, + "-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, outputVcf.getAbsolutePath() + }; + + runCommandLine(args); + + final FeatureDataSource variantsBefore = new FeatureDataSource<>(inputVcf); + final FeatureDataSource variantsAfter = new FeatureDataSource<>(outputVcf); + final Iterator afterIterator = variantsAfter.iterator(); + final Iterator beforeIterator = variantsBefore.iterator(); + + while (afterIterator.hasNext() && beforeIterator.hasNext()){ + final VariantContext before = beforeIterator.next(); + final VariantContext after = afterIterator.next(); + + Assert.assertEquals(before.getAttributeAsDouble(QUAL_BY_DEPTH_KEY, -1.0), + after.getAttributeAsDouble(COPY_OF_QD_KEY_NAME, -2.0)); + Assert.assertTrue(after.getAttributeAsDouble(QD_P_VALUE_KEY_NAME, -1.0) >= 0); + } + + Assert.assertTrue(! afterIterator.hasNext() && ! beforeIterator.hasNext()); + } + +} \ No newline at end of file From e002c1b6a178ae2a60248830a61e3d4efa5406b4 Mon Sep 17 00:00:00 2001 From: takutosato Date: Thu, 10 May 2018 11:59:17 -0400 Subject: [PATCH 3/7] edit the docs --- .../engine/TwoPassVariantWalker.java | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java index 3d9a23e2042..2473edc1cf3 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java @@ -8,13 +8,13 @@ import java.util.stream.StreamSupport; /** - * A modification of a VariantWalker that makes two passes through the variants. - * This allows the user to store internal states during the first pass, which the user then - * can process and access during the second pass + * A VariantWalker that makes two passes through the variants. + * This allows the user to store internal states during the first pass, which the user can then + * process and access during the second pass **/ public abstract class TwoPassVariantWalker extends VariantWalker { /** - * Overrides the traversal framework of {@link VariantWalkerBase} + * Overrides the default, single-pass traversal framework of {@link VariantWalkerBase} */ @Override public void traverse(){ @@ -31,15 +31,14 @@ public void traverse(){ // Second pass logger.info("Starting second pass through the variants"); traverseVariants(variantContextFilter, readFilter, this::secondPassApply); - logger.info(readFilter.getSummaryLine()); } /** * - * First pass through the variants. Store relevant data as a private member of the walker - * and process it in {@link #afterFirstPass} before making a second pass + * First pass through the variants. The user may store data in instance variables of the walker as you go + * and process them in {@link #afterFirstPass} before making a second pass * - * @param variant A variant record in vcf + * @param variant A variant record in a vcf * @param readsContext Reads overlapping the current variant. Will be empty if a read source (e.g. bam) isn't provided * @param referenceContext Reference bases spanning the current variant * @param featureContext A vcf record overlapping the current variant from an auxiliary data source (e.g. gnomad) @@ -53,11 +52,8 @@ protected abstract void firstPassApply(final VariantContext variant, * * Having seen all of the variants in a vcf, make a second pass through the variants * - * @param variant A variant record in vcf - * @param readsContext Reads overlapping the current variant. Will be empty if a read source (e.g. bam) isn't provided - * @param referenceContext Reference bases spanning the current variant - * @param featureContext A vcf record overlapping the current variant from an auxiliary data source (e.g. gnomad) - */ + * See argument docs of {@link #firstPassApply} and {@link VariantWalkerBase#apply} + **/ protected abstract void secondPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, @@ -85,8 +81,7 @@ private void traverseVariants(final VariantFilter variantFilter, final CountingR private interface VariantConsumer { void consume(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext reference, final FeatureContext features); } - - + /** * Make final to hide it from subclasses */ From a673774c7369c839d8618c738ccd4f2355a166dc Mon Sep 17 00:00:00 2001 From: takutosato Date: Thu, 10 May 2018 13:28:14 -0400 Subject: [PATCH 4/7] update test --- .../hellbender/engine/TwoPassVariantWalker.java | 2 +- .../tools/examples/ExampleTwoPassVariantWalker.java | 13 ++++++------- .../engine/TwoPassVariantWalkerUnitTest.java | 4 +--- .../ExampleTwoPassVariantWalkerIntegrationTest.java | 9 +++++---- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java index 2473edc1cf3..9722a128177 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java @@ -81,7 +81,7 @@ private void traverseVariants(final VariantFilter variantFilter, final CountingR private interface VariantConsumer { void consume(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext reference, final FeatureContext features); } - + /** * Make final to hide it from subclasses */ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java index 76c5183c94d..6256a5a5f4a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java @@ -12,7 +12,6 @@ import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.TwoPassVariantWalker; -import org.ojalgo.array.blas.COPY; import java.io.File; import java.util.ArrayList; @@ -35,20 +34,20 @@ public class ExampleTwoPassVariantWalker extends TwoPassVariantWalker { private VariantContextWriter vcfWriter; - public static String QD_P_VALUE_KEY_NAME = "P_VALUE"; + public static String QD_DISTANCE_FROM_MEAN = "QD_DIST"; public static String COPY_OF_QD_KEY_NAME = "QD_COPY"; private double averageQualByDepth; - private double varianceOfQDs; + private double sampleVarianceOfQDs; int counter = 0; @Override public void onTraversalStart() { final VCFHeader inputHeader = getHeaderForVariants(); - inputHeader.addMetaDataLine(new VCFInfoHeaderLine(QD_P_VALUE_KEY_NAME, 1, VCFHeaderLineType.Float, "p value of the quality by depth")); + inputHeader.addMetaDataLine(new VCFInfoHeaderLine(QD_DISTANCE_FROM_MEAN, 1, VCFHeaderLineType.Float, "distance from the average QD value in the units of standard deviations")); inputHeader.addMetaDataLine(new VCFInfoHeaderLine(COPY_OF_QD_KEY_NAME, 1, VCFHeaderLineType.Float, "copy of the QD INFO field")); vcfWriter = createVCFWriter(outputVcf); vcfWriter.writeHeader(inputHeader); @@ -63,8 +62,8 @@ protected void firstPassApply(VariantContext variant, ReadsContext readsContext, protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { VariantContextBuilder vcb = new VariantContextBuilder(variant); final double qualByDepth = qualByDepths.get(counter++); - final double pValue = 0.05; - vcb.attribute(QD_P_VALUE_KEY_NAME, pValue); + final double distanceFromMean = Math.abs((qualByDepth - averageQualByDepth)/ Math.sqrt(sampleVarianceOfQDs)); + vcb.attribute(QD_DISTANCE_FROM_MEAN, distanceFromMean); vcb.attribute(COPY_OF_QD_KEY_NAME, qualByDepth); vcfWriter.add(vcb.make()); @@ -77,7 +76,7 @@ protected void afterFirstPass() { return; } averageQualByDepth = qualByDepths.stream().mapToDouble(x -> x).average().orElse(0.0); - varianceOfQDs = (1/(n-1))* qualByDepths.stream().mapToDouble(x -> Math.pow((x - averageQualByDepth), 2.0)).sum(); + sampleVarianceOfQDs = (1.0/(n - 1.0))* qualByDepths.stream().mapToDouble(x -> Math.pow((x - averageQualByDepth), 2.0)).sum(); } @Override diff --git a/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java index 4127356a1c2..01036aa1ae6 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java @@ -38,9 +38,7 @@ public void testNum() { final DummyTwoPassVariantWalker walker = new DummyTwoPassVariantWalker(); final String testVcf = "src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/multiallelic.vcf"; - final String[] args = { - "-V", testVcf, - }; + final String[] args = { "-V", testVcf }; walker.instanceMain(args); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java index 0c7de4bc845..b1b928d8e5e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java @@ -12,7 +12,7 @@ import java.util.Iterator; import static org.broadinstitute.hellbender.tools.examples.ExampleTwoPassVariantWalker.COPY_OF_QD_KEY_NAME; -import static org.broadinstitute.hellbender.tools.examples.ExampleTwoPassVariantWalker.QD_P_VALUE_KEY_NAME; +import static org.broadinstitute.hellbender.tools.examples.ExampleTwoPassVariantWalker.QD_DISTANCE_FROM_MEAN; import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.QUAL_BY_DEPTH_KEY; public class ExampleTwoPassVariantWalkerIntegrationTest extends CommandLineProgramTest { @@ -30,16 +30,17 @@ public void test() throws IOException { final FeatureDataSource variantsBefore = new FeatureDataSource<>(inputVcf); final FeatureDataSource variantsAfter = new FeatureDataSource<>(outputVcf); - final Iterator afterIterator = variantsAfter.iterator(); final Iterator beforeIterator = variantsBefore.iterator(); + final Iterator afterIterator = variantsAfter.iterator(); + - while (afterIterator.hasNext() && beforeIterator.hasNext()){ + while (beforeIterator.hasNext() && afterIterator.hasNext()){ final VariantContext before = beforeIterator.next(); final VariantContext after = afterIterator.next(); Assert.assertEquals(before.getAttributeAsDouble(QUAL_BY_DEPTH_KEY, -1.0), after.getAttributeAsDouble(COPY_OF_QD_KEY_NAME, -2.0)); - Assert.assertTrue(after.getAttributeAsDouble(QD_P_VALUE_KEY_NAME, -1.0) >= 0); + Assert.assertTrue(after.getAttributeAsDouble(QD_DISTANCE_FROM_MEAN, -1.0) > 0); } Assert.assertTrue(! afterIterator.hasNext() && ! beforeIterator.hasNext()); From 02253f314554b73a6a9f7936b4bbc0ffafaf25a8 Mon Sep 17 00:00:00 2001 From: takutosato Date: Tue, 15 May 2018 09:47:37 -0400 Subject: [PATCH 5/7] review edits --- .../engine/TwoPassVariantWalker.java | 20 +++++---- .../examples/ExampleTwoPassVariantWalker.java | 41 +++++++++++-------- .../engine/TwoPassVariantWalkerUnitTest.java | 16 ++++---- ...leTwoPassVariantWalkerIntegrationTest.java | 35 ++++++++-------- 4 files changed, 60 insertions(+), 52 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java index 9722a128177..936d5c777df 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java @@ -21,7 +21,8 @@ public void traverse(){ final VariantFilter variantContextFilter = makeVariantFilter(); final CountingReadFilter readFilter = makeReadFilter(); - // First pass through the variant + // First pass through the variants + logger.info("Starting first pass through the variants"); traverseVariants(variantContextFilter, readFilter, this::firstPassApply); logger.info("Finished first pass through the variants"); @@ -31,22 +32,28 @@ public void traverse(){ // Second pass logger.info("Starting second pass through the variants"); traverseVariants(variantContextFilter, readFilter, this::secondPassApply); + + logger.info(readFilter.getSummaryLine()); } /** * - * First pass through the variants. The user may store data in instance variables of the walker as you go - * and process them in {@link #afterFirstPass} before making a second pass + * First pass through the variants. The user may store data in instance variables of the walker as they traverse the + * vcf the first time and process them in {@link #afterFirstPass} before making a second pass * * @param variant A variant record in a vcf * @param readsContext Reads overlapping the current variant. Will be empty if a read source (e.g. bam) isn't provided * @param referenceContext Reference bases spanning the current variant - * @param featureContext A vcf record overlapping the current variant from an auxiliary data source (e.g. gnomad) + * @param featureContext A record overlapping the current variant from an auxiliary data source (e.g. gnomad) */ protected abstract void firstPassApply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext ); + /** + * Process the data collected during the first pass. This method is called between the two traversals + */ + protected abstract void afterFirstPass(); /** * @@ -59,11 +66,6 @@ protected abstract void secondPassApply(final VariantContext variant, final ReferenceContext referenceContext, final FeatureContext featureContext); - /** - * Process the data collected during the first pass - */ - protected abstract void afterFirstPass(); - private void traverseVariants(final VariantFilter variantFilter, final CountingReadFilter readFilter, final VariantConsumer variantConsumer){ StreamSupport.stream(getSpliteratorForDrivingVariants(), false) .filter(variantFilter) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java index 6256a5a5f4a..062acad8c4d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java @@ -12,16 +12,23 @@ import org.broadinstitute.hellbender.engine.ReadsContext; import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.TwoPassVariantWalker; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import java.io.File; import java.util.ArrayList; import java.util.List; -import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.QUAL_BY_DEPTH_KEY; +/** + * This walker makes two traversals through variants in a vcf. During the first pass, it collects the qual-by-depth (QD) annotation + * of each variant and stores it in a list. After the first traversal, it computes the vcf-wide average and variance of QD's. + * The walker will then traverse the variants the second time and annotate each variant with the distance of its QD from the mean + * in units of standard deviations. + * + */ @CommandLineProgramProperties( summary = "Example variant walker that makes two passes through a vcf", - oneLineSummary = "Example two variant walker", + oneLineSummary = "Example two-pass variant walker", programGroup = ExampleProgramGroup.class, omitFromCommandLine = true ) @@ -30,19 +37,19 @@ public class ExampleTwoPassVariantWalker extends TwoPassVariantWalker { doc = "Output vcf", optional = true) private File outputVcf = null; - final List qualByDepths = new ArrayList<>(); + private final List qualByDepths = new ArrayList<>(); private VariantContextWriter vcfWriter; - public static String QD_DISTANCE_FROM_MEAN = "QD_DIST"; + static String QD_DISTANCE_FROM_MEAN = "QD_DIST"; - public static String COPY_OF_QD_KEY_NAME = "QD_COPY"; + static String COPY_OF_QD_KEY_NAME = "QD_COPY"; private double averageQualByDepth; private double sampleVarianceOfQDs; - int counter = 0; + private int counter = 0; @Override public void onTraversalStart() { @@ -55,7 +62,17 @@ public void onTraversalStart() { @Override protected void firstPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { - qualByDepths.add(variant.getAttributeAsDouble(QUAL_BY_DEPTH_KEY, 0.0)); + qualByDepths.add(variant.getAttributeAsDouble(GATKVCFConstants.QUAL_BY_DEPTH_KEY, 0.0)); + } + + @Override + protected void afterFirstPass() { + final int n = qualByDepths.size(); + if (n == 1){ + return; + } + averageQualByDepth = qualByDepths.stream().mapToDouble(x -> x).average().orElse(0.0); + sampleVarianceOfQDs = (1.0/(n - 1.0))* qualByDepths.stream().mapToDouble(x -> Math.pow((x - averageQualByDepth), 2.0)).sum(); } @Override @@ -69,16 +86,6 @@ protected void secondPassApply(VariantContext variant, ReadsContext readsContext vcfWriter.add(vcb.make()); } - @Override - protected void afterFirstPass() { - final int n = qualByDepths.size(); - if (n == 1){ - return; - } - averageQualByDepth = qualByDepths.stream().mapToDouble(x -> x).average().orElse(0.0); - sampleVarianceOfQDs = (1.0/(n - 1.0))* qualByDepths.stream().mapToDouble(x -> Math.pow((x - averageQualByDepth), 2.0)).sum(); - } - @Override public void closeTool() { if ( vcfWriter != null ) { diff --git a/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java index 01036aa1ae6..7672630efb2 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalkerUnitTest.java @@ -2,15 +2,17 @@ import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.TestProgramGroup; import org.testng.Assert; import org.testng.annotations.Test; -public class TwoPassVariantWalkerUnitTest { +public class TwoPassVariantWalkerUnitTest extends GATKBaseTest { @CommandLineProgramProperties( summary = "An example subclass of TwoPassVariantWalker", oneLineSummary = "An example subclass of TwoPassVariantWalker", - programGroup = TestProgramGroup.class + programGroup = TestProgramGroup.class, + omitFromCommandLine = true ) private static class DummyTwoPassVariantWalker extends TwoPassVariantWalker { public int firstPass = 0; @@ -23,18 +25,18 @@ protected void firstPassApply(VariantContext variant, ReadsContext readsContext, } @Override - protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { - secondPass++; + protected void afterFirstPass(){ + visitedAfterFirstPass = true; } @Override - protected void afterFirstPass(){ - visitedAfterFirstPass = true; + protected void secondPassApply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + secondPass++; } } @Test - public void testNum() { + public void testTwoPassTraversal() { final DummyTwoPassVariantWalker walker = new DummyTwoPassVariantWalker(); final String testVcf = "src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/multiallelic.vcf"; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java index b1b928d8e5e..719ca05be81 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalkerIntegrationTest.java @@ -4,21 +4,18 @@ import org.broadinstitute.hellbender.CommandLineProgramTest; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.engine.FeatureDataSource; +import org.broadinstitute.hellbender.utils.test.BaseTest; +import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; -import java.io.IOException; import java.util.Iterator; -import static org.broadinstitute.hellbender.tools.examples.ExampleTwoPassVariantWalker.COPY_OF_QD_KEY_NAME; -import static org.broadinstitute.hellbender.tools.examples.ExampleTwoPassVariantWalker.QD_DISTANCE_FROM_MEAN; -import static org.broadinstitute.hellbender.utils.variant.GATKVCFConstants.QUAL_BY_DEPTH_KEY; - public class ExampleTwoPassVariantWalkerIntegrationTest extends CommandLineProgramTest { @Test - public void test() throws IOException { - final File outputVcf = File.createTempFile("output", "vcf"); + public void test() { + final File outputVcf = BaseTest.createTempFile("output", "vcf"); final String inputVcf = "src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/SelectVariants/haploid-multisample.vcf"; final String[] args = { @@ -28,22 +25,22 @@ public void test() throws IOException { runCommandLine(args); - final FeatureDataSource variantsBefore = new FeatureDataSource<>(inputVcf); - final FeatureDataSource variantsAfter = new FeatureDataSource<>(outputVcf); - final Iterator beforeIterator = variantsBefore.iterator(); - final Iterator afterIterator = variantsAfter.iterator(); + try (final FeatureDataSource variantsBefore = new FeatureDataSource<>(inputVcf); + final FeatureDataSource variantsAfter = new FeatureDataSource<>(outputVcf)){ + final Iterator beforeIterator = variantsBefore.iterator(); + final Iterator afterIterator = variantsAfter.iterator(); + while (beforeIterator.hasNext() && afterIterator.hasNext()){ + final VariantContext before = beforeIterator.next(); + final VariantContext after = afterIterator.next(); - while (beforeIterator.hasNext() && afterIterator.hasNext()){ - final VariantContext before = beforeIterator.next(); - final VariantContext after = afterIterator.next(); + Assert.assertEquals(before.getAttributeAsDouble(GATKVCFConstants.QUAL_BY_DEPTH_KEY, -1.0), + after.getAttributeAsDouble(ExampleTwoPassVariantWalker.COPY_OF_QD_KEY_NAME, -2.0)); + Assert.assertTrue(after.getAttributeAsDouble(ExampleTwoPassVariantWalker.QD_DISTANCE_FROM_MEAN, -1.0) > 0); + } - Assert.assertEquals(before.getAttributeAsDouble(QUAL_BY_DEPTH_KEY, -1.0), - after.getAttributeAsDouble(COPY_OF_QD_KEY_NAME, -2.0)); - Assert.assertTrue(after.getAttributeAsDouble(QD_DISTANCE_FROM_MEAN, -1.0) > 0); + Assert.assertTrue(! afterIterator.hasNext() && ! beforeIterator.hasNext()); } - - Assert.assertTrue(! afterIterator.hasNext() && ! beforeIterator.hasNext()); } } \ No newline at end of file From 067f6fe3fb58c0191c8ca594c3c7d0a054293264 Mon Sep 17 00:00:00 2001 From: takutosato Date: Tue, 15 May 2018 09:50:19 -0400 Subject: [PATCH 6/7] reword --- .../hellbender/engine/TwoPassVariantWalker.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java index 936d5c777df..b0ff611956e 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/TwoPassVariantWalker.java @@ -38,8 +38,8 @@ public void traverse(){ /** * - * First pass through the variants. The user may store data in instance variables of the walker as they traverse the - * vcf the first time and process them in {@link #afterFirstPass} before making a second pass + * First pass through the variants. The user may store data in instance variables of the walker + * and process them in {@link #afterFirstPass} before making a second pass * * @param variant A variant record in a vcf * @param readsContext Reads overlapping the current variant. Will be empty if a read source (e.g. bam) isn't provided From 147f65e8de977e07426bcf979149f2de01f9f8e6 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 17 May 2018 16:04:14 -0400 Subject: [PATCH 7/7] static -> static final --- .../tools/examples/ExampleTwoPassVariantWalker.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java index 062acad8c4d..3c7eb85b6c6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/examples/ExampleTwoPassVariantWalker.java @@ -41,9 +41,9 @@ public class ExampleTwoPassVariantWalker extends TwoPassVariantWalker { private VariantContextWriter vcfWriter; - static String QD_DISTANCE_FROM_MEAN = "QD_DIST"; + public static final String QD_DISTANCE_FROM_MEAN = "QD_DIST"; - static String COPY_OF_QD_KEY_NAME = "QD_COPY"; + public static final String COPY_OF_QD_KEY_NAME = "QD_COPY"; private double averageQualByDepth;