Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Contig stratification should defer to user-defined intervals #7238

Merged
merged 6 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,10 @@ public long getnProcessedLoci() {
return traversalIntervals.stream().mapToLong(SimpleInterval::size).sum();
}

public List<SimpleInterval> getTraversalIntervals() {
return Collections.unmodifiableList(traversalIntervals);
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This transformation seems pretty specific to the contig strat class, so I think it would make more sense to keep it there, and instead expose an intervals getter on VariantEvalEngine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

private boolean compHasMatchingEval(final VariantContext comp, final Collection<VariantContext> evals) {
// find all of the matching comps
for ( final VariantContext eval : evals ) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,30 +1,37 @@
package org.broadinstitute.hellbender.tools.walkers.varianteval.stratifications;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.hellbender.tools.walkers.varianteval.VariantEvalEngine;
import org.broadinstitute.hellbender.tools.walkers.varianteval.util.VariantEvalContext;
import org.broadinstitute.hellbender.utils.SimpleInterval;

import java.util.*;

/**
* Stratifies the evaluation by each contig in the reference sequence
* Stratifies the evaluation by each contig in the reference sequence. Note: if the user supplies custom intervals, it will defer to these rather than the full sequence dictionary
*/
public class Contig extends VariantStratifier {
public Contig(VariantEvalEngine engine) {
super(engine);

states.addAll(getContigNames(getEngine().getSequenceDictionaryForDrivingVariants()));
states.addAll(getContigNames());
states.add("all");
}

private Set<String> getContigNames(SAMSequenceDictionary dict) {
/**
* @return The list of contig names to be traversed, preferentially taking user supplied intervals, but otherwise defaulting to driving variants
*/
private List<String> getContigNames() {
final TreeSet<String> contigs = new TreeSet<>();
for( final SAMSequenceRecord r : dict.getSequences()) {
contigs.add(r.getSequenceName());
if (getEngine().getTraversalIntervals() == null) {
getEngine().getSequenceDictionaryForDrivingVariants().getSequences().stream().map(SAMSequenceRecord::getSequenceName).forEach(contigs::add);
}
return contigs;
else {
getEngine().getTraversalIntervals().stream().map(SimpleInterval::getContig).forEach(contigs::add);
}

return new ArrayList<>(contigs);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
package org.broadinstitute.hellbender.tools.walkers.varianteval;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.IOUtil;
import org.apache.commons.lang3.StringUtils;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.*;
import java.util.stream.Collectors;

public class VariantEvalIntegrationTest extends CommandLineProgramTest {

Expand Down Expand Up @@ -106,6 +112,31 @@ public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() throws IOExc
spec.executeTest(name, this);
}

@DataProvider(name = "testContigStratWithUserSuppliedIntervalsData")
public Object[][] testContigStratWithUserSuppliedIntervalsData() {
return new Object[][]{
new Object[]{"1:1-1480226", "testContigStratWithUserSuppliedIntervals"},
new Object[]{"1", "testContigStratWithUserSuppliedIntervals2"},
new Object[]{null, "testContigStratWithUserSuppliedIntervals3"}
};
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original version of these tests was much easier to understand - it just needed a data provider, which for the old version of the tests would just be:

    @DataProvider(name = "testContigStratWithIntervals")
    public Object[][] testContigStratWithIntervals() {
        return new Object[][] {
                { "testContigStratWithUserSuppliedIntervals", "-L 1:1-1480226" },
                { "testContigStratWithUserSuppliedIntervals2", "-L 1" },
                { "testContigStratWithUserSuppliedIntervals3", null },
        };
    }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

discussed below - made the change


@Test(dataProvider = "testContigStratWithUserSuppliedIntervalsData")
public void testContigStratWithUserSuppliedIntervals(String intervalString, String expectedOutputFile) throws IOException {
IntegrationTestSpec spec = new IntegrationTestSpec(
" -R " + b37Reference +
" --eval " + snpEffVcf +
" -no-ev" +
" -EV CountVariants" +
" -no-st" +
" -ST Contig" +
(intervalString == null ? "" : " -L " + intervalString) +
" -O %s",
Arrays.asList(getExpectedFile(expectedOutputFile))
);
spec.executeTest(expectedOutputFile, this);
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two tests are identical except for the file name and interval, so they can be replaced with a single test that uses a @DataProvider. That will also nicely document the test files contents. Also please add a test case with no intervals specified.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if you've used data providers before - if not let me know and I'll add one here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i've used them. i reworked the test so it doesnt need the static test files anymore. i dont know what GATK's vision is in terms of having the full output checked in vs. testing specific features of the output. most of VariantEvalIntegration test uses the former since it was ported from GATK3, but i assume you dont want infinite test outputs checked in.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems much more convoluted and error-prone than the previous version, which was much easier to comprehend.

@Test
public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() throws IOException {
String name = "testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter";
Expand Down Expand Up @@ -636,7 +667,6 @@ public Object[][] testValidationReportData() {
return tests.toArray(new Object[][]{});
}


@Test
public void testPrintMissingComp() throws IOException {
String name = "testPrintMissingComp";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#:GATKReport.v1.1:1
#:GATKTable:29:2:%s:%s:%s:%s:%d:%d:%d:%d:%.8f:%.8f:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%.2e:%.2f:%.2f:%.2e:%.2f:%.2f:;
#:GATKTable:CountVariants:Counts different classes of variants in the sample
CountVariants CompFeatureInput Contig EvalFeatureInput nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate variantRatePerBp nSNPs nMNPs nInsertions nDeletions nComplex nSymbolic nMixed nNoCalls nHets nHomRef nHomVar nSingletons nHomDerived heterozygosity heterozygosityPerBp hetHomRatio indelRate indelRatePerBp insertionDeletionRatio
CountVariants none 1 eval 1480226 969 0 969 0.00065463 1527.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 8.11e-03 123.00 1.75 3.31e-05 30208.00 0.48
CountVariants none all eval 1480226 969 0 969 0.00065463 1527.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 8.11e-03 123.00 1.75 3.31e-05 30208.00 0.48
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#:GATKReport.v1.1:1
#:GATKTable:29:2:%s:%s:%s:%s:%d:%d:%d:%d:%.8f:%.8f:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%.2e:%.2f:%.2f:%.2e:%.2f:%.2f:;
#:GATKTable:CountVariants:Counts different classes of variants in the sample
CountVariants CompFeatureInput Contig EvalFeatureInput nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate variantRatePerBp nSNPs nMNPs nInsertions nDeletions nComplex nSymbolic nMixed nNoCalls nHets nHomRef nHomVar nSingletons nHomDerived heterozygosity heterozygosityPerBp hetHomRatio indelRate indelRatePerBp insertionDeletionRatio
CountVariants none 1 eval 249250621 969 0 969 0.00000389 257224.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 4.82e-05 20751.00 1.75 1.97e-07 5086747.00 0.48
CountVariants none all eval 249250621 969 0 969 0.00000389 257224.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 4.82e-05 20751.00 1.75 1.97e-07 5086747.00 0.48

Loading