-
Notifications
You must be signed in to change notification settings - Fork 597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Contig stratification should defer to user-defined intervals #7238
Changes from all commits
c805743
7874be6
75e470a
7ca616d
f228a3d
61cd15b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,27 @@ | ||
package org.broadinstitute.hellbender.tools.walkers.varianteval; | ||
|
||
import htsjdk.samtools.SAMSequenceDictionary; | ||
import htsjdk.samtools.SAMSequenceRecord; | ||
import htsjdk.samtools.util.IOUtil; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.broadinstitute.barclay.argparser.CommandLineException; | ||
import org.broadinstitute.hellbender.CommandLineProgramTest; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.exceptions.GATKException; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; | ||
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; | ||
import org.broadinstitute.hellbender.utils.SimpleInterval; | ||
import org.broadinstitute.hellbender.utils.io.IOUtils; | ||
import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; | ||
import org.testng.Assert; | ||
import org.testng.annotations.DataProvider; | ||
import org.testng.annotations.Test; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.File; | ||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.*; | ||
import java.util.stream.Collectors; | ||
|
||
public class VariantEvalIntegrationTest extends CommandLineProgramTest { | ||
|
||
|
@@ -106,6 +112,31 @@ public void testFundamentalsCountVariantsSNPsAndIndelsWithNovelty() throws IOExc | |
spec.executeTest(name, this); | ||
} | ||
|
||
@DataProvider(name = "testContigStratWithUserSuppliedIntervalsData") | ||
public Object[][] testContigStratWithUserSuppliedIntervalsData() { | ||
return new Object[][]{ | ||
new Object[]{"1:1-1480226", "testContigStratWithUserSuppliedIntervals"}, | ||
new Object[]{"1", "testContigStratWithUserSuppliedIntervals2"}, | ||
new Object[]{null, "testContigStratWithUserSuppliedIntervals3"} | ||
}; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original version of these tests was much easier to understand - it just needed a data provider, which for the old version of the tests would just be:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. discussed below - made the change |
||
|
||
@Test(dataProvider = "testContigStratWithUserSuppliedIntervalsData") | ||
public void testContigStratWithUserSuppliedIntervals(String intervalString, String expectedOutputFile) throws IOException { | ||
IntegrationTestSpec spec = new IntegrationTestSpec( | ||
" -R " + b37Reference + | ||
" --eval " + snpEffVcf + | ||
" -no-ev" + | ||
" -EV CountVariants" + | ||
" -no-st" + | ||
" -ST Contig" + | ||
(intervalString == null ? "" : " -L " + intervalString) + | ||
" -O %s", | ||
Arrays.asList(getExpectedFile(expectedOutputFile)) | ||
); | ||
spec.executeTest(expectedOutputFile, this); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two tests are identical except for the file name and interval, so they can be replaced with a single test that uses a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if you've used data providers before - if not let me know and I'll add one here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, i've used them. i reworked the test so it doesnt need the static test files anymore. i dont know what GATK's vision is in terms of having the full output checked in vs. testing specific features of the output. most of VariantEvalIntegration test uses the former since it was ported from GATK3, but i assume you dont want infinite test outputs checked in. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems much more convoluted and error-prone than the previous version, which was much easier to comprehend. |
||
@Test | ||
public void testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter() throws IOException { | ||
String name = "testFundamentalsCountVariantsSNPsAndIndelsWithNoveltyAndFilter"; | ||
|
@@ -636,7 +667,6 @@ public Object[][] testValidationReportData() { | |
return tests.toArray(new Object[][]{}); | ||
} | ||
|
||
|
||
@Test | ||
public void testPrintMissingComp() throws IOException { | ||
String name = "testPrintMissingComp"; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
#:GATKReport.v1.1:1 | ||
#:GATKTable:29:2:%s:%s:%s:%s:%d:%d:%d:%d:%.8f:%.8f:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%.2e:%.2f:%.2f:%.2e:%.2f:%.2f:; | ||
#:GATKTable:CountVariants:Counts different classes of variants in the sample | ||
CountVariants CompFeatureInput Contig EvalFeatureInput nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate variantRatePerBp nSNPs nMNPs nInsertions nDeletions nComplex nSymbolic nMixed nNoCalls nHets nHomRef nHomVar nSingletons nHomDerived heterozygosity heterozygosityPerBp hetHomRatio indelRate indelRatePerBp insertionDeletionRatio | ||
CountVariants none 1 eval 1480226 969 0 969 0.00065463 1527.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 8.11e-03 123.00 1.75 3.31e-05 30208.00 0.48 | ||
CountVariants none all eval 1480226 969 0 969 0.00065463 1527.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 8.11e-03 123.00 1.75 3.31e-05 30208.00 0.48 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#:GATKReport.v1.1:1 | ||
#:GATKTable:29:2:%s:%s:%s:%s:%d:%d:%d:%d:%.8f:%.8f:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%d:%.2e:%.2f:%.2f:%.2e:%.2f:%.2f:; | ||
#:GATKTable:CountVariants:Counts different classes of variants in the sample | ||
CountVariants CompFeatureInput Contig EvalFeatureInput nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate variantRatePerBp nSNPs nMNPs nInsertions nDeletions nComplex nSymbolic nMixed nNoCalls nHets nHomRef nHomVar nSingletons nHomDerived heterozygosity heterozygosityPerBp hetHomRatio indelRate indelRatePerBp insertionDeletionRatio | ||
CountVariants none 1 eval 249250621 969 0 969 0.00000389 257224.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 4.82e-05 20751.00 1.75 1.97e-07 5086747.00 0.48 | ||
CountVariants none all eval 249250621 969 0 969 0.00000389 257224.00000000 920 0 16 33 0 0 0 7519 12011 123783 6882 287 0 4.82e-05 20751.00 1.75 1.97e-07 5086747.00 0.48 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This transformation seems pretty specific to the contig strat class, so I think it would make more sense to keep it there, and instead expose an intervals getter on
VariantEvalEngine
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok