-
Notifications
You must be signed in to change notification settings - Fork 597
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
274 additions
and
1 deletion.
There are no files selected for viewing
167 changes: 167 additions & 0 deletions
167
src/main/java/org/broadinstitute/hellbender/tools/sv/CondenseDepthEvidence.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
package org.broadinstitute.hellbender.tools.sv; | ||
|
||
import htsjdk.samtools.SAMSequenceDictionary; | ||
import htsjdk.tribble.Feature; | ||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.barclay.argparser.ExperimentalFeature; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; | ||
import org.broadinstitute.hellbender.engine.*; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.utils.MathUtils; | ||
import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodec; | ||
import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodecFinder; | ||
import org.broadinstitute.hellbender.utils.codecs.FeatureSink; | ||
|
||
/** | ||
* <p>Combines adjacent intervals in DepthEvidence files.</p> | ||
* | ||
* <h3>Inputs</h3> | ||
* | ||
* <ul> | ||
* <li> | ||
* A locus-sorted DepthEvidence file (name ends with ".rd.txt", ".rd.txt.gz", or "rd.bci"). | ||
* </li> | ||
* </ul> | ||
* | ||
* <h3>Output</h3> | ||
* | ||
* <ul> | ||
* <li> | ||
* An output file containing merged evidence from the inputs. | ||
* </li> | ||
* </ul> | ||
* | ||
* <h3>Usage example</h3> | ||
* | ||
* <pre> | ||
* gatk CondenseDepthEvidence \ | ||
* -F input.rd.txt.gz \ | ||
* -O merged.rd.txt.gz | ||
* </pre> | ||
* | ||
* @author Ted Sharpe <tsharpe@broadinstitute.org> | ||
*/ | ||
@CommandLineProgramProperties( | ||
summary = "Merges adjacent DepthEvidence records.", | ||
oneLineSummary = "Merges adjacent DepthEvidence records.", | ||
programGroup = StructuralVariantDiscoveryProgramGroup.class | ||
) | ||
@ExperimentalFeature | ||
public class CondenseDepthEvidence extends FeatureWalker<DepthEvidence> { | ||
public static final String INPUT_ARGNAME = "depth-evidence"; | ||
public static final String COMPRESSION_LEVEL_ARGNAME = "compression-level"; | ||
public static final String MAX_INTERVAL_SIZE_ARGNAME = "max-interval-size"; | ||
public static final String MIN_INTERVAL_SIZE_ARGNAME = "min-interval-size"; | ||
|
||
@Argument( | ||
doc = "DepthEvidence input file", | ||
fullName = INPUT_ARGNAME, | ||
shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME | ||
) | ||
private GATKPath inputPath; | ||
|
||
@Argument( | ||
doc = "Merged DepthEvidence output file", | ||
fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, | ||
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME | ||
) | ||
private GATKPath outputPath; | ||
|
||
@Argument( | ||
doc = "Maximum interval size", | ||
fullName = MAX_INTERVAL_SIZE_ARGNAME, | ||
optional = true | ||
) | ||
private int maxIntervalLength = 1000; | ||
|
||
@Argument( | ||
doc = "Minimum interval size", | ||
fullName = MIN_INTERVAL_SIZE_ARGNAME, | ||
optional = true | ||
) | ||
private int minIntervalLength = 0; | ||
|
||
@Argument( | ||
doc = "Output compression level", | ||
fullName = COMPRESSION_LEVEL_ARGNAME, | ||
minValue = 0, maxValue = 9, optional = true | ||
) | ||
private int compressionLevel = 4; | ||
|
||
private FeatureSink<DepthEvidence> outputSink; | ||
private DepthEvidence accumulator; | ||
|
||
|
||
@Override | ||
public GATKPath getDrivingFeaturePath() { | ||
return inputPath; | ||
} | ||
|
||
@Override | ||
protected boolean isAcceptableFeatureType( final Class<? extends Feature> featureType ) { | ||
return featureType.equals(DepthEvidence.class); | ||
} | ||
|
||
@Override | ||
@SuppressWarnings("unchecked") | ||
public void onTraversalStart() { | ||
super.onTraversalStart(); | ||
|
||
if ( minIntervalLength > maxIntervalLength ) { | ||
throw new UserException("Minimum interval length exceeds maximum interval length."); | ||
} | ||
|
||
final FeatureOutputCodec<? extends Feature, ? extends FeatureSink<? extends Feature>> codec = | ||
FeatureOutputCodecFinder.find(outputPath); | ||
final Class<? extends Feature> codecFeatureClass = codec.getFeatureType(); | ||
if ( !codecFeatureClass.equals(DepthEvidence.class) ) { | ||
throw new UserException("Output file " + outputPath + " implies Feature subtype " + | ||
codecFeatureClass.getSimpleName() + | ||
", but this tool expects to write DepthEvidence."); | ||
} | ||
final SVFeaturesHeader header = (SVFeaturesHeader)getDrivingFeaturesHeader(); | ||
final SAMSequenceDictionary dict = | ||
header.getDictionary() != null ? header.getDictionary() : getBestAvailableSequenceDictionary(); | ||
outputSink = (FeatureSink<DepthEvidence>)codec.makeSink(outputPath, dict, | ||
header.getSampleNames(), compressionLevel); | ||
} | ||
|
||
@Override | ||
public void apply( final DepthEvidence feature, | ||
final ReadsContext readsContext, | ||
final ReferenceContext referenceContext, | ||
final FeatureContext featureContext ) { | ||
if ( accumulator == null ) { | ||
accumulator = feature; | ||
return; | ||
} | ||
final int intervalLength = accumulator.getLengthOnReference(); | ||
if ( !isAdjacent(accumulator, feature) || intervalLength >= maxIntervalLength ) { | ||
if ( intervalLength >= minIntervalLength ) { | ||
outputSink.write(accumulator); | ||
} | ||
accumulator = feature; | ||
return; | ||
} | ||
final int[] accumCounts = accumulator.getCounts(); | ||
MathUtils.addToArrayInPlace(accumCounts, feature.getCounts()); | ||
accumulator = new DepthEvidence(feature.getContig(), accumulator.getStart(), | ||
feature.getEnd(), accumCounts); | ||
} | ||
|
||
@Override | ||
public Object onTraversalSuccess() { | ||
super.onTraversalSuccess(); | ||
if ( accumulator != null && accumulator.getLengthOnReference() >= minIntervalLength ) { | ||
outputSink.write(accumulator); | ||
} | ||
outputSink.close(); | ||
return null; | ||
} | ||
|
||
private static boolean isAdjacent( final DepthEvidence ev1, final DepthEvidence ev2 ) { | ||
return ev1.getContig().equals(ev2.getContig()) && ev1.getEnd() + 1 == ev2.getStart(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
...est/java/org/broadinstitute/hellbender/tools/sv/CondenseDepthEvidenceIntegrationTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
package org.broadinstitute.hellbender.tools.sv; | ||
|
||
import htsjdk.samtools.SAMSequenceDictionary; | ||
import htsjdk.samtools.util.Log; | ||
import org.broadinstitute.hellbender.CommandLineProgramTest; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; | ||
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; | ||
import org.testng.annotations.Test; | ||
|
||
import java.io.IOException; | ||
import java.util.Collections; | ||
|
||
public class CondenseDepthEvidenceIntegrationTest extends CommandLineProgramTest { | ||
public static final String printEvidenceTestDir = toolsTestDir + "walkers/sv/printevidence/"; | ||
public static final String sequenceDict = largeFileTestDir + "human_g1k_v37.20.21.dict"; | ||
|
||
@Test | ||
public void positiveTest() throws IOException { | ||
final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); | ||
argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); | ||
argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, sequenceDict); | ||
argsBuilder.add(CondenseDepthEvidence.MIN_INTERVAL_SIZE_ARGNAME, 101); | ||
argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, printEvidenceTestDir + "output.test_hg38.rd.txt"); | ||
argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); | ||
final IntegrationTestSpec testSpec = | ||
new IntegrationTestSpec(argsBuilder.getString(), | ||
Collections.singletonList(printEvidenceTestDir + "merged.rd.txt")); | ||
testSpec.setOutputFileExtension("rd.txt"); | ||
testSpec.executeTest("positive test", this); | ||
} | ||
|
||
@Test | ||
public void negativeTest() throws IOException { | ||
final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); | ||
argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); | ||
argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, sequenceDict); | ||
argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, printEvidenceTestDir + "no_condense.rd.txt"); | ||
argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); | ||
|
||
// output equal to input, because this file contains rows that can't be merged for various reasons | ||
final IntegrationTestSpec testSpec = | ||
new IntegrationTestSpec(argsBuilder.getString(), | ||
Collections.singletonList(printEvidenceTestDir + "no_condense.rd.txt")); | ||
testSpec.setOutputFileExtension("rd.txt"); | ||
testSpec.executeTest("negative test", this); | ||
} | ||
} |
51 changes: 51 additions & 0 deletions
51
...test/resources/org/broadinstitute/hellbender/tools/walkers/sv/printevidence/merged.rd.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#Chr Start End HG00096 HG00129 HG00140 | ||
chr22 30499964 30500964 217 182 243 | ||
chr22 30500964 30501964 242 175 218 | ||
chr22 30501964 30502964 244 220 261 | ||
chr22 30502964 30503964 261 223 243 | ||
chr22 30503964 30504964 224 204 234 | ||
chr22 30504964 30505964 313 217 307 | ||
chr22 30505964 30506964 241 197 247 | ||
chr22 30506964 30507964 240 226 238 | ||
chr22 30507964 30508964 295 240 262 | ||
chr22 30508964 30509964 234 179 206 | ||
chr22 30509964 30510964 212 214 250 | ||
chr22 30510964 30511964 243 210 247 | ||
chr22 30511964 30512964 263 206 231 | ||
chr22 30512964 30513964 235 202 231 | ||
chr22 30513964 30514964 286 237 290 | ||
chr22 30514964 30515964 252 208 241 | ||
chr22 30515964 30516964 192 203 200 | ||
chr22 30516964 30517964 253 185 225 | ||
chr22 30517964 30518964 226 188 243 | ||
chr22 30518964 30519964 208 166 204 | ||
chr22 30519964 30520964 253 230 202 | ||
chr22 30520964 30521964 252 279 257 | ||
chr22 30521964 30522964 229 189 229 | ||
chr22 30522964 30523964 234 201 235 | ||
chr22 30523964 30524964 270 217 215 | ||
chr22 30524964 30525964 313 239 267 | ||
chr22 30525964 30526964 301 202 245 | ||
chr22 30526964 30527964 206 237 223 | ||
chr22 30527964 30528964 260 203 272 | ||
chr22 30528964 30529964 248 195 295 | ||
chr22 30529964 30530964 239 193 241 | ||
chr22 30530964 30531964 267 236 265 | ||
chr22 30531964 30532964 306 238 317 | ||
chr22 30532964 30533964 283 238 274 | ||
chr22 30533964 30534964 243 177 294 | ||
chr22 30534964 30535964 257 220 287 | ||
chr22 30535964 30536964 229 204 210 | ||
chr22 30536964 30537964 275 210 275 | ||
chr22 30537964 30538964 201 227 193 | ||
chr22 30538964 30539964 243 219 234 | ||
chr22 30539964 30540964 263 213 250 | ||
chr22 30540964 30541964 270 242 261 | ||
chr22 30541964 30542964 281 197 254 | ||
chr22 30542964 30543964 271 197 249 | ||
chr22 30543964 30544964 233 226 246 | ||
chr22 30544964 30545964 245 210 233 | ||
chr22 30545964 30546964 218 149 229 | ||
chr22 30546964 30547964 250 165 287 | ||
chr22 30547964 30548964 241 215 243 | ||
chr22 30548964 30549964 262 248 285 |
7 changes: 7 additions & 0 deletions
7
...resources/org/broadinstitute/hellbender/tools/walkers/sv/printevidence/no_condense.rd.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#Chr Start End HG00096 HG00129 HG00140 | ||
chr20 30499964 30500964 217 182 243 | ||
chr20 30500963 30501964 242 175 218 | ||
chr20 30501965 30502964 244 220 261 | ||
chr21 30502964 30503964 261 223 243 | ||
chr21 30504000 30505000 261 223 243 | ||
chr21 30505000 30506000 261 223 243 |