From 2fd080dcd17b4d19f9efbed8028bb44f631db793 Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Mon, 26 Mar 2018 20:25:38 -0400 Subject: [PATCH] 1st class vcf output directory as suggested --- ...cturalVariationDiscoveryPipelineSpark.java | 43 ++++++++++++++----- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java index 7f63918ed6f..f70a2479915 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java @@ -17,6 +17,7 @@ import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource; import org.broadinstitute.hellbender.engine.spark.GATKSparkTool; +import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.spark.sv.discovery.AnnotatedVariantProducer; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoverFromLocalAssemblyContigAlignmentsSpark; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputData; @@ -33,10 +34,13 @@ import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment; import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignmentUtils; import org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter; import scala.Serializable; +import java.io.IOException; +import java.nio.file.Paths; import java.util.EnumMap; import java.util.List; import java.util.stream.Collectors; @@ -113,10 +117,11 @@ public class StructuralVariationDiscoveryPipelineSpark extends GATKSparkTool { @Argument(doc = "sam file for aligned contigs", fullName = "contig-sam-file") private String outputAssemblyAlignments; - @Argument(doc = "prefix for output vcf; sample name will be appended after the provided argument", + @Argument(doc = "directory for VCF output, including those from experimental interpretation tool if so requested, " + + "will be created if not present; sample name will be appended after the provided argument", shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME) - private String outputPrefix; + private String variantsOutDir; @Advanced @Argument(doc = "prefix to output files of our experimental breakpoint and type inference tool;", @@ -166,14 +171,7 @@ protected void runTool( final JavaSparkContext ctx ) { // todo: when we call imprecise variants don't return here if(parsedAlignments.isEmpty()) return; - final Broadcast> cnvCallsBroadcast = broadcastCNVCalls(ctx, headerForReads, discoverStageArgs.cnvCallsFile); - final String outputPrefixWithSampleName = outputPrefix + (outputPrefix.endsWith("/") ? "" : "/") + SVUtils.getSampleId(headerForReads) + "_"; - final SvDiscoveryInputData svDiscoveryInputData = - new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName, - assembledEvidenceResults.getReadMetadata(), assembledEvidenceResults.getAssembledIntervals(), - makeEvidenceLinkTree(assembledEvidenceResults.getEvidenceTargetLinks()), - cnvCallsBroadcast, - getReads(), getHeaderForReads(), getReference(), localLogger); + final SvDiscoveryInputData svDiscoveryInputData = getSvDiscoveryInputData(ctx, headerForReads, assembledEvidenceResults); // TODO: 1/14/18 this is to be phased-out: old way of calling precise variants // assembled breakpoints @@ -194,6 +192,29 @@ protected void runTool( final JavaSparkContext ctx ) { } } + private SvDiscoveryInputData getSvDiscoveryInputData(final JavaSparkContext ctx, + final SAMFileHeader headerForReads, + final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults) { + final Broadcast> cnvCallsBroadcast = + broadcastCNVCalls(ctx, headerForReads, discoverStageArgs.cnvCallsFile); + try { + if ( !java.nio.file.Files.exists(Paths.get(variantsOutDir)) ) { + IOUtils.createDirectory(variantsOutDir); + } + } catch (final IOException ioex) { + throw new GATKException("Failed to create output directory " + variantsOutDir + " though it does not yet exist", ioex); + } + + final String outputPrefixWithSampleName = variantsOutDir + (variantsOutDir.endsWith("/") ? "" : "/") + + SVUtils.getSampleId(headerForReads) + "_"; + + return new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName, + assembledEvidenceResults.getReadMetadata(), assembledEvidenceResults.getAssembledIntervals(), + makeEvidenceLinkTree(assembledEvidenceResults.getEvidenceTargetLinks()), + cnvCallsBroadcast, + getReads(), getHeaderForReads(), getReference(), localLogger); + } + /** * Uses the input EvidenceTargetLinks to *
    @@ -267,7 +288,7 @@ private void experimentalInterpretation(final JavaSparkContext ctx, final SvDiscoveryInputData updatedSvDiscoveryInputData = new SvDiscoveryInputData(sampleId, svDiscoveryInputData.discoverStageArgs, - expVariantsOutPrefix + (expVariantsOutPrefix.endsWith("/") ? "" : "_") + sampleId + "_", + svDiscoveryInputData.outputPath + "_" + expVariantsOutPrefix + "_", svDiscoveryInputData.metadata, svDiscoveryInputData.assembledIntervals, svDiscoveryInputData.evidenceTargetLinks, reads, svDiscoveryInputData.toolLogger, referenceBroadcast, referenceSequenceDictionaryBroadcast, headerBroadcast, cnvCallsBroadcast);