From 2fd080dcd17b4d19f9efbed8028bb44f631db793 Mon Sep 17 00:00:00 2001
From: Steve Huang <shuang@broadinstitute.org>
Date: Mon, 26 Mar 2018 20:25:38 -0400
Subject: [PATCH] 1st class vcf output directory as suggested

---
 ...cturalVariationDiscoveryPipelineSpark.java | 43 ++++++++++++++-----
 1 file changed, 32 insertions(+), 11 deletions(-)
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java
index 7f63918ed6f..f70a2479915 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java
@@ -17,6 +17,7 @@
 import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup;
 import org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource;
 import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
+import org.broadinstitute.hellbender.exceptions.GATKException;
 import org.broadinstitute.hellbender.tools.spark.sv.discovery.AnnotatedVariantProducer;
 import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoverFromLocalAssemblyContigAlignmentsSpark;
 import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputData;
@@ -33,10 +34,13 @@
 import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignment;
 import org.broadinstitute.hellbender.utils.bwa.BwaMemAlignmentUtils;
 import org.broadinstitute.hellbender.utils.fermi.FermiLiteAssembly;
+import org.broadinstitute.hellbender.utils.io.IOUtils;
 import org.broadinstitute.hellbender.utils.read.GATKRead;
 import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter;
 import scala.Serializable;
 
+import java.io.IOException;
+import java.nio.file.Paths;
 import java.util.EnumMap;
 import java.util.List;
 import java.util.stream.Collectors;
@@ -113,10 +117,11 @@ public class StructuralVariationDiscoveryPipelineSpark extends GATKSparkTool {
     @Argument(doc = "sam file for aligned contigs", fullName = "contig-sam-file")
     private String outputAssemblyAlignments;
 
-    @Argument(doc = "prefix for output vcf; sample name will be appended after the provided argument",
+    @Argument(doc = "directory for VCF output, including those from experimental interpretation tool if so requested, " +
+            "will be created if not present; sample name will be appended after the provided argument",
             shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
             fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME)
-    private String outputPrefix;
+    private String variantsOutDir;
 
     @Advanced
     @Argument(doc = "prefix to output files of our experimental breakpoint and type inference tool;",
@@ -166,14 +171,7 @@ protected void runTool( final JavaSparkContext ctx ) {
         // todo: when we call imprecise variants don't return here
         if(parsedAlignments.isEmpty()) return;
 
-        final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast = broadcastCNVCalls(ctx, headerForReads, discoverStageArgs.cnvCallsFile);
-        final String outputPrefixWithSampleName = outputPrefix + (outputPrefix.endsWith("/") ? "" : "/") + SVUtils.getSampleId(headerForReads) + "_";
-        final SvDiscoveryInputData svDiscoveryInputData =
-                new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName,
-                        assembledEvidenceResults.getReadMetadata(), assembledEvidenceResults.getAssembledIntervals(),
-                        makeEvidenceLinkTree(assembledEvidenceResults.getEvidenceTargetLinks()),
-                        cnvCallsBroadcast,
-                        getReads(), getHeaderForReads(), getReference(), localLogger);
+        final SvDiscoveryInputData svDiscoveryInputData = getSvDiscoveryInputData(ctx, headerForReads, assembledEvidenceResults);
 
         // TODO: 1/14/18 this is to be phased-out: old way of calling precise variants
         // assembled breakpoints
@@ -194,6 +192,29 @@ protected void runTool( final JavaSparkContext ctx ) {
         }
     }
 
+    private SvDiscoveryInputData getSvDiscoveryInputData(final JavaSparkContext ctx,
+                                                         final SAMFileHeader headerForReads,
+                                                         final FindBreakpointEvidenceSpark.AssembledEvidenceResults assembledEvidenceResults) {
+        final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast =
+                broadcastCNVCalls(ctx, headerForReads, discoverStageArgs.cnvCallsFile);
+        try {
+            if ( !java.nio.file.Files.exists(Paths.get(variantsOutDir)) ) {
+                IOUtils.createDirectory(variantsOutDir);
+            }
+        } catch (final IOException ioex) {
+            throw new GATKException("Failed to create output directory " + variantsOutDir + " though it does not yet exist", ioex);
+        }
+
+        final String outputPrefixWithSampleName = variantsOutDir + (variantsOutDir.endsWith("/") ? "" : "/")
+                                                    + SVUtils.getSampleId(headerForReads) + "_";
+
+        return new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName,
+                assembledEvidenceResults.getReadMetadata(), assembledEvidenceResults.getAssembledIntervals(),
+                makeEvidenceLinkTree(assembledEvidenceResults.getEvidenceTargetLinks()),
+                cnvCallsBroadcast,
+                getReads(), getHeaderForReads(), getReference(), localLogger);
+    }
+
     /**
      * Uses the input EvidenceTargetLinks to
      *  <ul>
@@ -267,7 +288,7 @@ private void experimentalInterpretation(final JavaSparkContext ctx,
 
         final SvDiscoveryInputData updatedSvDiscoveryInputData =
                 new SvDiscoveryInputData(sampleId, svDiscoveryInputData.discoverStageArgs,
-                        expVariantsOutPrefix + (expVariantsOutPrefix.endsWith("/") ? "" : "_") + sampleId + "_",
+                        svDiscoveryInputData.outputPath + "_" + expVariantsOutPrefix + "_",
                         svDiscoveryInputData.metadata, svDiscoveryInputData.assembledIntervals,
                         svDiscoveryInputData.evidenceTargetLinks, reads, svDiscoveryInputData.toolLogger,
                         referenceBroadcast, referenceSequenceDictionaryBroadcast, headerBroadcast, cnvCallsBroadcast);