diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java index 1306142b335..7f63918ed6f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java @@ -109,14 +109,17 @@ public class StructuralVariationDiscoveryPipelineSpark extends GATKSparkTool { @ArgumentCollection private final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs = new DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection(); + @Argument(doc = "sam file for aligned contigs", fullName = "contig-sam-file") private String outputAssemblyAlignments; + @Argument(doc = "prefix for output vcf; sample name will be appended after the provided argument", shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME) private String outputPrefix; + @Advanced - @Argument(doc = "prefix to output files of our prototyping breakpoint and type inference tool in addition to the master VCF;", + @Argument(doc = "prefix to output files of our experimental breakpoint and type inference tool;", fullName = "exp-variants-out-prefix", optional = true) private String expVariantsOutPrefix; @@ -164,7 +167,7 @@ protected void runTool( final JavaSparkContext ctx ) { if(parsedAlignments.isEmpty()) return; final Broadcast> cnvCallsBroadcast = broadcastCNVCalls(ctx, headerForReads, discoverStageArgs.cnvCallsFile); - final String outputPrefixWithSampleName = outputPrefix + (outputPrefix.endsWith("/") ? "" : "_") + SVUtils.getSampleId(headerForReads); + final String outputPrefixWithSampleName = outputPrefix + (outputPrefix.endsWith("/") ? "" : "/") + SVUtils.getSampleId(headerForReads) + "_"; final SvDiscoveryInputData svDiscoveryInputData = new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName, assembledEvidenceResults.getReadMetadata(), assembledEvidenceResults.getAssembledIntervals(), @@ -389,9 +392,9 @@ public static Iterable getAlignedContigsInOneAssembly(final Align .mapToObj( contigIdx -> { final byte[] contigSequence = assembly.getContig(contigIdx).getSequence(); final String contigName = AlignedAssemblyOrExcuse.formatContigName(alignedAssembly.getAssemblyId(), contigIdx); - final List arOfAContig + final List alignmentsForOneContig = getAlignmentsForOneContig(contigName, contigSequence, allAlignments.get(contigIdx), refNames, header); - return new AlignedContig(contigName, contigSequence, arOfAContig); + return new AlignedContig(contigName, contigSequence, alignmentsForOneContig); } ).collect(Collectors.toList()); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java index 6f4e700e0e0..87e51f47172 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/SvDiscoverFromLocalAssemblyContigAlignmentsSpark.java @@ -138,7 +138,7 @@ protected void runTool(final JavaSparkContext ctx) { final Broadcast> cnvCallsBroadcast = StructuralVariationDiscoveryPipelineSpark.broadcastCNVCalls(ctx, getHeaderForReads(), discoverStageArgs.cnvCallsFile); - final String outputPrefixWithSampleName = outputPrefix + SVUtils.getSampleId(getHeaderForReads()); + final String outputPrefixWithSampleName = outputPrefix + SVUtils.getSampleId(getHeaderForReads()) + "_"; final SvDiscoveryInputData svDiscoveryInputData = new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName, null, null, null, @@ -187,9 +187,9 @@ public static EnumMap> final JavaRDD ambiguous = contigsByPossibleRawTypes.get(RawTypes.Ambiguous); final JavaRDD incomplete = contigsByPossibleRawTypes.get(RawTypes.Incomplete); final JavaRDD misAssemblySuspect = contigsByPossibleRawTypes.get(RawTypes.MisAssemblySuspect); - writeSAM(ambiguous, RawTypes.Ambiguous.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, toolLogger); - writeSAM(incomplete, RawTypes.Incomplete.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, toolLogger); - writeSAM(misAssemblySuspect, RawTypes.MisAssemblySuspect.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, toolLogger); + writeSAM(ambiguous, RawTypes.Ambiguous.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, ".bam", toolLogger); + writeSAM(incomplete, RawTypes.Incomplete.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, ".bam", toolLogger); + writeSAM(misAssemblySuspect, RawTypes.MisAssemblySuspect.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, ".bam", toolLogger); } return contigsByPossibleRawTypes; @@ -299,8 +299,6 @@ private static Iterator getVariantContextIterator(final Tuple2 getVariantContextIterator(final Tuple2 filteredContigs, final String rawTypeString, final JavaRDD originalAlignments, final Broadcast headerBroadcast, - final String outputPrefix, final Logger toolLogger) { + final String outputPrefix, final String extension, final Logger toolLogger) { final Set filteredReadNames = new HashSet<>( filteredContigs.map(AssemblyContigWithFineTunedAlignments::getContigName).distinct().collect() ); toolLogger.info(filteredReadNames.size() + " contigs indicating " + rawTypeString); final SAMFileHeader header = headerBroadcast.getValue(); final JavaRDD splitLongReads = originalAlignments.filter(read -> filteredReadNames.contains(read.getName())) .map(read -> read.convertToSAMRecord(header)); - header.setSortOrder(SAMFileHeader.SortOrder.queryname); - SVFileUtils.writeSAMFile(outputPrefix + rawTypeString + ".sam", - splitLongReads.collect().stream().sorted(samRecordQueryNameComparator).iterator(), + final SAMRecordComparator localComparator; + if(extension.toLowerCase().endsWith("bam")) { + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + localComparator = new SAMRecordCoordinateComparator(); + } + else if (extension.toLowerCase().endsWith("sam")) { + header.setSortOrder(SAMFileHeader.SortOrder.queryname); + localComparator = new SAMRecordQueryNameComparator(); + } else { + throw new IllegalArgumentException("Unsupported output format " + extension); + } + SVFileUtils.writeSAMFile( + outputPrefix + rawTypeString + (extension.startsWith(".") ? "" : ".") + extension, + splitLongReads.collect().stream().sorted(localComparator).iterator(), header, true); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java index d7f3b60b547..6bd6142dd28 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java @@ -440,8 +440,8 @@ private static AssemblyContigWithFineTunedAlignments updateContigMappingsWithGap /** * when two configurations are the same, * implement ordering that - * prefer the configuration with less alignments, then - * prefer the configuration less summed mismatches if still tie + * prefers the configuration with less alignments, + * and prefers the configuration with a lower number of summed mismatches in case of a tie */ @VisibleForTesting static Comparator getConfigurationComparator() { diff --git a/src/test/java/org/broadinstitute/hellbender/utils/StringSplitSpeedUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/StringSplitSpeedUnitTest.java index e8147b1b0b4..828713277d2 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/StringSplitSpeedUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/StringSplitSpeedUnitTest.java @@ -19,7 +19,7 @@ public class StringSplitSpeedUnitTest extends GATKBaseTest { "Italiam, fato profugus, Laviniaque venit " + "litora, multum ille et terris iactatus et alto " + "vi superum saevae memorem Iunonis ob iram; " + - "multa quoque et bello passus, testConfigurationSorting conderet urbem, " + + "multa quoque et bello passus, dum conderet urbem, " + "inferretque deos Latio, genus unde Latinum, " + "Albanique patres, atque altae moenia Romae. " + "Musa, mihi causas memora, quo numine laeso, " +