Skip to content

Commit

Permalink
addressing all comments except dir creation
Browse files Browse the repository at this point in the history
  • Loading branch information
SHuang-Broad committed Mar 27, 2018
1 parent 7e37a74 commit b44c0a4
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,17 @@ public class StructuralVariationDiscoveryPipelineSpark extends GATKSparkTool {
@ArgumentCollection
private final DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection discoverStageArgs
= new DiscoverVariantsFromContigsAlignmentsSparkArgumentCollection();

@Argument(doc = "sam file for aligned contigs", fullName = "contig-sam-file")
private String outputAssemblyAlignments;

@Argument(doc = "prefix for output vcf; sample name will be appended after the provided argument",
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME)
private String outputPrefix;

@Advanced
@Argument(doc = "prefix to output files of our prototyping breakpoint and type inference tool in addition to the master VCF;",
@Argument(doc = "prefix to output files of our experimental breakpoint and type inference tool;",
fullName = "exp-variants-out-prefix", optional = true)
private String expVariantsOutPrefix;

Expand Down Expand Up @@ -164,7 +167,7 @@ protected void runTool( final JavaSparkContext ctx ) {
if(parsedAlignments.isEmpty()) return;

final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast = broadcastCNVCalls(ctx, headerForReads, discoverStageArgs.cnvCallsFile);
final String outputPrefixWithSampleName = outputPrefix + (outputPrefix.endsWith("/") ? "" : "_") + SVUtils.getSampleId(headerForReads);
final String outputPrefixWithSampleName = outputPrefix + (outputPrefix.endsWith("/") ? "" : "/") + SVUtils.getSampleId(headerForReads) + "_";
final SvDiscoveryInputData svDiscoveryInputData =
new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName,
assembledEvidenceResults.getReadMetadata(), assembledEvidenceResults.getAssembledIntervals(),
Expand Down Expand Up @@ -389,9 +392,9 @@ public static Iterable<AlignedContig> getAlignedContigsInOneAssembly(final Align
.mapToObj( contigIdx -> {
final byte[] contigSequence = assembly.getContig(contigIdx).getSequence();
final String contigName = AlignedAssemblyOrExcuse.formatContigName(alignedAssembly.getAssemblyId(), contigIdx);
final List<AlignmentInterval> arOfAContig
final List<AlignmentInterval> alignmentsForOneContig
= getAlignmentsForOneContig(contigName, contigSequence, allAlignments.get(contigIdx), refNames, header);
return new AlignedContig(contigName, contigSequence, arOfAContig);
return new AlignedContig(contigName, contigSequence, alignmentsForOneContig);
} ).collect(Collectors.toList());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ protected void runTool(final JavaSparkContext ctx) {
final Broadcast<SVIntervalTree<VariantContext>> cnvCallsBroadcast =
StructuralVariationDiscoveryPipelineSpark.broadcastCNVCalls(ctx, getHeaderForReads(),
discoverStageArgs.cnvCallsFile);
final String outputPrefixWithSampleName = outputPrefix + SVUtils.getSampleId(getHeaderForReads());
final String outputPrefixWithSampleName = outputPrefix + SVUtils.getSampleId(getHeaderForReads()) + "_";
final SvDiscoveryInputData svDiscoveryInputData =
new SvDiscoveryInputData(ctx, discoverStageArgs, outputPrefixWithSampleName,
null, null, null,
Expand Down Expand Up @@ -187,9 +187,9 @@ public static EnumMap<RawTypes, JavaRDD<AssemblyContigWithFineTunedAlignments>>
final JavaRDD<AssemblyContigWithFineTunedAlignments> ambiguous = contigsByPossibleRawTypes.get(RawTypes.Ambiguous);
final JavaRDD<AssemblyContigWithFineTunedAlignments> incomplete = contigsByPossibleRawTypes.get(RawTypes.Incomplete);
final JavaRDD<AssemblyContigWithFineTunedAlignments> misAssemblySuspect = contigsByPossibleRawTypes.get(RawTypes.MisAssemblySuspect);
writeSAM(ambiguous, RawTypes.Ambiguous.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, toolLogger);
writeSAM(incomplete, RawTypes.Incomplete.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, toolLogger);
writeSAM(misAssemblySuspect, RawTypes.MisAssemblySuspect.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, toolLogger);
writeSAM(ambiguous, RawTypes.Ambiguous.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, ".bam", toolLogger);
writeSAM(incomplete, RawTypes.Incomplete.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, ".bam", toolLogger);
writeSAM(misAssemblySuspect, RawTypes.MisAssemblySuspect.name(), assemblyRawAlignments, headerBroadcast, outputPrefix, ".bam", toolLogger);
}

return contigsByPossibleRawTypes;
Expand Down Expand Up @@ -299,25 +299,34 @@ private static Iterator<VariantContext> getVariantContextIterator(final Tuple2<S

//==================================================================================================================

private static final SAMRecordQueryNameComparator samRecordQueryNameComparator = new SAMRecordQueryNameComparator();

/**
* write SAM file for provided {@code filteredContigs}
* by extracting original alignments from {@code originalAlignments},
* to directory specified by {@code outputDir}.
*/
private static void writeSAM(final JavaRDD<AssemblyContigWithFineTunedAlignments> filteredContigs, final String rawTypeString,
final JavaRDD<GATKRead> originalAlignments, final Broadcast<SAMFileHeader> headerBroadcast,
final String outputPrefix, final Logger toolLogger) {
final String outputPrefix, final String extension, final Logger toolLogger) {

final Set<String> filteredReadNames = new HashSet<>( filteredContigs.map(AssemblyContigWithFineTunedAlignments::getContigName).distinct().collect() );
toolLogger.info(filteredReadNames.size() + " contigs indicating " + rawTypeString);
final SAMFileHeader header = headerBroadcast.getValue();
final JavaRDD<SAMRecord> splitLongReads = originalAlignments.filter(read -> filteredReadNames.contains(read.getName()))
.map(read -> read.convertToSAMRecord(header));
header.setSortOrder(SAMFileHeader.SortOrder.queryname);
SVFileUtils.writeSAMFile(outputPrefix + rawTypeString + ".sam",
splitLongReads.collect().stream().sorted(samRecordQueryNameComparator).iterator(),
final SAMRecordComparator localComparator;
if(extension.toLowerCase().endsWith("bam")) {
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
localComparator = new SAMRecordCoordinateComparator();
}
else if (extension.toLowerCase().endsWith("sam")) {
header.setSortOrder(SAMFileHeader.SortOrder.queryname);
localComparator = new SAMRecordQueryNameComparator();
} else {
throw new IllegalArgumentException("Unsupported output format " + extension);
}
SVFileUtils.writeSAMFile(
outputPrefix + rawTypeString + (extension.startsWith(".") ? "" : ".") + extension,
splitLongReads.collect().stream().sorted(localComparator).iterator(),
header, true);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,8 @@ private static AssemblyContigWithFineTunedAlignments updateContigMappingsWithGap
/**
* when two configurations are the same,
* implement ordering that
* prefer the configuration with less alignments, then
* prefer the configuration less summed mismatches if still tie
* prefers the configuration with less alignments,
* and prefers the configuration with a lower number of summed mismatches in case of a tie
*/
@VisibleForTesting
static Comparator<AssemblyContigWithFineTunedAlignments> getConfigurationComparator() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class StringSplitSpeedUnitTest extends GATKBaseTest {
"Italiam, fato profugus, Laviniaque venit " +
"litora, multum ille et terris iactatus et alto " +
"vi superum saevae memorem Iunonis ob iram; " +
"multa quoque et bello passus, testConfigurationSorting conderet urbem, " +
"multa quoque et bello passus, dum conderet urbem, " +
"inferretque deos Latio, genus unde Latinum, " +
"Albanique patres, atque altae moenia Romae. " +
"Musa, mihi causas memora, quo numine laeso, " +
Expand Down

0 comments on commit b44c0a4

Please sign in to comment.