diff --git a/src/main/java/org/broadinstitute/hellbender/engine/ActivityProfileStateIterator.java b/src/main/java/org/broadinstitute/hellbender/engine/ActivityProfileStateIterator.java index 3ff08105aba..03da3f17cd3 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/ActivityProfileStateIterator.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/ActivityProfileStateIterator.java @@ -60,7 +60,7 @@ public ActivityProfileStateIterator(final MultiIntervalShard readShard // We wrap our LocusIteratorByState inside an IntervalAlignmentContextIterator so that we get empty loci // for uncovered locations. This is critical for reproducing GATK 3.x behavior! - LocusIteratorByState libs = new LocusIteratorByState(readShard.iterator(), DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readHeader), readHeader, true); + LocusIteratorByState libs = new LocusIteratorByState(readShard.iterator(), DownsamplingMethod.NONE, ReadUtils.getSamplesFromHeader(readHeader), readHeader, true); final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(readShard.getIntervals().iterator()); this.locusIterator = new IntervalAlignmentContextIterator(libs, intervalLocusIterator, readHeader.getSequenceDictionary()); } diff --git a/src/main/java/org/broadinstitute/hellbender/engine/AssemblyRegionIterator.java b/src/main/java/org/broadinstitute/hellbender/engine/AssemblyRegionIterator.java index f7114af56ea..79bc7658b4a 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/AssemblyRegionIterator.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/AssemblyRegionIterator.java @@ -89,7 +89,7 @@ public AssemblyRegionIterator(final MultiIntervalShard readShard, // We wrap our LocusIteratorByState inside an IntervalAlignmentContextIterator so that we get empty loci // for uncovered locations. This is critical for reproducing GATK 3.x behavior! - this.libs = new LocusIteratorByState(readCachingIterator, DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readHeader), readHeader, true); + this.libs = new LocusIteratorByState(readCachingIterator, DownsamplingMethod.NONE, ReadUtils.getSamplesFromHeader(readHeader), readHeader, true); final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(readShard.getIntervals().iterator()); this.locusIterator = new IntervalAlignmentContextIterator(libs, intervalLocusIterator, readHeader.getSequenceDictionary()); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/LocusWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/LocusWalker.java index f9c3b874d64..217fcf14e52 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/LocusWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/LocusWalker.java @@ -8,7 +8,6 @@ import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary; import org.broadinstitute.hellbender.engine.filters.WellformedReadFilter; -import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.locusiterator.AlignmentContextIteratorBuilder; import org.broadinstitute.hellbender.utils.locusiterator.LIBSDownsamplingInfo; @@ -43,13 +42,6 @@ public abstract class LocusWalker extends WalkerBase { @Argument(fullName = MAX_DEPTH_PER_SAMPLE_NAME, shortName = MAX_DEPTH_PER_SAMPLE_NAME, doc = "Maximum number of reads to retain per sample per locus. Reads above this threshold will be downsampled. Set to 0 to disable.", optional = true) protected int maxDepthPerSample = defaultMaxDepthPerSample(); - /** - * Should the LIBS keep unique reads? Tools that do should override to return {@code true}. - */ - protected boolean keepUniqueReadListInLibs() { - return false; - } - /** * LocusWalkers requires read sources */ @@ -185,7 +177,6 @@ final Iterator getAlignmentContextIterator(final CountingReadF alignmentContextIteratorBuilder.setDownsamplingInfo(getDownsamplingInfo()); alignmentContextIteratorBuilder.setEmitEmptyLoci(emitEmptyLoci()); alignmentContextIteratorBuilder.setIncludeDeletions(includeDeletions()); - alignmentContextIteratorBuilder.setKeepUniqueReadListInLibs(keepUniqueReadListInLibs()); alignmentContextIteratorBuilder.setIncludeNs(includeNs()); return alignmentContextIteratorBuilder.build( diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/LocusWalkerSpark.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/LocusWalkerSpark.java index 1c8b4f130ed..89158ed9d1b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/LocusWalkerSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/LocusWalkerSpark.java @@ -10,7 +10,6 @@ import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineException; import org.broadinstitute.hellbender.engine.*; -import org.broadinstitute.hellbender.engine.spark.datasources.ReferenceMultiSparkSource; import org.broadinstitute.hellbender.utils.IntervalUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.io.IOUtils; @@ -19,7 +18,10 @@ import org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState; import org.broadinstitute.hellbender.utils.read.GATKRead; -import java.util.*; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Spliterators; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -116,7 +118,6 @@ private static FlatMapFunction, LocusWalkerContext> getAlignment final AlignmentContextIteratorBuilder alignmentContextIteratorBuilder = new AlignmentContextIteratorBuilder(); alignmentContextIteratorBuilder.setDownsamplingInfo(downsamplingInfo); alignmentContextIteratorBuilder.setEmitEmptyLoci(isEmitEmptyLoci); - alignmentContextIteratorBuilder.setKeepUniqueReadListInLibs(false); alignmentContextIteratorBuilder.setIncludeNs(false); final Iterator alignmentContextIterator = alignmentContextIteratorBuilder.build( diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java index a16301d61d2..20459ab4ae7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java @@ -454,8 +454,7 @@ public static List getPileupsOverReference(final SAMFileHeader reads final List reads = new ArrayList<>(readLikelihoods.sampleEvidence(0)); reads.sort(new ReadCoordinateComparator(readsHeader)); //because we updated the reads based on the local realignments we have to re-sort or the pileups will be... unpredictable - final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, - false, samples.asSetOfSamples(), readsHeader, true); + final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, samples.asSetOfSamples(), readsHeader, true); final int startPos = activeRegionSpan.getStart(); final List pileups = new ArrayList<>(activeRegionSpan.getEnd() - startPos); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupReadErrorCorrector.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupReadErrorCorrector.java index f484569ff46..6a819986368 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupReadErrorCorrector.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/PileupReadErrorCorrector.java @@ -38,8 +38,7 @@ public PileupReadErrorCorrector(final double logOddsThreshold, final SAMFileHead public final List correctReads(final Collection originalReads) { final List reads = originalReads.stream().map(GATKRead::deepCopy).collect(Collectors.toList()); - final Iterator locusIterator = new LocusIteratorByState(reads.iterator(), DownsamplingMethod.NONE, - false, ReadUtils.getSamplesFromHeader(header), header, false); + final Iterator locusIterator = new LocusIteratorByState(reads.iterator(), DownsamplingMethod.NONE, ReadUtils.getSamplesFromHeader(header), header, false); final Map>> potentialCorrections = reads.stream().collect(Collectors.toMap(read -> read, read -> new ArrayList<>())); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/realignmentfilter/FilterAlignmentArtifacts.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/realignmentfilter/FilterAlignmentArtifacts.java index f7e0ffa061d..dc3c0681e9b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/realignmentfilter/FilterAlignmentArtifacts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/realignmentfilter/FilterAlignmentArtifacts.java @@ -225,7 +225,7 @@ public void apply(List variantContexts, ReferenceContext referen readLikelihoods.changeEvidence(readRealignments); writeBamOutput(assemblyResult, readLikelihoods, new HashSet<>(readLikelihoods.alleles()), regionForGenotyping.getSpan()); - final LocusIteratorByState libs = new LocusIteratorByState(regionForGenotyping.getReads().iterator(), DownsamplingMethod.NONE, false, samplesList.asListOfSamples(), bamHeader, true); + final LocusIteratorByState libs = new LocusIteratorByState(regionForGenotyping.getReads().iterator(), DownsamplingMethod.NONE, samplesList.asListOfSamples(), bamHeader, true); final List unitigs = getUnitigs(libs); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/AlignmentContextIteratorBuilder.java b/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/AlignmentContextIteratorBuilder.java index 2fb714b4fc0..dc1def4c222 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/AlignmentContextIteratorBuilder.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/AlignmentContextIteratorBuilder.java @@ -27,7 +27,6 @@ public class AlignmentContextIteratorBuilder { protected static final Logger logger = LogManager.getLogger(AlignmentContextIteratorBuilder.class); private boolean isEmitEmptyLoci; - private boolean isKeepUniqueReadListInLibs; private boolean isIncludeDeletions; private boolean isIncludeNs; private LIBSDownsamplingInfo downsamplingInfo; @@ -36,10 +35,6 @@ public void setEmitEmptyLoci(boolean emitEmptyLoci) { isEmitEmptyLoci = emitEmptyLoci; } - public void setKeepUniqueReadListInLibs(boolean keepUniqueReadListInLibs) { - isKeepUniqueReadListInLibs = keepUniqueReadListInLibs; - } - public void setIncludeDeletions(boolean includeDeletions) { isIncludeDeletions = includeDeletions; } @@ -54,7 +49,6 @@ public void setDownsamplingInfo(LIBSDownsamplingInfo downsamplingInfo) { public AlignmentContextIteratorBuilder() { isEmitEmptyLoci = false; - isKeepUniqueReadListInLibs = false; isIncludeDeletions = true; isIncludeNs = false; downsamplingInfo = LocusIteratorByState.NO_DOWNSAMPLING; @@ -75,7 +69,7 @@ public Iterator build(final Iterator readIterator, f Utils.nonNull(readIterator, "Read iterator cannot be null"); final boolean isDefinitelyReference = (dictionary != null) && isReference ; return createAlignmentContextIterator(intervalsForTraversal, header, readIterator, dictionary, downsamplingInfo, - isDefinitelyReference, isEmitEmptyLoci, isKeepUniqueReadListInLibs, isIncludeDeletions, isIncludeNs); + isDefinitelyReference, isEmitEmptyLoci, isIncludeDeletions, isIncludeNs); } /** @@ -90,8 +84,6 @@ public Iterator build(final Iterator readIterator, f * @param downsamplingInfo how to downsample (for {@link LocusIteratorByState}) * @param isReference the dictionary specified above is a reference, {@code false} if no reference being used or it is not a reference. * @param emitEmptyLoci whether loci with no coverage should be emitted. In this case, the AlignmentContext will be empty (not null). - * @param isKeepUniqueReadListInLibs if true, we will keep the unique reads from the samIterator and make them - * available via the transferReadsFromAllPreviousPileups interface (this parameter is specific to {@link LocusIteratorByState}) * @param isIncludeDeletions include reads with deletion on the loci in question * @param isIncludeNs include reads with N on the loci in question * @return iterator that produces AlignmentContexts ready for consumption (e.g. by a {@link org.broadinstitute.hellbender.engine.LocusWalker}) @@ -103,7 +95,6 @@ private static Iterator createAlignmentContextIterator(final L final LIBSDownsamplingInfo downsamplingInfo, final boolean isReference, boolean emitEmptyLoci, - boolean isKeepUniqueReadListInLibs, boolean isIncludeDeletions, boolean isIncludeNs) { @@ -113,7 +104,7 @@ private static Iterator createAlignmentContextIterator(final L .collect(Collectors.toSet()); // get the LIBS - final LocusIteratorByState libs = new LocusIteratorByState(readIterator, downsamplingInfo, isKeepUniqueReadListInLibs, samples, header, isIncludeDeletions, isIncludeNs); + final LocusIteratorByState libs = new LocusIteratorByState(readIterator, downsamplingInfo, samples, header, isIncludeDeletions, isIncludeNs); List finalIntervals = intervalsForTraversal; validateEmitEmptyLociParameters(emitEmptyLoci, dictionary, intervalsForTraversal, isReference); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByState.java b/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByState.java index 9348c3d8c48..2e281a17047 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByState.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByState.java @@ -1,6 +1,5 @@ package org.broadinstitute.hellbender.utils.locusiterator; -import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.util.Locatable; @@ -38,9 +37,6 @@ * -- A read that could be aligned to a position will actually occur in the pileup (downsampled away) * -- A read that appears in a previous pileup that could align to a future position will actually occur * in that pileup. That is, a read might show up at position i but be downsampled away in the pileup at j - * -- LIBS can optionally capture all of the reads that come off the iterator, before any leveling downsampling - * occurs, if requested. This allows users of LIBS to see both a ReadPileup view of the data as well as - * a stream of unique, sorted reads */ public final class LocusIteratorByState implements Iterator { /** Indicates that we shouldn't do any downsampling */ @@ -98,8 +94,6 @@ public final class LocusIteratorByState implements Iterator { * @param samIterator the iterator of reads to process into pileups. Reads must be ordered * according to standard coordinate-sorted BAM conventions * @param downsamplingMethod information about how to downsample the reads - * @param keepUniqueReadListInLIBS if true, we will keep the unique reads from the samIterator and make them - * available via the transferReadsFromAllPreviousPileups interface * @param samples a complete list of samples present in the read groups for the reads coming from samIterator. * This is generally just the set of read group sample fields in the SAMFileHeader. This * list of samples may contain a null element, and all reads without read groups will @@ -109,13 +103,11 @@ public final class LocusIteratorByState implements Iterator { */ public LocusIteratorByState(final Iterator samIterator, final DownsamplingMethod downsamplingMethod, - final boolean keepUniqueReadListInLIBS, final Collection samples, final SAMFileHeader header, final boolean includeReadsWithDeletionAtLoci) { this(samIterator, LIBSDownsamplingInfo.toDownsamplingInfo(downsamplingMethod), - keepUniqueReadListInLIBS, samples, header, includeReadsWithDeletionAtLoci @@ -124,30 +116,25 @@ public LocusIteratorByState(final Iterator samIterator, /** * Create a new LocusIteratorByState - * - * @param samIterator the iterator of reads to process into pileups. Reads must be ordered + * @param samIterator the iterator of reads to process into pileups. Reads must be ordered * according to standard coordinate-sorted BAM conventions * @param downsamplingMethod information about how to downsample the reads - * @param keepUniqueReadListInLIBS if true, we will keep the unique reads from the samIterator and make them - * available via the transferReadsFromAllPreviousPileups interface * @param samples a complete list of samples present in the read groups for the reads coming from samIterator. - * This is generally just the set of read group sample fields in the SAMFileHeader. This - * list of samples may contain a null element, and all reads without read groups will - * be mapped to this null sample + * This is generally just the set of read group sample fields in the SAMFileHeader. This + * list of samples may contain a null element, and all reads without read groups will + * be mapped to this null sample * @param header header from the reads * @param includeReadsWithDeletionAtLoci Include reads with deletion at loci * @param includeReadsWithNsAtLoci Include reads with Ns at loci (usually it is not needed) */ public LocusIteratorByState(final Iterator samIterator, final DownsamplingMethod downsamplingMethod, - final boolean keepUniqueReadListInLIBS, final Collection samples, final SAMFileHeader header, final boolean includeReadsWithDeletionAtLoci, final boolean includeReadsWithNsAtLoci) { this(samIterator, LIBSDownsamplingInfo.toDownsamplingInfo(downsamplingMethod), - keepUniqueReadListInLIBS, samples, header, includeReadsWithDeletionAtLoci, @@ -161,8 +148,6 @@ public LocusIteratorByState(final Iterator samIterator, * @param samIterator the iterator of reads to process into pileups. Reads must be ordered * according to standard coordinate-sorted BAM conventions * @param downsamplingInfo meta-information about how to downsample the reads - * @param keepUniqueReadListInLIBS if true, we will keep the unique reads from the samIterator and make them - * available via the transferReadsFromAllPreviousPileups interface * @param samples a complete list of samples present in the read groups for the reads coming from samIterator. * This is generally just the set of read group sample fields in the SAMFileHeader. This * list of samples may contain a null element, and all reads without read groups will @@ -172,13 +157,11 @@ public LocusIteratorByState(final Iterator samIterator, */ public LocusIteratorByState(final Iterator samIterator, final LIBSDownsamplingInfo downsamplingInfo, - final boolean keepUniqueReadListInLIBS, final Collection samples, final SAMFileHeader header, final boolean includeReadsWithDeletionAtLoci) { this(samIterator, downsamplingInfo, - keepUniqueReadListInLIBS, samples, header, includeReadsWithDeletionAtLoci, @@ -188,23 +171,19 @@ public LocusIteratorByState(final Iterator samIterator, /** * Create a new LocusIteratorByState - * - * @param samIterator the iterator of reads to process into pileups. Reads must be ordered + * @param samIterator the iterator of reads to process into pileups. Reads must be ordered * according to standard coordinate-sorted BAM conventions * @param downsamplingInfo meta-information about how to downsample the reads - * @param keepUniqueReadListInLIBS if true, we will keep the unique reads from the samIterator and make them - * available via the transferReadsFromAllPreviousPileups interface * @param samples a complete list of samples present in the read groups for the reads coming from samIterator. - * This is generally just the set of read group sample fields in the SAMFileHeader. This - * list of samples may contain a null element, and all reads without read groups will - * be mapped to this null sample + * This is generally just the set of read group sample fields in the SAMFileHeader. This + * list of samples may contain a null element, and all reads without read groups will + * be mapped to this null sample * @param header header from the reads * @param includeReadsWithDeletionAtLoci Include reads with deletion at loci * @param includeReadsWithNsAtLoci Include reads with Ns at loci (usually it is not needed) */ public LocusIteratorByState(final Iterator samIterator, final LIBSDownsamplingInfo downsamplingInfo, - final boolean keepUniqueReadListInLIBS, final Collection samples, final SAMFileHeader header, final boolean includeReadsWithDeletionAtLoci, @@ -223,7 +202,7 @@ public LocusIteratorByState(final Iterator samIterator, this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.includeReadsWithNsAtLoci = includeReadsWithNsAtLoci; this.samples = new ArrayList<>(samples); - this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, keepUniqueReadListInLIBS, header); + this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, header); } /** @@ -366,40 +345,4 @@ private boolean dontIncludeReadInPileup(final GATKRead rec, final long pos) { return ReadUtils.isBaseInsideAdaptor(rec, pos); } - /** - * Transfer current list of all unique reads that have ever been used in any pileup, clearing old list - * - * This list is guaranteed to only contain unique reads, even across calls to the this function. It is - * literally the unique set of reads ever seen. - * - * The list occurs in the same order as they are encountered in the underlying iterator. - * - * Takes the maintained list of submitted reads, and transfers it to the caller of this - * function. The old list of set to a new, cleanly allocated list so the caller officially - * owns the list returned by this call. This is the only way to clear the tracking - * of submitted reads, if enabled. - * - * The purpose of this function is allow users of LIBS to keep track of all of the reads pulled off the - * underlying GATKRead iterator and that appeared at any point in the list of SAMRecordAlignmentState for - * any reads. This function is intended to allow users to efficiently reconstruct the unique set of reads - * used across all pileups. This is necessary for LIBS to handle because attempting to do - * so from the pileups coming out of LIBS is extremely expensive. - * - * This functionality is only available if LIBS was created with the argument to track the reads - * - * @return the current list - * @throws UnsupportedOperationException if called when keepingSubmittedReads is false - */ - public List transferReadsFromAllPreviousPileups() { - return readStates.transferSubmittedReads(); - } - - /** - * Get the underlying list of tracked reads. For testing only - * @return a non-null list - */ - @VisibleForTesting - List getReadsFromAllPreviousPileups() { - return readStates.getSubmittedReads(); - } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/ReadStateManager.java b/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/ReadStateManager.java index aca6c00bf24..8f3e329b1a9 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/ReadStateManager.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/locusiterator/ReadStateManager.java @@ -1,6 +1,5 @@ package org.broadinstitute.hellbender.utils.locusiterator; -import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.util.PeekableIterator; import org.broadinstitute.hellbender.utils.Utils; @@ -9,14 +8,7 @@ import java.util.*; /** - * Manages and updates mapping from sample -> List of SAMRecordAlignmentState - * - * Optionally can keep track of all of the reads pulled off the iterator and - * that appeared at any point in the list of SAMRecordAlignmentState for any reads. - * This functionality is only possible at this stage, as this object does the popping of - * reads off the underlying source iterator, and presents only a pileup-like interface - * of samples -> SAMRecordAlignmentStates. Reconstructing the unique set of reads - * used across all pileups is extremely expensive from that data structure. + * Manages and updates mapping from sample -> Iterable */ final class ReadStateManager implements Iterable> { private final List samples; @@ -31,15 +23,11 @@ final class ReadStateManager implements Iterable readStatesBySample = new LinkedHashMap<>(); - private List submittedReads; - private final boolean keepSubmittedReads; - private int totalReadStates = 0; public ReadStateManager(final Iterator source, final List samples, final LIBSDownsamplingInfo info, - final boolean keepSubmittedReads, final SAMFileHeader header) { Utils.nonNull(source, "source"); Utils.nonNull(samples, "samples"); @@ -48,9 +36,6 @@ public ReadStateManager(final Iterator source, this.samples = samples; this.iterator = new PeekableIterator<>(source); - this.keepSubmittedReads = keepSubmittedReads; - this.submittedReads = new LinkedList<>(); - for (final String sample : samples) { // because this is a linked hash map the order of iteration will be in sample order readStatesBySample.put(sample, new PerSampleReadStateManager(info)); @@ -165,14 +150,6 @@ public void collectPendingReads() { for (final String sample : samples) { final Collection newReads = samplePartitioner.getReadsForSample(sample); - // if we're keeping reads, take the (potentially downsampled) list of new reads for this sample - // and add to the list of reads. Note this may reorder the list of reads someone (it groups them - // by sample, but it cannot change their absolute position on the genome as they all must - // start at the current location - if ( keepSubmittedReads ) { - submittedReads.addAll(newReads); - } - final PerSampleReadStateManager statesBySample = readStatesBySample.get(sample); addReadsToSample(statesBySample, newReads); } @@ -188,58 +165,6 @@ void submitRead(final GATKRead read) { samplePartitioner.submitRead(read); } - /** - * Transfer current list of submitted reads, clearing old list - * - * Takes the maintained list of submitted reads, and transfers it to the caller of this - * function. The old list of set to a new, cleanly allocated list so the caller officially - * owns the list returned by this call. This is the only way to clear the tracking - * of submitted reads, if enabled. - * - * How to use this function: - * - * while ( doing some work unit, such as creating pileup at some locus ): - * interact with ReadStateManager in some way to make work unit - * readsUsedInPileup = transferSubmittedReads) - * - * @throws IllegalStateException if called when keepSubmittedReads is false (check this by calling {@link #isKeepingSubmittedReads}) - * - * @return the current list of submitted reads - */ - public List transferSubmittedReads() { - Utils.validate(keepSubmittedReads,"cannot transferSubmittedReads if you aren't keeping them"); - final List prevSubmittedReads = submittedReads; - this.submittedReads = new LinkedList<>(); - - return prevSubmittedReads; - } - - /** - * Are we keeping submitted reads, or not? - * @return true if we are keeping them, false otherwise - */ - public boolean isKeepingSubmittedReads() { - return keepSubmittedReads; - } - - /** - * Obtain a pointer to the list of submitted reads. - * - * This is not a copy of the list; it is shared with this ReadStateManager. It should - * not be modified. Updates to this ReadStateManager may change the contains of the - * list entirely. - * - * For testing purposes only. - * - * Will always be empty if we are are not keepSubmittedReads - * - * @return a non-null list of reads that have been submitted to this ReadStateManager - */ - @VisibleForTesting - List getSubmittedReads() { - return submittedReads; - } - /** * Add reads with the given sample name to the given hanger entry. * diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngineUnitTest.java index dbf62674437..8f63c09979f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngineUnitTest.java @@ -62,7 +62,7 @@ public void testIsActive() throws IOException { } final Iterator readIter = new ReadFilteringIterator(reads.query(paddedShardInterval), hcCombinedFilter); - final LocusIteratorByState libs = new LocusIteratorByState(readIter, DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(reads.getHeader()), reads.getHeader(), true); + final LocusIteratorByState libs = new LocusIteratorByState(readIter, DownsamplingMethod.NONE, ReadUtils.getSamplesFromHeader(reads.getHeader()), reads.getHeader(), true); libs.forEachRemaining(pileup -> { final SimpleInterval pileupInterval = new SimpleInterval(pileup.getLocation()); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/IntervalAlignmentContextIteratorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/IntervalAlignmentContextIteratorUnitTest.java index 9130d4f27ed..dac23e063e7 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/IntervalAlignmentContextIteratorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/IntervalAlignmentContextIteratorUnitTest.java @@ -146,7 +146,7 @@ private List getAlignmentContexts(final List l final SAMSequenceDictionary dictionary = header.getSequenceDictionary(); - final LocusIteratorByState locusIteratorByState = new LocusIteratorByState(filteredReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, false, sampleNames, header, true); + final LocusIteratorByState locusIteratorByState = new LocusIteratorByState(filteredReads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING, sampleNames, header, true); List relevantIntervals = locusIntervals; diff --git a/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateBaseTest.java b/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateBaseTest.java index 9d341eb03b3..098b65d899d 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateBaseTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateBaseTest.java @@ -41,30 +41,27 @@ public void beforeClass() { } protected LocusIteratorByState makeLIBS(final List reads, final SAMFileHeader header) { - return makeLIBS(reads, null, false, header); + return makeLIBS(reads, null, header); } protected LocusIteratorByState makeLIBSwithNs(final List reads, final SAMFileHeader header) { - return makeLIBS(reads, null, true, false, header); + return makeLIBS(reads, null, true, header); } protected LocusIteratorByState makeLIBS(final List reads, final DownsamplingMethod downsamplingMethod, - final boolean keepUniqueReadList, final SAMFileHeader header) { - return makeLIBS(reads, downsamplingMethod, false, keepUniqueReadList, header); + return makeLIBS(reads, downsamplingMethod, false, header); } protected LocusIteratorByState makeLIBS(final List reads, final DownsamplingMethod downsamplingMethod, final boolean includeNs, - final boolean keepUniqueReadList, final SAMFileHeader header) { reads.sort(new ReadCoordinateComparator(header)); return new LocusIteratorByState( new FakeCloseableIterator<>(reads.iterator()), downsamplingMethod, - keepUniqueReadList, sampleListForSAMWithoutReadGroups(), header, true, diff --git a/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateUnitTest.java index 3e7db6df3c5..fe8dfd3e91e 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -30,7 +30,7 @@ public void testIteratingBeyondElements() { final List reads = Arrays.asList(mapped1); final LocusIteratorByState li; - li = makeLIBS(reads, DownsamplingMethod.NONE, true, header); + li = makeLIBS(reads, DownsamplingMethod.NONE, header); for (int i = 0; i < readLength; i++) { Assert.assertTrue(li.hasNext()); Assert.assertNotNull(li.next()); @@ -48,7 +48,7 @@ public void testAdvance() { final List reads = Arrays.asList(mapped1); final LocusIteratorByState li; - li = makeLIBS(reads, DownsamplingMethod.NONE, true, header); + li = makeLIBS(reads, DownsamplingMethod.NONE, header); final AlignmentContext alignmentContext2 = li.advanceToLocus(2, true); Assert.assertEquals(alignmentContext2.getPosition(), 2); @@ -76,15 +76,12 @@ public void testUnmappedAndAllIReadsPassThrough() { // create the iterator by state with the fake reads and fake records final LocusIteratorByState li; - li = makeLIBS(reads, DownsamplingMethod.NONE, true, header); + li = makeLIBS(reads, DownsamplingMethod.NONE, header); Assert.assertTrue(li.hasNext()); AlignmentContext context = li.next(); ReadPileup pileup = context.getBasePileup(); Assert.assertEquals(pileup.size(), 2, "Should see only 2 reads in pileup, even with unmapped and all I reads"); - - final List rawReads = li.transferReadsFromAllPreviousPileups(); - Assert.assertEquals(rawReads, reads, "Input and transferred read lists should be the same, and include the unmapped and all I reads"); } @Test @@ -100,15 +97,12 @@ public void testAllIReadsPassThrough() { // create the iterator by state with the fake reads and fake records final LocusIteratorByState li; - li = makeLIBS(reads, DownsamplingMethod.NONE, true, header); + li = makeLIBS(reads, DownsamplingMethod.NONE, header); Assert.assertTrue(li.hasNext()); final AlignmentContext context = li.next(); final ReadPileup pileup = context.getBasePileup(); Assert.assertEquals(pileup.size(), 2, "Should see only 2 reads in pileup, even with all I reads"); - - final List rawReads = li.transferReadsFromAllPreviousPileups(); - Assert.assertEquals(rawReads, reads, "Input and transferred read lists should be the same, and include and all I reads"); } @Test @@ -220,7 +214,7 @@ public void testWholeIndelRead() { // create the iterator by state with the fake reads and fake records final LocusIteratorByState li; - li = makeLIBS(reads, null, false, header); + li = makeLIBS(reads, null, header); int currentLocus = firstLocus; int numAlignmentContextsFound = 0; @@ -262,7 +256,7 @@ public void testWholeIndelReadRepresentedTest() { // create the iterator by state with the fake reads and fake records LocusIteratorByState li; - li = makeLIBS(reads, null, false, header); + li = makeLIBS(reads, null, header); while(li.hasNext()) { final AlignmentContext alignmentContext = li.next(); @@ -282,7 +276,7 @@ public void testWholeIndelReadRepresentedTest() { reads = Arrays.asList(read2); // create the iterator by state with the fake reads and fake records - li = makeLIBS(reads, null, false, header); + li = makeLIBS(reads, null, header); while(li.hasNext()) { final AlignmentContext alignmentContext = li.next(); @@ -354,7 +348,7 @@ public Object[][] makeIndelLengthAndBasesTest() { public void testIndelLengthAndBasesTest(final GATKRead read, final CigarOperator op, final int eventSize, final String eventBases) { // create the iterator by state with the fake reads and fake records final LocusIteratorByState li; - li = makeLIBS(Arrays.asList(read), null, false, header); + li = makeLIBS(Arrays.asList(read), null, header); Assert.assertTrue(li.hasNext()); @@ -399,7 +393,7 @@ public void testLIBS(final LIBSTest params) { // create the iterator by state with the fake reads and fake records final GATKRead read = params.makeRead(); final LocusIteratorByState li; - li = makeLIBS(Arrays.asList(read), null, false, header); + li = makeLIBS(Arrays.asList(read), null, header); final LIBS_position tester = new LIBS_position(read); int bpVisited = 0; @@ -459,13 +453,7 @@ public Object[][] makeLIBS_ComplexPileupTests() { for ( final int nReadsPerLocus : Arrays.asList(1, 10, 60) ) { for ( final int nLoci : Arrays.asList(1, 10, 25) ) { for ( final int nSamples : Arrays.asList(1) ) { - for ( final boolean keepReads : Arrays.asList(true, false) ) { - for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) { - tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, - keepReads, grabReadsAfterEachCycle, - downsampleTo}); - } - } + tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, downsampleTo}); } } } @@ -478,8 +466,6 @@ public Object[][] makeLIBS_ComplexPileupTests() { public void testLIBS_ComplexPileupTests(final int nReadsPerLocus, final int nLoci, final int nSamples, - final boolean keepReads, - final boolean grabReadsAfterEachCycle, final int downsampleTo) { final int readLength = 10; @@ -496,14 +482,12 @@ public void testLIBS_ComplexPileupTests(final int nReadsPerLocus, li = new LocusIteratorByState( new FakeCloseableIterator<>(reads.iterator()), downsampler, - keepReads, bamBuilder.getSamples(), bamBuilder.getHeader(), true ); final Set seenSoFar = new LinkedHashSet<>(); - final Set keptReads = new LinkedHashSet<>(); int bpVisited = 0; while ( li.hasNext() ) { bpVisited++; @@ -512,10 +496,8 @@ public void testLIBS_ComplexPileupTests(final int nReadsPerLocus, AssertWellOrderedPileup(p); - if ( downsample ) { - // just not a safe test - //Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling"); - } else { + + if ( !downsample ) { final int minPileupSize = nReadsPerLocus * nSamples; Assert.assertTrue(p.size() >= minPileupSize); } @@ -529,75 +511,12 @@ public void testLIBS_ComplexPileupTests(final int nReadsPerLocus, // we can have no more than maxDownsampledCoverage per sample final int maxCoveragePerLocus = downsample ? downsampleTo : nReadsPerLocus; Assert.assertTrue(nReadsStartingHere <= maxCoveragePerLocus * nSamples); - - seenSoFar.addAll(p.getReads()); - if ( keepReads && grabReadsAfterEachCycle ) { - final List locusReads = li.transferReadsFromAllPreviousPileups(); - - - if ( downsample ) { - // with downsampling we might have some reads here that were downsampled away - // in the pileup. We want to ensure that no more than the max coverage per sample is added - Assert.assertTrue(locusReads.size() >= nReadsStartingHere); - Assert.assertTrue(locusReads.size() <= maxCoveragePerLocus * nSamples); - } else { - Assert.assertEquals(locusReads.size(), nReadsStartingHere); - } - keptReads.addAll(locusReads); - - // check that all reads we've seen so far are in our keptReads - for ( final GATKRead read : seenSoFar ) { - Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read); - } - } - - if ( ! keepReads ) - Assert.assertTrue(li.getReadsFromAllPreviousPileups().isEmpty(), "Not keeping reads but the underlying list of reads isn't empty"); } - if ( keepReads && ! grabReadsAfterEachCycle ) - keptReads.addAll(li.transferReadsFromAllPreviousPileups()); - if ( ! downsample ) { // downsampling may drop loci final int expectedBpToVisit = nLoci + readLength - 1; Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); } - - if ( keepReads ) { - // check we have the right number of reads - final int totalReads = nLoci * nReadsPerLocus * nSamples; - if ( ! downsample ) { // downsampling may drop reads - Assert.assertEquals(keptReads.size(), totalReads, "LIBS didn't keep the right number of reads during the traversal"); - - // check that the order of reads is the same as in our read list - for ( int i = 0; i < reads.size(); i++ ) { - final GATKRead inputRead = reads.get(i); - final GATKRead keptRead = reads.get(i); - Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i); - } - } else { - Assert.assertTrue(keptReads.size() <= totalReads, "LIBS didn't keep the right number of reads during the traversal"); - } - - // check uniqueness - final Set readNames = new LinkedHashSet<>(); - for ( final GATKRead read : keptReads ) { - Assert.assertFalse(readNames.contains(read.getName()), "Found duplicate reads in the kept reads"); - readNames.add(read.getName()); - } - - // check that all reads we've seen are in our keptReads - for ( final GATKRead read : seenSoFar ) { - Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read); - } - - if ( ! downsample ) { - // check that every read in the list of keep reads occurred at least once in one of the pileups - for ( final GATKRead keptRead : keptReads ) { - Assert.assertTrue(seenSoFar.contains(keptRead), "There's a read " + keptRead + " in our keptReads list that never appeared in any pileup"); - } - } - } } private void AssertWellOrderedPileup(final ReadPileup pileup) { @@ -659,7 +578,6 @@ public void testLIBS_NotHoldingTooManyReads(final int nReadsPerLocus, final int li = new LocusIteratorByState( iterator, downsampler, - false, samples, header, true @@ -757,7 +675,6 @@ public void testAdapterClipping(final int nClipsOnLeft, final int nReadContainin li = new LocusIteratorByState( new FakeCloseableIterator<>(Collections.singletonList(read).iterator()), DownsamplingMethod.NONE, - false, sampleListForSAMWithoutReadGroups(), header, true