diff --git a/hmf-common/src/main/java/com/hartwig/hmftools/common/bam/FastBamWriter.java b/hmf-common/src/main/java/com/hartwig/hmftools/common/bam/FastBamWriter.java new file mode 100644 index 0000000000..1eac72e74c --- /dev/null +++ b/hmf-common/src/main/java/com/hartwig/hmftools/common/bam/FastBamWriter.java @@ -0,0 +1,120 @@ +package com.hartwig.hmftools.common.bam; + +import static com.hartwig.hmftools.common.bam.SamRecordUtils.SAM_LOGGER; + +import java.io.File; +import java.io.OutputStream; +import java.io.StringWriter; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Iterator; + +import htsjdk.samtools.BAMRecordCodec; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.SAMTextHeaderCodec; +import htsjdk.samtools.util.BinaryCodec; +import htsjdk.samtools.util.BlockCompressedOutputStream; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.ProgressLoggerInterface; +import htsjdk.samtools.util.zip.DeflaterFactory; + +public class FastBamWriter implements SAMFileWriter +{ + private final String mFilename; + private final SAMFileHeader mHeader; + + private BinaryCodec mOutputBinaryCodec; + private BAMRecordCodec mBamRecordCodec; + private BlockCompressedOutputStream mBlockCompressedOutputStream; + private boolean mClosed; + + // taken from HTS JDK + private static final int HTSJDK_COMPRESSION_LEVEL = 5; + private static final byte[] HTSJDK_BAM_MAGIC = "BAM\u0001".getBytes(); + + // unused: + private static final int BUFFER_SIZE = 131072; + + public FastBamWriter(final SAMFileHeader header, final String filename) + { + mHeader = header; + mFilename = filename; + + try + { + DeflaterFactory deflaterFactory = BlockCompressedOutputStream.getDefaultDeflaterFactory(); + File outputFile = new File(filename); + + OutputStream os = IOUtil.maybeBufferOutputStream(Files.newOutputStream(outputFile.toPath()), BUFFER_SIZE); + mBlockCompressedOutputStream = new BlockCompressedOutputStream(os, (Path) null, HTSJDK_COMPRESSION_LEVEL, deflaterFactory); + mOutputBinaryCodec = new BinaryCodec(mBlockCompressedOutputStream); + mOutputBinaryCodec.setOutputFileName(filename); + + writeHeader(); + + mBamRecordCodec = new BAMRecordCodec(mHeader); + mBamRecordCodec.setOutputStream(mOutputBinaryCodec.getOutputStream(), mFilename); + } + catch(Exception e) + { + SAM_LOGGER.error("failed to open BAM({}) for writing: {}", filename, e.toString()); + + mOutputBinaryCodec = null; + mBamRecordCodec = null; + mBlockCompressedOutputStream = null; + } + + mClosed = false; + } + + public String filename() { return mFilename; } + + @Override + public void addAlignment(SAMRecord record) + { + writeAlignment(record); + } + + @Override + public SAMFileHeader getFileHeader() { return mHeader; } + + @Override + public void setProgressLogger(final ProgressLoggerInterface loggerInterface) {} + + @Override + public void close() + { + if(mClosed) + return; + + mOutputBinaryCodec.close(); + mClosed = true; + } + + private void writeHeader() + { + StringWriter headerTextBuffer = new StringWriter(); + (new SAMTextHeaderCodec()).encode(headerTextBuffer, mHeader); + + mOutputBinaryCodec.writeBytes(HTSJDK_BAM_MAGIC); + mOutputBinaryCodec.writeString(headerTextBuffer.toString(), true, false); + mOutputBinaryCodec.writeInt(mHeader.getSequenceDictionary().size()); + + Iterator sequenceIter = mHeader.getSequenceDictionary().getSequences().iterator(); + + while(sequenceIter.hasNext()) + { + SAMSequenceRecord sequenceRecord = (SAMSequenceRecord)sequenceIter.next(); + mOutputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true); + mOutputBinaryCodec.writeInt(sequenceRecord.getSequenceLength()); + } + } + + private void writeAlignment(final SAMRecord record) + { + mBamRecordCodec.encode(record); + } +} diff --git a/redux/src/main/java/com/hartwig/hmftools/redux/write/FileWriterCache.java b/redux/src/main/java/com/hartwig/hmftools/redux/write/FileWriterCache.java index f565c7f09c..f7e2f699f1 100644 --- a/redux/src/main/java/com/hartwig/hmftools/redux/write/FileWriterCache.java +++ b/redux/src/main/java/com/hartwig/hmftools/redux/write/FileWriterCache.java @@ -17,6 +17,7 @@ import java.util.stream.Collectors; import com.google.common.collect.Lists; +import com.hartwig.hmftools.common.bam.FastBamWriter; import com.hartwig.hmftools.common.bamops.BamOperations; import com.hartwig.hmftools.common.bamops.BamToolName; import com.hartwig.hmftools.common.basequal.jitter.JitterAnalyser; @@ -194,26 +195,31 @@ public SAMFileWriter initialiseSamFileWriter(final String filename, boolean isSo { SAMFileHeader fileHeader = buildCombinedHeader(mConfig.BamFiles, mConfig.RefGenomeFile); - // the sorted BAM writers make use of the pre-sorted mode, which still checks that each read is after the previous - // however since this check is redundant given that Redux sorts records itself, this presort checking is disabled after - // the first sorted BAM has been written. The first BAM needs to retain this presorted setting so the header is 'coordinate' sorted - + /* if(isSorted) { + // the sorted BAM writers make use of the pre-sorted mode, which still checks that each read is after the previous + // however since this check is redundant given that Redux sorts records itself, this presort checking is disabled after + // the first sorted BAM has been written. The first BAM needs to retain this presorted setting so the header is 'coordinate' sorted + if(!mHasWrittenFirstSorted) mHasWrittenFirstSorted = true; else isSorted = false; } + */ if(isSorted) fileHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate); else fileHeader.setSortOrder(SAMFileHeader.SortOrder.unsorted); + /* boolean presorted = isSorted; - SAMFileWriter samFileWriter = new SAMFileWriterFactory().makeBAMWriter(fileHeader, presorted, new File(filename)); + */ + + SAMFileWriter samFileWriter = new FastBamWriter(fileHeader, filename); return samFileWriter; }