diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index 1067ce9a18..c0012cd97c 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -407,20 +407,29 @@ public boolean hasIndex() { */ @Override public BAMIndex getIndex() { - if(!hasIndex()) + if(!hasIndex()) { throw new SAMException("No index is available for this BAM file."); + } if(mIndex == null) { - SamIndexes samIndex = getIndexType(); - if (samIndex == null) { - mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary()) - : new DiskBasedBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary()); - } else if (samIndex.equals(SamIndexes.BAI)) { - mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping) - : new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping); - } else if (samIndex.equals(SamIndexes.CSI)) { - mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, getFileHeader().getSequenceDictionary()); - } else { - throw new SAMFormatException("Unsupported BAM index file: " + mIndexFile.getName()); + final SamIndexes samIndexType = getIndexType(); + final SAMSequenceDictionary sequenceDictionary = getFileHeader().getSequenceDictionary(); + if(mIndexFile != null) { + if (samIndexType.equals(SamIndexes.BAI)) { + mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping) + : new DiskBasedBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping); + } else if (samIndexType.equals(SamIndexes.CSI)) { + mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, sequenceDictionary); + } else { + throw new SAMFormatException("Unsupported BAM index file format: " + mIndexFile.getName()); + } + } else if(mIndexStream != null) { + if (samIndexType.equals(SamIndexes.BAI)) { + mIndex = new CachingBAMFileIndex(mIndexStream, sequenceDictionary); + } else if (samIndexType.equals(SamIndexes.CSI)) { + mIndex = new CSIIndex(mIndexStream, sequenceDictionary); + } else { + throw new SAMFormatException("Unsupported BAM index file format: " + mIndexStream.getSource()); + } } } @@ -438,8 +447,13 @@ public SamIndexes getIndexType() { } else if (mIndexFile.getName().toLowerCase().endsWith(FileExtensions.CSI)) { return SamIndexes.CSI; } - throw new SAMFormatException("Unknown BAM index file type: " + mIndexFile.getName()); + } else if (mIndexStream != null) { + final SamIndexes samIndexesType = SamIndexes.getSAMIndexTypeFromStream(mIndexStream); + if (samIndexesType == SamIndexes.BAI || samIndexesType == SamIndexes.CSI) { + return samIndexesType; + } + throw new SAMFormatException(String.format("Unknown BAM index file type: %s in %s", samIndexesType, mIndexStream.getSource())); } return null; diff --git a/src/main/java/htsjdk/samtools/CSIIndex.java b/src/main/java/htsjdk/samtools/CSIIndex.java index eacf4cbe45..706eb450f8 100644 --- a/src/main/java/htsjdk/samtools/CSIIndex.java +++ b/src/main/java/htsjdk/samtools/CSIIndex.java @@ -35,7 +35,7 @@ public class CSIIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex */ public CSIIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) { - this(new IndexStreamBuffer(stream), stream.getSource(), dictionary); + this(IndexFileBufferFactory.getBuffer(stream), stream.getSource(), dictionary); } public CSIIndex(final Path path, final SAMSequenceDictionary dictionary) throws IOException { diff --git a/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java b/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java index fa3328a13f..2c208b4f6c 100644 --- a/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java +++ b/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.RuntimeIOException; @@ -25,6 +26,11 @@ class CompressedIndexFileBuffer implements IndexFileBuffer { } } + CompressedIndexFileBuffer(SeekableStream seekableStream) { + mCompressedStream = new BlockCompressedInputStream(seekableStream); + binaryCodec = new BinaryCodec(mCompressedStream); + } + @Override public void readBytes(final byte[] bytes) { binaryCodec.readBytes(bytes); diff --git a/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java b/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java index d3c9d1cc0a..4592ce060f 100644 --- a/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java +++ b/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java @@ -1,5 +1,6 @@ package htsjdk.samtools; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeIOException; @@ -18,4 +19,13 @@ static IndexFileBuffer getBuffer(File file, boolean enableMemoryMapping) { return isCompressed ? new CompressedIndexFileBuffer(file) : (enableMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file)); } + + static IndexFileBuffer getBuffer(SeekableStream seekableStream) { + boolean isCompressed; + isCompressed = IOUtil.isGZIPInputStream(seekableStream); + + return isCompressed ? + new CompressedIndexFileBuffer(seekableStream) : + new IndexStreamBuffer(seekableStream); + } } diff --git a/src/main/java/htsjdk/samtools/SamIndexes.java b/src/main/java/htsjdk/samtools/SamIndexes.java index d45e503895..98b530c279 100644 --- a/src/main/java/htsjdk/samtools/SamIndexes.java +++ b/src/main/java/htsjdk/samtools/SamIndexes.java @@ -4,12 +4,15 @@ import htsjdk.samtools.seekablestream.SeekableBufferedStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.FileExtensions; +import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.RuntimeIOException; import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URL; +import java.util.zip.GZIPInputStream; /** * A helper class to read BAI and CRAI indexes. Main goal is to provide BAI stream as a sort of common API for all index types. @@ -102,6 +105,36 @@ public static SeekableStream asBaiSeekableStreamOrNull(final SeekableStream inpu return null; } + public static SamIndexes getSAMIndexTypeFromStream(final SeekableStream seekableStream) { + SamIndexes indexType = null; + try { + seekableStream.seek(0); + final SeekableBufferedStream bss = new SeekableBufferedStream(seekableStream); + + if (IOUtil.isGZIPInputStream(bss)) { + bss.seek(0); + GZIPInputStream gzipStream = new GZIPInputStream(bss); + if (doesStreamStartWith(gzipStream, CSI.magic)) { + indexType = CSI; + } else { + // the CRAI format has no signature bytes, so optimistically call it CRAI + // if its gzipped but not CSI + indexType = CRAI; + } + } else { + bss.seek(0); + if (doesStreamStartWith(bss, BAI.magic)) { + indexType = BAI; + } + } + seekableStream.seek(0); + } catch (final IOException e) { + throw new RuntimeIOException("Error interrogating index input stream", e); + } + + return indexType; + } + private static boolean doesStreamStartWith(final InputStream is, final byte[] bytes) throws IOException { for (final byte b : bytes) { if (is.read() != (0xFF & b)) { diff --git a/src/test/java/htsjdk/samtools/BAMFileReaderTest.java b/src/test/java/htsjdk/samtools/BAMFileReaderTest.java index 426b164f9e..5f5c7c83b7 100644 --- a/src/test/java/htsjdk/samtools/BAMFileReaderTest.java +++ b/src/test/java/htsjdk/samtools/BAMFileReaderTest.java @@ -1,7 +1,6 @@ package htsjdk.samtools; import htsjdk.HtsjdkTest; -import htsjdk.samtools.seekablestream.ByteArraySeekableStream; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CoordMath; import org.testng.Assert; @@ -10,7 +9,8 @@ import java.io.File; import java.io.IOException; -import java.util.List; +import java.net.URL; +import java.nio.file.Paths; public class BAMFileReaderTest extends HtsjdkTest { private final static File bamFile = new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam"); @@ -36,6 +36,23 @@ public void init() throws IOException { bamFileReaderNull = new BAMFileReader(bamFile, null, true, false, ValidationStringency.DEFAULT_STRINGENCY, DefaultSAMRecordFactory.getInstance()); } + @Test + public static void testCSIFromURL() throws IOException { + // https://github.com/samtools/htsjdk/issues/1507 + final URL bamURL = Paths.get(bamFile.toURI()).toUri().toURL(); + final URL csiURL = Paths.get(csiFileIndex.toURI()).toUri().toURL(); + final SamInputResource resource = SamInputResource.of(bamURL).index(csiURL); + final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); + try (final SamReader samReader = factory.open(resource)) { + Assert.assertTrue(samReader.hasIndex()); + final BAMIndex index = samReader.indexing().getIndex(); + Assert.assertTrue(index instanceof CSIIndex); + try (final SAMRecordIterator unusedIterator = + samReader.queryAlignmentStart("chr1_random", 1)) {} + try (final SAMRecordIterator unusedIterator = samReader.queryUnmapped()) {} + } + } + @Test public static void testGetIndexTypeOK() { BAMIndexMetaData.printIndexStats(bamFile); diff --git a/src/test/java/htsjdk/samtools/SamIndexesTest.java b/src/test/java/htsjdk/samtools/SamIndexesTest.java index 4c3bfa6ce1..40c1f2dacc 100644 --- a/src/test/java/htsjdk/samtools/SamIndexesTest.java +++ b/src/test/java/htsjdk/samtools/SamIndexesTest.java @@ -5,8 +5,10 @@ import htsjdk.samtools.seekablestream.SeekableFileStream; import htsjdk.samtools.seekablestream.SeekableMemoryStream; import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.seekablestream.SeekableStreamFactory; import htsjdk.samtools.util.IOUtil; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.ByteArrayInputStream; @@ -188,4 +190,21 @@ public void testOpenIndexUrlAsBaiOrNull() throws IOException { Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } -} + + @DataProvider(name = "getSAMIndexTypeFromStreamTests") + public Object[][] getSAMIndexTypeFromStreamTests() { + return new Object[][]{ + { new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.bai"), SamIndexes.BAI }, + { new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.csi"), SamIndexes.CSI }, + { new File("src/test/resources/htsjdk/samtools/cram/cramQueryWithCRAI.cram.crai"), SamIndexes.CRAI}, + }; + } + + @Test(dataProvider = "getSAMIndexTypeFromStreamTests") + public void testGetSAMIndexTypeFromStream(final File indexFile, final SamIndexes expectedIndexType) throws IOException { + try (final SeekableStream seekableStream = SeekableStreamFactory.getInstance().getStreamFor(indexFile.getPath())) { + Assert.assertEquals(SamIndexes.getSAMIndexTypeFromStream(seekableStream),expectedIndexType); + Assert.assertEquals(seekableStream.position(), 0); + } + } +} \ No newline at end of file