diff --git a/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java b/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java
index f78b8ea68a..45b50eb35e 100644
--- a/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java
+++ b/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java
@@ -34,8 +34,7 @@ public FASTADecoderV1_0(final Bundle inputBundle) {
this.displayName = inputBundle.getPrimaryResource().getDisplayName();
final BundleResource referenceResource = inputBundle.getOrThrow(BundleResourceType.CT_HAPLOID_REFERENCE);
if (referenceResource.getIOPath().isPresent()) {
- referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(
- referenceResource.getIOPath().get().toPath(), true);
+ referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFileFromBundle(inputBundle, true, true);
} else {
final SeekableStream seekableStream = referenceResource.getSeekableStream().orElseThrow(
() -> new IllegalArgumentException(
diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java b/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java
index d6987c9561..d083b06107 100644
--- a/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java
+++ b/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java
@@ -56,6 +56,7 @@ public class BundleResourceType {
/** Secondary content types for {@link BundleResourceType#CT_HAPLOID_REFERENCE} resources*/
public static final String CT_REFERENCE_DICTIONARY = "REFERENCE_DICTIONARY";
public static final String CT_REFERENCE_INDEX = "REFERENCE_INDEX";
+ public static final String CT_REFERENCE_INDEX_GZI = "REFERENCE_INDEX_GZI";
/****************************************** Resource types for FEATURES ********************************/
diff --git a/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java b/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java
index fa8cdba980..b1eceaacc6 100644
--- a/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java
+++ b/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java
@@ -9,9 +9,18 @@
import htsjdk.beta.plugin.hapref.HaploidReferenceCodec;
import htsjdk.beta.plugin.hapref.HaploidReferenceDecoder;
import htsjdk.beta.plugin.hapref.HaploidReferenceDecoderOptions;
+import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
+import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
+import htsjdk.samtools.util.GZIIndex;
+import htsjdk.samtools.util.IOUtil;
import htsjdk.utils.ValidationUtils;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.function.Function;
+
/**
* Class with methods for resolving inputs and outputs to haploid reference encoders and decoders.
*
@@ -66,9 +75,7 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder(
ValidationUtils.nonNull(inputPath, "Input path");
ValidationUtils.nonNull(HaploidReferenceDecoderOptions, "Decoder options");
- final Bundle referenceBundle = new BundleBuilder().addPrimary(
- new IOPathResource(inputPath, BundleResourceType.CT_HAPLOID_REFERENCE)).build();
-
+ final Bundle referenceBundle = referenceBundleFromFastaPath(inputPath, HtsPath::new);
return getHaploidReferenceDecoder(referenceBundle, HaploidReferenceDecoderOptions);
}
@@ -110,4 +117,47 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder(
return (HaploidReferenceDecoder) resolveForDecoding(inputBundle).getDecoder(inputBundle, HaploidReferenceDecoderOptions);
}
+ /**
+ * Create q reference bundle given only a fasta path, including an index and a dictionary
+ * file if they are present and located in the same directory as the fasta.
+ *
+ * @param fastaPath location of the fasta
+ * @param ioPathConstructor a constructor used to create IOPath-derived objects for the bundle
+ * @return a reference Bundle
+ * @param
+ */
+ public static Bundle referenceBundleFromFastaPath(final IOPath fastaPath, final Function ioPathConstructor) {
+ final BundleBuilder referenceBundleBuilder = new BundleBuilder();
+ referenceBundleBuilder.addPrimary(new IOPathResource(fastaPath, BundleResourceType.CT_HAPLOID_REFERENCE));
+
+ final Path dictPath = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(fastaPath.toPath());
+ if (Files.exists(dictPath)) {
+ referenceBundleBuilder.addSecondary(
+ new IOPathResource(
+ ioPathConstructor.apply(dictPath.toUri().toString()),
+ BundleResourceType.CT_REFERENCE_DICTIONARY));
+ }
+
+ final Path idxPath = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaPath.toPath());
+ if (Files.exists(idxPath)) {
+ referenceBundleBuilder.addSecondary(
+ new IOPathResource(
+ ioPathConstructor.apply(idxPath.toUri().toString()),
+ BundleResourceType.CT_REFERENCE_INDEX));
+ }
+
+ try {
+ if (IOUtil.isBlockCompressed(fastaPath.toPath(), true)) {
+ final Path gziPath = GZIIndex.resolveIndexNameForBgzipFile(fastaPath.toPath());
+ referenceBundleBuilder.addSecondary(
+ new IOPathResource(
+ ioPathConstructor.apply(gziPath.toUri().toString()),
+ BundleResourceType.CT_REFERENCE_INDEX_GZI));
+ }
+ } catch (IOException e) {
+ throw new HtsjdkException("Error while checking for block compression", e);
+ }
+ return referenceBundleBuilder.build();
+ }
+
}
diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java
index a8bce40cc5..18cad8fc9d 100644
--- a/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java
+++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java
@@ -18,7 +18,7 @@
/**
* A {@link Bundle} for variants and variants-related resources that are backed by on disk files. A {@link
* htsjdk.beta.plugin.variants.VariantsBundle} has a primary resource with content type {@link
- * BundleResourceType#PRIMARY_CT_VARIANT_CONTEXTS}; and an optional index resource. A VariantsBundle can also
+ * BundleResourceType#CT_VARIANT_CONTEXTS}; and an optional index resource. A VariantsBundle can also
* contain additional resources.
*
* Note that this class is merely a convenience class for the case where the variants are backed by files on disk.
@@ -31,6 +31,7 @@ public class VariantsBundle extends Bundle implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
private static final Log LOG = Log.getInstance(VariantsBundle.class);
+
/**
* Create a {@link htsjdk.beta.plugin.variants.VariantsBundle} containing only a variants resource.
*
diff --git a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
index aa921a53f6..f0d2585741 100644
--- a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
+++ b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
@@ -24,16 +24,16 @@
package htsjdk.samtools.reference;
+import htsjdk.io.HtsPath;
+import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
-import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMTextHeaderCodec;
-import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Lazy;
import java.io.File;
+import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.nio.file.Files;
@@ -84,13 +84,25 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
/** Attempts to find and load the sequence dictionary if present. */
protected SAMSequenceDictionary findAndLoadSequenceDictionary(final Path fasta) {
final Path dictPath = findSequenceDictionary(path);
- if (dictPath == null) return null;
+ if (dictPath == null) {
+ return null;
+ }
+ return loadSequenceDictionary(new HtsPath(dictPath.toUri().toString()));
+ }
- IOUtil.assertFileIsReadable(dictPath);
- try (InputStream dictionaryIn = IOUtil.openFileForReading(dictPath)) {
- return ReferenceSequenceFileFactory.loadDictionary(dictionaryIn);
+ /**
+ * Attempt to load a sequence dictionary given a file path. Path may be null.
+ * @param dictPath the dictionary file to open
+ * @return the SAMSequenceDictionary, or null
+ */
+ protected static SAMSequenceDictionary loadSequenceDictionary(final IOPath dictPath) {
+ if (dictPath == null) {
+ return null;
}
- catch (Exception e) {
+ IOUtil.assertFileIsReadable(dictPath.toPath());
+ try (final InputStream dictionaryStream = IOUtil.openFileForReading(dictPath.toPath())) {
+ return ReferenceSequenceFileFactory.loadDictionary(dictionaryStream);
+ } catch (final IOException e) {
throw new SAMException("Could not open sequence dictionary file: " + dictPath, e);
}
}
diff --git a/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java
index d922a6dea5..c08c87ff18 100644
--- a/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java
+++ b/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java
@@ -24,6 +24,7 @@
package htsjdk.samtools.reference;
+import htsjdk.io.IOPath;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
@@ -69,6 +70,27 @@ protected AbstractIndexedFastaSequenceFile(final Path path, final FastaSequenceI
}
}
+ /**
+ * Create a AbstractIndexedFastaSequenceFile from explicitly provided files. No assumptions are made
+ * about the relative location of the files (i.e., that they are siblings).
+ *
+ * @param fastaPath the path to the fasta file. may not be null.
+ * @param dictPath the path to the sequence dictionary. may be null.
+ * @param index the associated index object; may not be null.
+ */
+ protected AbstractIndexedFastaSequenceFile(final IOPath fastaPath, final IOPath dictPath, final FastaSequenceIndex index) {
+ super(fastaPath.toPath(), fastaPath.getURIString(), loadSequenceDictionary(dictPath));
+ if (index == null) {
+ throw new IllegalArgumentException("Null index for fasta " + index);
+ }
+ this.index = index;
+ IOUtil.assertFileIsReadable(fastaPath.toPath());
+ reset();
+ if (getSequenceDictionary() != null) {
+ sanityCheckDictionaryAgainstIndex(fastaPath.getRawInputString(), getSequenceDictionary(), index);
+ }
+ }
+
/**
* Initialise the given indexed fasta sequence file stream.
* @param source The named source of the reference file (used in error messages).
diff --git a/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java
index 1bfc354ba0..96d655d288 100644
--- a/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java
+++ b/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java
@@ -24,6 +24,7 @@
package htsjdk.samtools.reference;
+import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.seekablestream.SeekablePathStream;
@@ -54,6 +55,32 @@ public BlockCompressedIndexedFastaSequenceFile(final Path path)
this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path))));
}
+ /**
+ * Create a BlockCompressedIndexedFastaSequenceFile from explicitly provided files. No assumptions are made
+ * about the relative location of the files (i.e., no assumption is made that they are siblings).
+ * @param fastaPath the fasta file
+ * @param dictPath the associated dictionary file
+ * @param index the associated index
+ * @param gziIndex the associated gziIndex
+ */
+ public BlockCompressedIndexedFastaSequenceFile(
+ final IOPath fastaPath,
+ final IOPath dictPath,
+ final FastaSequenceIndex index,
+ final GZIIndex gziIndex) {
+ super(fastaPath, dictPath, index);
+ if (gziIndex == null) {
+ throw new IllegalArgumentException("null gzi index");
+ }
+ assertIsBlockCompressed(fastaPath.toPath());
+ try {
+ stream = new BlockCompressedInputStream(new SeekablePathStream(fastaPath.toPath()));
+ gzindex = gziIndex;
+ } catch (IOException e) {
+ throw new SAMException("Fasta file should be readable but is not: " + fastaPath, e);
+ }
+ }
+
public BlockCompressedIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
this(path, index, loadFastaGziIndex(path));
}
diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java
index 98807b8489..9449ba0ff9 100644
--- a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java
+++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java
@@ -24,6 +24,7 @@
package htsjdk.samtools.reference;
+import htsjdk.io.IOPath;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
@@ -64,6 +65,21 @@ public FastaSequenceFile(final Path path, final boolean truncateNamesAtWhitespac
this.in = new FastLineReader(IOUtil.openFileForReading(path));
}
+ /**
+ * Constructs a FastaSequenceFile that reads from the specified fasta and dictionary file. Makes no
+ * assumptions that the fata and dict file are in the same directory.
+ *
+ * @param fastaPath may not be null
+ * @param dictPath may be null
+ * @param truncateNamesAtWhitespace
+ */
+ public FastaSequenceFile(final IOPath fastaPath, final IOPath dictPath, final boolean truncateNamesAtWhitespace) {
+ super(fastaPath.toPath(), fastaPath.toString(), dictPath == null ? null : loadSequenceDictionary(dictPath));
+ this.truncateNamesAtWhitespace = truncateNamesAtWhitespace;
+ this.seekableStream = null;
+ this.in = new FastLineReader(IOUtil.openFileForReading(fastaPath.toPath()));
+ }
+
/**
* Constructs a FastaSequenceFile that reads from the specified stream (which must not be compressed, i.e.
* the caller is responsible for decompressing the stream).
diff --git a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
index 226d539c0b..e180c95b9d 100644
--- a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
+++ b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
@@ -24,18 +24,16 @@
package htsjdk.samtools.reference;
+import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.seekablestream.ReadableSeekableStreamByteChannel;
import htsjdk.samtools.seekablestream.SeekableStream;
-import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.IOUtil;
-import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
@@ -89,6 +87,28 @@ public IndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index)
}
}
+ /**
+ */
+ /**
+ * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
+ *
+ * @param path The file to open.
+ * @param dictPath the dictionar path (may be null)
+ * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk. may not be null.
+ */
+ public IndexedFastaSequenceFile(final IOPath path, final IOPath dictPath, final FastaSequenceIndex index) {
+ super(path, dictPath, index);
+ try {
+ // reject block-compressed files (use BlockCompressedIndexedFastaSequenceFile)
+ if (IOUtil.isBlockCompressed(path.toPath(), true)) {
+ throw new SAMException("Indexed block-compressed FASTA file cannot be handled: " + path);
+ }
+ this.channel = Files.newByteChannel(path.toPath());
+ } catch (IOException e) {
+ throw new SAMException("FASTA file should be readable but is not: " + path, e);
+ }
+ }
+
/**
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
* @param path The file to open.
diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
index 02db50564b..18058e0b34 100644
--- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
+++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
@@ -24,8 +24,16 @@
package htsjdk.samtools.reference;
+import htsjdk.beta.io.bundle.Bundle;
+import htsjdk.beta.io.bundle.BundleJSON;
+import htsjdk.beta.io.bundle.BundleResource;
+import htsjdk.beta.io.bundle.BundleResourceType;
+import htsjdk.beta.plugin.IOUtils;
+import htsjdk.io.HtsPath;
+import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
-import htsjdk.samtools.util.BlockCompressedInputStream;
+import htsjdk.samtools.cram.ref.CRAMReferenceSource;
+import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.GZIIndex;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
@@ -34,17 +42,16 @@
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.samtools.util.IOUtil;
+import htsjdk.utils.ValidationUtils;
-import java.io.BufferedInputStream;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.Collections;
-import java.util.HashSet;
+import java.util.Optional;
import java.util.Set;
+import java.util.function.Function;
/**
* Factory class for creating ReferenceSequenceFile instances for reading reference
@@ -97,7 +104,7 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final File file, fi
* @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader
*/
public static ReferenceSequenceFile getReferenceSequenceFile(final File file, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) {
- return getReferenceSequenceFile(IOUtil.toPath(file), truncateNamesAtWhitespace, preferIndexed);
+ return getReferenceSequenceFile(IOUtil.toPath(file), HtsPath::new, truncateNamesAtWhitespace, preferIndexed);
}
/**
@@ -119,29 +126,131 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path) {
* @param truncateNamesAtWhitespace if true, only include the first word of the sequence name
*/
public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) {
- return getReferenceSequenceFile(path, truncateNamesAtWhitespace, true);
+ return getReferenceSequenceFile(path, HtsPath::new, truncateNamesAtWhitespace, true);
}
/**
* Attempts to determine the type of the reference file and return an instance
- * of ReferenceSequenceFile that is appropriate to read it.
+ * of ReferenceSequenceFile that is appropriate to read it. If the file represents
+ * a Bundle file, the reference sequence file is created from the bundle.
*
+ * @param the IOPath-derived type to use for IOPathResources
* @param path the reference sequence file path
+ * @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type
+ * @return a newly created {@link Bundle}
* @param truncateNamesAtWhitespace if true, only include the first word of the sequence name
* @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader
*/
- public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) {
- // this should thrown an exception if the fasta file is not supported
- getFastaExtension(path);
- // Using faidx requires truncateNamesAtWhitespace
- if (truncateNamesAtWhitespace && preferIndexed && canCreateIndexedFastaReader(path)) {
- try {
- return IOUtil.isBlockCompressed(path, true) ? new BlockCompressedIndexedFastaSequenceFile(path) : new IndexedFastaSequenceFile(path);
- } catch (final IOException e) {
- throw new SAMException("Error opening FASTA: " + path, e);
+ public static ReferenceSequenceFile getReferenceSequenceFile(
+ final Path path,
+ final Function ioPathConstructor,
+ final boolean truncateNamesAtWhitespace,
+ final boolean preferIndexed) {
+ final IOPath refIOPath = ioPathConstructor.apply(path.toUri().toString());
+ if (refIOPath.hasExtension(BundleJSON.BUNDLE_EXTENSION)) {
+ final Bundle referenceBundle = BundleJSON.toBundle(IOUtils.getStringFromPath(refIOPath), ioPathConstructor);
+ return getReferenceSequenceFileFromBundle(referenceBundle, truncateNamesAtWhitespace, preferIndexed);
+ }
+ else {
+ // this should throw an exception if the fasta file is not supported
+ getFastaExtension(path);
+ // Using faidx requires truncateNamesAtWhitespace
+ if (truncateNamesAtWhitespace && preferIndexed && canCreateIndexedFastaReader(path)) {
+ try {
+ return IOUtil.isBlockCompressed(path, true) ?
+ new BlockCompressedIndexedFastaSequenceFile(path) :
+ new IndexedFastaSequenceFile(path);
+ } catch (final IOException e) {
+ throw new SAMException("Error opening FASTA: " + path, e);
+ }
+ } else {
+ return new FastaSequenceFile(path, truncateNamesAtWhitespace);
+ }
+ }
+ }
+
+ /**
+ * Attempts to determine the type of the reference file specified in the bundle, and return an instance
+ * of ReferenceSequenceFile that is appropriate to use to read the reference.
+ *
+ * @param referenceBundle a Bundle containing resources for the reference file, index, and dictionary
+ * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name
+ * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader
+ */
+ public static ReferenceSequenceFile getReferenceSequenceFileFromBundle(
+ final Bundle referenceBundle,
+ final boolean truncateNamesAtWhitespace,
+ final boolean preferIndexed) {
+ ValidationUtils.nonNull(referenceBundle, "reference bundle");
+
+ // required fasta path
+ final BundleResource fastaResource = referenceBundle.getOrThrow(BundleResourceType.CT_HAPLOID_REFERENCE);
+ final IOPath fastaPath = fastaResource.getIOPath().orElseThrow(
+ () -> new RuntimeException("The fasta bundle resource must contain a fasta resource that is backed by an IOPath."));
+ if (!Files.exists(fastaPath.toPath())) {
+ throw new RuntimeException(String.format("FASTA file %s does not exist", fastaPath));
+ }
+
+ // optional dictionary path
+ IOPath dictPath = null;
+ final Optional dictPathResource = referenceBundle.get(BundleResourceType.CT_REFERENCE_DICTIONARY);
+ if (dictPathResource.isPresent()) {
+ final BundleResource dictResource = dictPathResource.get();
+ final Optional optDictPath = dictResource.getIOPath();
+ if (optDictPath.isPresent()) {
+ dictPath = optDictPath.get();
+ if (!Files.exists(dictPath.toPath())) {
+ throw new RuntimeException(String.format("Sequence dictionary file %s does not exist", dictPath));
+ }
+ }
+ }
+
+ // optional index. Using faidx requires truncateNamesAtWhitespace
+ IOPath indexPath = null;
+ IOPath gziIndexPath = null;
+ if (preferIndexed) {
+ if (!truncateNamesAtWhitespace) {
+ throw new RuntimeException("preferIndexed option requires truncateNamesAtWhitespace");
+ }
+ final Optional indexPathResource = referenceBundle.get(BundleResourceType.CT_REFERENCE_INDEX);
+ if (indexPathResource.isPresent()) {
+ final BundleResource indexResource = indexPathResource.get();
+ final Optional optIndexIOPath = indexResource.getIOPath();
+ if (optIndexIOPath.isPresent()) {
+ indexPath = optIndexIOPath.get();
+ if (preferIndexed && !Files.exists(indexPath.toPath())) {
+ throw new RuntimeException(String.format("FASTA index file %s does not exist", indexPath));
+ }
+ }
+ }
+
+ final Optional gziIndexPathResource = referenceBundle.get(BundleResourceType.CT_REFERENCE_INDEX_GZI);
+ if (gziIndexPathResource.isPresent()) {
+ final BundleResource gziIndexResource = gziIndexPathResource.get();
+ final Optional optGziIndexPath = gziIndexResource.getIOPath();
+ if (optGziIndexPath.isPresent()) {
+ gziIndexPath = optGziIndexPath.get();
+ if (!Files.exists(gziIndexPath.toPath())) {
+ throw new RuntimeException(String.format("GZI index file %s does not exist", gziIndexPath));
+ }
+ }
+ }
+ }
+
+ try {
+ if (IOUtil.isBlockCompressed(fastaPath.toPath(), true) && preferIndexed && indexPath != null && gziIndexPath != null) {
+ return new BlockCompressedIndexedFastaSequenceFile(
+ fastaPath,
+ dictPath,
+ new FastaSequenceIndex(indexPath.toPath()),
+ GZIIndex.loadIndex(gziIndexPath.toPath()));
+ } else if (preferIndexed && indexPath != null) {
+ return new IndexedFastaSequenceFile(fastaPath, dictPath, new FastaSequenceIndex(indexPath.toPath()));
+ } else {
+ return new FastaSequenceFile(fastaPath, dictPath, truncateNamesAtWhitespace);
}
- } else {
- return new FastaSequenceFile(path, truncateNamesAtWhitespace);
+ } catch (final IOException e) {
+ throw new SAMException("Error opening FASTA: " + fastaPath, e);
}
}
diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java
index cfc56777ce..e0cf33a314 100644
--- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java
+++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java
@@ -23,6 +23,7 @@
*/
package htsjdk.samtools.reference;
+import htsjdk.io.HtsPath;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
@@ -47,7 +48,7 @@ public ReferenceSequenceFileWalker(final ReferenceSequenceFile referenceSequence
}
public ReferenceSequenceFileWalker(final Path path) {
- this(ReferenceSequenceFileFactory.getReferenceSequenceFile(path, true, false));
+ this(ReferenceSequenceFileFactory.getReferenceSequenceFile(path, HtsPath::new, true, false));
}
public ReferenceSequenceFileWalker(final File file) {
diff --git a/src/main/java/htsjdk/samtools/util/GZIIndex.java b/src/main/java/htsjdk/samtools/util/GZIIndex.java
index 179d253417..095ddab349 100644
--- a/src/main/java/htsjdk/samtools/util/GZIIndex.java
+++ b/src/main/java/htsjdk/samtools/util/GZIIndex.java
@@ -273,7 +273,7 @@ public void writeIndex(final OutputStream output) throws IOException {
*
* @throws IOException if an I/O error occurs.
*/
- public static final GZIIndex loadIndex(final Path indexPath) throws IOException {
+ public static GZIIndex loadIndex(final Path indexPath) throws IOException {
if (indexPath == null) {
throw new IllegalArgumentException("null input path");
}
diff --git a/src/test/java/htsjdk/beta/codecs/hapref/fasta/HaploidReferenceResolverTest.java b/src/test/java/htsjdk/beta/codecs/hapref/fasta/HaploidReferenceResolverTest.java
new file mode 100644
index 0000000000..239b035ab9
--- /dev/null
+++ b/src/test/java/htsjdk/beta/codecs/hapref/fasta/HaploidReferenceResolverTest.java
@@ -0,0 +1,132 @@
+package htsjdk.beta.codecs.hapref.fasta;
+
+import com.google.common.jimfs.Configuration;
+import com.google.common.jimfs.Jimfs;
+import htsjdk.HtsjdkTest;
+import htsjdk.beta.io.bundle.Bundle;
+import htsjdk.beta.io.bundle.BundleResource;
+import htsjdk.beta.io.bundle.BundleResourceType;
+import htsjdk.beta.plugin.registry.HaploidReferenceResolver;
+import htsjdk.io.HtsPath;
+import htsjdk.io.IOPath;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.FileSystem;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Optional;
+
+public class HaploidReferenceResolverTest extends HtsjdkTest {
+
+ @DataProvider
+ public Object[][] bundleCases() {
+ final String dataDir = "src/test/resources/htsjdk/samtools/reference";
+
+ return new Object[][] {
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta").getAbsolutePath()),
+ null,
+ null,
+ null
+ },
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.dict").getAbsolutePath()),
+ null,
+ null
+ },
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.dict").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.fai").getAbsolutePath()),
+ null
+ },
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.dict").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz.fai").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz.gzi").getAbsolutePath()),
+ },
+ };
+ }
+
+ @Test(dataProvider = "bundleCases")
+ public void testReferenceBundleFromPath(
+ final IOPath fastaFile,
+ final IOPath dictFile,
+ final IOPath indexFile,
+ final IOPath gziIndexFile // may be null
+ ) throws IOException {
+ // move whatever subset of files are provided into a jimfs NIO file system, with each file in the same
+ // directory, so we can test inference of siblings and tolerance of missing siblings
+ try (final FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) {
+ final Path fastaDir = jimfs.getPath("fastaDir");
+ final Path nioFastaDir = Files.createDirectory(fastaDir);
+ Assert.assertEquals(nioFastaDir, fastaDir);
+
+ // move the required fasta
+ final IOPath remoteFasta = new HtsPath(
+ Files.copy(
+ fastaFile.toPath(),
+ nioFastaDir.resolve(fastaFile.getBaseName().get() + fastaFile.getExtension().get())).toUri().toString());
+
+ // move the optional dictionary file
+ IOPath remoteDict = null;
+ if (dictFile != null) {
+ remoteDict = new HtsPath(
+ Files.copy(
+ dictFile.toPath(),
+ nioFastaDir.resolve(dictFile.getBaseName().get() + dictFile.getExtension().get())).toUri().toString());
+ }
+
+ // move the optional index
+ IOPath remoteIndex = null;
+ if (indexFile != null) {
+ remoteIndex = new HtsPath(
+ Files.copy(
+ indexFile.toPath(),
+ nioFastaDir.resolve(indexFile.getBaseName().get() + indexFile.getExtension().get())).toUri().toString());
+ }
+
+ // move the optional gzi index
+ IOPath remoteGZI = null;
+ if (gziIndexFile != null) {
+ remoteGZI =
+ new HtsPath(
+ Files.copy(
+ gziIndexFile.toPath(),
+ nioFastaDir.resolve(gziIndexFile.getBaseName().get() + gziIndexFile.getExtension().get())
+ ).toUri().toString()
+ );
+ }
+
+ final Bundle bundle = HaploidReferenceResolver.referenceBundleFromFastaPath(remoteFasta, HtsPath::new);
+ Assert.assertNotNull(bundle);
+
+ if (dictFile != null) {
+ final Optional optDictResource = bundle.get(BundleResourceType.CT_REFERENCE_DICTIONARY);
+ Assert.assertTrue(optDictResource.isPresent());
+ final IOPath actualDictPath = optDictResource.get().getIOPath().get();
+ Assert.assertEquals(actualDictPath, remoteDict);
+ }
+
+ if (indexFile != null) {
+ final Optional optIndexResource = bundle.get(BundleResourceType.CT_REFERENCE_INDEX);
+ Assert.assertTrue(optIndexResource.isPresent());
+ final IOPath actualindexPath = optIndexResource.get().getIOPath().get();
+ Assert.assertEquals(remoteIndex, actualindexPath);
+ }
+
+ if (gziIndexFile!= null) {
+ final Optional optGZIResource = bundle.get(BundleResourceType.CT_REFERENCE_INDEX_GZI);
+ Assert.assertTrue(optGZIResource.isPresent());
+ final IOPath actualGZIPath = optGZIResource.get().getIOPath().get();
+ Assert.assertEquals(remoteGZI, actualGZIPath);
+ }
+ }
+ }
+}
diff --git a/src/test/java/htsjdk/samtools/reference/AbstractFastaSequenceFileTest.java b/src/test/java/htsjdk/samtools/reference/AbstractFastaSequenceFileTest.java
new file mode 100644
index 0000000000..fd90899809
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/reference/AbstractFastaSequenceFileTest.java
@@ -0,0 +1,44 @@
+package htsjdk.samtools.reference;
+
+import com.google.common.jimfs.Configuration;
+import com.google.common.jimfs.Jimfs;
+import htsjdk.HtsjdkTest;
+import htsjdk.io.HtsPath;
+import htsjdk.io.IOPath;
+import htsjdk.samtools.SAMSequenceDictionary;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.nio.file.FileSystem;
+import java.nio.file.Files;
+
+public class AbstractFastaSequenceFileTest extends HtsjdkTest {
+ final String dataDir = "src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/";
+
+ // simple test to ensure that we can load the dictionary from an nio path
+ @Test void testLoadSequenceDictionaryFromNIO() throws IOException {
+ final IOPath testDictIOPath = new HtsPath(dataDir + "Homo_sapiens_assembly18.trimmed.dict");
+ final SAMSequenceDictionary originalDict = AbstractFastaSequenceFile.loadSequenceDictionary(testDictIOPath);
+
+ try (final FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) {
+ final IOPath remoteDictIOPath = new HtsPath(jimfs.getPath("seqDict.dict").toUri().toString());
+ final IOPath remoteDict = new HtsPath(
+ Files.copy(
+ testDictIOPath.toPath(),
+ remoteDictIOPath.toPath()
+ ).toUri().toString());
+ Assert.assertEquals(remoteDictIOPath, remoteDict);
+
+ final SAMSequenceDictionary remoteSamDict = AbstractFastaSequenceFile.loadSequenceDictionary(remoteDictIOPath);
+ Assert.assertEquals(remoteSamDict, originalDict);
+ }
+ }
+
+ @Test void testLoadSequenceDictionaryWithNull() {
+ final SAMSequenceDictionary dict = AbstractFastaSequenceFile.loadSequenceDictionary(null);
+ Assert.assertNull(dict);
+ }
+
+}
+
diff --git a/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java b/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java
index ce2d23fa5b..0c18d429c2 100644
--- a/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java
+++ b/src/test/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFileTest.java
@@ -24,7 +24,11 @@
package htsjdk.samtools.reference;
+import com.google.common.jimfs.Configuration;
+import com.google.common.jimfs.Jimfs;
import htsjdk.HtsjdkTest;
+import htsjdk.io.HtsPath;
+import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.CloserUtil;
@@ -40,6 +44,7 @@
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
@@ -369,4 +374,95 @@ public void testCanCreateBlockCompressedIndexedWithSpecifiedGZIAndDict() throws
withFilesAdjacent.getSubsequenceAt("chrM", 100, 1000).getBases());
}
}
+
+ //test for IndexedFastaSequenceFile (non-gzipped)
+ @Test
+ public void testIndexedFastaSequenceFileFromNio() throws IOException {
+ final String dataDir = "src/test/resources/htsjdk/samtools/reference";
+ final IOPath fastaFile = new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta").getAbsolutePath());
+ final IOPath indexFile = new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.fai").getAbsolutePath());;
+
+ // move everything to a jimfs NIO file system so that each file is in a separate directory so it is in
+ // a directory by iteself, so we can catch any downstream code that makes assumptions that the index
+ // files are siblings of the fasta in the same directory
+ try (final FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) {
+
+ // move the fasta
+ final Path fastaDir = jimfs.getPath("fastaDir");
+ final Path nioFastaDir = Files.createDirectory(fastaDir);
+ Assert.assertEquals(nioFastaDir, fastaDir);
+ final IOPath remoteFasta = new HtsPath(
+ Files.copy(
+ fastaFile.toPath(),
+ nioFastaDir.resolve(fastaFile.getBaseName().get() + fastaFile.getExtension().get())).toUri().toString());
+
+ // move the index file into a separate dir
+ final Path indexDir = jimfs.getPath("indexDir");
+ final Path nioIndexDir = Files.createDirectory(indexDir);
+ Assert.assertEquals(nioIndexDir, indexDir);
+ final IOPath remoteIndex = new HtsPath(
+ Files.copy(
+ indexFile.toPath(),
+ nioFastaDir.resolve(indexFile.getBaseName().get() + indexFile.getExtension().get())).toUri().toString());
+
+ final FastaSequenceIndex fsi = new FastaSequenceIndex(remoteIndex.toPath());
+ final IndexedFastaSequenceFile ifsf = new IndexedFastaSequenceFile(remoteFasta, null, fsi);
+ final ReferenceSequence rs = ifsf.getSubsequenceAt("chrM", 4, 10);
+ Assert.assertEquals(rs.getBaseString(), "CACAGGT");
+ }
+ }
+
+ @Test
+ public void testBlockCompressedIndexedFastaSequenceFileFromNio() throws IOException {
+ final String dataDir = "src/test/resources/htsjdk/samtools/reference";
+ final IOPath fastaFile = new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz").getAbsolutePath());
+ final IOPath indexFile = new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.fai").getAbsolutePath());;
+ final IOPath gziIndexFile = new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz.gzi").getAbsolutePath());;
+
+ // move everything to a jimfs NIO file system so that each file is in a separate directory so it is in
+ // a directory by iteself, so we can catch any downstream code that makes assumptions that the index
+ // files are siblings of the fasta in the same directory
+ try (final FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) {
+
+ // move the fasta
+ final Path fastaDir = jimfs.getPath("fastaDir");
+ final Path nioFastaDir = Files.createDirectory(fastaDir);
+ Assert.assertEquals(nioFastaDir, fastaDir);
+ final IOPath remoteFasta = new HtsPath(
+ Files.copy(
+ fastaFile.toPath(),
+ nioFastaDir.resolve(fastaFile.getBaseName().get() + fastaFile.getExtension().get())).toUri().toString());
+
+ // move the index file into a completely separate dir
+ final Path indexDir = jimfs.getPath("indexDir");
+ final Path nioIndexDir = Files.createDirectory(indexDir);
+ Assert.assertEquals(nioIndexDir, indexDir);
+ final IOPath remoteIndex = new HtsPath(
+ Files.copy(
+ indexFile.toPath(),
+ nioFastaDir.resolve(indexFile.getBaseName().get() + indexFile.getExtension().get())).toUri().toString());
+
+ // move the optional gzi index ito yet another separate dir
+ final Path gziDir = jimfs.getPath("gziDir");
+ final Path nioGZIDir = Files.createDirectory(gziDir);
+ Assert.assertEquals(nioGZIDir, gziDir);
+ final IOPath remoteGZI =
+ new HtsPath(
+ Files.copy(
+ gziIndexFile.toPath(),
+ nioGZIDir.resolve(gziIndexFile.getBaseName().get() + gziIndexFile.getExtension().get())
+ ).toUri().toString()
+ );
+
+ final FastaSequenceIndex fsi = new FastaSequenceIndex(remoteIndex.toPath());
+ final BlockCompressedIndexedFastaSequenceFile ifsf = new BlockCompressedIndexedFastaSequenceFile(
+ remoteFasta,
+ null,
+ fsi,
+ GZIIndex.loadIndex(remoteGZI.toPath()));
+ final ReferenceSequence rs = ifsf.getSubsequenceAt("chrM", 4, 10);
+ Assert.assertEquals(rs.getBaseString(), "CACAGGT");
+ }
+ }
+
}
diff --git a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java
index 56921750f6..60bfea6d37 100644
--- a/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java
+++ b/src/test/java/htsjdk/samtools/reference/ReferenceSequenceFileFactoryTests.java
@@ -1,11 +1,24 @@
package htsjdk.samtools.reference;
+import com.google.common.jimfs.Configuration;
+import com.google.common.jimfs.Jimfs;
import htsjdk.HtsjdkTest;
+import htsjdk.beta.io.bundle.Bundle;
+import htsjdk.beta.io.bundle.BundleBuilder;
+import htsjdk.beta.io.bundle.BundleResourceType;
+import htsjdk.beta.io.bundle.IOPathResource;
+import htsjdk.io.HtsPath;
+import htsjdk.io.IOPath;
+import htsjdk.samtools.SAMSequenceDictionary;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
+import java.io.IOException;
+import java.nio.file.FileSystem;
+import java.nio.file.Files;
+import java.nio.file.Path;
/**
* Simple tests for the reference sequence file factory
@@ -76,4 +89,144 @@ public Object[][] fastaNames() {
public void testGetDefaultDictionaryForReferenceSequence(final String fastaFile, final String expectedDict) throws Exception {
Assert.assertEquals(ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(new File(fastaFile)), new File(expectedDict));
}
+
+ @DataProvider
+ public Object[][] bundleCases() {
+ final String dataDir = "src/test/resources/htsjdk/samtools/reference";
+
+ return new Object[][] {
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta").getAbsolutePath()),
+ null,
+ null,
+ null
+ },
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.dict").getAbsolutePath()),
+ null,
+ null
+ },
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.dict").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.fai").getAbsolutePath()),
+ null
+ },
+ {
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.dict").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz.fai").getAbsolutePath()),
+ new HtsPath(new File(dataDir, "Homo_sapiens_assembly18.trimmed.fasta.gz.gzi").getAbsolutePath()),
+ },
+ };
+ }
+
+ @Test(dataProvider = "bundleCases")
+ public void testReferenceSequenceForLocalBundle(
+ final IOPath fastaFile,
+ final IOPath dictFile,
+ final IOPath indexFile,
+ final IOPath gziIndexFile // may be null
+ ) {
+ doBundleTest(fastaFile, dictFile, indexFile, gziIndexFile);
+ }
+
+ @Test(dataProvider = "bundleCases")
+ public void testReferenceSequenceForNioBundle(
+ final IOPath fastaFile,
+ final IOPath dictFile,
+ final IOPath indexFile,
+ final IOPath gziIndexFile // may be null
+ ) throws IOException {
+ // move everything to a jimfs NIO file system so that each file is in a separate directory, so we can
+ // catch any downstream code that makes assumptions that the files are siblings in the same dir
+ try (final FileSystem jimfs = Jimfs.newFileSystem(Configuration.unix())) {
+
+ // move the fasta
+ final Path fastaDir = jimfs.getPath("fastaDir");
+ final Path nioFastaDir = Files.createDirectory(fastaDir);
+ Assert.assertEquals(nioFastaDir, fastaDir);
+ final IOPath remoteFasta = new HtsPath(
+ Files.copy(
+ fastaFile.toPath(),
+ nioFastaDir.resolve(fastaFile.getBaseName().get() + fastaFile.getExtension().get())).toUri().toString());
+
+ // move the optional dictionary file
+ IOPath remoteDict = null;
+ if (dictFile != null) {
+ final Path dictDir = jimfs.getPath("dictDir");
+ final Path nioDictDir = Files.createDirectory(dictDir);
+ Assert.assertEquals(nioDictDir, dictDir);
+ remoteDict = new HtsPath(
+ Files.copy(
+ dictFile.toPath(),
+ nioDictDir.resolve(dictFile.getBaseName().get() + dictFile.getExtension().get())).toUri().toString());
+ }
+
+ // move the optional index
+ IOPath remoteIndex = null;
+ if (indexFile != null) {
+ final Path indexDir = jimfs.getPath("indexDir");
+ final Path nioIndexDir = Files.createDirectory(indexDir);
+ Assert.assertEquals(nioIndexDir, indexDir);
+ remoteIndex = new HtsPath(
+ Files.copy(
+ indexFile.toPath(),
+ nioFastaDir.resolve(indexFile.getBaseName().get() + indexFile.getExtension().get())).toUri().toString());
+ }
+
+ // move the optional gzi index
+ IOPath remoteGZI = null;
+ if (gziIndexFile != null) {
+ final Path gziDir = jimfs.getPath("gziDir");
+ final Path nioGZIDir = Files.createDirectory(gziDir);
+ Assert.assertEquals(nioGZIDir, gziDir);
+ remoteGZI =
+ new HtsPath(
+ Files.copy(
+ gziIndexFile.toPath(),
+ nioGZIDir.resolve(gziIndexFile.getBaseName().get() + gziIndexFile.getExtension().get())
+ ).toUri().toString()
+ );
+ }
+
+ doBundleTest(remoteFasta, remoteDict, remoteIndex, remoteGZI);
+ }
+ }
+
+ private void doBundleTest(
+ final IOPath fastaFile,
+ final IOPath dictFile,
+ final IOPath indexFile,
+ final IOPath gziIndexFile) {
+
+ // create a bundle for all of our resources
+ final BundleBuilder bundleBuilder = new BundleBuilder();
+ bundleBuilder.addPrimary(new IOPathResource(fastaFile, BundleResourceType.CT_HAPLOID_REFERENCE));
+ if (null != dictFile) {
+ bundleBuilder.addSecondary(new IOPathResource(dictFile, BundleResourceType.CT_REFERENCE_DICTIONARY));
+ }
+ if (null != indexFile) {
+ bundleBuilder.addSecondary(new IOPathResource(indexFile, BundleResourceType.CT_REFERENCE_INDEX));
+ }
+ if (null != gziIndexFile) {
+ bundleBuilder.addSecondary(new IOPathResource(gziIndexFile, BundleResourceType.CT_REFERENCE_INDEX_GZI));
+ }
+ final Bundle referenceBundle = bundleBuilder.build();
+
+ final ReferenceSequenceFile rsf = ReferenceSequenceFileFactory.getReferenceSequenceFileFromBundle(referenceBundle, true, true);
+ Assert.assertEquals(indexFile != null, rsf.isIndexed());
+
+ if (dictFile != null) {
+ final SAMSequenceDictionary samDict = AbstractFastaSequenceFile.loadSequenceDictionary(dictFile);
+ Assert.assertNotNull(rsf.getSequenceDictionary());
+ Assert.assertEquals(rsf.getSequenceDictionary(), samDict);
+ }
+
+ if (indexFile != null) {
+ final String seq = rsf.getSubsequenceAt("chrM", 4, 10).getBaseString();
+ Assert.assertEquals(seq, "CACAGGT");
+ }
+ }
}