Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support reference bundles. #1713

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ public FASTADecoderV1_0(final Bundle inputBundle) {
this.displayName = inputBundle.getPrimaryResource().getDisplayName();
final BundleResource referenceResource = inputBundle.getOrThrow(BundleResourceType.CT_HAPLOID_REFERENCE);
if (referenceResource.getIOPath().isPresent()) {
referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(
referenceResource.getIOPath().get().toPath(), true);
referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFileFromBundle(inputBundle, true, true);
} else {
final SeekableStream seekableStream = referenceResource.getSeekableStream().orElseThrow(
() -> new IllegalArgumentException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public class BundleResourceType {
/** Secondary content types for {@link BundleResourceType#CT_HAPLOID_REFERENCE} resources*/
public static final String CT_REFERENCE_DICTIONARY = "REFERENCE_DICTIONARY";
public static final String CT_REFERENCE_INDEX = "REFERENCE_INDEX";
public static final String CT_REFERENCE_INDEX_GZI = "REFERENCE_INDEX_GZI";


/****************************************** Resource types for FEATURES ********************************/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,18 @@
import htsjdk.beta.plugin.hapref.HaploidReferenceCodec;
import htsjdk.beta.plugin.hapref.HaploidReferenceDecoder;
import htsjdk.beta.plugin.hapref.HaploidReferenceDecoderOptions;
import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.GZIIndex;
import htsjdk.samtools.util.IOUtil;
import htsjdk.utils.ValidationUtils;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.function.Function;

/**
* Class with methods for resolving inputs and outputs to haploid reference encoders and decoders.
* <p>
Expand Down Expand Up @@ -66,9 +75,7 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder(
ValidationUtils.nonNull(inputPath, "Input path");
ValidationUtils.nonNull(HaploidReferenceDecoderOptions, "Decoder options");

final Bundle referenceBundle = new BundleBuilder().addPrimary(
new IOPathResource(inputPath, BundleResourceType.CT_HAPLOID_REFERENCE)).build();

final Bundle referenceBundle = referenceBundleFromFastaPath(inputPath, HtsPath::new);
return getHaploidReferenceDecoder(referenceBundle, HaploidReferenceDecoderOptions);
}

Expand Down Expand Up @@ -110,4 +117,47 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder(
return (HaploidReferenceDecoder) resolveForDecoding(inputBundle).getDecoder(inputBundle, HaploidReferenceDecoderOptions);
}

/**
* Create q reference bundle given only a fasta path, including an index and a dictionary
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: q

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* Create q reference bundle given only a fasta path, including an index and a dictionary
* Create a reference bundle given only a fasta path, including an index and a dictionary

* file if they are present and located in the same directory as the fasta.
*
* @param fastaPath location of the fasta
* @param ioPathConstructor a constructor used to create IOPath-derived objects for the bundle
* @return a reference Bundle
* @param <T>
*/
public static <T extends IOPath> Bundle referenceBundleFromFastaPath(final IOPath fastaPath, final Function<String, T> ioPathConstructor) {
final BundleBuilder referenceBundleBuilder = new BundleBuilder();
referenceBundleBuilder.addPrimary(new IOPathResource(fastaPath, BundleResourceType.CT_HAPLOID_REFERENCE));

final Path dictPath = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(fastaPath.toPath());
if (Files.exists(dictPath)) {
referenceBundleBuilder.addSecondary(
new IOPathResource(
ioPathConstructor.apply(dictPath.toUri().toString()),
BundleResourceType.CT_REFERENCE_DICTIONARY));
}

final Path idxPath = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaPath.toPath());
if (Files.exists(idxPath)) {
referenceBundleBuilder.addSecondary(
new IOPathResource(
ioPathConstructor.apply(idxPath.toUri().toString()),
BundleResourceType.CT_REFERENCE_INDEX));
}

try {
if (IOUtil.isBlockCompressed(fastaPath.toPath(), true)) {
final Path gziPath = GZIIndex.resolveIndexNameForBgzipFile(fastaPath.toPath());
referenceBundleBuilder.addSecondary(
new IOPathResource(
ioPathConstructor.apply(gziPath.toUri().toString()),
BundleResourceType.CT_REFERENCE_INDEX_GZI));
}
} catch (IOException e) {
throw new HtsjdkException("Error while checking for block compression", e);
}
return referenceBundleBuilder.build();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/**
* A {@link Bundle} for variants and variants-related resources that are backed by on disk files. A {@link
* htsjdk.beta.plugin.variants.VariantsBundle} has a primary resource with content type {@link
* BundleResourceType#PRIMARY_CT_VARIANT_CONTEXTS}; and an optional index resource. A VariantsBundle can also
* BundleResourceType#CT_VARIANT_CONTEXTS}; and an optional index resource. A VariantsBundle can also
* contain additional resources.
*
* Note that this class is merely a convenience class for the case where the variants are backed by files on disk.
Expand All @@ -31,6 +31,7 @@ public class VariantsBundle extends Bundle implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
private static final Log LOG = Log.getInstance(VariantsBundle.class);

/**
* Create a {@link htsjdk.beta.plugin.variants.VariantsBundle} containing only a variants resource.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@

package htsjdk.samtools.reference;

import htsjdk.io.HtsPath;
import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.FileExtensions;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Lazy;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.nio.file.Files;
Expand Down Expand Up @@ -84,13 +84,25 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
/** Attempts to find and load the sequence dictionary if present. */
protected SAMSequenceDictionary findAndLoadSequenceDictionary(final Path fasta) {
final Path dictPath = findSequenceDictionary(path);
if (dictPath == null) return null;
if (dictPath == null) {
return null;
}
return loadSequenceDictionary(new HtsPath(dictPath.toUri().toString()));
}

IOUtil.assertFileIsReadable(dictPath);
try (InputStream dictionaryIn = IOUtil.openFileForReading(dictPath)) {
return ReferenceSequenceFileFactory.loadDictionary(dictionaryIn);
/**
* Attempt to load a sequence dictionary given a file path. Path may be null.
* @param dictPath the dictionary file to open
* @return the SAMSequenceDictionary, or null
*/
protected static SAMSequenceDictionary loadSequenceDictionary(final IOPath dictPath) {
if (dictPath == null) {
return null;
}
catch (Exception e) {
IOUtil.assertFileIsReadable(dictPath.toPath());
try (final InputStream dictionaryStream = IOUtil.openFileForReading(dictPath.toPath())) {
return ReferenceSequenceFileFactory.loadDictionary(dictionaryStream);
} catch (final IOException e) {
throw new SAMException("Could not open sequence dictionary file: " + dictPath, e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

package htsjdk.samtools.reference;

import htsjdk.io.IOPath;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
Expand Down Expand Up @@ -69,6 +70,27 @@ protected AbstractIndexedFastaSequenceFile(final Path path, final FastaSequenceI
}
}

/**
* Create a AbstractIndexedFastaSequenceFile from explicitly provided files. No assumptions are made
* about the relative location of the files (i.e., that they are siblings).
*
* @param fastaPath the path to the fasta file. may not be null.
* @param dictPath the path to the sequence dictionary. may be null.
* @param index the associated index object; may not be null.
*/
protected AbstractIndexedFastaSequenceFile(final IOPath fastaPath, final IOPath dictPath, final FastaSequenceIndex index) {
super(fastaPath.toPath(), fastaPath.getURIString(), loadSequenceDictionary(dictPath));
if (index == null) {
throw new IllegalArgumentException("Null index for fasta " + index);
}
this.index = index;
IOUtil.assertFileIsReadable(fastaPath.toPath());
reset();
if (getSequenceDictionary() != null) {
sanityCheckDictionaryAgainstIndex(fastaPath.getRawInputString(), getSequenceDictionary(), index);
}
}

/**
* Initialise the given indexed fasta sequence file stream.
* @param source The named source of the reference file (used in error messages).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

package htsjdk.samtools.reference;

import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.seekablestream.SeekablePathStream;
Expand Down Expand Up @@ -54,6 +55,32 @@ public BlockCompressedIndexedFastaSequenceFile(final Path path)
this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path))));
}

/**
* Create a BlockCompressedIndexedFastaSequenceFile from explicitly provided files. No assumptions are made
* about the relative location of the files (i.e., no assumption is made that they are siblings).
* @param fastaPath the fasta file
* @param dictPath the associated dictionary file
* @param index the associated index
* @param gziIndex the associated gziIndex
*/
public BlockCompressedIndexedFastaSequenceFile(
final IOPath fastaPath,
final IOPath dictPath,
final FastaSequenceIndex index,
final GZIIndex gziIndex) {
super(fastaPath, dictPath, index);
if (gziIndex == null) {
throw new IllegalArgumentException("null gzi index");
}
assertIsBlockCompressed(fastaPath.toPath());
try {
stream = new BlockCompressedInputStream(new SeekablePathStream(fastaPath.toPath()));
gzindex = gziIndex;
} catch (IOException e) {
throw new SAMException("Fasta file should be readable but is not: " + fastaPath, e);
}
}

public BlockCompressedIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
this(path, index, loadFastaGziIndex(path));
}
Expand Down
16 changes: 16 additions & 0 deletions src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

package htsjdk.samtools.reference;

import htsjdk.io.IOPath;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
Expand Down Expand Up @@ -64,6 +65,21 @@ public FastaSequenceFile(final Path path, final boolean truncateNamesAtWhitespac
this.in = new FastLineReader(IOUtil.openFileForReading(path));
}

/**
* Constructs a FastaSequenceFile that reads from the specified fasta and dictionary file. Makes no
* assumptions that the fata and dict file are in the same directory.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* assumptions that the fata and dict file are in the same directory.
* assumptions that the fasta and dict file are in the same directory.

*
* @param fastaPath may not be null
* @param dictPath may be null
* @param truncateNamesAtWhitespace
*/
public FastaSequenceFile(final IOPath fastaPath, final IOPath dictPath, final boolean truncateNamesAtWhitespace) {
super(fastaPath.toPath(), fastaPath.toString(), dictPath == null ? null : loadSequenceDictionary(dictPath));
this.truncateNamesAtWhitespace = truncateNamesAtWhitespace;
this.seekableStream = null;
this.in = new FastLineReader(IOUtil.openFileForReading(fastaPath.toPath()));
}

/**
* Constructs a FastaSequenceFile that reads from the specified stream (which must not be compressed, i.e.
* the caller is responsible for decompressing the stream).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,16 @@

package htsjdk.samtools.reference;

import htsjdk.io.IOPath;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.seekablestream.ReadableSeekableStreamByteChannel;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.IOUtil;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
Expand Down Expand Up @@ -89,6 +87,28 @@ public IndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index)
}
}

/**
*/
/**
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
*
* @param path The file to open.
* @param dictPath the dictionar path (may be null)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* @param dictPath the dictionar path (may be null)
* @param dictPath the dictionary path (may be null)

* @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk. may not be null.
*/
public IndexedFastaSequenceFile(final IOPath path, final IOPath dictPath, final FastaSequenceIndex index) {
super(path, dictPath, index);
try {
// reject block-compressed files (use BlockCompressedIndexedFastaSequenceFile)
if (IOUtil.isBlockCompressed(path.toPath(), true)) {
throw new SAMException("Indexed block-compressed FASTA file cannot be handled: " + path);
}
this.channel = Files.newByteChannel(path.toPath());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should add getChannel methods to IOPath. We have getStream ones.

} catch (IOException e) {
throw new SAMException("FASTA file should be readable but is not: " + path, e);
}
}

/**
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
* @param path The file to open.
Expand Down
Loading
Loading