diff --git a/src/sourmash/index.py b/src/sourmash/index.py index 5b57860661..0966f0ebb7 100644 --- a/src/sourmash/index.py +++ b/src/sourmash/index.py @@ -993,9 +993,18 @@ def select(self, **kwargs): class DirectoryIndex(Index): """ Notes: + * manifests for directories are _generated_ by MultiIndex :grin: :shrug: * Lazy - loads signatures only when requested + * could/should be implemented on top of FSStorage? + * maybe this becomes StorageIndex? basically a storage with a manifest? + * different from MultiIndex because it uses pre-existing manifest and + lazy loading/does not keep signatures in memory. + * different from LazyLoadedIndex because that requires a concrete index. + * different from LazyMultiIndex because that also requires a concrete + index. """ def __init__(self, parent, manifest): + "Constructor; 'parent' is the top-level directory." self.parent = parent self.manifest = manifest @@ -1019,12 +1028,18 @@ def signatures(self): yield ss def signatures_with_location(self): + "Load and return all of the signatures in the manifest." for location in self.manifest.locations(): fullpath = os.path.join(self.parent, location) for ss in load_signatures(fullpath): if ss in self.manifest: yield ss, fullpath + def select(self, **kwargs): + "Run 'select' on the manifest." + new_manifest = self.manifest.select_to_manifest(**kwargs) + return DirectoryIndex(self.parent, new_manifest) + def __len__(self): return len(self.manifest) @@ -1034,11 +1049,6 @@ def insert(self, *args): def save(self, *args): raise NotImplementedError - def select(self, **kwargs): - "Run 'select' on the manifest." - new_manifest = self.manifest.select_to_manifest(**kwargs) - return DirectoryIndex(self.parent, new_manifest) - class LazyLoadedIndex(Index): """Given an index location and a manifest, do select only on the manifest diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index 64c118fb4b..a8c0823a7c 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -18,7 +18,8 @@ from .logging import notify, error, debug_literal -from .index import (LinearIndex, ZipFileLinearIndex, MultiIndex) +from .index import (LinearIndex, ZipFileLinearIndex, + MultiIndex, DirectoryIndex) from . import signature as sigmod from .picklist import SignaturePicklist, PickStyle from .manifest import CollectionManifest @@ -270,6 +271,13 @@ def _multiindex_load_from_pathlist(filename, **kwargs): return db +def _directoryindex_load(filename, **kwargs): + "Load collection from a directory with a manifest." + db = DirectoryIndex.load(filename) + + return db + + def _multiindex_load_from_path(filename, **kwargs): "Load collection from a directory." traverse_yield_all = kwargs['traverse_yield_all'] @@ -309,6 +317,7 @@ def _load_zipfile(filename, **kwargs): # all loader functions, in order. _loader_functions = [ ("load from stdin", _load_stdin), + ("load from directory w.manifest", _directoryindex_load), ("load from path (file or directory)", _multiindex_load_from_path), ("load from file list", _multiindex_load_from_pathlist), ("load SBT", _load_sbt),