Skip to content

Commit

Permalink
add directory load w/manifest into _load_databases
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Jul 4, 2021
1 parent 7c4949f commit 512bbf2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
20 changes: 15 additions & 5 deletions src/sourmash/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,9 +993,18 @@ def select(self, **kwargs):
class DirectoryIndex(Index):
"""
Notes:
* manifests for directories are _generated_ by MultiIndex :grin: :shrug:
* Lazy - loads signatures only when requested
* could/should be implemented on top of FSStorage?
* maybe this becomes StorageIndex? basically a storage with a manifest?
* different from MultiIndex because it uses pre-existing manifest and
lazy loading/does not keep signatures in memory.
* different from LazyLoadedIndex because that requires a concrete index.
* different from LazyMultiIndex because that also requires a concrete
index.
"""
def __init__(self, parent, manifest):
"Constructor; 'parent' is the top-level directory."
self.parent = parent
self.manifest = manifest

Expand All @@ -1019,12 +1028,18 @@ def signatures(self):
yield ss

def signatures_with_location(self):
"Load and return all of the signatures in the manifest."
for location in self.manifest.locations():
fullpath = os.path.join(self.parent, location)
for ss in load_signatures(fullpath):
if ss in self.manifest:
yield ss, fullpath

def select(self, **kwargs):
"Run 'select' on the manifest."
new_manifest = self.manifest.select_to_manifest(**kwargs)
return DirectoryIndex(self.parent, new_manifest)

def __len__(self):
return len(self.manifest)

Expand All @@ -1034,11 +1049,6 @@ def insert(self, *args):
def save(self, *args):
raise NotImplementedError

def select(self, **kwargs):
"Run 'select' on the manifest."
new_manifest = self.manifest.select_to_manifest(**kwargs)
return DirectoryIndex(self.parent, new_manifest)


class LazyLoadedIndex(Index):
"""Given an index location and a manifest, do select only on the manifest
Expand Down
11 changes: 10 additions & 1 deletion src/sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@

from .logging import notify, error, debug_literal

from .index import (LinearIndex, ZipFileLinearIndex, MultiIndex)
from .index import (LinearIndex, ZipFileLinearIndex,
MultiIndex, DirectoryIndex)
from . import signature as sigmod
from .picklist import SignaturePicklist, PickStyle
from .manifest import CollectionManifest
Expand Down Expand Up @@ -270,6 +271,13 @@ def _multiindex_load_from_pathlist(filename, **kwargs):
return db


def _directoryindex_load(filename, **kwargs):
"Load collection from a directory with a manifest."
db = DirectoryIndex.load(filename)

return db


def _multiindex_load_from_path(filename, **kwargs):
"Load collection from a directory."
traverse_yield_all = kwargs['traverse_yield_all']
Expand Down Expand Up @@ -309,6 +317,7 @@ def _load_zipfile(filename, **kwargs):
# all loader functions, in order.
_loader_functions = [
("load from stdin", _load_stdin),
("load from directory w.manifest", _directoryindex_load),
("load from path (file or directory)", _multiindex_load_from_path),
("load from file list", _multiindex_load_from_pathlist),
("load SBT", _load_sbt),
Expand Down

0 comments on commit 512bbf2

Please sign in to comment.