Skip to content

Commit

Permalink
[MRG] minor improvements to Index tests (#1900)
Browse files Browse the repository at this point in the history
* add -d/--debug to various commands

* initial implementation of StandaloneManifestIndex

* support prefix if not abspath

* clean up

* some standalone manifests tests - incl CLI

* iterate over internal locations instead

* switch to picklist API

* aaaaand swap out for load_file_as_index 🎉

* remove unnecessary spaces

* more tests

* more better prefix test

* remove unnec space

* upgrade output error messages

* fix SBT subdir loading error

* add message about using --debug

* doc etc

* rationalize _signatures_with_internal

* test describe and fileinfo on manifests

* think through more manifest stuff

* fix descr

* rationalize _signatures_with_internal

* fix docstring

* add heading anchors config; fix napoleon package ref

* pin versions for doc building

* fix internal refs

* fix one last ref target

* add docs

* clarify language

* add docs

* add more/better tests for lazy loading

* clarify

* a few more tests

* update docs

* cleanup and comment on index code

* minor improvements to Index tests

* add explicit test for lazy-loading prefetch on StandaloneManifestIndex

* add explicit test for lazy-loading prefetch on StandaloneManifestIndex

* update comments/docstrings

* Apply suggestions from code review

Co-authored-by: Tessa Pierce Ward <bluegenes@users.noreply.github.com>

Co-authored-by: Tessa Pierce Ward <bluegenes@users.noreply.github.com>
  • Loading branch information
ctb and bluegenes authored Mar 29, 2022
1 parent c26ac4e commit a78361d
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
6 changes: 3 additions & 3 deletions src/sourmash/index/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ def sigloc_iter():
for ss in idx.signatures():
yield ss, iloc

# build manifest; note, signatures are stored in memory.
# build manifest; note, ALL signatures are stored in memory.
# CTB: could do this on demand?
# CTB: should we use get_manifest functionality?
# CTB: note here that the manifest is created by iteration
Expand Down Expand Up @@ -976,11 +976,11 @@ def load_from_directory(cls, pathname, *, force=False):

rel = os.path.relpath(thisfile, pathname)
source_list.append(rel)
except (IOError, sourmash.exceptions.SourmashError):
except (IOError, sourmash.exceptions.SourmashError) as exc:
if force:
continue # ignore error
else:
raise # stop loading!
raise ValueError(exc) # stop loading!

# did we load anything? if not, error
if not index_list:
Expand Down
30 changes: 26 additions & 4 deletions tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,8 +1312,21 @@ def test_multi_index_load_from_directory_2():
assert len(sigs) == 7


def test_multi_index_load_from_directory_3_simple_bad_file(runtmp):
# check that force=False fails properly when confronted with non-JSON
# files.
c = runtmp

with open(runtmp.output('badsig.sig'), 'wt') as fp:
fp.write('bad content.')

with pytest.raises(ValueError):
mi = MultiIndex.load_from_directory(runtmp.location, force=False)


def test_multi_index_load_from_directory_3(runtmp):
# check that force works ok on a directory
# check that force=False fails properly when confronted with non-JSON
# files that are legit sourmash files...
c = runtmp

dirname = utils.get_test_data('prot')
Expand All @@ -1327,12 +1340,13 @@ def test_multi_index_load_from_directory_3(runtmp):
shutil.copyfile(fullname, copyto)
count += 1

with pytest.raises(sourmash.exceptions.SourmashError):
with pytest.raises(ValueError):
mi = MultiIndex.load_from_directory(c.location, force=False)


def test_multi_index_load_from_directory_3_yield_all_true(runtmp):
# check that force works ok on a directory w/force=True
# Note here that only .sig/.sig.gz files are loaded.
c = runtmp

dirname = utils.get_test_data('prot')
Expand All @@ -1353,7 +1367,8 @@ def test_multi_index_load_from_directory_3_yield_all_true(runtmp):


def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp):
# check that force works ok on subdirectories
# check that force works ok on subdirectories.
# Note here that only .sig/.sig.gz files are loaded.
c = runtmp
dirname = utils.get_test_data('prot')

Expand All @@ -1371,6 +1386,9 @@ def test_multi_index_load_from_directory_3_yield_all_true_subdir(runtmp):

mi = MultiIndex.load_from_directory(c.location, force=True)

locations = set([ row['internal_location'] for row in mi.manifest.rows ])
print(locations)

sigs = list(mi.signatures())
assert len(sigs) == 8

Expand Down Expand Up @@ -1466,6 +1484,7 @@ def test_multi_index_load_from_pathlist_1(runtmp):

def test_multi_index_load_from_pathlist_2(runtmp):
# create a pathlist file with _all_ files under dir, and try to load it.
# this will fail on one of several CSV or .sh files in there.

# CTB note: if you create extra files under this directory,
# it will fail :)
Expand All @@ -1479,9 +1498,12 @@ def test_multi_index_load_from_pathlist_2(runtmp):
with open(file_list, 'wt') as fp:
print("\n".join(files), file=fp)

with pytest.raises(ValueError):
with pytest.raises(ValueError) as exc:
mi = MultiIndex.load_from_pathlist(file_list)

print(str(exc))
assert 'Error while reading signatures from' in str(exc)


def test_multi_index_load_from_pathlist_3_zipfile(runtmp):
# can we load zipfiles in a pathlist? yes please.
Expand Down

0 comments on commit a78361d

Please sign in to comment.