Skip to content

Commit

Permalink
make sure md5 selector is unique in collection
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Jul 1, 2020
1 parent a724430 commit 11e519a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
13 changes: 11 additions & 2 deletions sourmash/sourmash_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,20 @@ def load_query_signature(filename, ksize, select_moltype, select_md5=None):
sys.exit(-1)

if len(sl) and select_md5:
found_sig = None
for sig in sl:
sig_md5 = sig.md5sum()
if sig_md5.startswith(select_md5.lower()):
sl = [sig]
break
# make sure we pick only one --
if found_sig is not None:
error("Error! Multiple signatures start with md5 '{}'",
select_md5)
error("Please use a longer --md5 selector.")
sys.exit(-1)
else:
found_sig = sig

sl = [found_sig]

if len(sl) and ksize is None:
ksizes = set([ ss.minhash.ksize for ss in sl ])
Expand Down
12 changes: 12 additions & 0 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,18 @@ def test_gather_query_db_md5(c):
assert '340.9 kbp 100.0% 100.0% ...01593925.1_ASM159392v1_protein.faa.gz' in str(c)


@utils.in_thisdir
def test_gather_query_db_md5_ambiguous(c):
# what if we give an ambiguous md5 prefix?
db = utils.get_test_data('prot/protein.sbt.zip')

with pytest.raises(ValueError) as exc:
c.run_sourmash('gather', db, db, '--md5', '1')

err = c.last_result.err
assert "Error! Multiple signatures start with md5 '1'" in err


@utils.in_tempdir
def test_gather_lca_db(c):
# can we do a 'sourmash gather' on an LCA database?
Expand Down

0 comments on commit 11e519a

Please sign in to comment.