From 11e519a875483c4e7da06a2334825501caca4483 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 1 Jul 2020 16:52:44 -0700 Subject: [PATCH] make sure md5 selector is unique in collection --- sourmash/sourmash_args.py | 13 +++++++++++-- tests/test_sourmash.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/sourmash/sourmash_args.py b/sourmash/sourmash_args.py index 0087da18eb..094a535077 100644 --- a/sourmash/sourmash_args.py +++ b/sourmash/sourmash_args.py @@ -74,11 +74,20 @@ def load_query_signature(filename, ksize, select_moltype, select_md5=None): sys.exit(-1) if len(sl) and select_md5: + found_sig = None for sig in sl: sig_md5 = sig.md5sum() if sig_md5.startswith(select_md5.lower()): - sl = [sig] - break + # make sure we pick only one -- + if found_sig is not None: + error("Error! Multiple signatures start with md5 '{}'", + select_md5) + error("Please use a longer --md5 selector.") + sys.exit(-1) + else: + found_sig = sig + + sl = [found_sig] if len(sl) and ksize is None: ksizes = set([ ss.minhash.ksize for ss in sl ]) diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index f09e4fa09a..bdd3d2082a 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -773,6 +773,18 @@ def test_gather_query_db_md5(c): assert '340.9 kbp 100.0% 100.0% ...01593925.1_ASM159392v1_protein.faa.gz' in str(c) +@utils.in_thisdir +def test_gather_query_db_md5_ambiguous(c): + # what if we give an ambiguous md5 prefix? + db = utils.get_test_data('prot/protein.sbt.zip') + + with pytest.raises(ValueError) as exc: + c.run_sourmash('gather', db, db, '--md5', '1') + + err = c.last_result.err + assert "Error! Multiple signatures start with md5 '1'" in err + + @utils.in_tempdir def test_gather_lca_db(c): # can we do a 'sourmash gather' on an LCA database?