diff --git a/src/sourmash/lca/lca_db.py b/src/sourmash/lca/lca_db.py index 280f810426..21ff15a2c1 100644 --- a/src/sourmash/lca/lca_db.py +++ b/src/sourmash/lca/lca_db.py @@ -460,7 +460,7 @@ def downsample_scaled(self, scaled): max_hash = _get_max_hash_for_scaled(scaled) # filter out all hashes over max_hash in value. - new_hashvals = {} + new_hashvals = defaultdict(set) for k, v in self._hashval_to_idx.items(): if k < max_hash: new_hashvals[k] = v diff --git a/tests/test_lca.py b/tests/test_lca.py index 61bd4905cd..af9778c76b 100644 --- a/tests/test_lca.py +++ b/tests/test_lca.py @@ -425,6 +425,32 @@ def test_api_create_insert_two_then_scale(): assert len(lca_db._hashval_to_idx) == len(combined_mins) +def test_api_create_insert_two_then_scale_then_add(): + # construct database, THEN downsample, then add another + ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'), + ksize=31) + ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'), + ksize=31) + + lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000) + lca_db.insert(ss) + + # downsample everything to 5000 + lca_db.downsample_scaled(5000) + + # insert another after downsample + lca_db.insert(ss2) + + # now test - + ss.minhash = ss.minhash.downsample(scaled=5000) + ss2.minhash = ss2.minhash.downsample(scaled=5000) + + # & check... + combined_mins = set(ss.minhash.hashes.keys()) + combined_mins.update(set(ss2.minhash.hashes.keys())) + assert len(lca_db._hashval_to_idx) == len(combined_mins) + + def test_api_create_insert_scale_two(): # downsample while constructing database ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),