Skip to content

Commit

Permalink
test insert after downsample for LCA_Database
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Jul 11, 2022
1 parent ebe14dd commit 8ce3e3d
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/sourmash/lca/lca_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def downsample_scaled(self, scaled):
max_hash = _get_max_hash_for_scaled(scaled)

# filter out all hashes over max_hash in value.
new_hashvals = {}
new_hashvals = defaultdict(set)
for k, v in self._hashval_to_idx.items():
if k < max_hash:
new_hashvals[k] = v
Expand Down
26 changes: 26 additions & 0 deletions tests/test_lca.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,32 @@ def test_api_create_insert_two_then_scale():
assert len(lca_db._hashval_to_idx) == len(combined_mins)


def test_api_create_insert_two_then_scale_then_add():
# construct database, THEN downsample, then add another
ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
ksize=31)
ss2 = sourmash.load_one_signature(utils.get_test_data('63.fa.sig'),
ksize=31)

lca_db = sourmash.lca.LCA_Database(ksize=31, scaled=1000)
lca_db.insert(ss)

# downsample everything to 5000
lca_db.downsample_scaled(5000)

# insert another after downsample
lca_db.insert(ss2)

# now test -
ss.minhash = ss.minhash.downsample(scaled=5000)
ss2.minhash = ss2.minhash.downsample(scaled=5000)

# & check...
combined_mins = set(ss.minhash.hashes.keys())
combined_mins.update(set(ss2.minhash.hashes.keys()))
assert len(lca_db._hashval_to_idx) == len(combined_mins)


def test_api_create_insert_scale_two():
# downsample while constructing database
ss = sourmash.load_one_signature(utils.get_test_data('47.fa.sig'),
Expand Down

0 comments on commit 8ce3e3d

Please sign in to comment.