Skip to content

Commit

Permalink
Bugfix: MAX_CLUSTERS capping
Browse files Browse the repository at this point in the history
When reach MAX_CLUSTERS, use that number of clusters in output,
rather than put each sequence in its own cluster.
  • Loading branch information
bricoletc committed Oct 26, 2020
1 parent f99c35d commit a496215
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
4 changes: 2 additions & 2 deletions make_prg/from_msa/cluster_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ def kmeans_cluster_seqs_in_interval(

while cluster_further(seqclustering):
num_clusters += 1
if num_clusters >= MAX_CLUSTERS:
num_clusters = num_sequences
if num_clusters > MAX_CLUSTERS:
break
if num_clusters == num_sequences:
break
start = time.time()
Expand Down
3 changes: 2 additions & 1 deletion tests/from_msa/test_cluster_sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
get_distances,
DISTANCE_THRESHOLD,
LENGTH_THRESHOLD,
MAX_CLUSTERS,
get_one_ref_like_threshold_distance,
sequences_are_one_reference_like,
cluster_further,
Expand Down Expand Up @@ -388,7 +389,7 @@ def test_GivenManyVeryDifferentSequences_EachSeqInOwnCluster(self):
all_5mers = list(map("".join, product(standard_bases, repeat=5)))
alignment = make_alignment(all_5mers)
result = kmeans_cluster_seqs_in_interval([0, 5], alignment, 5)
self.assertEqual(len(result), len(all_5mers))
self.assertEqual(len(result), MAX_CLUSTERS)


class TestMergeClusters(TestCase):
Expand Down

0 comments on commit a496215

Please sign in to comment.