From f02f7b29cbdede1551f819b5bc7f62f63de67808 Mon Sep 17 00:00:00 2001 From: njgheorghita Date: Fri, 8 Sep 2017 15:09:04 -0600 Subject: [PATCH 1/4] Add binary search to get bucket for node --- evm/p2p/kademlia.py | 30 ++++++++++++++++++++++++------ evm/p2p/test_kademlia.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/evm/p2p/kademlia.py b/evm/p2p/kademlia.py index aff132499b..e2a5bdadd5 100644 --- a/evm/p2p/kademlia.py +++ b/evm/p2p/kademlia.py @@ -1,6 +1,7 @@ import asyncio import ipaddress import logging +import bisect import operator import random import struct @@ -106,6 +107,7 @@ def __hash__(self): return hash(self.pubkey) +@total_ordering class KBucket: """A bucket of nodes whose IDs fall between the bucket's start and end. @@ -189,6 +191,11 @@ def __contains__(self, node): def __len__(self): return len(self.nodes) + def __le__(self, other): + if not isinstance(other, self.__class__): + return super(KBucket, self).__le__(other) + return self.end <= other.end + class RoutingTable: @@ -212,12 +219,12 @@ def not_full_buckets(self): return [b for b in self.buckets if not b.is_full] def remove_node(self, node): - self.get_bucket_for_node(node).remove_node(node) + binary_get_bucket_for_node(self.buckets, node) def add_node(self, node): if node == self.this_node: raise ValueError("Cannot add this_node to routing table") - bucket = self.get_bucket_for_node(node) + bucket = binary_get_bucket_for_node(self.buckets, node) eviction_candidate = bucket.add(node) if eviction_candidate is not None: # bucket is full # Split if the bucket has the local node in its range or if the depth is not congruent @@ -231,10 +238,7 @@ def add_node(self, node): return None # successfully added to not full bucket def get_bucket_for_node(self, node): - for bucket in self.buckets: - if node.id < bucket.end: - return bucket - raise ValueError("No bucket found for node with id {}".format(node.id)) + return binary_get_bucket_for_node(self.buckets, node) def buckets_by_distance_to(self, id): return sorted(self.buckets, key=operator.methodcaller('distance_to', id)) @@ -264,6 +268,20 @@ def neighbours(self, node_id, k=k_bucket_size): return sort_by_distance(nodes, node_id)[:k] +def binary_get_bucket_for_node(buckets, node): + """Return the bucket for a given node.""" + sorted_buckets = sorted(buckets) + bucket_ends = list(KBucket.end for KBucket in sorted_buckets) + bucket_position = bisect.bisect_left(bucket_ends, node.id) + # Prevents edge cases where bisect_left returns an out of range index + if bucket_position >= len(buckets): + raise ValueError("No bucket found for node with id {}".format(node.id)) + bucket = sorted_buckets[bucket_position] + if not bucket.start <= node.id <= bucket.end: + raise ValueError("No bucket found for node with id {}".format(node.id)) + return bucket + + class KademliaProtocol: logger = logging.getLogger("evm.p2p.discovery.KademliaProtocol") diff --git a/evm/p2p/test_kademlia.py b/evm/p2p/test_kademlia.py index 370de5a7d4..41a0a487d0 100644 --- a/evm/p2p/test_kademlia.py +++ b/evm/p2p/test_kademlia.py @@ -263,6 +263,39 @@ def test_kbucket_split(): assert len(bucket2) == bucket.k / 2 +def test_bucket_ordering(): + first = kademlia.KBucket(51,100) + second = kademlia.KBucket(0, 50) + assert first > second + + +@pytest.mark.parametrize( + "bucket_list, node_id, correct", + ( + (list([]), 5, None), + # test for node.id < bucket.end + (list([kademlia.KBucket(0, 4)]), 5, None), + # test for node.id > bucket.start + (list([kademlia.KBucket(6, 10)]), 5, None), + # test multiple buckets that don't contain node.id + (list([kademlia.KBucket(0, 1), kademlia.KBucket(50, 100), kademlia.KBucket(6, 49)]), 5, None), + # test single bucket + (list([kademlia.KBucket(0, 100)]), 5, 0), + (list([kademlia.KBucket(50, 100), kademlia.KBucket(0, 49)]), 5, 1), + # test multiple unordered buckets + (list([kademlia.KBucket(50, 100), kademlia.KBucket(0, 1), kademlia.KBucket(2, 5), kademlia.KBucket(6, 49) ]), 5, 2), + ) +) +def test_binary_get_bucket_for_node(bucket_list, node_id, correct): + node = random_node() + node.id = node_id + if correct is None: + with pytest.raises(ValueError): + kademlia.binary_get_bucket_for_node(bucket_list, node) + else: + assert kademlia.binary_get_bucket_for_node(bucket_list, node) == bucket_list[correct] + + def test_compute_shared_prefix_bits(): # When we have less than 2 nodes, the depth is k_id_size. nodes = [random_node()] From 04da8147a2e1c468a599518e257a3832a3a68567 Mon Sep 17 00:00:00 2001 From: njgheorghita Date: Sat, 9 Sep 2017 09:23:12 -0600 Subject: [PATCH 2/4] Refactoring and pr fixes --- evm/p2p/kademlia.py | 8 +++++--- evm/p2p/test_kademlia.py | 26 +++++++++++++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/evm/p2p/kademlia.py b/evm/p2p/kademlia.py index e2a5bdadd5..fadfe48503 100644 --- a/evm/p2p/kademlia.py +++ b/evm/p2p/kademlia.py @@ -193,7 +193,7 @@ def __len__(self): def __le__(self, other): if not isinstance(other, self.__class__): - return super(KBucket, self).__le__(other) + raise TypeError("Cannot compare KBucket with type {}.".format(other.__class__)) return self.end <= other.end @@ -271,10 +271,12 @@ def neighbours(self, node_id, k=k_bucket_size): def binary_get_bucket_for_node(buckets, node): """Return the bucket for a given node.""" sorted_buckets = sorted(buckets) - bucket_ends = list(KBucket.end for KBucket in sorted_buckets) + bucket_ends = [bucket.end for bucket in sorted_buckets] bucket_position = bisect.bisect_left(bucket_ends, node.id) # Prevents edge cases where bisect_left returns an out of range index - if bucket_position >= len(buckets): + try: + bucket = sorted_buckets[bucket_position] + except IndexError: raise ValueError("No bucket found for node with id {}".format(node.id)) bucket = sorted_buckets[bucket_position] if not bucket.start <= node.id <= bucket.end: diff --git a/evm/p2p/test_kademlia.py b/evm/p2p/test_kademlia.py index 41a0a487d0..2fe835f460 100644 --- a/evm/p2p/test_kademlia.py +++ b/evm/p2p/test_kademlia.py @@ -264,13 +264,16 @@ def test_kbucket_split(): def test_bucket_ordering(): - first = kademlia.KBucket(51,100) + first = kademlia.KBucket(51, 100) second = kademlia.KBucket(0, 50) + third = random_node() assert first > second - + with pytest.raises(TypeError): + first > third + @pytest.mark.parametrize( - "bucket_list, node_id, correct", + "bucket_list, node_id, correct", ( (list([]), 5, None), # test for node.id < bucket.end @@ -278,12 +281,25 @@ def test_bucket_ordering(): # test for node.id > bucket.start (list([kademlia.KBucket(6, 10)]), 5, None), # test multiple buckets that don't contain node.id - (list([kademlia.KBucket(0, 1), kademlia.KBucket(50, 100), kademlia.KBucket(6, 49)]), 5, None), + (list( + [ + kademlia.KBucket(0, 1), + kademlia.KBucket(50, 100), + kademlia.KBucket(6, 49) + ] + ), 5, None), # test single bucket (list([kademlia.KBucket(0, 100)]), 5, 0), (list([kademlia.KBucket(50, 100), kademlia.KBucket(0, 49)]), 5, 1), # test multiple unordered buckets - (list([kademlia.KBucket(50, 100), kademlia.KBucket(0, 1), kademlia.KBucket(2, 5), kademlia.KBucket(6, 49) ]), 5, 2), + (list( + [ + kademlia.KBucket(50, 100), + kademlia.KBucket(0, 1), + kademlia.KBucket(2, 5), + kademlia.KBucket(6, 49) + ] + ), 5, 2), ) ) def test_binary_get_bucket_for_node(bucket_list, node_id, correct): From bcd62205ad680ccca789928c60857f367c1db076 Mon Sep 17 00:00:00 2001 From: njgheorghita Date: Mon, 11 Sep 2017 14:39:57 -0600 Subject: [PATCH 3/4] PR fixes --- evm/p2p/kademlia.py | 14 ++++++----- evm/p2p/test_kademlia.py | 52 +++++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/evm/p2p/kademlia.py b/evm/p2p/kademlia.py index fadfe48503..9f3eba8661 100644 --- a/evm/p2p/kademlia.py +++ b/evm/p2p/kademlia.py @@ -191,10 +191,13 @@ def __contains__(self, node): def __len__(self): return len(self.nodes) - def __le__(self, other): + def __lt__(self, other): if not isinstance(other, self.__class__): raise TypeError("Cannot compare KBucket with type {}.".format(other.__class__)) - return self.end <= other.end + # Check for invalid state of KBuckets + if not self.end < other.start: + raise ValueError("Invalid Buckets.") + return self.end < other.start class RoutingTable: @@ -270,15 +273,14 @@ def neighbours(self, node_id, k=k_bucket_size): def binary_get_bucket_for_node(buckets, node): """Return the bucket for a given node.""" - sorted_buckets = sorted(buckets) - bucket_ends = [bucket.end for bucket in sorted_buckets] + bucket_ends = [bucket.end for bucket in buckets] bucket_position = bisect.bisect_left(bucket_ends, node.id) # Prevents edge cases where bisect_left returns an out of range index try: - bucket = sorted_buckets[bucket_position] + bucket = buckets[bucket_position] except IndexError: raise ValueError("No bucket found for node with id {}".format(node.id)) - bucket = sorted_buckets[bucket_position] + bucket = buckets[bucket_position] if not bucket.start <= node.id <= bucket.end: raise ValueError("No bucket found for node with id {}".format(node.id)) return bucket diff --git a/evm/p2p/test_kademlia.py b/evm/p2p/test_kademlia.py index 2fe835f460..15674e2fa8 100644 --- a/evm/p2p/test_kademlia.py +++ b/evm/p2p/test_kademlia.py @@ -264,52 +264,56 @@ def test_kbucket_split(): def test_bucket_ordering(): - first = kademlia.KBucket(51, 100) - second = kademlia.KBucket(0, 50) + first = kademlia.KBucket(0, 50) + second = kademlia.KBucket(51, 100) third = random_node() - assert first > second + assert first < second with pytest.raises(TypeError): first > third @pytest.mark.parametrize( - "bucket_list, node_id, correct", + "bucket_list, node_id", ( - (list([]), 5, None), + (list([]), 5), # test for node.id < bucket.end - (list([kademlia.KBucket(0, 4)]), 5, None), + (list([kademlia.KBucket(0, 4)]), 5), # test for node.id > bucket.start - (list([kademlia.KBucket(6, 10)]), 5, None), + (list([kademlia.KBucket(6, 10)]), 5), # test multiple buckets that don't contain node.id (list( [ - kademlia.KBucket(0, 1), + kademlia.KBucket(1, 5), + kademlia.KBucket(6, 49), kademlia.KBucket(50, 100), - kademlia.KBucket(6, 49) ] - ), 5, None), - # test single bucket + ), 0), + ) +) +def test_binary_get_bucket_for_node_error(bucket_list, node_id): + node = random_node(nodeid=node_id) + with pytest.raises(ValueError): + kademlia.binary_get_bucket_for_node(bucket_list, node) + + +@pytest.mark.parametrize( + "bucket_list, node_id, correct_position", + ( (list([kademlia.KBucket(0, 100)]), 5, 0), - (list([kademlia.KBucket(50, 100), kademlia.KBucket(0, 49)]), 5, 1), - # test multiple unordered buckets + (list([kademlia.KBucket(0, 49), kademlia.KBucket(50, 100)]), 5, 0), (list( [ - kademlia.KBucket(50, 100), kademlia.KBucket(0, 1), kademlia.KBucket(2, 5), - kademlia.KBucket(6, 49) + kademlia.KBucket(6, 49), + kademlia.KBucket(50, 100) ] - ), 5, 2), + ), 5, 1), ) ) -def test_binary_get_bucket_for_node(bucket_list, node_id, correct): - node = random_node() - node.id = node_id - if correct is None: - with pytest.raises(ValueError): - kademlia.binary_get_bucket_for_node(bucket_list, node) - else: - assert kademlia.binary_get_bucket_for_node(bucket_list, node) == bucket_list[correct] +def test_binary_get_bucket_for_node(bucket_list, node_id, correct_position): + node = random_node(nodeid=node_id) + assert kademlia.binary_get_bucket_for_node(bucket_list, node) == bucket_list[correct_position] def test_compute_shared_prefix_bits(): From 5d7cb25517159975cce468559781755548ca9b2a Mon Sep 17 00:00:00 2001 From: njgheorghita Date: Tue, 12 Sep 2017 15:41:36 -0600 Subject: [PATCH 4/4] Refactor & remove le comparison --- evm/p2p/kademlia.py | 13 ++++--------- evm/p2p/test_kademlia.py | 2 +- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/evm/p2p/kademlia.py b/evm/p2p/kademlia.py index 9f3eba8661..2a9cf9e137 100644 --- a/evm/p2p/kademlia.py +++ b/evm/p2p/kademlia.py @@ -194,9 +194,6 @@ def __len__(self): def __lt__(self, other): if not isinstance(other, self.__class__): raise TypeError("Cannot compare KBucket with type {}.".format(other.__class__)) - # Check for invalid state of KBuckets - if not self.end < other.start: - raise ValueError("Invalid Buckets.") return self.end < other.start @@ -272,18 +269,16 @@ def neighbours(self, node_id, k=k_bucket_size): def binary_get_bucket_for_node(buckets, node): - """Return the bucket for a given node.""" + """Given a list of ordered buckets, returns the bucket for a given node.""" bucket_ends = [bucket.end for bucket in buckets] bucket_position = bisect.bisect_left(bucket_ends, node.id) # Prevents edge cases where bisect_left returns an out of range index try: bucket = buckets[bucket_position] - except IndexError: + assert bucket.start <= node.id <= bucket.end + return bucket + except (IndexError, AssertionError): raise ValueError("No bucket found for node with id {}".format(node.id)) - bucket = buckets[bucket_position] - if not bucket.start <= node.id <= bucket.end: - raise ValueError("No bucket found for node with id {}".format(node.id)) - return bucket class KademliaProtocol: diff --git a/evm/p2p/test_kademlia.py b/evm/p2p/test_kademlia.py index 15674e2fa8..b622c0ad1b 100644 --- a/evm/p2p/test_kademlia.py +++ b/evm/p2p/test_kademlia.py @@ -269,7 +269,7 @@ def test_bucket_ordering(): third = random_node() assert first < second with pytest.raises(TypeError): - first > third + first > third @pytest.mark.parametrize(