Skip to content

Commit e4c05f8

Browse files
committed
[KIP-54] Remove extra dependency on sortedcontainers
1 parent 53cc810 commit e4c05f8

File tree

3 files changed

+82
-24
lines changed

3 files changed

+82
-24
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
class SortedSet:
2+
def __init__(self, iterable=None, key=None):
3+
self._key = key if key is not None else lambda x: x
4+
self._set = set(iterable) if iterable is not None else set()
5+
6+
self._cached_last = None
7+
self._cached_first = None
8+
9+
def first(self):
10+
if self._cached_first is not None:
11+
return self._cached_first
12+
13+
first = None
14+
for element in self._set:
15+
if first is None or self._key(first) > self._key(element):
16+
first = element
17+
self._cached_first = first
18+
return first
19+
20+
def last(self):
21+
if self._cached_last is not None:
22+
return self._cached_last
23+
24+
last = None
25+
for element in self._set:
26+
if last is None or self._key(last) < self._key(element):
27+
last = element
28+
self._cached_last = last
29+
return last
30+
31+
def pop_last(self):
32+
value = self.last()
33+
self._set.remove(value)
34+
self._cached_last = None
35+
return value
36+
37+
def add(self, value):
38+
if self._cached_last is not None and self._key(value) > self._key(self._cached_last):
39+
self._cached_last = value
40+
if self._cached_first is not None and self._key(value) < self._key(self._cached_first):
41+
self._cached_first = value
42+
43+
return self._set.add(value)
44+
45+
def remove(self, value):
46+
if self._cached_last is not None and self._cached_last == value:
47+
self._cached_last = None
48+
if self._cached_first is not None and self._cached_first == value:
49+
self._cached_first = None
50+
51+
return self._set.remove(value)
52+
53+
def __contains__(self, value):
54+
return value in self._set
55+
56+
def __iter__(self):
57+
return iter(sorted(self._set, key=self._key))
58+
59+
def _bool(self):
60+
return len(self._set) != 0
61+
62+
__nonzero__ = _bool
63+
__bool__ = _bool

kafka/coordinator/assignors/sticky/sticky_assignor.py

+19-23
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
from collections import defaultdict, namedtuple
33
from copy import deepcopy
44

5-
from sortedcontainers import SortedSet, SortedDict, SortedList
6-
75
from kafka.cluster import ClusterMetadata
86
from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor
97
from kafka.coordinator.assignors.sticky.partition_movements import PartitionMovements
8+
from kafka.coordinator.assignors.sticky.sorted_set import SortedSet
109
from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment
1110
from kafka.coordinator.protocol import Schema
1211
from kafka.protocol.struct import Struct
@@ -82,7 +81,7 @@ def __init__(self, cluster, members):
8281
# a mapping of all consumers to all potential topic partitions that can be assigned to them
8382
self.consumer_to_all_potential_partitions = {}
8483
# an ascending sorted set of consumers based on how many topic partitions are already assigned to them
85-
self.sorted_current_subscriptions = set()
84+
self.sorted_current_subscriptions = SortedSet()
8685
# an ascending sorted list of topic partitions based on how many consumers can potentially use them
8786
self.sorted_partitions = []
8887
# all partitions that need to be assigned
@@ -154,9 +153,10 @@ def balance(self):
154153
self._add_consumer_to_current_subscriptions_and_maintain_order(consumer)
155154

156155
def get_final_assignment(self, member_id):
157-
assignment = defaultdict(lambda: SortedList())
156+
assignment = defaultdict(list)
158157
for topic_partition in self.current_assignment[member_id]:
159-
assignment[topic_partition.topic].add(topic_partition.partition)
158+
assignment[topic_partition.topic].append(topic_partition.partition)
159+
assignment = {k: sorted(v) for k, v in six.iteritems(assignment)}
160160
return six.viewitems(assignment)
161161

162162
def _initialize(self, cluster):
@@ -188,7 +188,7 @@ def _init_current_assignments(self, members):
188188
# higher generations overwrite lower generations in case of a conflict
189189
# note that a conflict could exists only if user data is for different generations
190190

191-
# for each partition we create a sorted map of its consumers by generation
191+
# for each partition we create a map of its consumers by generation
192192
sorted_partition_consumers_by_generation = {}
193193
for consumer, member_metadata in six.iteritems(members):
194194
for partitions in member_metadata.partitions:
@@ -204,14 +204,13 @@ def _init_current_assignments(self, members):
204204
else:
205205
consumers[member_metadata.generation] = consumer
206206
else:
207-
sorted_consumers = SortedDict()
208-
sorted_consumers[member_metadata.generation] = consumer
207+
sorted_consumers = {member_metadata.generation: consumer}
209208
sorted_partition_consumers_by_generation[partitions] = sorted_consumers
210209

211210
# previous_assignment holds the prior ConsumerGenerationPair (before current) of each partition
212211
# current and previous consumers are the last two consumers of each partition in the above sorted map
213212
for partitions, consumers in six.iteritems(sorted_partition_consumers_by_generation):
214-
generations = list(reversed(consumers.keys()))
213+
generations = sorted(consumers.keys(), reverse=True)
215214
self.current_assignment[consumers[generations[0]]].append(partitions)
216215
# now update previous assignment if any
217216
if len(generations) > 1:
@@ -236,13 +235,10 @@ def _are_subscriptions_identical(self):
236235
return has_identical_list_elements(list(six.itervalues(self.consumer_to_all_potential_partitions)))
237236

238237
def _populate_sorted_partitions(self):
239-
# an ascending sorted set of topic partitions based on how many consumers can potentially use them
240-
sorted_all_partitions = SortedSet(
241-
iterable=[
242-
(tp, tuple(consumers)) for tp, consumers in six.iteritems(self.partition_to_all_potential_consumers)
243-
],
244-
key=partitions_comparator_key,
245-
)
238+
# set of topic partitions with their respective potential consumers
239+
all_partitions = set((tp, tuple(consumers))
240+
for tp, consumers in six.iteritems(self.partition_to_all_potential_consumers))
241+
partitions_sorted_by_num_of_potential_consumers = sorted(all_partitions, key=partitions_comparator_key)
246242

247243
self.sorted_partitions = []
248244
if not self.is_fresh_assignment and self._are_subscriptions_identical():
@@ -266,7 +262,7 @@ def _populate_sorted_partitions(self):
266262
# how many valid partitions are currently assigned to them
267263
while sorted_consumers:
268264
# take the consumer with the most partitions
269-
consumer, _ = sorted_consumers.pop()
265+
consumer, _ = sorted_consumers.pop_last()
270266
# currently assigned partitions to this consumer
271267
remaining_partitions = assignments[consumer]
272268
# from partitions that had a different consumer before,
@@ -284,13 +280,13 @@ def _populate_sorted_partitions(self):
284280
self.sorted_partitions.append(remaining_partitions.pop())
285281
sorted_consumers.add((consumer, tuple(assignments[consumer])))
286282

287-
while sorted_all_partitions:
288-
partition = sorted_all_partitions.pop(0)[0]
283+
while partitions_sorted_by_num_of_potential_consumers:
284+
partition = partitions_sorted_by_num_of_potential_consumers.pop(0)[0]
289285
if partition not in self.sorted_partitions:
290286
self.sorted_partitions.append(partition)
291287
else:
292-
while sorted_all_partitions:
293-
self.sorted_partitions.append(sorted_all_partitions.pop(0)[0])
288+
while partitions_sorted_by_num_of_potential_consumers:
289+
self.sorted_partitions.append(partitions_sorted_by_num_of_potential_consumers.pop(0)[0])
294290

295291
def _populate_partitions_to_reassign(self):
296292
self.unassigned_partitions = deepcopy(self.sorted_partitions)
@@ -334,10 +330,10 @@ def _initialize_current_subscriptions(self):
334330
)
335331

336332
def _get_consumer_with_least_subscriptions(self):
337-
return self.sorted_current_subscriptions[0][0]
333+
return self.sorted_current_subscriptions.first()[0]
338334

339335
def _get_consumer_with_most_subscriptions(self):
340-
return self.sorted_current_subscriptions[-1][0]
336+
return self.sorted_current_subscriptions.last()[0]
341337

342338
def _remove_consumer_from_current_subscriptions_and_maintain_order(self, consumer):
343339
self.sorted_current_subscriptions.remove((consumer, tuple(self.current_assignment[consumer])))

requirements-dev.txt

-1
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,3 @@ pytest-mock==1.10.0
1515
sphinx-rtd-theme==0.2.4
1616
crc32c==1.7
1717
py==1.8.0
18-
sortedcontainers==2.1.0

0 commit comments

Comments
 (0)