-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhashtable.py
215 lines (190 loc) · 8.96 KB
/
hashtable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!python
from linkedlist import LinkedList
class HashTable(object):
def __init__(self, init_size=8):
"""Initialize this hash table with the given initial size."""
self.buckets = [LinkedList() for i in range(init_size)]
self.size = 0 # Number of key-value entries
def __str__(self):
"""Return a formatted string representation of this hash table."""
items = ['{!r}: {!r}'.format(key, val) for key, val in self.items()]
return '{' + ', '.join(items) + '}'
def __repr__(self):
"""Return a string representation of this hash table."""
return 'HashTable({!r})'.format(self.items())
def __len__(self):
return self.size
def _bucket_index(self, key):
"""Return the bucket index where the given key would be stored."""
return hash(key) % len(self.buckets)
def load_factor(self):
"""Return the load factor, the ratio of number of entries to buckets.
Best and worst case running time: O(1) just dividing 2 constants"""
# TODO: Calculate load factor
return float(self.size) / len(self.buckets)
def keys(self):
"""Return a list of all keys in this hash table.
Best and worst case running time: O(n) has to traverse through every element"""
# Collect all keys in each of the buckets
all_keys = []
for bucket in self.buckets:
for key, _ in bucket.items():
all_keys.append(key)
return all_keys
def values(self):
"""Return a list of all values in this hash table.
Best and worst case running time: O(n) traverses through every element"""
# Collect all values in each of the buckets
all_values = []
for bucket in self.buckets:
for _, value in bucket.items():
all_values.append(value)
return all_values
def items(self):
"""Return a list of all entries (key-value pairs) in this hash table.
Best and worst case running time: O(n) has to traverse over every element"""
# Collect all pairs of key-value entries in each of the buckets
all_items = []
for bucket in self.buckets:
all_items.extend(bucket.items())
return all_items
def length(self):
"""Return the number of key-value entries by traversing its buckets.
Best and worst case running time: O(1) is the best way if you have a property to call,
O(n) worst case because it has to traverse through every bucket once"""
# Count number of key-value entries in each of the buckets
# hashTable_size = 0
# for bucket in self.buckets:
# hashTable_size += bucket.length()
# return hashTable_size
return self.size
def contains(self, key):
"""Return True if this hash table contains the given key, or False.
Best case running time: O(1) key is the head in the bucket
Worst case running time: O(n/k) n is number of items and k is number of buckets
average number of items per bucket so item is tail"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Check if an entry with the given key exists in that bucket
entry = bucket.find(lambda key_value: key_value[0] == key)
return entry is not None # True or False
def get(self, key):
"""Return the value associated with the given key, or raise KeyError.
Best case running time: O(1) item is head in LL
Worst case running time: O(n/k) n is number of items and k is number of buckets
average number of items per bucket so item is tail"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Find the entry with the given key in that bucket, if one exists
entry = bucket.find(lambda key_value: key_value[0] == key)
if entry is not None: # Found
# Return the given key's associated value
assert isinstance(entry, tuple)
assert len(entry) == 2
return entry[1]
else: # Not found
raise KeyError('Key not found: {}'.format(key))
def set(self, key, value):
"""Insert or update the given key with its associated value.
Best case running time: O(1) item is head in LL
Worst case running time: O(n/k) n is number of items and k is number of buckets
average number of items per bucket so item is tail"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Find the entry with the given key in that bucket, if one exists
# Check if an entry with the given key exists in that bucket
entry = bucket.find(lambda key_value: key_value[0] == key)
if entry is not None: # Found
# In this case, the given key's value is being updated
# Remove the old key-value entry from the bucket first
bucket.delete(entry)
self.size -= 1
# Insert the new key-value entry into the bucket in either case
bucket.append((key, value))
self.size += 1
# TODO: Check if the load factor exceeds a threshold such as 0.75
if self.load_factor() > 0.75:
# TODO: If so, automatically resize to reduce the load factor
self._resize()
def delete(self, key):
"""Delete the given key and its associated value, or raise KeyError.
Best case running time: O(1)
Worst case running time: O(n)"""
# Find the bucket the given key belongs in
index = self._bucket_index(key)
bucket = self.buckets[index]
# Find the entry with the given key in that bucket, if one exists
entry = bucket.find(lambda key_value: key_value[0] == key)
if entry is not None: # Found
# Remove the key-value entry from the bucket
bucket.delete(entry)
self.size -= 1
else: # Not found
raise KeyError('Key not found: {}'.format(key))
def _resize(self, new_size=None):
"""Resize this hash table's buckets and rehash all key-value entries.
Should be called automatically when load factor exceeds a threshold
such as 0.75 after an insertion (when set is called with a new key).
Best and worst case running time: O(n) because regardless we need to iter through the current size of the table
Best and worst case space usage: O(n/k) where n is the items and k is the buckets"""
# If unspecified, choose new size dynamically based on current size
if new_size is None:
new_size = len(self.buckets) * 2 # Double size
# Option to reduce size if buckets are sparsely filled (low load factor)
elif new_size is 0:
new_size = len(self.buckets) / 2 # Half size
# TODO: Get a list to temporarily hold all current key-value entries
temp_list = self.items()
# TODO: Create a new list of new_size total empty linked list buckets
new_size = int(new_size)
# TODO: Insert each key-value entry into the new list of buckets,
# which will rehash them into a new bucket index based on the new size
self.__init__(new_size)
for key, value in temp_list: # this breaks up the properties of the item "deconstructor"
self.set(key, value)
def test_hash_table():
ht = HashTable(4)
print('HashTable: ' + str(ht))
print('Setting entries:')
ht.set('I', 1)
print('set(I, 1): ' + str(ht))
ht.set('V', 5)
print('set(V, 5): ' + str(ht))
print('size: ' + str(ht.size))
print('length: ' + str(ht.length()))
print('buckets: ' + str(len(ht.buckets)))
print('load_factor: ' + str(ht.load_factor()))
ht.set('X', 10)
print('set(X, 10): ' + str(ht))
ht.set('L', 50) # Should trigger resize
print('set(L, 50): ' + str(ht))
print('size: ' + str(ht.size))
print('length: ' + str(ht.length()))
print('buckets: ' + str(len(ht.buckets)))
print('load_factor: ' + str(ht.load_factor()))
print('Getting entries:')
print('get(I): ' + str(ht.get('I')))
print('get(V): ' + str(ht.get('V')))
print('get(X): ' + str(ht.get('X')))
print('get(L): ' + str(ht.get('L')))
print('contains(X): ' + str(ht.contains('X')))
print('contains(Z): ' + str(ht.contains('Z')))
print('Deleting entries:')
ht.delete('I')
print('delete(I): ' + str(ht))
ht.delete('V')
print('delete(V): ' + str(ht))
ht.delete('X')
print('delete(X): ' + str(ht))
ht.delete('L')
print('delete(L): ' + str(ht))
print('contains(X): ' + str(ht.contains('X')))
print('size: ' + str(ht.size))
print('length: ' + str(ht.length()))
print('buckets: ' + str(len(ht.buckets)))
print('load_factor: ' + str(ht.load_factor()))
if __name__ == '__main__':
test_hash_table()