-
Notifications
You must be signed in to change notification settings - Fork 25
/
huffman.py
119 lines (89 loc) · 3.3 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import heapq
from functools import total_ordering
"""
Code for Huffman Coding, compression and decompression.
Explanation at http://bhrigu.me/blog/2017/01/17/huffman-coding-python-implementation/
Adapted from https://github.com/bhrigu123/huffman-coding
"""
@total_ordering
class HeapNode:
def __init__(self, token, freq):
self.token = token
self.freq = freq
self.left = None
self.right = None
# defining comparators less_than and equals
def __lt__(self, other):
return self.freq < other.freq
def __eq__(self, other):
if(other == None):
return False
if(not isinstance(other, HeapNode)):
return False
return self.freq == other.freq
class HuffmanCoding:
def __init__(self):
self.heap = []
self.codes = {}
self.reverse_mapping = {}
# functions for compression:
def make_heap(self, frequency):
for key in frequency:
node = HeapNode(key, frequency[key])
heapq.heappush(self.heap, node)
def make_heap_from_array(self, freqs):
for index in range(len(freqs)):
node = HeapNode(index, freqs[index])
heapq.heappush(self.heap, node)
def merge_nodes(self):
while(len(self.heap)>1):
node1 = heapq.heappop(self.heap)
node2 = heapq.heappop(self.heap)
merged = HeapNode(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(self.heap, merged)
def make_codes_helper(self, root, current_code):
if(root == None):
return
if(root.token != None):
self.codes[root.token] = current_code
self.reverse_mapping[current_code] = root.token
return
self.make_codes_helper(root.left, current_code + "0")
self.make_codes_helper(root.right, current_code + "1")
def make_codes(self):
root = heapq.heappop(self.heap)
current_code = ""
self.make_codes_helper(root, current_code)
return root
def get_encoded_tokens(self, token_list):
encoded_text = ""
for token in token_list:
encoded_text += self.codes[token]
return encoded_text
def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""
for bit in encoded_text:
current_code += bit
if(current_code in self.reverse_mapping):
character = self.reverse_mapping[current_code]
decoded_text += character
current_code = ""
return decoded_text
def decompress(self, input_path):
filename, file_extension = os.path.splitext(self.path)
output_path = filename + "_decompressed" + ".txt"
with open(input_path, 'rb') as file, open(output_path, 'w') as output:
bit_string = ""
byte = file.read(1)
while(len(byte) > 0):
byte = ord(byte)
bits = bin(byte)[2:].rjust(8, '0')
bit_string += bits
byte = file.read(1)
encoded_text = self.remove_padding(bit_string)
decompressed_text = self.decode_text(encoded_text)
output.write(decompressed_text)
return output_path