Skip to content

Commit 4360912

Browse files
Small update
1 parent 3dbd20e commit 4360912

File tree

2 files changed

+188
-0
lines changed

2 files changed

+188
-0
lines changed

neozcompress.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import struct
2+
from typing import List
3+
4+
def lzw_compress(uncompressed: str, max_table_size: int = 4096) -> List[int]:
5+
"""
6+
Compress a string to a list of output symbols using the LZW algorithm.
7+
8+
:param uncompressed: The input string to compress.
9+
:param max_table_size: Maximum size for the dictionary.
10+
:return: A list of integer codes representing the compressed data.
11+
"""
12+
if not uncompressed:
13+
return []
14+
15+
# Initialize the dictionary with single-character mappings.
16+
dict_size = 256
17+
dictionary = {chr(i): i for i in range(dict_size)}
18+
19+
w = ""
20+
result = []
21+
for c in uncompressed:
22+
wc = w + c
23+
if wc in dictionary:
24+
w = wc
25+
else:
26+
# Output the code for w.
27+
result.append(dictionary[w])
28+
# Only add new sequences if we haven't reached the maximum table size.
29+
if dict_size < max_table_size:
30+
dictionary[wc] = dict_size
31+
dict_size += 1
32+
w = c
33+
34+
# Output the code for w if it's not empty.
35+
if w:
36+
result.append(dictionary[w])
37+
38+
return result
39+
40+
41+
def lzw_decompress(compressed: List[int], max_table_size: int = 4096) -> str:
42+
"""
43+
Decompress a list of output symbols to a string using the LZW algorithm.
44+
45+
:param compressed: The list of integer codes to decompress.
46+
:param max_table_size: Maximum size for the dictionary.
47+
:return: The original uncompressed string.
48+
"""
49+
if not compressed:
50+
return ""
51+
52+
# Initialize the dictionary with single-character mappings.
53+
dict_size = 256
54+
dictionary = [chr(i) for i in range(dict_size)]
55+
56+
# Start with the first code.
57+
result = []
58+
w = dictionary[compressed[0]]
59+
result.append(w)
60+
61+
# Process the rest of the codes.
62+
for k in compressed[1:]:
63+
if k < len(dictionary):
64+
entry = dictionary[k]
65+
elif k == dict_size:
66+
# Special case: entry = w + w[0]
67+
entry = w + w[0]
68+
else:
69+
raise ValueError(f"Bad compressed code: {k}")
70+
71+
result.append(entry)
72+
# Add new entry to the dictionary if the maximum size hasn't been reached.
73+
if dict_size < max_table_size:
74+
dictionary.append(w + entry[0])
75+
dict_size += 1
76+
w = entry
77+
78+
return "".join(result)
79+
80+
81+
def lzw_compress_bytes(uncompressed: str, max_table_size: int = 4096) -> bytes:
82+
"""
83+
Compress a string and return a bytes object.
84+
85+
This function wraps lzw_compress() by converting its list-of-int output into a bytes
86+
object, where each code is stored as an unsigned 16-bit integer.
87+
88+
:param uncompressed: The input string to compress.
89+
:param max_table_size: Maximum size for the dictionary.
90+
:return: A bytes object containing the packed compressed data.
91+
"""
92+
# Get the list of integer codes from the standard LZW compression.
93+
codes = lzw_compress(uncompressed, max_table_size)
94+
95+
# Pack each integer as an unsigned short (16 bits) in big-endian order.
96+
# Note: With max_table_size=4096 the codes fit in 12 bits and are safe to pack in 16 bits.
97+
return struct.pack('>' + 'H' * len(codes), *codes)
98+
99+
100+
def lzw_decompress_bytes(compressed_bytes: bytes, max_table_size: int = 4096) -> str:
101+
"""
102+
Decompress a bytes object (produced by lzw_compress_bytes) back to a string.
103+
104+
This function unpacks the bytes object into a list of integers (codes) and then uses
105+
lzw_decompress() to reconstruct the original string.
106+
107+
:param compressed_bytes: The bytes object containing the packed compressed data.
108+
:param max_table_size: Maximum size for the dictionary.
109+
:return: The original uncompressed string.
110+
"""
111+
# Each code is stored as 2 bytes. Compute the number of codes.
112+
num_codes = len(compressed_bytes) // 2
113+
114+
# Unpack the bytes object to a tuple of integers.
115+
codes = list(struct.unpack('>' + 'H' * num_codes, compressed_bytes))
116+
117+
# Decompress using the standard LZW decompression.
118+
return lzw_decompress(codes, max_table_size)
119+
120+
121+
# Example usage:
122+
if __name__ == "__main__":
123+
sample_text = "TOBEORNOTTOBEORTOBEORNOT"
124+
print("Original Text: ", sample_text)
125+
126+
# Compress to a list of integer codes.
127+
codes = lzw_compress(sample_text)
128+
print("Compressed Codes:", codes)
129+
130+
# Compress to bytes.
131+
compressed_bytes = lzw_compress_bytes(sample_text)
132+
print("Compressed Bytes:", compressed_bytes)
133+
134+
# Decompress from list of integer codes.
135+
decompressed_text = lzw_decompress(codes)
136+
print("Decompressed Text (from codes):", decompressed_text)
137+
138+
# Decompress from bytes.
139+
decompressed_text_bytes = lzw_decompress_bytes(compressed_bytes)
140+
print("Decompressed Text (from bytes):", decompressed_text_bytes)

zcompress.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
def lzw_compress(uncompressed):
2+
"""Compress a string to a list of output symbols."""
3+
# Build the dictionary.
4+
dict_size = 256
5+
dictionary = {chr(i): i for i in range(dict_size)}
6+
w = ""
7+
result = []
8+
for c in uncompressed:
9+
wc = w + c
10+
if wc in dictionary:
11+
w = wc
12+
else:
13+
result.append(dictionary[w])
14+
# Add wc to the dictionary.
15+
dictionary[wc] = dict_size
16+
dict_size += 1
17+
w = c
18+
19+
# Output the code for w.
20+
if w:
21+
result.append(dictionary[w])
22+
return result
23+
24+
def lzw_decompress(compressed):
25+
"""Decompress a list of output symbols to a string."""
26+
# Build the dictionary.
27+
dict_size = 256
28+
dictionary = {i: chr(i) for i in range(dict_size)}
29+
30+
# use StringIO to store the result as characters
31+
result = []
32+
w = chr(compressed.pop(0))
33+
result.append(w)
34+
for k in compressed:
35+
if k in dictionary:
36+
entry = dictionary[k]
37+
elif k == dict_size:
38+
entry = w + w[0]
39+
else:
40+
raise ValueError('Bad compressed k: %s' % k)
41+
result.append(entry)
42+
43+
# Add w+entry[0] to the dictionary.
44+
dictionary[dict_size] = w + entry[0]
45+
dict_size += 1
46+
47+
w = entry
48+
return ''.join(result)

0 commit comments

Comments
 (0)