-
Notifications
You must be signed in to change notification settings - Fork 18
/
STIL-convert.py
284 lines (228 loc) · 8.16 KB
/
STIL-convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# https://hvsc.brona.dk/HVSC/C64Music/DOCUMENTS/STIL.txt
# TODO: Check /MUSICIANS/H/Hubbard_Rob/Delta.sid
# TODO: skip "folder items" like /MUSICIANS/H/Hubbard_Rob/
# TODO: check /MUSICIANS/Z/Zzap69/Jullov.sid
# has two TITLE and ARTIST entries
# Todo: chris huelsbeck does not work due to the umlaut u
from struct import *
import os
import textwrap
import re
import zlib
# Huffman tree found from the internetz
class NodeTree(object):
def __init__(self, left=None, right=None):
self.left = left
self.right = right
def children(self):
return self.left, self.right
def __str__(self):
return self.left, self.right
def huffman_code_tree(node, binString=''):
'''
Function to find Huffman Code
'''
if type(node) is str:
return {node: binString}
(l, r) = node.children()
d = dict()
d.update(huffman_code_tree(l, binString + '0'))
d.update(huffman_code_tree(r, binString + '1'))
return d
def make_tree(nodes):
'''
Function to make tree
:param nodes: Nodes
:return: Root of the tree
'''
print("make_tree " + str(nodes))
while len(nodes) > 1:
# last tuple
(key1, c1) = nodes[-1]
# penultimate tuple
(key2, c2) = nodes[-2]
# Remote two tuples
nodes = nodes[:-2]
node = NodeTree(key1, key2)
nodes.append((node, c1 + c2))
nodes = sorted(nodes, key=lambda x: x[1], reverse=True)
return nodes[0][0]
def huffmanEncode(stil):
freq = {}
count = 0
for path in stil:
for line in stil[path]["lines"]:
for idx in range(0, len(line)):
char = line[idx]
count = count + 1
if not char in freq:
freq[char] = 1
else:
freq[char] = freq[char] + 1
freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
node = make_tree(freq)
encoding = huffman_code_tree(node)
for i in encoding:
print(f'{i} : {encoding[i]}')
bitCount = 0
charCount = 0
for path in stil:
for line in stil[path]["lines"]:
for idx in range(0, len(line)):
char = line[idx]
charCount = charCount + 1
bitCount = bitCount + len(encoding[char])
print("CharCount " + str(charCount))
print("HuffCount " + str(bitCount/8))
return
def dictionaryCompress(stil):
words = {}
for path in stil:
lines = stil[path]["lines"]
#results = re.findall(r'\w+', " ".join(lines))
results = re.findall(r'[\w:]+\s?', " ".join(lines))
for word in results:
if len(word) > 3:
if word in words:
words[word] = words[word]+1
else:
words[word] = 1
# Set dictionary size
sortedWords = sorted(words.items(), reverse=True, key=lambda item: item[1])[0:127]
print(sortedWords)
for word in sortedWords:
index = str(sortedWords.index(word) + 1)
for path in stil:
newLines = []
for line in stil[path]["lines"]:
newLines.append(line.replace(word[0], index))
stil[path]["lines"] = newLines
def fnv1(data: bytearray):
prime = 0x01000193
hval = 0x811c9dc5
for i in range(0, len(data)):
hval = (hval * prime) & 0xffffffff
hval = hval ^ data[i]
return hval
def fnvHash(stil):
for path in stil:
stil[path]["hash"] = fnv1(path.upper().encode("ISO-8859-1"))
dups = {}
for path in stil:
hash = stil[path]["hash"]
if hash in dups:
print("Collision!")
print(path)
else:
dups[hash] = 1
def convertStil():
fileIn = open("STIL.txt", 'r', encoding='latin1')
fileIdx = open("HiP-STIL.db", 'wb')
tempFile = "/tmp/stil.tmp"
fileDb = open(tempFile, 'wb')
wrapper = textwrap.TextWrapper(width=39, initial_indent="", subsequent_indent=" ")
stil = {}
# Parse into a dictionary
while True:
line = fileIn.readline()
if not line:
break
if line[0] == "#":
continue
# Start of STIL block
if line[0] == "/":
# Strip line change and extension
# Reverse: line[::-1]
path = os.path.splitext(line.strip())[0]
# Parse STIL block
block = {}
lines = []
comment = ""
while True:
pos = fileIn.tell()
line = fileIn.readline()
if not line or line[0] == "/":
# Block done, stop at the start of the next STIL block
fileIn.seek(pos)
# Skip folder items, not supporting those at the moment
if not path[-1] == "/":
block["lines"] = lines
stil[path] = block
else:
pass #print("Skipped: " + path)
break
line = line.rstrip()
# Output fields as wrapped lines
if line[0:2] == "(#":
songNumber = line.strip("(#)")
lines.append("") # empty line after
lines += wrapper.wrap("*** Song " + songNumber + " ***")
if line.startswith(" NAME:"):
lines += wrapper.wrap("Name: " + line[9:])
if line.startswith(" AUTHOR:"):
lines += wrapper.wrap("Author: " + line[9:])
if line.startswith(" TITLE:"):
lines += wrapper.wrap("Title: " + line[9:])
if line.startswith(" ARTIST:"):
lines += wrapper.wrap("Artist: " + line[9:])
if line.startswith("COMMENT:"):
comment = line[9:]
pos = fileIn.tell()
nextLine = fileIn.readline()
fileIn.seek(pos)
if not nextLine or not nextLine.startswith(" "):
lines += wrapper.wrap("Comment: " + comment)
comment = ""
if line.startswith(" "):
comment += line[8:]
pos = fileIn.tell()
nextLine = fileIn.readline()
fileIn.seek(pos)
if not nextLine or not nextLine.startswith(" "):
lines += wrapper.wrap("Comment: " + comment)
comment = ""
fileIn.close()
#dictionaryCompress(stil)
#huffmanEncode(stil)
fnvHash(stil)
# Leave space for length
fileIdx.write(pack(">I", 0xdeadbeef))
dbIndex = 0
count = 0
for path in stil:
#print("Path: " + path)
lines = stil[path]["lines"]
count = count+1
# Write hash and data offset
fileIdx.write(pack(">IBBB", stil[path]["hash"], (dbIndex>>16)&0xff, (dbIndex>>8)&0xff, dbIndex&0xff))
# Calculate text length
# Each line with two byte line change magic code
textLength = 0
for line in lines:
textLength += len(line) + 2
dbIndex += textLength + 2 # add len
# Write length
# Big endian, two byte length
fileDb.write(pack(">H", textLength))
# Write text line
# Big endian, string with length, two bytes custom Hippo line change
for line in lines:
fileDb.write(pack(">" + str(len(line)) + "sBB", line.encode("ISO-8859-1", "replace"), 0x83, 0x03))
continue
# Write index length to the first 4 bytes placeholder
idxLength = fileIdx.tell()
print("Idx length: " + str(idxLength))
print("Db length: " + str(fileDb.tell()))
fileIdx.seek(0)
fileIdx.write(pack(">I", idxLength))
fileIdx.seek(idxLength)
# Append the data part to the end of the file
fileDb.close()
fileDb = open(tempFile, 'rb')
fileIdx.write(fileDb.read())
fileIdx.close()
fileDb.close()
print("Items: " + str(count))
return
if __name__ == "__main__":
convertStil()