-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdzx7_lom_v1p1.asm
216 lines (170 loc) · 4.71 KB
/
dzx7_lom_v1p1.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
; -----------------------------------------------------------------------------
; ZX7 decoder by Einar Saukas, Antonio Villena & Metalbrain
; with additional size-efficient speed optimizations by introspec ("Life on Mars" version 1)
; 199 bytes long and is always faster than "mega" decompressor (by about 4% on average)
; drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
; -----------------------------------------------------------------------------
; Parameters:
; HL: source address (compressed data)
; DE: destination address (decompressing)
; -----------------------------------------------------------------------------
DEFINE AllowUsingIX ; +0byte
;
; first block is where the entry point is and all the literal copying codes are (fairly well optimized, i think)
;
dzx7_lom: ldi
scf
IFNDEF AllowUsingIX
ELSE
ld ix, dzx7l_offset_eoverflow
ENDIF
jr dzx7l_reload
dzx7l_reload_1: ld a, (hl)
inc hl
rla
rl d
add a
rl d
add a
rl d
add a
jr nc, dzx7l_copying
IFNDEF AllowUsingIX
jp dzx7l_offset_eoverflow
ELSE
jp (ix)
ENDIF
dzx7l_reload_2: ld a, (hl)
inc hl
rla
rl d
add a
rl d
add a
jr nc, dzx7l_copying
IFNDEF AllowUsingIX
jp dzx7l_offset_eoverflow
ELSE
jp (ix)
ENDIF
dzx7l_reload_3: ld a, (hl)
inc hl
rla
rl d
add a
jr nc, dzx7l_copying
IFNDEF AllowUsingIX
jp dzx7l_offset_eoverflow
ELSE
jp (ix)
ENDIF
dzx7l_reload_4: ld a, (hl)
inc hl
rla
jr nc, dzx7l_copying
IFNDEF AllowUsingIX
jp dzx7l_offset_eoverflow
ELSE
jp (ix)
ENDIF
dzx7l_len_value_loop: add a
jr z, dzx7l_len_value_reload
rl c
jr c, dzx7l_len_value_bincluded
dzx7l_len_value_start: dec d
jr nz, dzx7l_len_value_loop
dzx7l_len_value_done:
;
; the code that determines offset (pretty neat, actually)
;
ld e, (hl) ; load offset flag (1 bit) + offset value (7 bits)
inc hl
bit 7, e
jr z, dzx7l_copying ; if offset flag is set, load 4 extra bits
add a
jr z, dzx7l_reload_1
rl d
add a
jr z, dzx7l_reload_2
rl d
add a
jr z, dzx7l_reload_3
rl d
add a
jr nc, dzx7l_copying ; we need to put 4-bit value into D, then INC D, then SRL D : RR E
jr z, dzx7l_reload_4
dzx7l_offset_eoverflow: res 7, e ; since bit 7 of E is already 1, we do nothing when NC or RES 7,E : INC D
inc d
dzx7l_copying: ex (sp), hl ; store source, restore destination
push hl ; store destination
scf
sbc hl, de ; HL = destination - offset - 1
pop de ; DE = destination
ldir ; copy previous sequence
ldi
pop hl ; restore source address (compressed data)
add a
jr nc, dzx7l_copy_byte_loop
jr nz, dzx7l_process_ref
dzx7l_reload: ld a, (hl)
inc hl
rla
jr c, dzx7l_process_ref
DUP 3 ; increasing this number speeds things up a little (max allowed is 7)
ldi
add a
jr c, dzx7l_process_ref
EDUP
dzx7l_copy_byte_loop: ldi ; copy literal byte
dzx7l_main_loop: DUP 2 ; the more the better, but it may/will break down some JR optimizations
add a
jr c, dzx7l_process_ref_or_reload
ldi
EDUP
add a
jr nc, dzx7l_copy_byte_loop ; next bit indicates either literal or sequence
dzx7l_process_ref_or_reload:
jr z, dzx7l_reload
;
; here we determine number of bits used for length (Elias gamma coding) (NB: not too ugly, but...)
;
dzx7l_process_ref: push de
ld bc, 1
ld d, b
add a
jr nc, dzx7l_len_size_loop
jr nz, dzx7l_len_value_done
dzx7l_reload_size1: ld a, (hl)
inc hl
rla
jr c, dzx7l_len_value_done
DUP 1 ; values above 1 speed things up slightly, but not by much - not really worth it
inc d
add a
jr c, dzx7l_len_value_loop
EDUP
dzx7l_len_size_loop: inc d
add a
jr nc, dzx7l_len_size_loop
jr nz, dzx7l_len_value_loop
dzx7l_reload_size2: ld a, (hl)
inc hl
rla
jr c, dzx7l_len_value_loop
jp dzx7l_len_size_loop
;
; the length of the reference is determined here (NB: kinda ugly; the commented out sections runs faster, but takes too much space. DJNZ?)
;
dzx7l_len_value_reload: ld a, (hl)
inc hl
dzx7l_len_value_loop2: adc a
jr z, dzx7l_len_value_reload
rl c
dzx7l_len_value_bincluded: rl b
jr c, dzx7l_exit ; check end marker
dec d
jr nz, dzx7l_len_value_loop2
jp dzx7l_len_value_done
dzx7l_exit: pop de
ret
; -----------------------------------------------------------------------------