Skip to content

Commit 9ca87e1

Browse files
authored
Merge pull request #508 from Gustorn/huffman-asm-fix
Fix the x64 assembly version of Huffman encoding
2 parents 54b833b + 760e633 commit 9ca87e1

File tree

1 file changed

+14
-6
lines changed
  • contents/huffman_encoding/code/asm-x64

1 file changed

+14
-6
lines changed

contents/huffman_encoding/code/asm-x64/huffman.s

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ encode_calculate_length:
110110
test al, al # If we're at the terminating null character then we're ready to encode
111111
jz encode_message
112112
lea rdx, [rax + 4*rax] # We get the codebook entry at the specific index
113-
lea r8, [r13 + 4*rdx]
113+
lea r8, [r13 + 8*rdx]
114114
add r14, QWORD PTR [r8 + bitstr_len] # And add the encoded word length to the total
115115
inc rcx
116116
jmp encode_calculate_length
@@ -134,7 +134,7 @@ encode_message_bits:
134134
test dil, dil # If we're at the the null terminator we're done
135135
jz encode_done
136136
lea rdx, [rdi + 4*rdi] # Get the codebook entry
137-
lea r10, [r13 + 4*rdx]
137+
lea r10, [r13 + 8*rdx]
138138
mov r11, QWORD PTR [r10 + bitstr_len] # Load the bitstring length
139139
lea r14, [r10] # The bitstring qword we're currently processing
140140
encode_message_bits_qword:
@@ -225,14 +225,22 @@ decode_done:
225225
# rdi - The starting address of the codebook we want to generate
226226
# rsi - Huffman-tree root (ptr)
227227
generate_codebook:
228-
sub rsp, bitstr_size + 8 # 8 extra bytes for alignment
228+
push r12
229+
sub rsp, bitstr_size + 16 # 16 extra bytes for alignment
230+
mov r12, rsi
229231
xorps xmm0, xmm0 # Create a 0-initialized bitstring. This will be
230232
movaps XMMWORD PTR [rsp], xmm0 # used in the recursive function calls
231233
movaps XMMWORD PTR [rsp + 16], xmm0
232234
mov QWORD PTR [rsp + 32], 0
235+
xor rsi, rsi
236+
mov rdx, codebook_size
237+
call memset
238+
mov rdi, rax
239+
mov rsi, r12
233240
mov rdx, rsp
234241
call generate_codebook_recurse
235-
add rsp, bitstr_size + 8
242+
add rsp, bitstr_size + 16
243+
pop r12
236244
ret
237245

238246
# rdi - The codebook's starting address
@@ -254,7 +262,7 @@ generate_codebook_recurse:
254262
movaps xmm1, XMMWORD PTR [rdx + 16]
255263
mov r9, QWORD PTR [rdx + 32]
256264
lea rax, [r8 + 4*r8] # The index calculation needs to add 40 * index. With lea arithmetic this can be represented as
257-
lea r10, [rdi + 4*rax] # base address + 4 * (5 * index). This is done in two lea instructions
265+
lea r10, [rdi + 8*rax] # base address + 8 * (5 * index). This is done in two lea instructions
258266
movups XMMWORD PTR [r10], xmm0 # And copy the data over to it
259267
movups XMMWORD PTR [r10 + 16], xmm1
260268
mov QWORD PTR [r10 + 32], r9
@@ -436,7 +444,7 @@ print_codebook_loop:
436444
cmp rbx, 255
437445
jg print_codebook_done
438446
lea rax, [rbx + 4*rbx] # We get the codebook entry at the specific index
439-
lea r10, [r12 + 4*rax]
447+
lea r10, [r12 + 8*rax]
440448
mov rdx, QWORD PTR [r10 + bitstr_len] # Load the length of the bitstring
441449
test rdx, rdx # If it's zero then the codepoint didn't exist in the original alphabet, skip
442450
jz print_codebook_counters

0 commit comments

Comments
 (0)