forked from minio/simdjson-go
-
Notifications
You must be signed in to change notification settings - Fork 0
/
flatten_bits_amd64.s
60 lines (51 loc) · 1.28 KB
/
flatten_bits_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
//+build !noasm !appengine gc
#define MASK AX
#define INDEX BX
#define ZEROS CX
#define CARRIED DX
#define SHIFTS R8
#define POSITION R10
TEXT ·_flatten_bits_incremental(SB), $0-40
MOVQ base_ptr+0(FP), DI
MOVQ pbase+8(FP), SI
MOVQ mask+16(FP), MASK
MOVQ carried+24(FP), R11
MOVQ position+32(FP), R12
MOVQ (SI), INDEX
MOVQ (R11), CARRIED
MOVQ (R12), POSITION
CALL ·__flatten_bits_incremental(SB)
MOVQ POSITION, (R12)
MOVQ CARRIED, (R11)
MOVQ INDEX, (SI)
RET
TEXT ·__flatten_bits_incremental(SB), $0
XORQ SHIFTS, SHIFTS
// First iteration takes CARRIED into account
TZCNTQ MASK, ZEROS
JCS done // carry is set if ZEROS == 64
// Two shifts required because maximum combined shift (63+1) exceeds 6-bits
SHRQ $1, MASK
SHRQ ZEROS, MASK
INCQ ZEROS
ADDQ ZEROS, SHIFTS
ADDQ CARRIED, ZEROS
MOVL ZEROS, (DI)(INDEX*4)
ADDQ $1, INDEX
ADDQ ZEROS, POSITION
XORQ CARRIED, CARRIED // Reset CARRIED to 0 (since it has been used)
loop:
TZCNTQ MASK, ZEROS
JCS done // carry is set if ZEROS == 64
INCQ ZEROS
SHRQ ZEROS, MASK
ADDQ ZEROS, SHIFTS
MOVL ZEROS, (DI)(INDEX*4)
ADDQ $1, INDEX
ADDQ ZEROS, POSITION
JMP loop
done:
MOVQ $64, R9
SUBQ SHIFTS, R9
ADDQ R9, CARRIED // CARRIED += 64 - shifts (remaining empty bits to carry over to next call)
RET