-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgfp.s
115 lines (91 loc) · 2.06 KB
/
gfp.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#include "gfp.h"
#include "mul.h"
#include "mul_bmi2.h"
TEXT ·gfpNeg(SB),0,$0-16
MOVQ ·p+0(SB), R8
MOVQ ·p+8(SB), R9
MOVQ ·p+16(SB), R10
MOVQ ·p+24(SB), R11
MOVQ ·p+32(SB), R12
MOVQ ·p+40(SB), R13
MOVQ a+8(FP), DI
SUBQ 0(DI), R8
SBBQ 8(DI), R9
SBBQ 16(DI), R10
SBBQ 24(DI), R11
SBBQ 32(DI), R12
SBBQ 40(DI), R13
MOVQ $0, R14
gfpCarry(R8,R9,R10,R11,R12,R13,R14, R15,AX,BX,CX,DX,DI,SI)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11,R12,R13, 0(DI))
RET
TEXT ·gfpAdd(SB),0,$0-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
loadBlock(0(DI), R8,R9,R10,R11,R12,R13)
MOVQ $0, R14
ADDQ 0(SI), R8
ADCQ 8(SI), R9
ADCQ 16(SI), R10
ADCQ 24(SI), R11
ADCQ 32(SI), R12
ADCQ 40(SI), R13
ADCQ $0, R14
gfpCarry(R8,R9,R10,R11,R12,R13,R14, R15,AX,BX,CX,DX,DI,SI)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11,R12,R13, 0(DI))
RET
TEXT ·gfpSub(SB),0,$0-24
// TODO(brendan): Directly subtract.
MOVQ ·p+0(SB), R8
MOVQ ·p+8(SB), R9
MOVQ ·p+16(SB), R10
MOVQ ·p+24(SB), R11
MOVQ ·p+32(SB), R12
MOVQ ·p+40(SB), R13
MOVQ b+16(FP), DI
SUBQ 0(DI), R8
SBBQ 8(DI), R9
SBBQ 16(DI), R10
SBBQ 24(DI), R11
SBBQ 32(DI), R12
SBBQ 40(DI), R13
MOVQ $0, R14
gfpCarry(R8,R9,R10,R11,R12,R13,R14, R15,AX,BX,CX,DX,DI,SI)
MOVQ $0, R14
MOVQ a+8(FP), DI
ADDQ 0(DI), R8
ADCQ 8(DI), R9
ADCQ 16(DI), R10
ADCQ 24(DI), R11
ADCQ 32(DI), R12
ADCQ 40(DI), R13
ADCQ $0, R14
gfpCarry(R8,R9,R10,R11,R12,R13,R14, R15,AX,BX,CX,DX,DI,SI)
MOVQ c+0(FP), DI
storeBlock(R8,R9,R10,R11,R12,R13, 0(DI))
RET
TEXT ·gfpMul(SB),0,$240-24
MOVQ a+8(FP), DI
MOVQ b+16(FP), SI
// Jump to a slightly different implementation if MULX isn't supported.
CMPB ·hasBMI2(SB), $0
JE nobmi2Mul
// T = a * b
mulBMI2(0(DI),8(DI),16(DI),24(DI),32(DI),40(DI), 0(SI), 0(SP))
storeBlock(R14,R15,R8,R9,R10,R11, 48(SP))
// Reduce T.
gfpReduceBMI2(0(SP))
MOVQ c+0(FP), DI
storeBlock(R14,R15,R8,R9,R10,R11, 0(DI))
JMP end
nobmi2Mul:
// T = a * b
mul(0(DI),8(DI),16(DI),24(DI),32(DI),40(DI), 0(SI), 0(SP))
// Reduce T.
gfpReduce(0(SP))
MOVQ c+0(FP), DI
storeBlock(R14,R15,AX,BX,CX,DX, 0(DI))
end:
RET