Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRAFT: Replace Field Arithmetic #1260

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ noinst_HEADERS += src/ecmult_gen_compute_table.h
noinst_HEADERS += src/ecmult_gen_compute_table_impl.h
noinst_HEADERS += src/field_10x26.h
noinst_HEADERS += src/field_10x26_impl.h
noinst_HEADERS += src/dettman.h
noinst_HEADERS += src/field_5x52.h
noinst_HEADERS += src/field_5x52_impl.h
noinst_HEADERS += src/field_5x52_int128_impl.h
Expand Down Expand Up @@ -89,12 +90,15 @@ pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libsecp256k1.pc

if USE_EXTERNAL_ASM
if USE_ASM_X86_64
libsecp256k1_common_la_SOURCES = src/asm/mul.s src/asm/square.s
endif
if USE_ASM_ARM
libsecp256k1_common_la_SOURCES = src/asm/field_10x26_arm.s
endif
endif

libsecp256k1_la_SOURCES = src/secp256k1.c
libsecp256k1_la_SOURCES = src/secp256k1.c src/asm/square.s src/asm/mul.s
libsecp256k1_la_CPPFLAGS = $(SECP_CONFIG_DEFINES)
libsecp256k1_la_LIBADD = $(COMMON_LIB) $(PRECOMPUTED_LIB)
libsecp256k1_la_LDFLAGS = -no-undefined -version-info $(LIB_VERSION_CURRENT):$(LIB_VERSION_REVISION):$(LIB_VERSION_AGE)
Expand Down
4 changes: 4 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ else
fi

if test x"$req_asm" = x"auto"; then
# TODO
SECP_64BIT_ASM_CHECK
if test x"$has_64bit_asm" = x"yes"; then
set_asm=x86_64
Expand All @@ -275,6 +276,7 @@ else
set_asm=$req_asm
case $set_asm in
x86_64)
# TODO
SECP_64BIT_ASM_CHECK
if test x"$has_64bit_asm" != x"yes"; then
AC_MSG_ERROR([x86_64 assembly optimization requested but not available])
Expand All @@ -296,6 +298,7 @@ enable_external_asm=no
case $set_asm in
x86_64)
SECP_CONFIG_DEFINES="$SECP_CONFIG_DEFINES -DUSE_ASM_X86_64=1"
enable_external_asm=yes
;;
arm)
enable_external_asm=yes
Expand Down Expand Up @@ -438,6 +441,7 @@ AM_CONDITIONAL([ENABLE_MODULE_EXTRAKEYS], [test x"$enable_module_extrakeys" = x"
AM_CONDITIONAL([ENABLE_MODULE_SCHNORRSIG], [test x"$enable_module_schnorrsig" = x"yes"])
AM_CONDITIONAL([USE_EXTERNAL_ASM], [test x"$enable_external_asm" = x"yes"])
AM_CONDITIONAL([USE_ASM_ARM], [test x"$set_asm" = x"arm"])
AM_CONDITIONAL([USE_ASM_X86_64], [test x"$set_asm" = x"x86_64"])
AM_CONDITIONAL([BUILD_WINDOWS], [test "$build_windows" = "yes"])
AC_SUBST(LIB_VERSION_CURRENT, _LIB_VERSION_CURRENT)
AC_SUBST(LIB_VERSION_REVISION, _LIB_VERSION_REVISION)
Expand Down
200 changes: 200 additions & 0 deletions src/asm/mul.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
.text
.global secp256k1_fe_mul_inner
secp256k1_fe_mul_inner:
mov %rdx,%rax
mov 0x20(%rdx),%rdx
mulx 0x20(%rsi),%r10,%r11
mov 0x20(%rax),%rdx
mulx 0x18(%rsi),%rcx,%r8
mov 0x10(%rax),%rdx
mov %rbx,-0x80(%rsp)
mulx 0x8(%rsi),%r9,%rbx
mov 0x18(%rax),%rdx
mov %rbp,-0x78(%rsp)
mov %r12,-0x70(%rsp)
mulx 0x10(%rsi),%rbp,%r12
mov (%rax),%rdx
mov %r13,-0x68(%rsp)
mov %r14,-0x60(%rsp)
mulx 0x18(%rsi),%r13,%r14
mov 0x8(%rsi),%rdx
mov %r15,-0x58(%rsp)
mov %rdi,-0x50(%rsp)
mulx 0x18(%rax),%r15,%rdi
mov %r10,%rdx
shrd $0x34,%r11,%rdx
mov %rdx,%r11
mov 0x10(%rsi),%rdx
mov %r8,-0x48(%rsp)
mov %rcx,-0x40(%rsp)
mulx 0x8(%rax),%r8,%rcx
add %r8,%r13
adcx %r14,%rcx
movabs $0x1000003d10,%rdx
mulx %r11,%r14,%r8
mov (%rsi),%rdx
mov %r12,-0x38(%rsp)
mulx 0x18(%rax),%r11,%r12
test %al,%al
adox %r9,%r13
adox %rcx,%rbx
adcx %r11,%r13
adcx %rbx,%r12
movabs $0xfffffffffffff,%rdx
and %rdx,%r10
movabs $0x1000003d10,%r9
mov %r9,%rdx
mulx %r10,%r9,%rcx
mov 0x8(%rax),%rdx
mulx 0x18(%rsi),%r11,%rbx
mov 0x20(%rsi),%rdx
mov %rbp,-0x30(%rsp)
mulx (%rax),%r10,%rbp
adox %r11,%r10
adox %rbp,%rbx
adcx %r13,%r9
adcx %rcx,%r12
mov 0x10(%rsi),%rdx
mulx 0x10(%rax),%r13,%rcx
xor %rdx,%rdx
adox %r13,%r10
adox %rbx,%rcx
adcx %r15,%r10
adcx %rcx,%rdi
mov $0x34,%r15d
bzhi %r15,%r9,%r11
mov 0x10(%rax),%rdx
mulx 0x18(%rsi),%rbp,%rbx
mov 0x20(%rax),%rdx
mulx (%rsi),%r13,%rcx
adox %r13,%r10
adox %rdi,%rcx
shrd $0x34,%r12,%r9
mov 0x20(%rsi),%rdx
mulx 0x8(%rax),%r12,%rdi
add %rbp,%r12
adcx %rdi,%rbx
xor %rdx,%rdx
adox %r10,%r9
adox %rdx,%rcx
adcx %r9,%r14
adcx %r8,%rcx
xor %r8,%r8
adox -0x30(%rsp),%r12
adox -0x38(%rsp),%rbx
bzhi %r15,%r14,%rdx
mov %rdx,%rbp
mov 0x20(%rax),%rdx
mulx 0x8(%rsi),%r13,%r10
mov $0x30,%edx
bzhi %rdx,%rbp,%rdi
mov 0x10(%rax),%rdx
mulx 0x20(%rsi),%r9,%r8
adox %r13,%r12
adox %rbx,%r10
shrd $0x34,%rcx,%r14
xor %rdx,%rdx
adox %r12,%r14
adox %rdx,%r10
bzhi %r15,%r14,%rcx
shrd $0x34,%r10,%r14
mov 0x18(%rsi),%rdx
mulx 0x18(%rax),%rbx,%r13
mov 0x20(%rax),%rdx
mulx 0x10(%rsi),%r12,%r10
add %rbx,%r9
adcx %r8,%r13
shl $0x4,%rcx
add %r12,%r9
adcx %r13,%r10
shr $0x30,%rbp
lea (%rcx,%rbp,1),%rcx
movabs $0x1000003d1,%rdx
mulx %rcx,%r8,%rbx
mov (%rax),%rdx
mulx (%rsi),%r12,%r13
mov 0x18(%rax),%rdx
mulx 0x20(%rsi),%rbp,%rcx
test %al,%al
adox %r12,%r8
adox %rbx,%r13
adcx -0x40(%rsp),%rbp
adcx -0x48(%rsp),%rcx
mov (%rsi),%rdx
mulx 0x8(%rax),%rbx,%r12
xor %rdx,%rdx
adox %r9,%r14
adox %rdx,%r10
mov %r8,%r9
shrd $0x34,%r13,%r9
bzhi %r15,%r14,%r13
shrd $0x34,%r10,%r14
xor %r10,%r10
adox %rbp,%r14
adox %r10,%rcx
mov 0x8(%rsi),%rdx
mulx (%rax),%rbp,%r10
mov 0x8(%rax),%rdx
mov %rdi,-0x28(%rsp)
mulx 0x8(%rsi),%r15,%rdi
mov 0x10(%rsi),%rdx
mov %r11,-0x20(%rsp)
mov %rcx,-0x18(%rsp)
mulx (%rax),%r11,%rcx
adcx %r15,%r11
adcx %rcx,%rdi
test %al,%al
adox %rbx,%rbp
adox %r10,%r12
adcx %rbp,%r9
adc $0x0,%r12
movabs $0x1000003d10,%rdx
mulx %r13,%rbx,%r10
add %r9,%rbx
adcx %r10,%r12
mov 0x10(%rax),%rdx
mulx (%rsi),%r13,%r15
mov $0x34,%edx
bzhi %rdx,%rbx,%rcx
bzhi %rdx,%r14,%rbp
movabs $0x1000003d10,%r9
mov %rbp,%rdx
mulx %r9,%rbp,%r10
shrd $0x34,%r12,%rbx
mov -0x50(%rsp),%r12
mov %rcx,0x8(%r12)
add %r13,%r11
adcx %rdi,%r15
xor %rdi,%rdi
adox %r11,%rbx
adox %rdi,%r15
adcx %rbx,%rbp
adcx %r10,%r15
mov %rbp,%r13
shrd $0x34,%r15,%r13
add -0x20(%rsp),%r13
mov -0x18(%rsp),%rcx
shrd $0x34,%rcx,%r14
mov %r9,%rdx
mulx %r14,%r9,%rcx
xor %r10,%r10
adox %r13,%r9
adox %r10,%rcx
movabs $0xfffffffffffff,%rdi
mov %r9,%r11
and %rdi,%r11
shrd $0x34,%rcx,%r9
add -0x28(%rsp),%r9
and %rdi,%r8
mov %r9,0x20(%r12)
mov %r8,(%r12)
and %rdi,%rbp
mov %rbp,0x10(%r12)
mov %r11,0x18(%r12)
mov -0x80(%rsp),%rbx
mov -0x78(%rsp),%rbp
mov -0x70(%rsp),%r12
mov -0x68(%rsp),%r13
mov -0x60(%rsp),%r14
mov -0x58(%rsp),%r15
ret
Loading