From 4c0b9a24e0990dd852b3a5a9dc054156e880b205 Mon Sep 17 00:00:00 2001 From: jinghe-INTC Date: Mon, 4 Jul 2022 14:36:18 +0800 Subject: [PATCH] Upgrade for OpenSSL 1.1.1p (#131) * upgrade for openssl 1.1.1p Signed-off-by: Jing He * generated by: $ diff ../openssl_source/openssl-1.1.1p/crypto/bn/x86_64-mont5.s ../openssl_source/openssl-1.1.1o/crypto/bn/x86_64-mont5.s Signed-off-by: Jing He * generated by: (WSL) $ diff ../openssl_source/openssl-1.1.1p/crypto/bn/x86_64-mont5.asm ../openssl_source/openssl-1.1.1o/crypto/bn/x86_64-mont5.asm Signed-off-by: Jing He * upgrade for openssl 1.1.1p Signed-off-by: Jing He --- README.md | 6 +- openssl_source/Linux/x86_64-mont5.s | 181 --------------- openssl_source/Windows/x86_64-mont5.asm | 214 ++---------------- .../x86_64-mont5.asm.openssl1p.patch | 205 +++++++++++++++++ openssl_source/x86_64-mont5.s.openssl1p.patch | 180 +++++++++++++++ 5 files changed, 403 insertions(+), 383 deletions(-) create mode 100644 openssl_source/x86_64-mont5.asm.openssl1p.patch create mode 100644 openssl_source/x86_64-mont5.s.openssl1p.patch diff --git a/README.md b/README.md index e8600599..d219e41e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Introduction ------------ The Intel® Software Guard Extensions SSL (Intel® SGX SSL) cryptographic library is intended to provide cryptographic services for Intel® Software Guard Extensions (SGX) enclave applications. The Intel® SGX SSL cryptographic library is based on the underlying OpenSSL* Open Source project, providing a full-strength general purpose cryptography library. -Supported OpenSSL version is 1.1.1o. To work with 1.1.0 version please use "openssl_1.1.0" branch. +Supported OpenSSL version is 1.1.1p. To work with 1.1.0 version please use "openssl_1.1.0" branch. In order to build Intel® SGX SSL libraries based on old OpenSSL version, checkout the tag with the corresponding versioning, e.g. lin_2.5_1.1.1c. Tag naming convention ``[lin/win]__``. @@ -35,7 +35,7 @@ Windows (Note: Perl, NASM need to be included in machine's PATH variable) To build Intel® SGX SSL package in Windows OS: -1. Download OpenSSL package into openssl_source/ directory. (tar.gz package, e.g. openssl-1.1.1o.tar.gz) +1. Download OpenSSL package into openssl_source/ directory. (tar.gz package, e.g. openssl-1.1.1p.tar.gz) 2. Download and install latest SGX SDK from [Intel Developer Zone](https://software.intel.com/en-us/sgx-sdk/download). You can find installation guide from the same website. 3. Change the directory to the SGXSSL path and enter the following command: ``` @@ -51,7 +51,7 @@ Linux - Intel(R) SGX Linux latest release, including SDK, PSW, and driver To build Intel® SGX SSL package in Linux OS: -1. Download OpenSSL 1.1.1o package into openssl_source/ directory. (tar.gz package, e.g. openssl-1.1.1o.tar.gz) +1. Download OpenSSL 1.1.1p package into openssl_source/ directory. (tar.gz package, e.g. openssl-1.1.1p.tar.gz) 2. Download and install latest SGX SDK from [01.org](https://01.org/intel-software-guard-extensions/downloads). You can find installation guide in the same website. 3. Source SGX SDK's environment variables. 4. Cd to Linux/ directory and run: diff --git a/openssl_source/Linux/x86_64-mont5.s b/openssl_source/Linux/x86_64-mont5.s index fcc279dd..ab35076e 100644 --- a/openssl_source/Linux/x86_64-mont5.s +++ b/openssl_source/Linux/x86_64-mont5.s @@ -2048,185 +2048,6 @@ __bn_post4x_internal: ret .cfi_endproc .size __bn_post4x_internal,.-__bn_post4x_internal -.globl bn_from_montgomery -.type bn_from_montgomery,@function -.align 32 -bn_from_montgomery: -.cfi_startproc - testl $7,%r9d - jz bn_from_mont8x - xorl %eax,%eax - ret -.cfi_endproc -.size bn_from_montgomery,.-bn_from_montgomery - -.type bn_from_mont8x,@function -.align 32 -bn_from_mont8x: -.cfi_startproc -.byte 0x67 - movq %rsp,%rax -.cfi_def_cfa_register %rax - pushq %rbx -.cfi_offset %rbx,-16 - pushq %rbp -.cfi_offset %rbp,-24 - pushq %r12 -.cfi_offset %r12,-32 - pushq %r13 -.cfi_offset %r13,-40 - pushq %r14 -.cfi_offset %r14,-48 - pushq %r15 -.cfi_offset %r15,-56 -.Lfrom_prologue: - - shll $3,%r9d - leaq (%r9,%r9,2),%r10 - negq %r9 - movq (%r8),%r8 - - - - - - - - - leaq -320(%rsp,%r9,2),%r11 - movq %rsp,%rbp - subq %rdi,%r11 - andq $4095,%r11 - cmpq %r11,%r10 - jb .Lfrom_sp_alt - subq %r11,%rbp - leaq -320(%rbp,%r9,2),%rbp - jmp .Lfrom_sp_done - -.align 32 -.Lfrom_sp_alt: - leaq 4096-320(,%r9,2),%r10 - leaq -320(%rbp,%r9,2),%rbp - subq %r10,%r11 - movq $0,%r10 - cmovcq %r10,%r11 - subq %r11,%rbp -.Lfrom_sp_done: - andq $-64,%rbp - movq %rsp,%r11 - subq %rbp,%r11 - andq $-4096,%r11 - leaq (%r11,%rbp,1),%rsp - movq (%rsp),%r10 - cmpq %rbp,%rsp - ja .Lfrom_page_walk - jmp .Lfrom_page_walk_done - -.Lfrom_page_walk: - leaq -4096(%rsp),%rsp - movq (%rsp),%r10 - cmpq %rbp,%rsp - ja .Lfrom_page_walk -.Lfrom_page_walk_done: - - movq %r9,%r10 - negq %r9 - - - - - - - - - - - movq %r8,32(%rsp) - movq %rax,40(%rsp) -.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 -.Lfrom_body: - movq %r9,%r11 - leaq 48(%rsp),%rax - pxor %xmm0,%xmm0 - jmp .Lmul_by_1 - -.align 32 -.Lmul_by_1: - movdqu (%rsi),%xmm1 - movdqu 16(%rsi),%xmm2 - movdqu 32(%rsi),%xmm3 - movdqa %xmm0,(%rax,%r9,1) - movdqu 48(%rsi),%xmm4 - movdqa %xmm0,16(%rax,%r9,1) -.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 - movdqa %xmm1,(%rax) - movdqa %xmm0,32(%rax,%r9,1) - movdqa %xmm2,16(%rax) - movdqa %xmm0,48(%rax,%r9,1) - movdqa %xmm3,32(%rax) - movdqa %xmm4,48(%rax) - leaq 64(%rax),%rax - subq $64,%r11 - jnz .Lmul_by_1 - -.byte 102,72,15,110,207 -.byte 102,72,15,110,209 -.byte 0x67 - movq %rcx,%rbp -.byte 102,73,15,110,218 - movl OPENSSL_ia32cap_P+8(%rip),%r11d - andl $0x80108,%r11d - cmpl $0x80108,%r11d - jne .Lfrom_mont_nox - - leaq (%rax,%r9,1),%rdi - call __bn_sqrx8x_reduction - call __bn_postx4x_internal - - pxor %xmm0,%xmm0 - leaq 48(%rsp),%rax - jmp .Lfrom_mont_zero - -.align 32 -.Lfrom_mont_nox: - call __bn_sqr8x_reduction - call __bn_post4x_internal - - pxor %xmm0,%xmm0 - leaq 48(%rsp),%rax - jmp .Lfrom_mont_zero - -.align 32 -.Lfrom_mont_zero: - movq 40(%rsp),%rsi -.cfi_def_cfa %rsi,8 - movdqa %xmm0,0(%rax) - movdqa %xmm0,16(%rax) - movdqa %xmm0,32(%rax) - movdqa %xmm0,48(%rax) - leaq 64(%rax),%rax - subq $32,%r9 - jnz .Lfrom_mont_zero - - movq $1,%rax - movq -48(%rsi),%r15 -.cfi_restore %r15 - movq -40(%rsi),%r14 -.cfi_restore %r14 - movq -32(%rsi),%r13 -.cfi_restore %r13 - movq -24(%rsi),%r12 -.cfi_restore %r12 - movq -16(%rsi),%rbp -.cfi_restore %rbp - movq -8(%rsi),%rbx -.cfi_restore %rbx - leaq (%rsi),%rsp -.cfi_def_cfa_register %rsp -.Lfrom_epilogue: - ret -.cfi_endproc -.size bn_from_mont8x,.-bn_from_mont8x .type bn_mulx4x_mont_gather5,@function .align 32 bn_mulx4x_mont_gather5: @@ -3790,9 +3611,7 @@ bn_gather5: .cfi_endproc .size bn_gather5,.-bn_gather5 .align 64 - .Linc: .long 0,0, 1,1 .long 2,2, 2,2 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 - diff --git a/openssl_source/Windows/x86_64-mont5.asm b/openssl_source/Windows/x86_64-mont5.asm index 3184bd64..f671e4aa 100644 --- a/openssl_source/Windows/x86_64-mont5.asm +++ b/openssl_source/Windows/x86_64-mont5.asm @@ -583,6 +583,7 @@ $L$SEH_end_bn_mul4x_mont_gather5: ALIGN 32 mul4x_internal: + shl r9,5 movd xmm5,DWORD[56+rax] lea rax,[$L$inc] @@ -1105,6 +1106,7 @@ $L$inner4x: mov r15,QWORD[24+rbp] jmp NEAR $L$sqr4x_sub_entry + global bn_power5 ALIGN 32 @@ -1331,6 +1333,7 @@ __bn_sqr8x_internal: + lea rbp,[32+r10] @@ -2036,8 +2039,10 @@ DB 102,73,15,126,217 ret + ALIGN 32 __bn_post4x_internal: + mov r12,QWORD[rbp] lea rbx,[r9*1+rdi] mov rcx,r9 @@ -2089,198 +2094,8 @@ $L$sqr4x_sub_entry: neg r9 ret -global bn_from_montgomery - -ALIGN 32 -bn_from_montgomery: - test DWORD[48+rsp],7 - jz NEAR bn_from_mont8x - xor eax,eax - ret - -ALIGN 32 -bn_from_mont8x: - mov QWORD[8+rsp],rdi ;WIN64 prologue - mov QWORD[16+rsp],rsi - mov rax,rsp -$L$SEH_begin_bn_from_mont8x: - mov rdi,rcx - mov rsi,rdx - mov rdx,r8 - mov rcx,r9 - mov r8,QWORD[40+rsp] - mov r9,QWORD[48+rsp] - - - -DB 0x67 - mov rax,rsp - - push rbx - - push rbp - - push r12 - - push r13 - - push r14 - - push r15 - -$L$from_prologue: - - shl r9d,3 - lea r10,[r9*2+r9] - neg r9 - mov r8,QWORD[r8] - - - - - - - - - lea r11,[((-320))+r9*2+rsp] - mov rbp,rsp - sub r11,rdi - and r11,4095 - cmp r10,r11 - jb NEAR $L$from_sp_alt - sub rbp,r11 - lea rbp,[((-320))+r9*2+rbp] - jmp NEAR $L$from_sp_done - -ALIGN 32 -$L$from_sp_alt: - lea r10,[((4096-320))+r9*2] - lea rbp,[((-320))+r9*2+rbp] - sub r11,r10 - mov r10,0 - cmovc r11,r10 - sub rbp,r11 -$L$from_sp_done: - and rbp,-64 - mov r11,rsp - sub r11,rbp - and r11,-4096 - lea rsp,[rbp*1+r11] - mov r10,QWORD[rsp] - cmp rsp,rbp - ja NEAR $L$from_page_walk - jmp NEAR $L$from_page_walk_done - -$L$from_page_walk: - lea rsp,[((-4096))+rsp] - mov r10,QWORD[rsp] - cmp rsp,rbp - ja NEAR $L$from_page_walk -$L$from_page_walk_done: - - mov r10,r9 - neg r9 - - - - - - - - - - - mov QWORD[32+rsp],r8 - mov QWORD[40+rsp],rax - -$L$from_body: - mov r11,r9 - lea rax,[48+rsp] - pxor xmm0,xmm0 - jmp NEAR $L$mul_by_1 - -ALIGN 32 -$L$mul_by_1: - movdqu xmm1,XMMWORD[rsi] - movdqu xmm2,XMMWORD[16+rsi] - movdqu xmm3,XMMWORD[32+rsi] - movdqa XMMWORD[r9*1+rax],xmm0 - movdqu xmm4,XMMWORD[48+rsi] - movdqa XMMWORD[16+r9*1+rax],xmm0 -DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 - movdqa XMMWORD[rax],xmm1 - movdqa XMMWORD[32+r9*1+rax],xmm0 - movdqa XMMWORD[16+rax],xmm2 - movdqa XMMWORD[48+r9*1+rax],xmm0 - movdqa XMMWORD[32+rax],xmm3 - movdqa XMMWORD[48+rax],xmm4 - lea rax,[64+rax] - sub r11,64 - jnz NEAR $L$mul_by_1 - -DB 102,72,15,110,207 -DB 102,72,15,110,209 -DB 0x67 - mov rbp,rcx -DB 102,73,15,110,218 - mov r11d,DWORD[((OPENSSL_ia32cap_P+8))] - and r11d,0x80108 - cmp r11d,0x80108 - jne NEAR $L$from_mont_nox - - lea rdi,[r9*1+rax] - call __bn_sqrx8x_reduction - call __bn_postx4x_internal - - pxor xmm0,xmm0 - lea rax,[48+rsp] - jmp NEAR $L$from_mont_zero - -ALIGN 32 -$L$from_mont_nox: - call __bn_sqr8x_reduction - call __bn_post4x_internal - - pxor xmm0,xmm0 - lea rax,[48+rsp] - jmp NEAR $L$from_mont_zero - -ALIGN 32 -$L$from_mont_zero: - mov rsi,QWORD[40+rsp] - - movdqa XMMWORD[rax],xmm0 - movdqa XMMWORD[16+rax],xmm0 - movdqa XMMWORD[32+rax],xmm0 - movdqa XMMWORD[48+rax],xmm0 - lea rax,[64+rax] - sub r9,32 - jnz NEAR $L$from_mont_zero - - mov rax,1 - mov r15,QWORD[((-48))+rsi] - - mov r14,QWORD[((-40))+rsi] - - mov r13,QWORD[((-32))+rsi] - - mov r12,QWORD[((-24))+rsi] - - mov rbp,QWORD[((-16))+rsi] - - mov rbx,QWORD[((-8))+rsi] - - lea rsp,[rsi] - -$L$from_epilogue: - mov rdi,QWORD[8+rsp] ;WIN64 epilogue - mov rsi,QWORD[16+rsp] - ret - -$L$SEH_end_bn_from_mont8x: - ALIGN 32 bn_mulx4x_mont_gather5: mov QWORD[8+rsp],rdi ;WIN64 prologue @@ -2408,6 +2223,7 @@ $L$SEH_end_bn_mulx4x_mont_gather5: ALIGN 32 mulx4x_internal: + mov QWORD[8+rsp],r9 mov r10,r9 neg r9 @@ -2828,6 +2644,7 @@ $L$mulx4x_inner: jmp NEAR $L$sqrx4x_sub_entry + ALIGN 32 bn_powerx5: mov QWORD[8+rsp],rdi ;WIN64 prologue @@ -3612,6 +3429,7 @@ DB 102,72,15,126,213 ALIGN 32 __bn_postx4x_internal: + mov r12,QWORD[rbp] mov r10,rcx mov r9,rcx @@ -3660,10 +3478,12 @@ $L$sqrx4x_sub_entry: ret + global bn_get_bits5 ALIGN 16 bn_get_bits5: + lea r10,[rcx] lea r11,[1+rcx] mov ecx,edx @@ -3679,10 +3499,12 @@ bn_get_bits5: ret + global bn_scatter5 ALIGN 16 bn_scatter5: + cmp edx,0 jz NEAR $L$scatter_epilogue lea r8,[r9*8+r8] @@ -3697,12 +3519,14 @@ $L$scatter_epilogue: ret + global bn_gather5 ALIGN 32 bn_gather5: $L$SEH_begin_bn_gather5: + DB 0x4c,0x8d,0x14,0x24 DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00 lea rax,[$L$inc] @@ -3860,6 +3684,7 @@ $L$gather: ret $L$SEH_end_bn_gather5: + ALIGN 64 $L$inc: DD 0,0,1,1 @@ -3986,10 +3811,6 @@ ALIGN 4 DD $L$SEH_begin_bn_power5 wrt ..imagebase DD $L$SEH_end_bn_power5 wrt ..imagebase DD $L$SEH_info_bn_power5 wrt ..imagebase - - DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase - DD $L$SEH_end_bn_from_mont8x wrt ..imagebase - DD $L$SEH_info_bn_from_mont8x wrt ..imagebase DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase @@ -4018,11 +3839,6 @@ DB 9,0,0,0 DD mul_handler wrt ..imagebase DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase ALIGN 8 -$L$SEH_info_bn_from_mont8x: -DB 9,0,0,0 - DD mul_handler wrt ..imagebase - DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase -ALIGN 8 $L$SEH_info_bn_mulx4x_mont_gather5: DB 9,0,0,0 DD mul_handler wrt ..imagebase diff --git a/openssl_source/x86_64-mont5.asm.openssl1p.patch b/openssl_source/x86_64-mont5.asm.openssl1p.patch new file mode 100644 index 00000000..85b24369 --- /dev/null +++ b/openssl_source/x86_64-mont5.asm.openssl1p.patch @@ -0,0 +1,205 @@ +2097a2098,2290 +> global bn_from_montgomery +> +> ALIGN 32 +> bn_from_montgomery: +> +> test DWORD[48+rsp],7 +> jz NEAR bn_from_mont8x +> xor eax,eax +> ret ; DB 0F3h,0C3h ;repret +> +> +> +> +> ALIGN 32 +> bn_from_mont8x: +> mov QWORD[8+rsp],rdi ;WIN64 prologue +> mov QWORD[16+rsp],rsi +> mov rax,rsp +> $L$SEH_begin_bn_from_mont8x: +> mov rdi,rcx +> mov rsi,rdx +> mov rdx,r8 +> mov rcx,r9 +> mov r8,QWORD[40+rsp] +> mov r9,QWORD[48+rsp] +> +> +> +> DB 0x67 +> mov rax,rsp +> +> push rbx +> +> push rbp +> +> push r12 +> +> push r13 +> +> push r14 +> +> push r15 +> +> $L$from_prologue: +> +> shl r9d,3 +> lea r10,[r9*2+r9] +> neg r9 +> mov r8,QWORD[r8] +> +> +> +> +> +> +> +> +> lea r11,[((-320))+r9*2+rsp] +> mov rbp,rsp +> sub r11,rdi +> and r11,4095 +> cmp r10,r11 +> jb NEAR $L$from_sp_alt +> sub rbp,r11 +> lea rbp,[((-320))+r9*2+rbp] +> jmp NEAR $L$from_sp_done +> +> ALIGN 32 +> $L$from_sp_alt: +> lea r10,[((4096-320))+r9*2] +> lea rbp,[((-320))+r9*2+rbp] +> sub r11,r10 +> mov r10,0 +> cmovc r11,r10 +> sub rbp,r11 +> $L$from_sp_done: +> and rbp,-64 +> mov r11,rsp +> sub r11,rbp +> and r11,-4096 +> lea rsp,[rbp*1+r11] +> mov r10,QWORD[rsp] +> cmp rsp,rbp +> ja NEAR $L$from_page_walk +> jmp NEAR $L$from_page_walk_done +> +> $L$from_page_walk: +> lea rsp,[((-4096))+rsp] +> mov r10,QWORD[rsp] +> cmp rsp,rbp +> ja NEAR $L$from_page_walk +> $L$from_page_walk_done: +> +> mov r10,r9 +> neg r9 +> +> +> +> +> +> +> +> +> +> +> mov QWORD[32+rsp],r8 +> mov QWORD[40+rsp],rax +> +> $L$from_body: +> mov r11,r9 +> lea rax,[48+rsp] +> pxor xmm0,xmm0 +> jmp NEAR $L$mul_by_1 +> +> ALIGN 32 +> $L$mul_by_1: +> movdqu xmm1,XMMWORD[rsi] +> movdqu xmm2,XMMWORD[16+rsi] +> movdqu xmm3,XMMWORD[32+rsi] +> movdqa XMMWORD[r9*1+rax],xmm0 +> movdqu xmm4,XMMWORD[48+rsi] +> movdqa XMMWORD[16+r9*1+rax],xmm0 +> DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 +> movdqa XMMWORD[rax],xmm1 +> movdqa XMMWORD[32+r9*1+rax],xmm0 +> movdqa XMMWORD[16+rax],xmm2 +> movdqa XMMWORD[48+r9*1+rax],xmm0 +> movdqa XMMWORD[32+rax],xmm3 +> movdqa XMMWORD[48+rax],xmm4 +> lea rax,[64+rax] +> sub r11,64 +> jnz NEAR $L$mul_by_1 +> +> DB 102,72,15,110,207 +> DB 102,72,15,110,209 +> DB 0x67 +> mov rbp,rcx +> DB 102,73,15,110,218 +> mov r11d,DWORD[((OPENSSL_ia32cap_P+8))] +> and r11d,0x80108 +> cmp r11d,0x80108 +> jne NEAR $L$from_mont_nox +> +> lea rdi,[r9*1+rax] +> call __bn_sqrx8x_reduction +> call __bn_postx4x_internal +> +> pxor xmm0,xmm0 +> lea rax,[48+rsp] +> jmp NEAR $L$from_mont_zero +> +> ALIGN 32 +> $L$from_mont_nox: +> call __bn_sqr8x_reduction +> call __bn_post4x_internal +> +> pxor xmm0,xmm0 +> lea rax,[48+rsp] +> jmp NEAR $L$from_mont_zero +> +> ALIGN 32 +> $L$from_mont_zero: +> mov rsi,QWORD[40+rsp] +> +> movdqa XMMWORD[rax],xmm0 +> movdqa XMMWORD[16+rax],xmm0 +> movdqa XMMWORD[32+rax],xmm0 +> movdqa XMMWORD[48+rax],xmm0 +> lea rax,[64+rax] +> sub r9,32 +> jnz NEAR $L$from_mont_zero +> +> mov rax,1 +> mov r15,QWORD[((-48))+rsi] +> +> mov r14,QWORD[((-40))+rsi] +> +> mov r13,QWORD[((-32))+rsi] +> +> mov r12,QWORD[((-24))+rsi] +> +> mov rbp,QWORD[((-16))+rsi] +> +> mov rbx,QWORD[((-8))+rsi] +> +> lea rsp,[rsi] +> +> $L$from_epilogue: +> mov rdi,QWORD[8+rsp] ;WIN64 epilogue +> mov rsi,QWORD[16+rsp] +> ret ; DB 0F3h,0C3h ;repret +> +> $L$SEH_end_bn_from_mont8x: +3799a3993,3996 +> +> DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase +> DD $L$SEH_end_bn_from_mont8x wrt ..imagebase +> DD $L$SEH_info_bn_from_mont8x wrt ..imagebase +3826a4024,4028 +> ALIGN 8 +> $L$SEH_info_bn_from_mont8x: +> DB 9,0,0,0 +> DD mul_handler wrt ..imagebase +> DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase diff --git a/openssl_source/x86_64-mont5.s.openssl1p.patch b/openssl_source/x86_64-mont5.s.openssl1p.patch new file mode 100644 index 00000000..61e6c307 --- /dev/null +++ b/openssl_source/x86_64-mont5.s.openssl1p.patch @@ -0,0 +1,180 @@ +2050a2051,2229 +> .globl bn_from_montgomery +> .type bn_from_montgomery,@function +> .align 32 +> bn_from_montgomery: +> .cfi_startproc +> testl $7,%r9d +> jz bn_from_mont8x +> xorl %eax,%eax +> ret ; .byte 0xf3,0xc3 +> .cfi_endproc +> .size bn_from_montgomery,.-bn_from_montgomery +> +> .type bn_from_mont8x,@function +> .align 32 +> bn_from_mont8x: +> .cfi_startproc +> .byte 0x67 +> movq %rsp,%rax +> .cfi_def_cfa_register %rax +> pushq %rbx +> .cfi_offset %rbx,-16 +> pushq %rbp +> .cfi_offset %rbp,-24 +> pushq %r12 +> .cfi_offset %r12,-32 +> pushq %r13 +> .cfi_offset %r13,-40 +> pushq %r14 +> .cfi_offset %r14,-48 +> pushq %r15 +> .cfi_offset %r15,-56 +> .Lfrom_prologue: +> +> shll $3,%r9d +> leaq (%r9,%r9,2),%r10 +> negq %r9 +> movq (%r8),%r8 +> +> +> +> +> +> +> +> +> leaq -320(%rsp,%r9,2),%r11 +> movq %rsp,%rbp +> subq %rdi,%r11 +> andq $4095,%r11 +> cmpq %r11,%r10 +> jb .Lfrom_sp_alt +> subq %r11,%rbp +> leaq -320(%rbp,%r9,2),%rbp +> jmp .Lfrom_sp_done +> +> .align 32 +> .Lfrom_sp_alt: +> leaq 4096-320(,%r9,2),%r10 +> leaq -320(%rbp,%r9,2),%rbp +> subq %r10,%r11 +> movq $0,%r10 +> cmovcq %r10,%r11 +> subq %r11,%rbp +> .Lfrom_sp_done: +> andq $-64,%rbp +> movq %rsp,%r11 +> subq %rbp,%r11 +> andq $-4096,%r11 +> leaq (%r11,%rbp,1),%rsp +> movq (%rsp),%r10 +> cmpq %rbp,%rsp +> ja .Lfrom_page_walk +> jmp .Lfrom_page_walk_done +> +> .Lfrom_page_walk: +> leaq -4096(%rsp),%rsp +> movq (%rsp),%r10 +> cmpq %rbp,%rsp +> ja .Lfrom_page_walk +> .Lfrom_page_walk_done: +> +> movq %r9,%r10 +> negq %r9 +> +> +> +> +> +> +> +> +> +> +> movq %r8,32(%rsp) +> movq %rax,40(%rsp) +> .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 +> .Lfrom_body: +> movq %r9,%r11 +> leaq 48(%rsp),%rax +> pxor %xmm0,%xmm0 +> jmp .Lmul_by_1 +> +> .align 32 +> .Lmul_by_1: +> movdqu (%rsi),%xmm1 +> movdqu 16(%rsi),%xmm2 +> movdqu 32(%rsi),%xmm3 +> movdqa %xmm0,(%rax,%r9,1) +> movdqu 48(%rsi),%xmm4 +> movdqa %xmm0,16(%rax,%r9,1) +> .byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 +> movdqa %xmm1,(%rax) +> movdqa %xmm0,32(%rax,%r9,1) +> movdqa %xmm2,16(%rax) +> movdqa %xmm0,48(%rax,%r9,1) +> movdqa %xmm3,32(%rax) +> movdqa %xmm4,48(%rax) +> leaq 64(%rax),%rax +> subq $64,%r11 +> jnz .Lmul_by_1 +> +> .byte 102,72,15,110,207 +> .byte 102,72,15,110,209 +> .byte 0x67 +> movq %rcx,%rbp +> .byte 102,73,15,110,218 +> movl OPENSSL_ia32cap_P+8(%rip),%r11d +> andl $0x80108,%r11d +> cmpl $0x80108,%r11d +> jne .Lfrom_mont_nox +> +> leaq (%rax,%r9,1),%rdi +> call __bn_sqrx8x_reduction +> call __bn_postx4x_internal +> +> pxor %xmm0,%xmm0 +> leaq 48(%rsp),%rax +> jmp .Lfrom_mont_zero +> +> .align 32 +> .Lfrom_mont_nox: +> call __bn_sqr8x_reduction +> call __bn_post4x_internal +> +> pxor %xmm0,%xmm0 +> leaq 48(%rsp),%rax +> jmp .Lfrom_mont_zero +> +> .align 32 +> .Lfrom_mont_zero: +> movq 40(%rsp),%rsi +> .cfi_def_cfa %rsi,8 +> movdqa %xmm0,0(%rax) +> movdqa %xmm0,16(%rax) +> movdqa %xmm0,32(%rax) +> movdqa %xmm0,48(%rax) +> leaq 64(%rax),%rax +> subq $32,%r9 +> jnz .Lfrom_mont_zero +> +> movq $1,%rax +> movq -48(%rsi),%r15 +> .cfi_restore %r15 +> movq -40(%rsi),%r14 +> .cfi_restore %r14 +> movq -32(%rsi),%r13 +> .cfi_restore %r13 +> movq -24(%rsi),%r12 +> .cfi_restore %r12 +> movq -16(%rsi),%rbp +> .cfi_restore %rbp +> movq -8(%rsi),%rbx +> .cfi_restore %rbx +> leaq (%rsi),%rsp +> .cfi_def_cfa_register %rsp +> .Lfrom_epilogue: +> ret ; .byte 0xf3,0xc3 +> .cfi_endproc +> .size bn_from_mont8x,.-bn_from_mont8x