Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upstream merge 2024 10 17 #1934

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
54 changes: 9 additions & 45 deletions crypto/fipsmodule/sha/asm/sha1-586.pl
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,6 @@
# been tested.
$shaext = 0;

&external_label("OPENSSL_ia32cap_P") if ($xmm);


$A="eax";
$B="ebx";
Expand Down Expand Up @@ -322,40 +320,9 @@ sub BODY_40_59
}
}

&function_begin("sha1_block_data_order");
if ($xmm) {
&static_label("shaext_shortcut") if ($shaext);
&static_label("ssse3_shortcut");
&static_label("avx_shortcut") if ($ymm);
&static_label("K_XX_XX");
&static_label("K_XX_XX");

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));

&mov ($A,&DWP(0,$T));
&mov ($D,&DWP(4,$T));
&test ($D,1<<9); # check SSSE3 bit
&jz (&label("x86"));
&mov ($C,&DWP(8,$T));
&test ($A,1<<24); # check FXSR bit
&jz (&label("x86"));
if ($shaext) {
&test ($C,1<<29); # check SHA bit
&jnz (&label("shaext_shortcut"));
}
if ($ymm) {
&and ($D,1<<28); # mask AVX bit
&and ($A,1<<30); # mask "Intel CPU" bit
&or ($A,$D);
&cmp ($A,1<<28|1<<30);
&je (&label("avx_shortcut"));
}
&jmp (&label("ssse3_shortcut"));
&set_label("x86",16);
}
&function_begin("sha1_block_data_order_nohw");
&mov($tmp1,&wparam(0)); # SHA_CTX *c
&mov($T,&wparam(1)); # const void *input
&mov($A,&wparam(2)); # size_t num
Expand Down Expand Up @@ -421,7 +388,7 @@ sub BODY_40_59
&jb(&label("loop"));

&stack_pop(16+3);
&function_end("sha1_block_data_order");
&function_end("sha1_block_data_order_nohw");

if ($xmm) {
if ($shaext) {
Expand All @@ -446,12 +413,11 @@ sub sha1op38 {
sub sha1msg1 { sha1op38(0xc9,@_); }
sub sha1msg2 { sha1op38(0xca,@_); }

&function_begin("_sha1_block_data_order_shaext");
&function_begin("sha1_block_data_order_shaext");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
&set_label("shaext_shortcut");
&mov ($ctx,&wparam(0));
&mov ("ebx","esp");
&mov ($inp,&wparam(1));
Expand Down Expand Up @@ -533,7 +499,7 @@ sub sha1op38 {
&movdqu (&QWP(0,$ctx),$ABCD)
&movd (&DWP(16,$ctx),$E);
&mov ("esp","ebx");
&function_end("_sha1_block_data_order_shaext");
&function_end("sha1_block_data_order_shaext");
}
######################################################################
# The SSSE3 implementation.
Expand Down Expand Up @@ -569,12 +535,11 @@ sub sha1op38 {
my $_rol=sub { &rol(@_) };
my $_ror=sub { &ror(@_) };

&function_begin("_sha1_block_data_order_ssse3");
&function_begin("sha1_block_data_order_ssse3");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
&set_label("ssse3_shortcut");

&movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19
&movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39
Expand Down Expand Up @@ -1097,7 +1062,7 @@ ()
&mov (&DWP(12,@T[1]),$D);
&mov (&DWP(16,@T[1]),$E);

&function_end("_sha1_block_data_order_ssse3");
&function_end("sha1_block_data_order_ssse3");

$rx=0; # reset

Expand All @@ -1112,12 +1077,11 @@ ()
my $_rol=sub { &shld(@_[0],@_) };
my $_ror=sub { &shrd(@_[0],@_) };

&function_begin("_sha1_block_data_order_avx");
&function_begin("sha1_block_data_order_avx");
&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($tmp1);
&lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
&set_label("avx_shortcut");
&vzeroall();

&vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19
Expand Down Expand Up @@ -1470,7 +1434,7 @@ ()
&mov (&DWP(8,@T[1]),$C);
&mov (&DWP(12,@T[1]),$D);
&mov (&DWP(16,@T[1]),$E);
&function_end("_sha1_block_data_order_avx");
&function_end("sha1_block_data_order_avx");
}
&set_label("K_XX_XX",64);
&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
Expand Down
159 changes: 120 additions & 39 deletions crypto/fipsmodule/sha/asm/sha256-586.pl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
# versions, but BoringSSL is intended to be used with pre-generated perlasm
# output, so this isn't useful anyway.
#
# TODO(davidben): Enable AVX2 code after testing by setting $avx to 2.
# TODO(davidben): Enable AVX+BMI2 code after testing by setting $avx to 2.
$avx = 1;

$avx = 0 unless ($xmm);
Expand Down Expand Up @@ -190,9 +190,9 @@ ()
&add ($A,$T); # h += T
}

&external_label("OPENSSL_ia32cap_P") if (!$i386);
&static_label("K256");

&function_begin("sha256_block_data_order");
&function_begin("sha256_block_data_order_nohw");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
Expand All @@ -213,28 +213,6 @@ ()
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp
if (!$i386 && $xmm) {
&picmeup("edx","OPENSSL_ia32cap_P",$K256,&label("K256"));
&mov ("ecx",&DWP(0,"edx"));
&mov ("ebx",&DWP(4,"edx"));
&test ("ecx",1<<20); # check for P4
&jnz (&label("loop"));
&mov ("edx",&DWP(8,"edx")) if ($xmm);
&test ("ecx",1<<24); # check for FXSR
&jz ($unroll_after?&label("no_xmm"):&label("loop"));
&and ("ecx",1<<30); # mask "Intel CPU" bit
&and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits
&test ("edx",1<<29) if ($shaext); # check for SHA
&jnz (&label("shaext")) if ($shaext);
&or ("ecx","ebx");
&and ("ecx",1<<28|1<<30);
&cmp ("ecx",1<<28|1<<30);
if ($xmm) {
&je (&label("AVX")) if ($avx);
&test ("ebx",1<<9); # check for SSSE3
&jnz (&label("SSSE3"));
} else {
&je (&label("loop_shrd"));
}
if ($unroll_after) {
&set_label("no_xmm");
&sub ("eax","edi");
Expand Down Expand Up @@ -522,6 +500,8 @@ ()
&mov ("esp",&DWP(96+12,"esp")); # restore sp
&function_end_A();
}
&function_end_B("sha256_block_data_order_nohw");

if (!$i386 && $xmm) {{{
if ($shaext) {
######################################################################
Expand All @@ -540,7 +520,33 @@ sub sha256op38 {
sub sha256msg1 { sha256op38(0xcc,@_); }
sub sha256msg2 { sha256op38(0xcd,@_); }

&set_label("shaext",32);
&function_begin("sha256_block_data_order_hw");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&sub ("esp",32);

&movdqu ($ABEF,&QWP(0,$ctx)); # DCBA
Expand Down Expand Up @@ -660,14 +666,40 @@ sub sha256op38 {
&mov ("esp",&DWP(32+12,"esp"));
&movdqu (&QWP(0,$ctx),$ABEF);
&movdqu (&QWP(16,$ctx),$CDGH);
&function_end_A();
&function_end("sha256_block_data_order_shaext");
}

my @X = map("xmm$_",(0..3));
my ($t0,$t1,$t2,$t3) = map("xmm$_",(4..7));
my @AH = ($A,$T);

&set_label("SSSE3",32);
&function_begin("sha256_block_data_order_ssse3");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&lea ("esp",&DWP(-96,"esp"));
# copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
&mov ($AH[0],&DWP(0,"esi"));
Expand Down Expand Up @@ -975,14 +1007,36 @@ ()
&jb (&label("grand_ssse3"));

&mov ("esp",&DWP(96+12,"esp")); # restore sp
&function_end_A();
&function_end("sha256_block_data_order_ssse3");

if ($avx) {
&set_label("AVX",32);
if ($avx>1) {
&and ("edx",1<<8|1<<3); # check for BMI2+BMI1
&cmp ("edx",1<<8|1<<3);
&je (&label("AVX_BMI"));
}
&function_begin("sha256_block_data_order_avx");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&lea ("esp",&DWP(-96,"esp"));
&vzeroall ();
# copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
Expand Down Expand Up @@ -1142,7 +1196,8 @@ ()

&mov ("esp",&DWP(96+12,"esp")); # restore sp
&vzeroall ();
&function_end_A();
&function_end("sha256_block_data_order_avx");

if ($avx>1) {
sub bodyx_00_15 () { # +10%
(
Expand Down Expand Up @@ -1179,7 +1234,34 @@ ()
);
}

&set_label("AVX_BMI",32);
# If enabled, this function should be gated on AVX, BMI1, and BMI2.
&function_begin("sha256_block_data_order_avx_bmi");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp

&call (&label("pic_point")); # make it PIC!
&set_label("pic_point");
&blindpop($K256);
&lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));

&sub ("esp",16);
&and ("esp",-64);

&shl ("eax",6);
&add ("eax","edi");
&mov (&DWP(0,"esp"),"esi"); # ctx
&mov (&DWP(4,"esp"),"edi"); # inp
&mov (&DWP(8,"esp"),"eax"); # inp+num*128
&mov (&DWP(12,"esp"),"ebx"); # saved sp

# TODO(davidben): The preamble above this point comes from the original
# merged sha256_block_data_order function, which performed some common
# setup and then jumped to the particular SHA-256 implementation. The
# parts of the preamble that do not apply to this function can be
# removed.

&lea ("esp",&DWP(-96,"esp"));
&vzeroall ();
# copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack
Expand Down Expand Up @@ -1283,11 +1365,10 @@ ()

&mov ("esp",&DWP(96+12,"esp")); # restore sp
&vzeroall ();
&function_end_A();
&function_end("sha256_block_data_order_avx_bmi");
}
}
}}}
&function_end_B("sha256_block_data_order");

&asm_finish();

Expand Down
Loading
Loading