From 09e9443c845762c5270eecc7ff8a9ebc0bdc1f07 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sat, 27 Jan 2024 17:24:33 -0500 Subject: [PATCH 1/9] Write down the bounds for the sha*_block_data_order functions May as well. Change-Id: I0d15f0f6f703129f08df4a105a34c8ff06cb06b5 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65871 Commit-Queue: David Benjamin Reviewed-by: Bob Beck (cherry picked from commit 70054a3a7bf1c3b26a2fcb9b064002e6962e3480) --- crypto/fipsmodule/sha/internal.h | 37 ++++++++++++++++---------------- crypto/fipsmodule/sha/sha1.c | 6 +++--- crypto/fipsmodule/sha/sha256.c | 6 +++--- crypto/fipsmodule/sha/sha512.c | 8 +++---- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h index 29970940bb..d10f408e7d 100644 --- a/crypto/fipsmodule/sha/internal.h +++ b/crypto/fipsmodule/sha/internal.h @@ -99,11 +99,11 @@ void sha1_block_data_order(uint32_t *state, const uint8_t *data, #define SHA256_ASM #define SHA512_ASM -void sha1_block_data_order(uint32_t *state, const uint8_t *data, +void sha1_block_data_order(uint32_t state[5], const uint8_t *data, size_t num_blocks); -void sha256_block_data_order(uint32_t *state, const uint8_t *data, +void sha256_block_data_order(uint32_t state[8], const uint8_t *data, size_t num_blocks); -void sha512_block_data_order(uint64_t *state, const uint8_t *data, +void sha512_block_data_order(uint64_t state[8], const uint8_t *data, size_t num_blocks); #elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) @@ -118,7 +118,7 @@ OPENSSL_INLINE int sha1_hw_capable(void) { } #define SHA1_ASM_NEON -void sha1_block_data_order_neon(uint32_t *state, const uint8_t *data, +void sha1_block_data_order_neon(uint32_t state[5], const uint8_t *data, size_t num); #define SHA256_ASM_HW @@ -127,12 +127,12 @@ OPENSSL_INLINE int sha256_hw_capable(void) { } #define SHA256_ASM_NEON -void sha256_block_data_order_neon(uint32_t *state, const uint8_t *data, +void sha256_block_data_order_neon(uint32_t state[8], const uint8_t *data, size_t num); // Armv8.2 SHA-512 instructions are not available in 32-bit. #define SHA512_ASM_NEON -void sha512_block_data_order_neon(uint64_t *state, const uint8_t *data, +void sha512_block_data_order_neon(uint64_t state[8], const uint8_t *data, size_t num); #elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_AARCH64) @@ -175,7 +175,7 @@ OPENSSL_INLINE int sha1_avx2_capable(void) { return CRYPTO_is_AVX2_capable() && CRYPTO_is_BMI2_capable() && CRYPTO_is_BMI1_capable() && CRYPTO_is_SSSE3_capable(); } -void sha1_block_data_order_avx2(uint32_t *state, const uint8_t *data, +void sha1_block_data_order_avx2(uint32_t state[5], const uint8_t *data, size_t num); #define SHA1_ASM_AVX @@ -188,14 +188,14 @@ OPENSSL_INLINE int sha1_avx_capable(void) { return CRYPTO_is_AVX_capable() && CRYPTO_is_SSSE3_capable() && CRYPTO_is_intel_cpu(); } -void sha1_block_data_order_avx(uint32_t *state, const uint8_t *data, +void sha1_block_data_order_avx(uint32_t state[5], const uint8_t *data, size_t num); #define SHA1_ASM_SSSE3 OPENSSL_INLINE int sha1_ssse3_capable(void) { return CRYPTO_is_SSSE3_capable(); } -void sha1_block_data_order_ssse3(uint32_t *state, const uint8_t *data, +void sha1_block_data_order_ssse3(uint32_t state[5], const uint8_t *data, size_t num); #define SHA256_ASM_HW @@ -213,14 +213,14 @@ OPENSSL_INLINE int sha256_avx_capable(void) { return CRYPTO_is_AVX_capable() && CRYPTO_is_SSSE3_capable() && CRYPTO_is_intel_cpu(); } -void sha256_block_data_order_avx(uint32_t *state, const uint8_t *data, +void sha256_block_data_order_avx(uint32_t state[8], const uint8_t *data, size_t num); #define SHA256_ASM_SSSE3 OPENSSL_INLINE int sha256_ssse3_capable(void) { return CRYPTO_is_SSSE3_capable(); } -void sha256_block_data_order_ssse3(uint32_t *state, const uint8_t *data, +void sha256_block_data_order_ssse3(uint32_t state[8], const uint8_t *data, size_t num); #define SHA512_ASM_AVX @@ -233,35 +233,36 @@ OPENSSL_INLINE int sha512_avx_capable(void) { return CRYPTO_is_AVX_capable() && CRYPTO_is_SSSE3_capable() && CRYPTO_is_intel_cpu(); } -void sha512_block_data_order_avx(uint64_t *state, const uint8_t *data, +void sha512_block_data_order_avx(uint64_t state[8], const uint8_t *data, size_t num); #endif #if defined(SHA1_ASM_HW) -void sha1_block_data_order_hw(uint32_t *state, const uint8_t *data, size_t num); +void sha1_block_data_order_hw(uint32_t state[5], const uint8_t *data, + size_t num); #endif #if defined(SHA1_ASM_NOHW) -void sha1_block_data_order_nohw(uint32_t *state, const uint8_t *data, +void sha1_block_data_order_nohw(uint32_t state[5], const uint8_t *data, size_t num); #endif #if defined(SHA256_ASM_HW) -void sha256_block_data_order_hw(uint32_t *state, const uint8_t *data, +void sha256_block_data_order_hw(uint32_t state[8], const uint8_t *data, size_t num); #endif #if defined(SHA256_ASM_NOHW) -void sha256_block_data_order_nohw(uint32_t *state, const uint8_t *data, +void sha256_block_data_order_nohw(uint32_t state[8], const uint8_t *data, size_t num); #endif #if defined(SHA512_ASM_HW) -void sha512_block_data_order_hw(uint64_t *state, const uint8_t *data, +void sha512_block_data_order_hw(uint64_t state[8], const uint8_t *data, size_t num); #endif #if defined(SHA512_ASM_NOHW) -void sha512_block_data_order_nohw(uint64_t *state, const uint8_t *data, +void sha512_block_data_order_nohw(uint64_t state[8], const uint8_t *data, size_t num); #endif diff --git a/crypto/fipsmodule/sha/sha1.c b/crypto/fipsmodule/sha/sha1.c index 9387e0f090..6bb9969fc7 100644 --- a/crypto/fipsmodule/sha/sha1.c +++ b/crypto/fipsmodule/sha/sha1.c @@ -113,7 +113,7 @@ uint8_t *SHA1(const uint8_t *data, size_t len, uint8_t out[SHA_DIGEST_LENGTH]) { } #if !defined(SHA1_ASM) && !defined(SHA1_ALTIVEC) -static void sha1_block_data_order(uint32_t *state, const uint8_t *data, +static void sha1_block_data_order(uint32_t state[5], const uint8_t *data, size_t num); #endif @@ -238,7 +238,7 @@ int SHA1_get_state(SHA_CTX *ctx, uint8_t out_h[SHA1_CHAINING_LENGTH], #if !defined(SHA1_ASM) && !defined(SHA1_ALTIVEC) #if !defined(SHA1_ASM_NOHW) -static void sha1_block_data_order_nohw(uint32_t *state, const uint8_t *data, +static void sha1_block_data_order_nohw(uint32_t state[5], const uint8_t *data, size_t num) { register uint32_t A, B, C, D, E, T; uint32_t XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, XX8, XX9, XX10, @@ -388,7 +388,7 @@ static void sha1_block_data_order_nohw(uint32_t *state, const uint8_t *data, } #endif // !SHA1_ASM_NOHW -static void sha1_block_data_order(uint32_t *state, const uint8_t *data, +static void sha1_block_data_order(uint32_t state[5], const uint8_t *data, size_t num) { #if defined(SHA1_ASM_HW) if (sha1_hw_capable()) { diff --git a/crypto/fipsmodule/sha/sha256.c b/crypto/fipsmodule/sha/sha256.c index e39dcede5c..f36fb8b716 100644 --- a/crypto/fipsmodule/sha/sha256.c +++ b/crypto/fipsmodule/sha/sha256.c @@ -169,7 +169,7 @@ uint8_t *SHA256(const uint8_t *data, size_t len, } #if !defined(SHA256_ASM) -static void sha256_block_data_order(uint32_t *state, const uint8_t *in, +static void sha256_block_data_order(uint32_t state[8], const uint8_t *in, size_t num); #endif @@ -295,7 +295,7 @@ static const uint32_t K256[64] = { ROUND_00_15(i, a, b, c, d, e, f, g, h); \ } while (0) -static void sha256_block_data_order_nohw(uint32_t *state, const uint8_t *data, +static void sha256_block_data_order_nohw(uint32_t state[8], const uint8_t *data, size_t num) { uint32_t a, b, c, d, e, f, g, h, s0, s1, T1; uint32_t X[16]; @@ -384,7 +384,7 @@ static void sha256_block_data_order_nohw(uint32_t *state, const uint8_t *data, #endif // !defined(SHA256_ASM_NOHW) -static void sha256_block_data_order(uint32_t *state, const uint8_t *data, +static void sha256_block_data_order(uint32_t state[8], const uint8_t *data, size_t num) { #if defined(SHA256_ASM_HW) if (sha256_hw_capable()) { diff --git a/crypto/fipsmodule/sha/sha512.c b/crypto/fipsmodule/sha/sha512.c index f7dbe2c159..a5d0bab5c6 100644 --- a/crypto/fipsmodule/sha/sha512.c +++ b/crypto/fipsmodule/sha/sha512.c @@ -267,7 +267,7 @@ uint8_t *SHA512_256(const uint8_t *data, size_t len, } #if !defined(SHA512_ASM) -static void sha512_block_data_order(uint64_t *state, const uint8_t *in, +static void sha512_block_data_order(uint64_t state[8], const uint8_t *in, size_t num_blocks); #endif @@ -522,7 +522,7 @@ static const uint64_t K512[80] = { #if defined(__i386) || defined(__i386__) || defined(_M_IX86) // This code should give better results on 32-bit CPU with less than // ~24 registers, both size and performance wise... -static void sha512_block_data_order_nohw(uint64_t *state, const uint8_t *in, +static void sha512_block_data_order_nohw(uint64_t state[8], const uint8_t *in, size_t num) { uint64_t A, E, T; uint64_t X[9 + 80], *F; @@ -595,7 +595,7 @@ static void sha512_block_data_order_nohw(uint64_t *state, const uint8_t *in, ROUND_00_15(i + j, a, b, c, d, e, f, g, h); \ } while (0) -static void sha512_block_data_order_nohw(uint64_t *state, const uint8_t *in, +static void sha512_block_data_order_nohw(uint64_t state[8], const uint8_t *in, size_t num) { uint64_t a, b, c, d, e, f, g, h, s0, s1, T1; uint64_t X[16]; @@ -681,7 +681,7 @@ static void sha512_block_data_order_nohw(uint64_t *state, const uint8_t *in, #endif // !SHA512_ASM_NOHW -static void sha512_block_data_order(uint64_t *state, const uint8_t *data, +static void sha512_block_data_order(uint64_t state[8], const uint8_t *data, size_t num) { #if defined(SHA512_ASM_HW) if (sha512_hw_capable()) { From 441380edb628902417fe32878973f2aa0792e028 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sat, 27 Jan 2024 18:03:01 -0500 Subject: [PATCH 2/9] Move capability checks in sha1-586.pl to C sha256-586.pl and sha512-586.pl have their own unique challenges, so I'll do them separately. Bug: 673 Change-Id: Ic9be0454fddf75e7f49bcccd8a86a4ff8862ff67 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65872 Commit-Queue: David Benjamin Reviewed-by: Bob Beck (cherry picked from commit 11688849bf1c9d4d0d1ad8aa7cd860275284317b) --- crypto/fipsmodule/sha/asm/sha1-586.pl | 54 +++++---------------------- crypto/fipsmodule/sha/internal.h | 48 ++++++++++++++++++------ 2 files changed, 45 insertions(+), 57 deletions(-) diff --git a/crypto/fipsmodule/sha/asm/sha1-586.pl b/crypto/fipsmodule/sha/asm/sha1-586.pl index 5a9a4f2b86..d6c3a6df79 100644 --- a/crypto/fipsmodule/sha/asm/sha1-586.pl +++ b/crypto/fipsmodule/sha/asm/sha1-586.pl @@ -145,8 +145,6 @@ # been tested. $shaext = 0; -&external_label("OPENSSL_ia32cap_P") if ($xmm); - $A="eax"; $B="ebx"; @@ -322,40 +320,9 @@ sub BODY_40_59 } } -&function_begin("sha1_block_data_order"); -if ($xmm) { - &static_label("shaext_shortcut") if ($shaext); - &static_label("ssse3_shortcut"); - &static_label("avx_shortcut") if ($ymm); - &static_label("K_XX_XX"); +&static_label("K_XX_XX"); - &call (&label("pic_point")); # make it PIC! - &set_label("pic_point"); - &blindpop($tmp1); - &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point")); - &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); - - &mov ($A,&DWP(0,$T)); - &mov ($D,&DWP(4,$T)); - &test ($D,1<<9); # check SSSE3 bit - &jz (&label("x86")); - &mov ($C,&DWP(8,$T)); - &test ($A,1<<24); # check FXSR bit - &jz (&label("x86")); - if ($shaext) { - &test ($C,1<<29); # check SHA bit - &jnz (&label("shaext_shortcut")); - } - if ($ymm) { - &and ($D,1<<28); # mask AVX bit - &and ($A,1<<30); # mask "Intel CPU" bit - &or ($A,$D); - &cmp ($A,1<<28|1<<30); - &je (&label("avx_shortcut")); - } - &jmp (&label("ssse3_shortcut")); - &set_label("x86",16); -} +&function_begin("sha1_block_data_order_nohw"); &mov($tmp1,&wparam(0)); # SHA_CTX *c &mov($T,&wparam(1)); # const void *input &mov($A,&wparam(2)); # size_t num @@ -421,7 +388,7 @@ sub BODY_40_59 &jb(&label("loop")); &stack_pop(16+3); -&function_end("sha1_block_data_order"); +&function_end("sha1_block_data_order_nohw"); if ($xmm) { if ($shaext) { @@ -446,12 +413,11 @@ sub sha1op38 { sub sha1msg1 { sha1op38(0xc9,@_); } sub sha1msg2 { sha1op38(0xca,@_); } -&function_begin("_sha1_block_data_order_shaext"); +&function_begin("sha1_block_data_order_shaext"); &call (&label("pic_point")); # make it PIC! &set_label("pic_point"); &blindpop($tmp1); &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("shaext_shortcut"); &mov ($ctx,&wparam(0)); &mov ("ebx","esp"); &mov ($inp,&wparam(1)); @@ -533,7 +499,7 @@ sub sha1op38 { &movdqu (&QWP(0,$ctx),$ABCD) &movd (&DWP(16,$ctx),$E); &mov ("esp","ebx"); -&function_end("_sha1_block_data_order_shaext"); +&function_end("sha1_block_data_order_shaext"); } ###################################################################### # The SSSE3 implementation. @@ -569,12 +535,11 @@ sub sha1op38 { my $_rol=sub { &rol(@_) }; my $_ror=sub { &ror(@_) }; -&function_begin("_sha1_block_data_order_ssse3"); +&function_begin("sha1_block_data_order_ssse3"); &call (&label("pic_point")); # make it PIC! &set_label("pic_point"); &blindpop($tmp1); &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("ssse3_shortcut"); &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 &movdqa (@X[4],&QWP(16,$tmp1)); # K_20_39 @@ -1097,7 +1062,7 @@ () &mov (&DWP(12,@T[1]),$D); &mov (&DWP(16,@T[1]),$E); -&function_end("_sha1_block_data_order_ssse3"); +&function_end("sha1_block_data_order_ssse3"); $rx=0; # reset @@ -1112,12 +1077,11 @@ () my $_rol=sub { &shld(@_[0],@_) }; my $_ror=sub { &shrd(@_[0],@_) }; -&function_begin("_sha1_block_data_order_avx"); +&function_begin("sha1_block_data_order_avx"); &call (&label("pic_point")); # make it PIC! &set_label("pic_point"); &blindpop($tmp1); &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); -&set_label("avx_shortcut"); &vzeroall(); &vmovdqa(@X[3],&QWP(0,$tmp1)); # K_00_19 @@ -1470,7 +1434,7 @@ () &mov (&DWP(8,@T[1]),$C); &mov (&DWP(12,@T[1]),$D); &mov (&DWP(16,@T[1]),$E); -&function_end("_sha1_block_data_order_avx"); +&function_end("sha1_block_data_order_avx"); } &set_label("K_XX_XX",64); &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h index d10f408e7d..87b9348831 100644 --- a/crypto/fipsmodule/sha/internal.h +++ b/crypto/fipsmodule/sha/internal.h @@ -85,6 +85,7 @@ struct keccak_st { uint8_t pad; // padding character uint8_t padded; // denotes if padding has been performed }; + // Define SHA{n}[_{variant}]_ASM if sha{n}_block_data_order[_{variant}] is // defined in assembly. @@ -94,18 +95,6 @@ struct keccak_st { void sha1_block_data_order(uint32_t *state, const uint8_t *data, size_t num_blocks); -#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) -#define SHA1_ASM -#define SHA256_ASM -#define SHA512_ASM - -void sha1_block_data_order(uint32_t state[5], const uint8_t *data, - size_t num_blocks); -void sha256_block_data_order(uint32_t state[8], const uint8_t *data, - size_t num_blocks); -void sha512_block_data_order(uint64_t state[8], const uint8_t *data, - size_t num_blocks); - #elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) #define SHA1_ASM_NOHW @@ -156,6 +145,41 @@ OPENSSL_INLINE int sha512_hw_capable(void) { return CRYPTO_is_ARMv8_SHA512_capable(); } +#elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) + +#define SHA1_ASM_NOHW + +#define SHA1_ASM_SSSE3 +OPENSSL_INLINE int sha1_ssse3_capable(void) { + // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not + // say to. + return CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable(); +} +void sha1_block_data_order_ssse3(uint32_t state[5], const uint8_t *data, + size_t num); + +#define SHA1_ASM_AVX +OPENSSL_INLINE int sha1_avx_capable(void) { + // Pre-Zen AMD CPUs had slow SHLD/SHRD; Zen added the SHA extension; see the + // discussion in sha1-586.pl. + // + // TODO(davidben): Should we enable SHAEXT on 32-bit x86? + // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not + // say to. + return CRYPTO_is_AVX_capable() && CRYPTO_is_intel_cpu() && + CRYPTO_is_FXSR_capable(); +} +void sha1_block_data_order_avx(uint32_t state[5], const uint8_t *data, + size_t num); + +// TODO(crbug.com/boringssl/673): Move the remaining CPU dispatch to C. +#define SHA256_ASM +#define SHA512_ASM +void sha256_block_data_order(uint32_t state[8], const uint8_t *data, + size_t num_blocks); +void sha512_block_data_order(uint64_t state[8], const uint8_t *data, + size_t num_blocks); + #elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) #define SHA1_ASM_NOHW From fd70e3d5b5997e8ad33acd766d7afe6c9235ba39 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Fri, 26 Jan 2024 07:47:07 -0500 Subject: [PATCH 3/9] Document some miscellaneous x509.h functions These three aren't part of some larger category of functions. Bug: 426 Change-Id: I94c977b20c6e6beb51df9d89f86851c960b4dfc6 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65809 Commit-Queue: David Benjamin Reviewed-by: Bob Beck (cherry picked from commit e273d1d18794ceb236bd426ec91079b2ebc660bb) --- include/openssl/ex_data.h | 16 ++++++++++++-- include/openssl/x509.h | 45 ++++++++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/include/openssl/ex_data.h b/include/openssl/ex_data.h index 5fac7c3d9c..ebc4e038eb 100644 --- a/include/openssl/ex_data.h +++ b/include/openssl/ex_data.h @@ -129,11 +129,11 @@ typedef struct crypto_ex_data_st CRYPTO_EX_DATA; // Type-specific functions. -// -// Each type that supports ex_data provides three functions: #if 0 // Sample +// Each type that supports ex_data provides three functions: + // TYPE_get_ex_new_index allocates a new index for |TYPE|. An optional // |free_func| argument may be provided which is called when the owning object // is destroyed. See |CRYPTO_EX_free| for details. The |argl| and |argp| @@ -153,6 +153,18 @@ OPENSSL_EXPORT int TYPE_set_ex_data(TYPE *t, int index, void *arg); // previous call to |TYPE_get_ex_new_index|. OPENSSL_EXPORT void *TYPE_get_ex_data(const TYPE *t, int index); +// Some types additionally preallocate index zero, with all callbacks set to +// NULL. Applications that do not need the general ex_data machinery may use +// this instead. + +// TYPE_set_app_data sets |t|'s application data pointer to |arg|. It returns +// one on success and zero on error. +OPENSSL_EXPORT int TYPE_set_app_data(TYPE *t, void *arg); + +// TYPE_get_app_data returns the application data pointer for |t|, or NULL if no +// such pointer exists. +OPENSSL_EXPORT void *TYPE_get_app_data(const TYPE *t); + #endif // Sample diff --git a/include/openssl/x509.h b/include/openssl/x509.h index c59fd7ac1e..624b94007c 100644 --- a/include/openssl/x509.h +++ b/include/openssl/x509.h @@ -485,6 +485,22 @@ OPENSSL_EXPORT STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(const X509 *x509); // |OPENSSL_malloc|. If |sk| is NULL, no action is taken. OPENSSL_EXPORT void X509_email_free(STACK_OF(OPENSSL_STRING) *sk); +// X509_cmp compares |a| and |b| and returns zero if they are equal, a negative +// number if |b| sorts after |a| and a negative number if |a| sorts after |b|. +// The sort order implemented by this function is arbitrary and does not +// reflect properties of the certificate such as expiry. Applications should not +// rely on the order itself. +// +// TODO(https://crbug.com/boringssl/355): This function works by comparing a +// cached hash of the encoded certificate. If |a| or |b| could not be +// serialized, the current behavior is to compare all unencodable certificates +// as equal. This function should only be used with |X509| objects that were +// parsed from bytes and never mutated. +// +// TODO(https://crbug.com/boringssl/407): This function is const, but it is not +// always thread-safe, notably if |a| and |b| were mutated. +OPENSSL_EXPORT int X509_cmp(const X509 *a, const X509 *b); + // Issuing certificates. // @@ -754,6 +770,18 @@ OPENSSL_EXPORT X509_CRL *d2i_X509_CRL(X509_CRL **out, const uint8_t **inp, // mutated. OPENSSL_EXPORT int i2d_X509_CRL(X509_CRL *crl, uint8_t **outp); +// X509_CRL_match compares |a| and |b| and returns zero if they are equal, a +// negative number if |b| sorts after |a| and a negative number if |a| sorts +// after |b|. The sort order implemented by this function is arbitrary and does +// not reflect properties of the CRL such as expiry. Applications should not +// rely on the order itself. +// +// TODO(https://crbug.com/boringssl/355): This function works by comparing a +// cached hash of the encoded CRL. This cached hash is computed when the CRL is +// parsed, but not when mutating or issuing CRLs. This function should only be +// used with |X509_CRL| objects that were parsed from bytes and never mutated. +OPENSSL_EXPORT int X509_CRL_match(const X509_CRL *a, const X509_CRL *b); + #define X509_CRL_VERSION_1 0 #define X509_CRL_VERSION_2 1 @@ -3794,6 +3822,10 @@ OPENSSL_EXPORT int X509_STORE_get_ex_new_index(long argl, void *argp, OPENSSL_EXPORT int X509_STORE_set_ex_data(X509_STORE *ctx, int idx, void *data); OPENSSL_EXPORT void *X509_STORE_get_ex_data(X509_STORE *ctx, int idx); +#define X509_STORE_CTX_set_app_data(ctx, data) \ + X509_STORE_CTX_set_ex_data(ctx, 0, data) +#define X509_STORE_CTX_get_app_data(ctx) X509_STORE_CTX_get_ex_data(ctx, 0) + // Hashing and signing ASN.1 structures. // ASN1_digest serializes |data| with |i2d| and then hashes the result with @@ -4490,11 +4522,6 @@ OPENSSL_EXPORT const char *X509_get_default_cert_dir_env(void); OPENSSL_EXPORT const char *X509_get_default_cert_file_env(void); OPENSSL_EXPORT const char *X509_get_default_private_dir(void); - -OPENSSL_EXPORT int X509_TRUST_set(int *t, int trust); - -OPENSSL_EXPORT int X509_cmp(const X509 *a, const X509 *b); - // X509_NAME_hash returns a hash of |name|, or zero on error. This is the new // hash used by |X509_LOOKUP_hash_dir|. // @@ -4521,8 +4548,7 @@ OPENSSL_EXPORT uint32_t X509_NAME_hash(X509_NAME *name); // value. OPENSSL_EXPORT uint32_t X509_NAME_hash_old(X509_NAME *name); -OPENSSL_EXPORT int X509_CRL_match(const X509_CRL *a, const X509_CRL *b); - +OPENSSL_EXPORT int X509_TRUST_set(int *t, int trust); OPENSSL_EXPORT int X509_TRUST_get_count(void); OPENSSL_EXPORT const X509_TRUST *X509_TRUST_get0(int idx); OPENSSL_EXPORT int X509_TRUST_get_by_id(int id); @@ -4530,7 +4556,6 @@ OPENSSL_EXPORT int X509_TRUST_get_flags(const X509_TRUST *xp); OPENSSL_EXPORT char *X509_TRUST_get0_name(const X509_TRUST *xp); OPENSSL_EXPORT int X509_TRUST_get_trust(const X509_TRUST *xp); - /* SSL_CTX -> X509_STORE -> X509_LOOKUP @@ -4555,10 +4580,6 @@ certificate chain. DEFINE_STACK_OF(X509_OBJECT) -#define X509_STORE_CTX_set_app_data(ctx, data) \ - X509_STORE_CTX_set_ex_data(ctx, 0, data) -#define X509_STORE_CTX_get_app_data(ctx) X509_STORE_CTX_get_ex_data(ctx, 0) - #define X509_L_FILE_LOAD 1 #define X509_L_ADD_DIR 2 From d1c871c5e6a7b568a96a547bd05212d50fd2d99d Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sat, 27 Jan 2024 18:20:03 -0500 Subject: [PATCH 4/9] Remove old "check for P4" in sha256-586.pl Historically, OPENSSL_ia32cap_P used reserved bit 20 to trigger some variation in the RC4 implementation. That same bit was used in upstream's f889bb03841c69330d1e14a873c9982d3702f2d0 to disable some optimization on the Pentium 4. We've long since always cleared that flag (see cpu_intel.c), so this is dead code. Remove it. Change-Id: Ib9c0d88235617941833eb8aabb17a4713bdf6606 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65873 Commit-Queue: David Benjamin Reviewed-by: Bob Beck (cherry picked from commit 3f119b7f774900ce22e9b65068e10aa7bdc7fd91) --- crypto/fipsmodule/sha/asm/sha256-586.pl | 2 -- 1 file changed, 2 deletions(-) diff --git a/crypto/fipsmodule/sha/asm/sha256-586.pl b/crypto/fipsmodule/sha/asm/sha256-586.pl index 1db6206802..4baaa8eb04 100644 --- a/crypto/fipsmodule/sha/asm/sha256-586.pl +++ b/crypto/fipsmodule/sha/asm/sha256-586.pl @@ -216,8 +216,6 @@ () &picmeup("edx","OPENSSL_ia32cap_P",$K256,&label("K256")); &mov ("ecx",&DWP(0,"edx")); &mov ("ebx",&DWP(4,"edx")); - &test ("ecx",1<<20); # check for P4 - &jnz (&label("loop")); &mov ("edx",&DWP(8,"edx")) if ($xmm); &test ("ecx",1<<24); # check for FXSR &jz ($unroll_after?&label("no_xmm"):&label("loop")); From 6a6ead0effbf7e24f0b3320ee890fd0799e7e492 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sat, 27 Jan 2024 18:54:15 -0500 Subject: [PATCH 5/9] Move capability checks in sha256-586.pl to C This took a little restructuring because they were previously all one big function. Some notes: - label() and set_label() in x86 perlasm default to function-scoped. But static_label() marks a label as file-scoped, which is why "pic_point" and "K256" work. - There's a pretty sizeable common preamble. I just copied it to each for simplicity. I'm pretty sure some of it is wasted, but it's definitely not all wasted, between loading parameters, setting up stack alignment, and saving the old stack location. But I'm not sure if all those 16 bytes are actually used. Bug: 673 Change-Id: I6e8671d05d07cb4676ecf117dd56e2ed355c5d19 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65874 Commit-Queue: David Benjamin Reviewed-by: Bob Beck (cherry picked from commit 469118ca637887427f9727f1aac13c3e5bdee068) --- crypto/fipsmodule/sha/asm/sha256-586.pl | 157 ++++++++++++++++++------ crypto/fipsmodule/sha/internal.h | 27 +++- 2 files changed, 144 insertions(+), 40 deletions(-) diff --git a/crypto/fipsmodule/sha/asm/sha256-586.pl b/crypto/fipsmodule/sha/asm/sha256-586.pl index 4baaa8eb04..e73f3812b1 100644 --- a/crypto/fipsmodule/sha/asm/sha256-586.pl +++ b/crypto/fipsmodule/sha/asm/sha256-586.pl @@ -88,7 +88,7 @@ # versions, but BoringSSL is intended to be used with pre-generated perlasm # output, so this isn't useful anyway. # -# TODO(davidben): Enable AVX2 code after testing by setting $avx to 2. +# TODO(davidben): Enable AVX+BMI2 code after testing by setting $avx to 2. $avx = 1; $avx = 0 unless ($xmm); @@ -190,9 +190,9 @@ () &add ($A,$T); # h += T } -&external_label("OPENSSL_ia32cap_P") if (!$i386); +&static_label("K256"); -&function_begin("sha256_block_data_order"); +&function_begin("sha256_block_data_order_nohw"); &mov ("esi",wparam(0)); # ctx &mov ("edi",wparam(1)); # inp &mov ("eax",wparam(2)); # num @@ -213,26 +213,6 @@ () &mov (&DWP(8,"esp"),"eax"); # inp+num*128 &mov (&DWP(12,"esp"),"ebx"); # saved sp if (!$i386 && $xmm) { - &picmeup("edx","OPENSSL_ia32cap_P",$K256,&label("K256")); - &mov ("ecx",&DWP(0,"edx")); - &mov ("ebx",&DWP(4,"edx")); - &mov ("edx",&DWP(8,"edx")) if ($xmm); - &test ("ecx",1<<24); # check for FXSR - &jz ($unroll_after?&label("no_xmm"):&label("loop")); - &and ("ecx",1<<30); # mask "Intel CPU" bit - &and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits - &test ("edx",1<<29) if ($shaext); # check for SHA - &jnz (&label("shaext")) if ($shaext); - &or ("ecx","ebx"); - &and ("ecx",1<<28|1<<30); - &cmp ("ecx",1<<28|1<<30); - if ($xmm) { - &je (&label("AVX")) if ($avx); - &test ("ebx",1<<9); # check for SSSE3 - &jnz (&label("SSSE3")); - } else { - &je (&label("loop_shrd")); - } if ($unroll_after) { &set_label("no_xmm"); &sub ("eax","edi"); @@ -520,6 +500,8 @@ () &mov ("esp",&DWP(96+12,"esp")); # restore sp &function_end_A(); } +&function_end_B("sha256_block_data_order_nohw"); + if (!$i386 && $xmm) {{{ if ($shaext) { ###################################################################### @@ -538,7 +520,33 @@ sub sha256op38 { sub sha256msg1 { sha256op38(0xcc,@_); } sub sha256msg2 { sha256op38(0xcd,@_); } -&set_label("shaext",32); +&function_begin("sha256_block_data_order_hw"); + &mov ("esi",wparam(0)); # ctx + &mov ("edi",wparam(1)); # inp + &mov ("eax",wparam(2)); # num + &mov ("ebx","esp"); # saved sp + + &call (&label("pic_point")); # make it PIC! +&set_label("pic_point"); + &blindpop($K256); + &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); + + &sub ("esp",16); + &and ("esp",-64); + + &shl ("eax",6); + &add ("eax","edi"); + &mov (&DWP(0,"esp"),"esi"); # ctx + &mov (&DWP(4,"esp"),"edi"); # inp + &mov (&DWP(8,"esp"),"eax"); # inp+num*128 + &mov (&DWP(12,"esp"),"ebx"); # saved sp + + # TODO(davidben): The preamble above this point comes from the original + # merged sha256_block_data_order function, which performed some common + # setup and then jumped to the particular SHA-256 implementation. The + # parts of the preamble that do not apply to this function can be + # removed. + &sub ("esp",32); &movdqu ($ABEF,&QWP(0,$ctx)); # DCBA @@ -658,14 +666,40 @@ sub sha256op38 { &mov ("esp",&DWP(32+12,"esp")); &movdqu (&QWP(0,$ctx),$ABEF); &movdqu (&QWP(16,$ctx),$CDGH); -&function_end_A(); +&function_end("sha256_block_data_order_shaext"); } my @X = map("xmm$_",(0..3)); my ($t0,$t1,$t2,$t3) = map("xmm$_",(4..7)); my @AH = ($A,$T); -&set_label("SSSE3",32); +&function_begin("sha256_block_data_order_ssse3"); + &mov ("esi",wparam(0)); # ctx + &mov ("edi",wparam(1)); # inp + &mov ("eax",wparam(2)); # num + &mov ("ebx","esp"); # saved sp + + &call (&label("pic_point")); # make it PIC! +&set_label("pic_point"); + &blindpop($K256); + &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); + + &sub ("esp",16); + &and ("esp",-64); + + &shl ("eax",6); + &add ("eax","edi"); + &mov (&DWP(0,"esp"),"esi"); # ctx + &mov (&DWP(4,"esp"),"edi"); # inp + &mov (&DWP(8,"esp"),"eax"); # inp+num*128 + &mov (&DWP(12,"esp"),"ebx"); # saved sp + + # TODO(davidben): The preamble above this point comes from the original + # merged sha256_block_data_order function, which performed some common + # setup and then jumped to the particular SHA-256 implementation. The + # parts of the preamble that do not apply to this function can be + # removed. + &lea ("esp",&DWP(-96,"esp")); # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack &mov ($AH[0],&DWP(0,"esi")); @@ -973,14 +1007,36 @@ () &jb (&label("grand_ssse3")); &mov ("esp",&DWP(96+12,"esp")); # restore sp -&function_end_A(); +&function_end("sha256_block_data_order_ssse3"); + if ($avx) { -&set_label("AVX",32); - if ($avx>1) { - &and ("edx",1<<8|1<<3); # check for BMI2+BMI1 - &cmp ("edx",1<<8|1<<3); - &je (&label("AVX_BMI")); - } +&function_begin("sha256_block_data_order_avx"); + &mov ("esi",wparam(0)); # ctx + &mov ("edi",wparam(1)); # inp + &mov ("eax",wparam(2)); # num + &mov ("ebx","esp"); # saved sp + + &call (&label("pic_point")); # make it PIC! +&set_label("pic_point"); + &blindpop($K256); + &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); + + &sub ("esp",16); + &and ("esp",-64); + + &shl ("eax",6); + &add ("eax","edi"); + &mov (&DWP(0,"esp"),"esi"); # ctx + &mov (&DWP(4,"esp"),"edi"); # inp + &mov (&DWP(8,"esp"),"eax"); # inp+num*128 + &mov (&DWP(12,"esp"),"ebx"); # saved sp + + # TODO(davidben): The preamble above this point comes from the original + # merged sha256_block_data_order function, which performed some common + # setup and then jumped to the particular SHA-256 implementation. The + # parts of the preamble that do not apply to this function can be + # removed. + &lea ("esp",&DWP(-96,"esp")); &vzeroall (); # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack @@ -1140,7 +1196,8 @@ () &mov ("esp",&DWP(96+12,"esp")); # restore sp &vzeroall (); -&function_end_A(); +&function_end("sha256_block_data_order_avx"); + if ($avx>1) { sub bodyx_00_15 () { # +10% ( @@ -1177,7 +1234,34 @@ () ); } -&set_label("AVX_BMI",32); +# If enabled, this function should be gated on AVX, BMI1, and BMI2. +&function_begin("sha256_block_data_order_avx_bmi"); + &mov ("esi",wparam(0)); # ctx + &mov ("edi",wparam(1)); # inp + &mov ("eax",wparam(2)); # num + &mov ("ebx","esp"); # saved sp + + &call (&label("pic_point")); # make it PIC! +&set_label("pic_point"); + &blindpop($K256); + &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); + + &sub ("esp",16); + &and ("esp",-64); + + &shl ("eax",6); + &add ("eax","edi"); + &mov (&DWP(0,"esp"),"esi"); # ctx + &mov (&DWP(4,"esp"),"edi"); # inp + &mov (&DWP(8,"esp"),"eax"); # inp+num*128 + &mov (&DWP(12,"esp"),"ebx"); # saved sp + + # TODO(davidben): The preamble above this point comes from the original + # merged sha256_block_data_order function, which performed some common + # setup and then jumped to the particular SHA-256 implementation. The + # parts of the preamble that do not apply to this function can be + # removed. + &lea ("esp",&DWP(-96,"esp")); &vzeroall (); # copy ctx->h[0-7] to A,B,C,D,E,F,G,H on stack @@ -1281,11 +1365,10 @@ () &mov ("esp",&DWP(96+12,"esp")); # restore sp &vzeroall (); -&function_end_A(); +&function_end("sha256_block_data_order_avx_bmi"); } } }}} -&function_end_B("sha256_block_data_order"); &asm_finish(); diff --git a/crypto/fipsmodule/sha/internal.h b/crypto/fipsmodule/sha/internal.h index 87b9348831..596371ade9 100644 --- a/crypto/fipsmodule/sha/internal.h +++ b/crypto/fipsmodule/sha/internal.h @@ -148,6 +148,7 @@ OPENSSL_INLINE int sha512_hw_capable(void) { #elif !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) #define SHA1_ASM_NOHW +#define SHA256_ASM_NOHW #define SHA1_ASM_SSSE3 OPENSSL_INLINE int sha1_ssse3_capable(void) { @@ -172,11 +173,31 @@ OPENSSL_INLINE int sha1_avx_capable(void) { void sha1_block_data_order_avx(uint32_t state[5], const uint8_t *data, size_t num); +#define SHA256_ASM_SSSE3 +OPENSSL_INLINE int sha256_ssse3_capable(void) { + // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not + // say to. + return CRYPTO_is_SSSE3_capable() && CRYPTO_is_FXSR_capable(); +} +void sha256_block_data_order_ssse3(uint32_t state[8], const uint8_t *data, + size_t num); + +#define SHA256_ASM_AVX +OPENSSL_INLINE int sha256_avx_capable(void) { + // Pre-Zen AMD CPUs had slow SHLD/SHRD; Zen added the SHA extension; see the + // discussion in sha1-586.pl. + // + // TODO(davidben): Should we enable SHAEXT on 32-bit x86? + // TODO(davidben): Do we need to check the FXSR bit? The Intel manual does not + // say to. + return CRYPTO_is_AVX_capable() && CRYPTO_is_intel_cpu() && + CRYPTO_is_FXSR_capable(); +} +void sha256_block_data_order_avx(uint32_t state[8], const uint8_t *data, + size_t num); + // TODO(crbug.com/boringssl/673): Move the remaining CPU dispatch to C. -#define SHA256_ASM #define SHA512_ASM -void sha256_block_data_order(uint32_t state[8], const uint8_t *data, - size_t num_blocks); void sha512_block_data_order(uint64_t state[8], const uint8_t *data, size_t num_blocks); From c389e02e923a8fe1a4bede51a36e6c00f89cb8e2 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Wed, 21 Feb 2024 09:41:46 -0500 Subject: [PATCH 6/9] Use BIO_TYPE_* constants for flags Also remove some of the comments that, with the constants, don't provide any real value. Change-Id: Ie73d840ac5613a6750796e21a9ab1d644edfe5b1 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/66448 Reviewed-by: Bob Beck Auto-Submit: David Benjamin Commit-Queue: Bob Beck (cherry picked from commit 7101b14d18f1a3bb9c3c6b9de56924e21bb08be5) --- include/openssl/bio.h | 50 +++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/include/openssl/bio.h b/include/openssl/bio.h index 6b4a770830..c3d7a47409 100644 --- a/include/openssl/bio.h +++ b/include/openssl/bio.h @@ -932,37 +932,37 @@ OPENSSL_EXPORT int BIO_set_write_buffer_size(BIO *bio, int buffer_size); // or change the data in any way. #define BIO_FLAGS_MEM_RDONLY 0x200 -// These are the 'types' of BIOs -#define BIO_TYPE_NONE 0 -#define BIO_TYPE_MEM (1 | 0x0400) -#define BIO_TYPE_FILE (2 | 0x0400) -#define BIO_TYPE_FD (4 | 0x0400 | 0x0100) -#define BIO_TYPE_SOCKET (5 | 0x0400 | 0x0100) -#define BIO_TYPE_NULL (6 | 0x0400) -#define BIO_TYPE_SSL (7 | 0x0200) -#define BIO_TYPE_MD (8 | 0x0200) // passive filter -#define BIO_TYPE_BUFFER (9 | 0x0200) // filter -#define BIO_TYPE_CIPHER (10 | 0x0200) // filter -#define BIO_TYPE_BASE64 (11 | 0x0200) // filter -#define BIO_TYPE_CONNECT (12 | 0x0400 | 0x0100) // socket - connect -#define BIO_TYPE_ACCEPT (13 | 0x0400 | 0x0100) // socket for accept -#define BIO_TYPE_PROXY_CLIENT (14 | 0x0200) // client proxy BIO -#define BIO_TYPE_PROXY_SERVER (15 | 0x0200) // server proxy BIO -#define BIO_TYPE_NBIO_TEST (16 | 0x0200) // server proxy BIO -#define BIO_TYPE_NULL_FILTER (17 | 0x0200) -#define BIO_TYPE_BER (18 | 0x0200) // BER -> bin filter -#define BIO_TYPE_BIO (19 | 0x0400) // (half a) BIO pair -#define BIO_TYPE_LINEBUFFER (20 | 0x0200) // filter -#define BIO_TYPE_DGRAM (21 | 0x0400 | 0x0100) -#define BIO_TYPE_ASN1 (22 | 0x0200) // filter -#define BIO_TYPE_COMP (23 | 0x0200) // filter - // BIO_TYPE_DESCRIPTOR denotes that the |BIO| responds to the |BIO_C_SET_FD| // (|BIO_set_fd|) and |BIO_C_GET_FD| (|BIO_get_fd|) control hooks. #define BIO_TYPE_DESCRIPTOR 0x0100 // socket, fd, connect or accept #define BIO_TYPE_FILTER 0x0200 #define BIO_TYPE_SOURCE_SINK 0x0400 +// These are the 'types' of BIOs +#define BIO_TYPE_NONE 0 +#define BIO_TYPE_MEM (1 | BIO_TYPE_SOURCE_SINK) +#define BIO_TYPE_FILE (2 | BIO_TYPE_SOURCE_SINK) +#define BIO_TYPE_FD (4 | BIO_TYPE_SOURCE_SINK | BIO_TYPE_DESCRIPTOR) +#define BIO_TYPE_SOCKET (5 | BIO_TYPE_SOURCE_SINK | BIO_TYPE_DESCRIPTOR) +#define BIO_TYPE_NULL (6 | BIO_TYPE_SOURCE_SINK) +#define BIO_TYPE_SSL (7 | BIO_TYPE_FILTER) +#define BIO_TYPE_MD (8 | BIO_TYPE_FILTER) +#define BIO_TYPE_BUFFER (9 | BIO_TYPE_FILTER) +#define BIO_TYPE_CIPHER (10 | BIO_TYPE_FILTER) +#define BIO_TYPE_BASE64 (11 | BIO_TYPE_FILTER) +#define BIO_TYPE_CONNECT (12 | BIO_TYPE_SOURCE_SINK | BIO_TYPE_DESCRIPTOR) +#define BIO_TYPE_ACCEPT (13 | BIO_TYPE_SOURCE_SINK | BIO_TYPE_DESCRIPTOR) +#define BIO_TYPE_PROXY_CLIENT (14 | BIO_TYPE_FILTER) +#define BIO_TYPE_PROXY_SERVER (15 | BIO_TYPE_FILTER) +#define BIO_TYPE_NBIO_TEST (16 | BIO_TYPE_FILTER) +#define BIO_TYPE_NULL_FILTER (17 | BIO_TYPE_FILTER) +#define BIO_TYPE_BER (18 | BIO_TYPE_FILTER) // BER -> bin filter +#define BIO_TYPE_BIO (19 | BIO_TYPE_SOURCE_SINK) // (half a) BIO pair +#define BIO_TYPE_LINEBUFFER (20 | BIO_TYPE_FILTER) +#define BIO_TYPE_DGRAM (21 | BIO_TYPE_SOURCE_SINK | BIO_TYPE_DESCRIPTOR) +#define BIO_TYPE_ASN1 (22 | BIO_TYPE_FILTER) +#define BIO_TYPE_COMP (23 | BIO_TYPE_FILTER) + // BIO_TYPE_START is the first user-allocated |BIO| type. No pre-defined type, // flag bits aside, may exceed this value. #define BIO_TYPE_START 128 From 2bf3139e6e7b27a94f547cd15c63b59458d48a64 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 28 Dec 2023 18:54:52 -0500 Subject: [PATCH 7/9] Const-correct the 'kstr' parameter of PEM functions Also rename to pass and pass_len, which makes a bit more sense for what these are. Change-Id: If3421ed7890c92cd11130641a8a2e090cc7f8b91 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/65810 Commit-Queue: David Benjamin Auto-Submit: David Benjamin Reviewed-by: Bob Beck (cherry picked from commit e44712755dd9281656009d4931cf7ae12201ae21) --- crypto/pem/pem_lib.c | 35 +++++++------- crypto/pem/pem_pk8.c | 85 ++++++++++++++++---------------- crypto/pem/pem_pkey.c | 25 +++++----- include/openssl/pem.h | 109 ++++++++++++++++++++++-------------------- 4 files changed, 131 insertions(+), 123 deletions(-) diff --git a/crypto/pem/pem_lib.c b/crypto/pem/pem_lib.c index 2c9a347ba4..fa66620039 100644 --- a/crypto/pem/pem_lib.c +++ b/crypto/pem/pem_lib.c @@ -269,21 +269,22 @@ int PEM_bytes_read_bio(unsigned char **pdata, long *plen, char **pnm, } int PEM_ASN1_write(i2d_of_void *i2d, const char *name, FILE *fp, void *x, - const EVP_CIPHER *enc, unsigned char *kstr, int klen, - pem_password_cb *callback, void *u) { + const EVP_CIPHER *enc, const unsigned char *pass, + int pass_len, pem_password_cb *callback, void *u) { BIO *b = BIO_new_fp(fp, BIO_NOCLOSE); if (b == NULL) { OPENSSL_PUT_ERROR(PEM, ERR_R_BUF_LIB); return 0; } - int ret = PEM_ASN1_write_bio(i2d, name, b, x, enc, kstr, klen, callback, u); + int ret = + PEM_ASN1_write_bio(i2d, name, b, x, enc, pass, pass_len, callback, u); BIO_free(b); return ret; } int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, void *x, - const EVP_CIPHER *enc, unsigned char *kstr, int klen, - pem_password_cb *callback, void *u) { + const EVP_CIPHER *enc, const unsigned char *pass, + int pass_len, pem_password_cb *callback, void *u) { EVP_CIPHER_CTX ctx; int dsize = 0, i, j, ret = 0; unsigned char *p, *data = NULL; @@ -318,17 +319,17 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, void *x, if (enc != NULL) { const unsigned iv_len = EVP_CIPHER_iv_length(enc); - if (kstr == NULL) { - klen = 0; + if (pass == NULL) { + pass_len = 0; if (!callback) { callback = PEM_def_callback; } - klen = (*callback)(buf, PEM_BUFSIZE, 1, u); - if (klen <= 0) { + pass_len = (*callback)(buf, PEM_BUFSIZE, 1, u); + if (pass_len <= 0) { OPENSSL_PUT_ERROR(PEM, PEM_R_READ_KEY); goto err; } - kstr = (unsigned char *)buf; + pass = (const unsigned char *)buf; } assert(iv_len <= sizeof(iv)); if (!RAND_bytes(iv, iv_len)) { // Generate a salt @@ -336,11 +337,11 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, void *x, } // The 'iv' is used as the iv and as a salt. It is NOT taken from // the BytesToKey function - if (!EVP_BytesToKey(enc, EVP_md5(), iv, kstr, klen, 1, key, NULL)) { + if (!EVP_BytesToKey(enc, EVP_md5(), iv, pass, pass_len, 1, key, NULL)) { goto err; } - if (kstr == (unsigned char *)buf) { + if (pass == (const unsigned char *)buf) { OPENSSL_cleanse(buf, PEM_BUFSIZE); } @@ -383,7 +384,7 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, void *x, int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *plen, pem_password_cb *callback, void *u) { - int i = 0, j, o, klen; + int i = 0, j, o, pass_len; long len; EVP_CIPHER_CTX ctx; unsigned char key[EVP_MAX_KEY_LENGTH]; @@ -395,18 +396,18 @@ int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data, long *plen, return 1; } - klen = 0; + pass_len = 0; if (!callback) { callback = PEM_def_callback; } - klen = callback(buf, PEM_BUFSIZE, 0, u); - if (klen <= 0) { + pass_len = callback(buf, PEM_BUFSIZE, 0, u); + if (pass_len <= 0) { OPENSSL_PUT_ERROR(PEM, PEM_R_BAD_PASSWORD_READ); return 0; } if (!EVP_BytesToKey(cipher->cipher, EVP_md5(), &(cipher->iv[0]), - (unsigned char *)buf, klen, 1, key, NULL)) { + (unsigned char *)buf, pass_len, 1, key, NULL)) { return 0; } diff --git a/crypto/pem/pem_pk8.c b/crypto/pem/pem_pk8.c index 610f36ca7f..9c6419be45 100644 --- a/crypto/pem/pem_pk8.c +++ b/crypto/pem/pem_pk8.c @@ -65,10 +65,10 @@ #include static int do_pk8pkey(BIO *bp, const EVP_PKEY *x, int isder, int nid, - const EVP_CIPHER *enc, char *kstr, int klen, + const EVP_CIPHER *enc, const char *pass, int pass_len, pem_password_cb *cb, void *u); static int do_pk8pkey_fp(FILE *bp, const EVP_PKEY *x, int isder, int nid, - const EVP_CIPHER *enc, char *kstr, int klen, + const EVP_CIPHER *enc, const char *pass, int pass_len, pem_password_cb *cb, void *u); // These functions write a private key in PKCS#8 format: it is a "drop in" @@ -77,30 +77,31 @@ static int do_pk8pkey_fp(FILE *bp, const EVP_PKEY *x, int isder, int nid, // uses PKCS#5 v1.5 PBE algorithms whereas the others use PKCS#5 v2.0. int PEM_write_bio_PKCS8PrivateKey_nid(BIO *bp, const EVP_PKEY *x, int nid, - char *kstr, int klen, pem_password_cb *cb, - void *u) { - return do_pk8pkey(bp, x, 0, nid, NULL, kstr, klen, cb, u); + const char *pass, int pass_len, + pem_password_cb *cb, void *u) { + return do_pk8pkey(bp, x, 0, nid, NULL, pass, pass_len, cb, u); } int PEM_write_bio_PKCS8PrivateKey(BIO *bp, const EVP_PKEY *x, - const EVP_CIPHER *enc, char *kstr, int klen, - pem_password_cb *cb, void *u) { - return do_pk8pkey(bp, x, 0, -1, enc, kstr, klen, cb, u); + const EVP_CIPHER *enc, const char *pass, + int pass_len, pem_password_cb *cb, void *u) { + return do_pk8pkey(bp, x, 0, -1, enc, pass, pass_len, cb, u); } int i2d_PKCS8PrivateKey_bio(BIO *bp, const EVP_PKEY *x, const EVP_CIPHER *enc, - char *kstr, int klen, pem_password_cb *cb, + const char *pass, int pass_len, pem_password_cb *cb, void *u) { - return do_pk8pkey(bp, x, 1, -1, enc, kstr, klen, cb, u); + return do_pk8pkey(bp, x, 1, -1, enc, pass, pass_len, cb, u); } -int i2d_PKCS8PrivateKey_nid_bio(BIO *bp, const EVP_PKEY *x, int nid, char *kstr, - int klen, pem_password_cb *cb, void *u) { - return do_pk8pkey(bp, x, 1, nid, NULL, kstr, klen, cb, u); +int i2d_PKCS8PrivateKey_nid_bio(BIO *bp, const EVP_PKEY *x, int nid, + const char *pass, int pass_len, + pem_password_cb *cb, void *u) { + return do_pk8pkey(bp, x, 1, nid, NULL, pass, pass_len, cb, u); } static int do_pk8pkey(BIO *bp, const EVP_PKEY *x, int isder, int nid, - const EVP_CIPHER *enc, char *kstr, int klen, + const EVP_CIPHER *enc, const char *pass, int pass_len, pem_password_cb *cb, void *u) { X509_SIG *p8; PKCS8_PRIV_KEY_INFO *p8inf; @@ -111,23 +112,23 @@ static int do_pk8pkey(BIO *bp, const EVP_PKEY *x, int isder, int nid, return 0; } if (enc || (nid != -1)) { - if (!kstr) { - klen = 0; + if (!pass) { + pass_len = 0; if (!cb) { cb = PEM_def_callback; } - klen = cb(buf, PEM_BUFSIZE, 1, u); - if (klen <= 0) { + pass_len = cb(buf, PEM_BUFSIZE, 1, u); + if (pass_len <= 0) { OPENSSL_PUT_ERROR(PEM, PEM_R_READ_KEY); PKCS8_PRIV_KEY_INFO_free(p8inf); return 0; } - kstr = buf; + pass = buf; } - p8 = PKCS8_encrypt(nid, enc, kstr, klen, NULL, 0, 0, p8inf); - if (kstr == buf) { - OPENSSL_cleanse(buf, klen); + p8 = PKCS8_encrypt(nid, enc, pass, pass_len, NULL, 0, 0, p8inf); + if (pass == buf) { + OPENSSL_cleanse(buf, pass_len); } PKCS8_PRIV_KEY_INFO_free(p8inf); if (isder) { @@ -152,7 +153,7 @@ EVP_PKEY *d2i_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY **x, pem_password_cb *cb, void *u) { PKCS8_PRIV_KEY_INFO *p8inf = NULL; X509_SIG *p8 = NULL; - int klen; + int pass_len; EVP_PKEY *ret; char psbuf[PEM_BUFSIZE]; p8 = d2i_PKCS8_bio(bp, NULL); @@ -160,19 +161,19 @@ EVP_PKEY *d2i_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY **x, pem_password_cb *cb, return NULL; } - klen = 0; + pass_len = 0; if (!cb) { cb = PEM_def_callback; } - klen = cb(psbuf, PEM_BUFSIZE, 0, u); - if (klen <= 0) { + pass_len = cb(psbuf, PEM_BUFSIZE, 0, u); + if (pass_len <= 0) { OPENSSL_PUT_ERROR(PEM, PEM_R_BAD_PASSWORD_READ); X509_SIG_free(p8); return NULL; } - p8inf = PKCS8_decrypt(p8, psbuf, klen); + p8inf = PKCS8_decrypt(p8, psbuf, pass_len); X509_SIG_free(p8); - OPENSSL_cleanse(psbuf, klen); + OPENSSL_cleanse(psbuf, pass_len); if (!p8inf) { return NULL; } @@ -192,29 +193,31 @@ EVP_PKEY *d2i_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY **x, pem_password_cb *cb, int i2d_PKCS8PrivateKey_fp(FILE *fp, const EVP_PKEY *x, const EVP_CIPHER *enc, - char *kstr, int klen, pem_password_cb *cb, void *u) { - return do_pk8pkey_fp(fp, x, 1, -1, enc, kstr, klen, cb, u); + const char *pass, int pass_len, pem_password_cb *cb, + void *u) { + return do_pk8pkey_fp(fp, x, 1, -1, enc, pass, pass_len, cb, u); } -int i2d_PKCS8PrivateKey_nid_fp(FILE *fp, const EVP_PKEY *x, int nid, char *kstr, - int klen, pem_password_cb *cb, void *u) { - return do_pk8pkey_fp(fp, x, 1, nid, NULL, kstr, klen, cb, u); +int i2d_PKCS8PrivateKey_nid_fp(FILE *fp, const EVP_PKEY *x, int nid, + const char *pass, int pass_len, + pem_password_cb *cb, void *u) { + return do_pk8pkey_fp(fp, x, 1, nid, NULL, pass, pass_len, cb, u); } int PEM_write_PKCS8PrivateKey_nid(FILE *fp, const EVP_PKEY *x, int nid, - char *kstr, int klen, pem_password_cb *cb, - void *u) { - return do_pk8pkey_fp(fp, x, 0, nid, NULL, kstr, klen, cb, u); + const char *pass, int pass_len, + pem_password_cb *cb, void *u) { + return do_pk8pkey_fp(fp, x, 0, nid, NULL, pass, pass_len, cb, u); } int PEM_write_PKCS8PrivateKey(FILE *fp, const EVP_PKEY *x, - const EVP_CIPHER *enc, char *kstr, int klen, - pem_password_cb *cb, void *u) { - return do_pk8pkey_fp(fp, x, 0, -1, enc, kstr, klen, cb, u); + const EVP_CIPHER *enc, const char *pass, + int pass_len, pem_password_cb *cb, void *u) { + return do_pk8pkey_fp(fp, x, 0, -1, enc, pass, pass_len, cb, u); } static int do_pk8pkey_fp(FILE *fp, const EVP_PKEY *x, int isder, int nid, - const EVP_CIPHER *enc, char *kstr, int klen, + const EVP_CIPHER *enc, const char *pass, int pass_len, pem_password_cb *cb, void *u) { BIO *bp; int ret; @@ -222,7 +225,7 @@ static int do_pk8pkey_fp(FILE *fp, const EVP_PKEY *x, int isder, int nid, OPENSSL_PUT_ERROR(PEM, ERR_R_BUF_LIB); return 0; } - ret = do_pk8pkey(bp, x, isder, nid, enc, kstr, klen, cb, u); + ret = do_pk8pkey(bp, x, isder, nid, enc, pass, pass_len, cb, u); BIO_free(bp); return ret; } diff --git a/crypto/pem/pem_pkey.c b/crypto/pem/pem_pkey.c index 796bf693c6..288fe7b40c 100644 --- a/crypto/pem/pem_pkey.c +++ b/crypto/pem/pem_pkey.c @@ -99,26 +99,26 @@ EVP_PKEY *PEM_read_bio_PrivateKey(BIO *bp, EVP_PKEY **x, pem_password_cb *cb, } else if (strcmp(nm, PEM_STRING_PKCS8) == 0) { PKCS8_PRIV_KEY_INFO *p8inf; X509_SIG *p8; - int klen; + int pass_len; char psbuf[PEM_BUFSIZE]; p8 = d2i_X509_SIG(NULL, &p, len); if (!p8) { goto p8err; } - klen = 0; + pass_len = 0; if (!cb) { cb = PEM_def_callback; } - klen = cb(psbuf, PEM_BUFSIZE, 0, u); - if (klen <= 0) { + pass_len = cb(psbuf, PEM_BUFSIZE, 0, u); + if (pass_len <= 0) { OPENSSL_PUT_ERROR(PEM, PEM_R_BAD_PASSWORD_READ); X509_SIG_free(p8); goto err; } - p8inf = PKCS8_decrypt(p8, psbuf, klen); + p8inf = PKCS8_decrypt(p8, psbuf, pass_len); X509_SIG_free(p8); - OPENSSL_cleanse(psbuf, klen); + OPENSSL_cleanse(psbuf, pass_len); if (!p8inf) { goto p8err; } @@ -152,9 +152,10 @@ EVP_PKEY *PEM_read_bio_PrivateKey(BIO *bp, EVP_PKEY **x, pem_password_cb *cb, } int PEM_write_bio_PrivateKey(BIO *bp, EVP_PKEY *x, const EVP_CIPHER *enc, - unsigned char *kstr, int klen, pem_password_cb *cb, - void *u) { - return PEM_write_bio_PKCS8PrivateKey(bp, x, enc, (char *)kstr, klen, cb, u); + const unsigned char *pass, int pass_len, + pem_password_cb *cb, void *u) { + return PEM_write_bio_PKCS8PrivateKey(bp, x, enc, (const char *)pass, pass_len, + cb, u); } EVP_PKEY *PEM_read_bio_Parameters(BIO *bio, EVP_PKEY **pkey) { @@ -292,14 +293,14 @@ EVP_PKEY *PEM_read_PrivateKey(FILE *fp, EVP_PKEY **x, pem_password_cb *cb, } int PEM_write_PrivateKey(FILE *fp, EVP_PKEY *x, const EVP_CIPHER *enc, - unsigned char *kstr, int klen, pem_password_cb *cb, - void *u) { + const unsigned char *pass, int pass_len, + pem_password_cb *cb, void *u) { BIO *b = BIO_new_fp(fp, BIO_NOCLOSE); if (b == NULL) { OPENSSL_PUT_ERROR(PEM, ERR_R_BUF_LIB); return 0; } - int ret = PEM_write_bio_PrivateKey(b, x, enc, kstr, klen, cb, u); + int ret = PEM_write_bio_PrivateKey(b, x, enc, pass, pass_len, cb, u); BIO_free(b); return ret; } diff --git a/include/openssl/pem.h b/include/openssl/pem.h index 2cdd4f3ae9..f560bad33b 100644 --- a/include/openssl/pem.h +++ b/include/openssl/pem.h @@ -150,26 +150,26 @@ extern "C" { NULL, 0, NULL, NULL); \ } -#define IMPLEMENT_PEM_write_cb_fp(name, type, str, asn1) \ - static int pem_write_##name##_i2d(const void *x, unsigned char **outp) { \ - return i2d_##asn1((type *)x, outp); \ - } \ - OPENSSL_EXPORT int PEM_write_##name( \ - FILE *fp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \ - pem_password_cb *cb, void *u) { \ - return PEM_ASN1_write(pem_write_##name##_i2d, str, fp, x, enc, kstr, klen, \ - cb, u); \ +#define IMPLEMENT_PEM_write_cb_fp(name, type, str, asn1) \ + static int pem_write_##name##_i2d(const void *x, unsigned char **outp) { \ + return i2d_##asn1((type *)x, outp); \ + } \ + OPENSSL_EXPORT int PEM_write_##name( \ + FILE *fp, type *x, const EVP_CIPHER *enc, const unsigned char *pass, \ + int pass_len, pem_password_cb *cb, void *u) { \ + return PEM_ASN1_write(pem_write_##name##_i2d, str, fp, x, enc, pass, \ + pass_len, cb, u); \ } -#define IMPLEMENT_PEM_write_cb_fp_const(name, type, str, asn1) \ - static int pem_write_##name##_i2d(const void *x, unsigned char **outp) { \ - return i2d_##asn1((const type *)x, outp); \ - } \ - OPENSSL_EXPORT int PEM_write_##name( \ - FILE *fp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \ - pem_password_cb *cb, void *u) { \ - return PEM_ASN1_write(pem_write_##name##_i2d, str, fp, x, enc, kstr, klen, \ - cb, u); \ +#define IMPLEMENT_PEM_write_cb_fp_const(name, type, str, asn1) \ + static int pem_write_##name##_i2d(const void *x, unsigned char **outp) { \ + return i2d_##asn1((const type *)x, outp); \ + } \ + OPENSSL_EXPORT int PEM_write_##name( \ + FILE *fp, type *x, const EVP_CIPHER *enc, const unsigned char *pass, \ + int pass_len, pem_password_cb *cb, void *u) { \ + return PEM_ASN1_write(pem_write_##name##_i2d, str, fp, x, enc, pass, \ + pass_len, cb, u); \ } @@ -207,10 +207,10 @@ extern "C" { return i2d_##asn1((type *)x, outp); \ } \ OPENSSL_EXPORT int PEM_write_bio_##name( \ - BIO *bp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \ - pem_password_cb *cb, void *u) { \ + BIO *bp, type *x, const EVP_CIPHER *enc, const unsigned char *pass, \ + int pass_len, pem_password_cb *cb, void *u) { \ return PEM_ASN1_write_bio(pem_write_bio_##name##_i2d, str, bp, x, enc, \ - kstr, klen, cb, u); \ + pass, pass_len, cb, u); \ } #define IMPLEMENT_PEM_write_cb_bio_const(name, type, str, asn1) \ @@ -218,10 +218,10 @@ extern "C" { return i2d_##asn1((const type *)x, outp); \ } \ OPENSSL_EXPORT int PEM_write_bio_##name( \ - BIO *bp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \ - pem_password_cb *cb, void *u) { \ + BIO *bp, type *x, const EVP_CIPHER *enc, const unsigned char *pass, \ + int pass_len, pem_password_cb *cb, void *u) { \ return PEM_ASN1_write_bio(pem_write_bio_##name##_i2d, str, bp, (void *)x, \ - enc, kstr, klen, cb, u); \ + enc, pass, pass_len, cb, u); \ } #define IMPLEMENT_PEM_write(name, type, str, asn1) \ @@ -268,10 +268,10 @@ extern "C" { #define DECLARE_PEM_write_fp_const(name, type) \ OPENSSL_EXPORT int PEM_write_##name(FILE *fp, const type *x); -#define DECLARE_PEM_write_cb_fp(name, type) \ - OPENSSL_EXPORT int PEM_write_##name( \ - FILE *fp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \ - pem_password_cb *cb, void *u); +#define DECLARE_PEM_write_cb_fp(name, type) \ + OPENSSL_EXPORT int PEM_write_##name( \ + FILE *fp, type *x, const EVP_CIPHER *enc, const unsigned char *pass, \ + int pass_len, pem_password_cb *cb, void *u); #define DECLARE_PEM_read_bio(name, type) \ OPENSSL_EXPORT type *PEM_read_bio_##name(BIO *bp, type **x, \ @@ -283,10 +283,10 @@ extern "C" { #define DECLARE_PEM_write_bio_const(name, type) \ OPENSSL_EXPORT int PEM_write_bio_##name(BIO *bp, const type *x); -#define DECLARE_PEM_write_cb_bio(name, type) \ - OPENSSL_EXPORT int PEM_write_bio_##name( \ - BIO *bp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \ - pem_password_cb *cb, void *u); +#define DECLARE_PEM_write_cb_bio(name, type) \ + OPENSSL_EXPORT int PEM_write_bio_##name( \ + BIO *bp, type *x, const EVP_CIPHER *enc, const unsigned char *pass, \ + int pass_len, pem_password_cb *cb, void *u); #define DECLARE_PEM_write(name, type) \ @@ -365,7 +365,7 @@ OPENSSL_EXPORT void *PEM_ASN1_read_bio(d2i_of_void *d2i, const char *name, void *u); OPENSSL_EXPORT int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, void *x, const EVP_CIPHER *enc, - unsigned char *kstr, int klen, + const unsigned char *pass, int pass_len, pem_password_cb *cb, void *u); // PEM_X509_INFO_read_bio reads PEM blocks from |bp| and decodes any @@ -402,7 +402,7 @@ OPENSSL_EXPORT void *PEM_ASN1_read(d2i_of_void *d2i, const char *name, FILE *fp, void **x, pem_password_cb *cb, void *u); OPENSSL_EXPORT int PEM_ASN1_write(i2d_of_void *i2d, const char *name, FILE *fp, void *x, const EVP_CIPHER *enc, - unsigned char *kstr, int klen, + const unsigned char *pass, int pass_len, pem_password_cb *callback, void *u); // PEM_def_callback treats |userdata| as a string and copies it into |buf|, @@ -457,42 +457,45 @@ DECLARE_PEM_rw_cb(PrivateKey, EVP_PKEY) DECLARE_PEM_rw(PUBKEY, EVP_PKEY) OPENSSL_EXPORT int PEM_write_bio_PKCS8PrivateKey_nid(BIO *bp, const EVP_PKEY *x, - int nid, char *kstr, - int klen, + int nid, const char *pass, + int pass_len, pem_password_cb *cb, void *u); -OPENSSL_EXPORT int PEM_write_bio_PKCS8PrivateKey(BIO *, const EVP_PKEY *, - const EVP_CIPHER *, char *, - int, pem_password_cb *, - void *); +OPENSSL_EXPORT int PEM_write_bio_PKCS8PrivateKey(BIO *bp, const EVP_PKEY *x, + const EVP_CIPHER *enc, + const char *pass, int pass_len, + pem_password_cb *cb, void *u); OPENSSL_EXPORT int i2d_PKCS8PrivateKey_bio(BIO *bp, const EVP_PKEY *x, - const EVP_CIPHER *enc, char *kstr, - int klen, pem_password_cb *cb, - void *u); + const EVP_CIPHER *enc, + const char *pass, int pass_len, + pem_password_cb *cb, void *u); OPENSSL_EXPORT int i2d_PKCS8PrivateKey_nid_bio(BIO *bp, const EVP_PKEY *x, - int nid, char *kstr, int klen, + int nid, const char *pass, + int pass_len, pem_password_cb *cb, void *u); OPENSSL_EXPORT EVP_PKEY *d2i_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY **x, pem_password_cb *cb, void *u); OPENSSL_EXPORT int i2d_PKCS8PrivateKey_fp(FILE *fp, const EVP_PKEY *x, - const EVP_CIPHER *enc, char *kstr, - int klen, pem_password_cb *cb, - void *u); + const EVP_CIPHER *enc, + const char *pass, int pass_len, + pem_password_cb *cb, void *u); OPENSSL_EXPORT int i2d_PKCS8PrivateKey_nid_fp(FILE *fp, const EVP_PKEY *x, - int nid, char *kstr, int klen, - pem_password_cb *cb, void *u); + int nid, const char *pass, + int pass_len, pem_password_cb *cb, + void *u); OPENSSL_EXPORT int PEM_write_PKCS8PrivateKey_nid(FILE *fp, const EVP_PKEY *x, - int nid, char *kstr, int klen, + int nid, const char *pass, + int pass_len, pem_password_cb *cb, void *u); OPENSSL_EXPORT EVP_PKEY *d2i_PKCS8PrivateKey_fp(FILE *fp, EVP_PKEY **x, pem_password_cb *cb, void *u); OPENSSL_EXPORT int PEM_write_PKCS8PrivateKey(FILE *fp, const EVP_PKEY *x, - const EVP_CIPHER *enc, char *kstr, - int klen, pem_password_cb *cd, - void *u); + const EVP_CIPHER *enc, + const char *pass, int pass_len, + pem_password_cb *cd, void *u); // PEM_read_bio_Parameters is a generic PEM deserialization function that // parses the public "parameters" in |bio| and returns a corresponding From 6712c07ae414a24566e03a73dbe15ede4df008d9 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 12 Feb 2024 16:57:18 -0500 Subject: [PATCH 8/9] Make an internal RefCounted base class for libssl This is still a bit more tedious than I'd like, but we've got three of these and I'm about to add a fourth. Add something like Chromium's base class. But where Chromium integrates the base class directly with scoped_refptr (giving a place for a static_assert that you did the subclassing right), we don't quite have that since we need to integrate with the external C API. Instead, use the "passkey" pattern and have RefCounted's protected constructor take a struct that only T can construct. The passkey ensures that only T can construct RefCounted, and the protectedness ensures that T subclassed RefCounted. (I think the latter already comes from the static_cast in DecRef, but may as well.) Change-Id: Icf4cbc7d4168010ee46dfa3a7b0a2e7c20aaf383 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/66369 Reviewed-by: Bob Beck Commit-Queue: David Benjamin (cherry picked from commit fbf10f0d968beb56622eb4927bace53a0e931189) --- ssl/encrypted_client_hello.cc | 12 ++----- ssl/internal.h | 63 +++++++++++++++++++++++++++-------- ssl/ssl_lib.cc | 12 +++---- ssl/ssl_session.cc | 13 +++----- 4 files changed, 62 insertions(+), 38 deletions(-) diff --git a/ssl/encrypted_client_hello.cc b/ssl/encrypted_client_hello.cc index a5492e9a0c..8c4a42ce83 100644 --- a/ssl/encrypted_client_hello.cc +++ b/ssl/encrypted_client_hello.cc @@ -1012,18 +1012,12 @@ int SSL_marshal_ech_config(uint8_t **out, size_t *out_len, uint8_t config_id, SSL_ECH_KEYS *SSL_ECH_KEYS_new() { return New(); } -void SSL_ECH_KEYS_up_ref(SSL_ECH_KEYS *keys) { - CRYPTO_refcount_inc(&keys->references); -} +void SSL_ECH_KEYS_up_ref(SSL_ECH_KEYS *keys) { keys->UpRefInternal(); } void SSL_ECH_KEYS_free(SSL_ECH_KEYS *keys) { - if (keys == nullptr || - !CRYPTO_refcount_dec_and_test_zero(&keys->references)) { - return; + if (keys != nullptr) { + keys->DecRefInternal(); } - - keys->~ssl_ech_keys_st(); - OPENSSL_free(keys); } int SSL_ECH_KEYS_add(SSL_ECH_KEYS *configs, int is_retry_config, diff --git a/ssl/internal.h b/ssl/internal.h index 29bf268100..148e476154 100644 --- a/ssl/internal.h +++ b/ssl/internal.h @@ -480,6 +480,48 @@ inline size_t GetAllNames(const char **out, size_t max_out, return fixed_names.size() + objects.size(); } +// RefCounted is a common base for ref-counted types. This is an instance of the +// C++ curiously-recurring template pattern, so a type Foo must subclass +// RefCounted. It additionally must friend RefCounted to allow calling +// the destructor. +template +class RefCounted { + public: + RefCounted(const RefCounted &) = delete; + RefCounted &operator=(const RefCounted &) = delete; + + // These methods are intentionally named differently from `bssl::UpRef` to + // avoid a collision. Only the implementations of `FOO_up_ref` and `FOO_free` + // should call these. + void UpRefInternal() { CRYPTO_refcount_inc(&references_); } + void DecRefInternal() { + if (CRYPTO_refcount_dec_and_test_zero(&references_)) { + Derived *d = static_cast(this); + d->~Derived(); + OPENSSL_free(d); + } + } + + protected: + // Ensure that only `Derived`, which must inherit from `RefCounted`, + // can call the constructor. This catches bugs where someone inherited from + // the wrong base. + class CheckSubClass { + private: + friend Derived; + CheckSubClass() = default; + }; + RefCounted(CheckSubClass) { + static_assert(std::is_base_of::value, + "Derived must subclass RefCounted"); + } + + ~RefCounted() = default; + + private: + CRYPTO_refcount_t references_ = 1; +}; + // Protocol versions. // @@ -3664,7 +3706,7 @@ struct ssl_method_st { }; #define MIN_SAFE_FRAGMENT_SIZE 512 -struct ssl_ctx_st { +struct ssl_ctx_st : public bssl::RefCounted { explicit ssl_ctx_st(const SSL_METHOD *ssl_method); ssl_ctx_st(const ssl_ctx_st &) = delete; ssl_ctx_st &operator=(const ssl_ctx_st &) = delete; @@ -3761,8 +3803,6 @@ struct ssl_ctx_st { // processes - spooky :-) } stats; - CRYPTO_refcount_t references = 1; - // if defined, these override the X509_verify_cert() calls int (*app_verify_callback)(X509_STORE_CTX *store_ctx, void *arg) = nullptr; void *app_verify_arg = nullptr; @@ -4019,8 +4059,8 @@ struct ssl_ctx_st { bool conf_min_version_use_default : 1; private: + friend RefCounted; ~ssl_ctx_st(); - friend OPENSSL_EXPORT void SSL_CTX_free(SSL_CTX *); }; struct ssl_st { @@ -4134,13 +4174,11 @@ struct ssl_st { bool enable_read_ahead : 1; }; -struct ssl_session_st { +struct ssl_session_st : public bssl::RefCounted { explicit ssl_session_st(const bssl::SSL_X509_METHOD *method); ssl_session_st(const ssl_session_st &) = delete; ssl_session_st &operator=(const ssl_session_st &) = delete; - CRYPTO_refcount_t references = 1; - // ssl_version is the (D)TLS version that established the session. uint16_t ssl_version = 0; @@ -4289,21 +4327,18 @@ struct ssl_session_st { bssl::Array quic_early_data_context; private: + friend RefCounted; ~ssl_session_st(); - friend OPENSSL_EXPORT void SSL_SESSION_free(SSL_SESSION *); }; -struct ssl_ech_keys_st { - ssl_ech_keys_st() = default; - ssl_ech_keys_st(const ssl_ech_keys_st &) = delete; - ssl_ech_keys_st &operator=(const ssl_ech_keys_st &) = delete; +struct ssl_ech_keys_st : public bssl::RefCounted { + ssl_ech_keys_st() : RefCounted(CheckSubClass()) {} bssl::GrowableArray> configs; - CRYPTO_refcount_t references = 1; private: + friend RefCounted; ~ssl_ech_keys_st() = default; - friend OPENSSL_EXPORT void SSL_ECH_KEYS_free(SSL_ECH_KEYS *); }; #endif // OPENSSL_HEADER_SSL_INTERNAL_H diff --git a/ssl/ssl_lib.cc b/ssl/ssl_lib.cc index c74aa04b10..63cb730fb3 100644 --- a/ssl/ssl_lib.cc +++ b/ssl/ssl_lib.cc @@ -527,7 +527,8 @@ static int ssl_session_cmp(const SSL_SESSION *a, const SSL_SESSION *b) { } ssl_ctx_st::ssl_ctx_st(const SSL_METHOD *ssl_method) - : method(ssl_method->method), + : RefCounted(CheckSubClass()), + method(ssl_method->method), x509_method(ssl_method->x509_method), retain_only_sha256_of_client_certs(false), quiet_shutdown(false), @@ -606,17 +607,14 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *method) { } int SSL_CTX_up_ref(SSL_CTX *ctx) { - CRYPTO_refcount_inc(&ctx->references); + ctx->UpRefInternal(); return 1; } void SSL_CTX_free(SSL_CTX *ctx) { - if (ctx == NULL || !CRYPTO_refcount_dec_and_test_zero(&ctx->references)) { - return; + if (ctx != nullptr) { + ctx->DecRefInternal(); } - - ctx->~ssl_ctx_st(); - OPENSSL_free(ctx); } ssl_st::ssl_st(SSL_CTX *ctx_arg) diff --git a/ssl/ssl_session.cc b/ssl/ssl_session.cc index c9116d01b3..92b57d1728 100644 --- a/ssl/ssl_session.cc +++ b/ssl/ssl_session.cc @@ -949,7 +949,8 @@ BSSL_NAMESPACE_END using namespace bssl; ssl_session_st::ssl_session_st(const SSL_X509_METHOD *method) - : x509_method(method), + : RefCounted(CheckSubClass()), + x509_method(method), extended_master_secret(false), peer_sha256_valid(false), not_resumable(false), @@ -971,18 +972,14 @@ SSL_SESSION *SSL_SESSION_new(const SSL_CTX *ctx) { } int SSL_SESSION_up_ref(SSL_SESSION *session) { - CRYPTO_refcount_inc(&session->references); + session->UpRefInternal(); return 1; } void SSL_SESSION_free(SSL_SESSION *session) { - if (session == NULL || - !CRYPTO_refcount_dec_and_test_zero(&session->references)) { - return; + if (session != nullptr) { + session->DecRefInternal(); } - - session->~ssl_session_st(); - OPENSSL_free(session); } const uint8_t *SSL_SESSION_get_id(const SSL_SESSION *session, From 328ef55904b3fed8ed9e00d59e7b2374b49aacd3 Mon Sep 17 00:00:00 2001 From: Torben Hansen <50673096+torben-hansen@users.noreply.github.com> Date: Mon, 21 Oct 2024 07:16:30 -0700 Subject: [PATCH 9/9] Update build files in generated-src --- .../linux-x86/crypto/fipsmodule/sha1-586.S | 92 +++++------- .../linux-x86/crypto/fipsmodule/sha256-586.S | 142 +++++++++++------- .../mac-x86/crypto/fipsmodule/sha1-586.S | 84 ++++------- .../mac-x86/crypto/fipsmodule/sha256-586.S | 138 ++++++++++------- .../win-x86/crypto/fipsmodule/sha1-586.asm | 77 ++++------ .../win-x86/crypto/fipsmodule/sha256-586.asm | 133 +++++++++------- 6 files changed, 345 insertions(+), 321 deletions(-) diff --git a/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S b/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S index eb59f2ba9a..0e5754fe20 100644 --- a/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S +++ b/generated-src/linux-x86/crypto/fipsmodule/sha1-586.S @@ -5,36 +5,16 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) .text -.globl sha1_block_data_order -.hidden sha1_block_data_order -.type sha1_block_data_order,@function +.globl sha1_block_data_order_nohw +.hidden sha1_block_data_order_nohw +.type sha1_block_data_order_nohw,@function .align 16 -sha1_block_data_order: -.L_sha1_block_data_order_begin: +sha1_block_data_order_nohw: +.L_sha1_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call .L000pic_point -.L000pic_point: - popl %ebp - leal OPENSSL_ia32cap_P-.L000pic_point(%ebp),%esi - leal .LK_XX_XX-.L000pic_point(%ebp),%ebp - movl (%esi),%eax - movl 4(%esi),%edx - testl $512,%edx - jz .L001x86 - movl 8(%esi),%ecx - testl $16777216,%eax - jz .L001x86 - andl $268435456,%edx - andl $1073741824,%eax - orl %edx,%eax - cmpl $1342177280,%eax - je .Lavx_shortcut - jmp .Lssse3_shortcut -.align 16 -.L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax @@ -43,9 +23,9 @@ sha1_block_data_order: addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi - jmp .L002loop + jmp .L000loop .align 16 -.L002loop: +.L000loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -1392,27 +1372,28 @@ sha1_block_data_order: movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) - jb .L002loop + jb .L000loop addl $76,%esp popl %edi popl %esi popl %ebx popl %ebp ret -.size sha1_block_data_order,.-.L_sha1_block_data_order_begin -.hidden _sha1_block_data_order_ssse3 -.type _sha1_block_data_order_ssse3,@function +.size sha1_block_data_order_nohw,.-.L_sha1_block_data_order_nohw_begin +.globl sha1_block_data_order_ssse3 +.hidden sha1_block_data_order_ssse3 +.type sha1_block_data_order_ssse3,@function .align 16 -_sha1_block_data_order_ssse3: +sha1_block_data_order_ssse3: +.L_sha1_block_data_order_ssse3_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call .L003pic_point -.L003pic_point: + call .L001pic_point +.L001pic_point: popl %ebp - leal .LK_XX_XX-.L003pic_point(%ebp),%ebp -.Lssse3_shortcut: + leal .LK_XX_XX-.L001pic_point(%ebp),%ebp movdqa (%ebp),%xmm7 movdqa 16(%ebp),%xmm0 movdqa 32(%ebp),%xmm1 @@ -1464,9 +1445,9 @@ _sha1_block_data_order_ssse3: xorl %edx,%ebp pshufd $238,%xmm0,%xmm4 andl %ebp,%esi - jmp .L004loop + jmp .L002loop .align 16 -.L004loop: +.L002loop: rorl $2,%ebx xorl %edx,%esi movl %eax,%ebp @@ -2369,7 +2350,7 @@ _sha1_block_data_order_ssse3: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je .L005done + je .L003done movdqa 160(%esp),%xmm7 movdqa 176(%esp),%xmm6 movdqu (%ebp),%xmm0 @@ -2504,9 +2485,9 @@ _sha1_block_data_order_ssse3: pshufd $238,%xmm0,%xmm4 andl %ebx,%esi movl %ebp,%ebx - jmp .L004loop + jmp .L002loop .align 16 -.L005done: +.L003done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -2619,20 +2600,21 @@ _sha1_block_data_order_ssse3: popl %ebx popl %ebp ret -.size _sha1_block_data_order_ssse3,.-_sha1_block_data_order_ssse3 -.hidden _sha1_block_data_order_avx -.type _sha1_block_data_order_avx,@function +.size sha1_block_data_order_ssse3,.-.L_sha1_block_data_order_ssse3_begin +.globl sha1_block_data_order_avx +.hidden sha1_block_data_order_avx +.type sha1_block_data_order_avx,@function .align 16 -_sha1_block_data_order_avx: +sha1_block_data_order_avx: +.L_sha1_block_data_order_avx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call .L006pic_point -.L006pic_point: + call .L004pic_point +.L004pic_point: popl %ebp - leal .LK_XX_XX-.L006pic_point(%ebp),%ebp -.Lavx_shortcut: + leal .LK_XX_XX-.L004pic_point(%ebp),%ebp vzeroall vmovdqa (%ebp),%xmm7 vmovdqa 16(%ebp),%xmm0 @@ -2681,9 +2663,9 @@ _sha1_block_data_order_avx: xorl %edx,%ebp vmovdqa %xmm6,32(%esp) andl %ebp,%esi - jmp .L007loop + jmp .L005loop .align 16 -.L007loop: +.L005loop: shrdl $2,%ebx,%ebx xorl %edx,%esi vpalignr $8,%xmm0,%xmm1,%xmm4 @@ -3543,7 +3525,7 @@ _sha1_block_data_order_avx: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je .L008done + je .L006done vmovdqa 160(%esp),%xmm7 vmovdqa 176(%esp),%xmm6 vmovdqu (%ebp),%xmm0 @@ -3674,9 +3656,9 @@ _sha1_block_data_order_avx: movl %esi,%ebp andl %ebx,%esi movl %ebp,%ebx - jmp .L007loop + jmp .L005loop .align 16 -.L008done: +.L006done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -3790,7 +3772,7 @@ _sha1_block_data_order_avx: popl %ebx popl %ebp ret -.size _sha1_block_data_order_avx,.-_sha1_block_data_order_avx +.size sha1_block_data_order_avx,.-.L_sha1_block_data_order_avx_begin .align 64 .LK_XX_XX: .long 1518500249,1518500249,1518500249,1518500249 diff --git a/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S b/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S index ee41b78cbf..41b3759d36 100644 --- a/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S +++ b/generated-src/linux-x86/crypto/fipsmodule/sha256-586.S @@ -5,12 +5,12 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) .text -.globl sha256_block_data_order -.hidden sha256_block_data_order -.type sha256_block_data_order,@function +.globl sha256_block_data_order_nohw +.hidden sha256_block_data_order_nohw +.type sha256_block_data_order_nohw,@function .align 16 -sha256_block_data_order: -.L_sha256_block_data_order_begin: +sha256_block_data_order_nohw: +.L_sha256_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi @@ -22,7 +22,7 @@ sha256_block_data_order: call .L000pic_point .L000pic_point: popl %ebp - leal .L001K256-.L000pic_point(%ebp),%ebp + leal .LK256-.L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $6,%eax @@ -31,29 +31,13 @@ sha256_block_data_order: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) - leal OPENSSL_ia32cap_P-.L001K256(%ebp),%edx - movl (%edx),%ecx - movl 4(%edx),%ebx - testl $1048576,%ecx - jnz .L002loop - movl 8(%edx),%edx - testl $16777216,%ecx - jz .L003no_xmm - andl $1073741824,%ecx - andl $268435968,%ebx - orl %ebx,%ecx - andl $1342177280,%ecx - cmpl $1342177280,%ecx - je .L004AVX - testl $512,%ebx - jnz .L005SSSE3 -.L003no_xmm: +.L001no_xmm: subl %edi,%eax cmpl $256,%eax - jae .L006unrolled - jmp .L002loop + jae .L002unrolled + jmp .L003loop .align 16 -.L002loop: +.L003loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx @@ -122,7 +106,7 @@ sha256_block_data_order: movl %ecx,28(%esp) movl %edi,32(%esp) .align 16 -.L00700_15: +.L00400_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx @@ -160,11 +144,11 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne .L00700_15 + jne .L00400_15 movl 156(%esp),%ecx - jmp .L00816_63 + jmp .L00516_63 .align 16 -.L00816_63: +.L00516_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx @@ -219,7 +203,7 @@ sha256_block_data_order: addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne .L00816_63 + jne .L00516_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx @@ -246,7 +230,7 @@ sha256_block_data_order: leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi - jb .L002loop + jb .L003loop movl 12(%esp),%esp popl %edi popl %esi @@ -254,7 +238,7 @@ sha256_block_data_order: popl %ebp ret .align 64 -.L001K256: +.LK256: .long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 .long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 @@ -263,7 +247,7 @@ sha256_block_data_order: .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 16 -.L006unrolled: +.L002unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp @@ -280,9 +264,9 @@ sha256_block_data_order: movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp .L009grand_loop + jmp .L006grand_loop .align 16 -.L009grand_loop: +.L006grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx @@ -3162,15 +3146,40 @@ sha256_block_data_order: movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb .L009grand_loop + jb .L006grand_loop movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 32 -.L005SSSE3: +.size sha256_block_data_order_nohw,.-.L_sha256_block_data_order_nohw_begin +.globl sha256_block_data_order_ssse3 +.hidden sha256_block_data_order_ssse3 +.type sha256_block_data_order_ssse3,@function +.align 16 +sha256_block_data_order_ssse3: +.L_sha256_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L007pic_point +.L007pic_point: + popl %ebp + leal .LK256-.L007pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx @@ -3189,9 +3198,9 @@ sha256_block_data_order: movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp .L010grand_ssse3 + jmp .L008grand_ssse3 .align 16 -.L010grand_ssse3: +.L008grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 @@ -3214,9 +3223,9 @@ sha256_block_data_order: paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp .L011ssse3_00_47 + jmp .L009ssse3_00_47 .align 16 -.L011ssse3_00_47: +.L009ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 @@ -3859,7 +3868,7 @@ sha256_block_data_order: addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L011ssse3_00_47 + jne .L009ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi @@ -4373,15 +4382,40 @@ sha256_block_data_order: movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L010grand_ssse3 + jb .L008grand_ssse3 movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 32 -.L004AVX: +.size sha256_block_data_order_ssse3,.-.L_sha256_block_data_order_ssse3_begin +.globl sha256_block_data_order_avx +.hidden sha256_block_data_order_avx +.type sha256_block_data_order_avx,@function +.align 16 +sha256_block_data_order_avx: +.L_sha256_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L010pic_point +.L010pic_point: + popl %ebp + leal .LK256-.L010pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp vzeroall movl (%esi),%eax @@ -4401,9 +4435,9 @@ sha256_block_data_order: movl %ecx,24(%esp) movl %esi,28(%esp) vmovdqa 256(%ebp),%xmm7 - jmp .L012grand_avx + jmp .L011grand_avx .align 32 -.L012grand_avx: +.L011grand_avx: vmovdqu (%edi),%xmm0 vmovdqu 16(%edi),%xmm1 vmovdqu 32(%edi),%xmm2 @@ -4422,9 +4456,9 @@ sha256_block_data_order: vmovdqa %xmm5,48(%esp) vmovdqa %xmm6,64(%esp) vmovdqa %xmm7,80(%esp) - jmp .L013avx_00_47 + jmp .L012avx_00_47 .align 16 -.L013avx_00_47: +.L012avx_00_47: addl $64,%ebp vpalignr $4,%xmm0,%xmm1,%xmm4 movl %edx,%ecx @@ -5039,7 +5073,7 @@ sha256_block_data_order: addl %ecx,%eax vmovdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne .L013avx_00_47 + jne .L012avx_00_47 movl %edx,%ecx shrdl $14,%edx,%edx movl 20(%esp),%esi @@ -5553,7 +5587,7 @@ sha256_block_data_order: vmovdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb .L012grand_avx + jb .L011grand_avx movl 108(%esp),%esp vzeroall popl %edi @@ -5561,5 +5595,5 @@ sha256_block_data_order: popl %ebx popl %ebp ret -.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +.size sha256_block_data_order_avx,.-.L_sha256_block_data_order_avx_begin #endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__ELF__) diff --git a/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S b/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S index 76ee6bc5a3..f0ab02be58 100644 --- a/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S +++ b/generated-src/mac-x86/crypto/fipsmodule/sha1-586.S @@ -5,35 +5,15 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) .text -.globl _sha1_block_data_order -.private_extern _sha1_block_data_order +.globl _sha1_block_data_order_nohw +.private_extern _sha1_block_data_order_nohw .align 4 -_sha1_block_data_order: -L_sha1_block_data_order_begin: +_sha1_block_data_order_nohw: +L_sha1_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call L000pic_point -L000pic_point: - popl %ebp - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000pic_point(%ebp),%esi - leal LK_XX_XX-L000pic_point(%ebp),%ebp - movl (%esi),%eax - movl 4(%esi),%edx - testl $512,%edx - jz L001x86 - movl 8(%esi),%ecx - testl $16777216,%eax - jz L001x86 - andl $268435456,%edx - andl $1073741824,%eax - orl %edx,%eax - cmpl $1342177280,%eax - je Lavx_shortcut - jmp Lssse3_shortcut -.align 4,0x90 -L001x86: movl 20(%esp),%ebp movl 24(%esp),%esi movl 28(%esp),%eax @@ -42,9 +22,9 @@ L001x86: addl %esi,%eax movl %eax,104(%esp) movl 16(%ebp),%edi - jmp L002loop + jmp L000loop .align 4,0x90 -L002loop: +L000loop: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx @@ -1391,25 +1371,26 @@ L002loop: movl %ebx,12(%ebp) movl %edx,%esi movl %ecx,16(%ebp) - jb L002loop + jb L000loop addl $76,%esp popl %edi popl %esi popl %ebx popl %ebp ret -.private_extern __sha1_block_data_order_ssse3 +.globl _sha1_block_data_order_ssse3 +.private_extern _sha1_block_data_order_ssse3 .align 4 -__sha1_block_data_order_ssse3: +_sha1_block_data_order_ssse3: +L_sha1_block_data_order_ssse3_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call L003pic_point -L003pic_point: + call L001pic_point +L001pic_point: popl %ebp - leal LK_XX_XX-L003pic_point(%ebp),%ebp -Lssse3_shortcut: + leal LK_XX_XX-L001pic_point(%ebp),%ebp movdqa (%ebp),%xmm7 movdqa 16(%ebp),%xmm0 movdqa 32(%ebp),%xmm1 @@ -1461,9 +1442,9 @@ Lssse3_shortcut: xorl %edx,%ebp pshufd $238,%xmm0,%xmm4 andl %ebp,%esi - jmp L004loop + jmp L002loop .align 4,0x90 -L004loop: +L002loop: rorl $2,%ebx xorl %edx,%esi movl %eax,%ebp @@ -2366,7 +2347,7 @@ L004loop: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je L005done + je L003done movdqa 160(%esp),%xmm7 movdqa 176(%esp),%xmm6 movdqu (%ebp),%xmm0 @@ -2501,9 +2482,9 @@ L004loop: pshufd $238,%xmm0,%xmm4 andl %ebx,%esi movl %ebp,%ebx - jmp L004loop + jmp L002loop .align 4,0x90 -L005done: +L003done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -2616,18 +2597,19 @@ L005done: popl %ebx popl %ebp ret -.private_extern __sha1_block_data_order_avx +.globl _sha1_block_data_order_avx +.private_extern _sha1_block_data_order_avx .align 4 -__sha1_block_data_order_avx: +_sha1_block_data_order_avx: +L_sha1_block_data_order_avx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi - call L006pic_point -L006pic_point: + call L004pic_point +L004pic_point: popl %ebp - leal LK_XX_XX-L006pic_point(%ebp),%ebp -Lavx_shortcut: + leal LK_XX_XX-L004pic_point(%ebp),%ebp vzeroall vmovdqa (%ebp),%xmm7 vmovdqa 16(%ebp),%xmm0 @@ -2676,9 +2658,9 @@ Lavx_shortcut: xorl %edx,%ebp vmovdqa %xmm6,32(%esp) andl %ebp,%esi - jmp L007loop + jmp L005loop .align 4,0x90 -L007loop: +L005loop: shrdl $2,%ebx,%ebx xorl %edx,%esi vpalignr $8,%xmm0,%xmm1,%xmm4 @@ -3538,7 +3520,7 @@ L007loop: addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je L008done + je L006done vmovdqa 160(%esp),%xmm7 vmovdqa 176(%esp),%xmm6 vmovdqu (%ebp),%xmm0 @@ -3669,9 +3651,9 @@ L007loop: movl %esi,%ebp andl %ebx,%esi movl %ebp,%ebx - jmp L007loop + jmp L005loop .align 4,0x90 -L008done: +L006done: addl 16(%esp),%ebx xorl %edi,%esi movl %ecx,%ebp @@ -3797,8 +3779,4 @@ LK_XX_XX: .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.section __IMPORT,__pointers,non_lazy_symbol_pointers -L_OPENSSL_ia32cap_P$non_lazy_ptr: -.indirect_symbol _OPENSSL_ia32cap_P -.long 0 #endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S b/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S index d43510a491..8e74e68620 100644 --- a/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S +++ b/generated-src/mac-x86/crypto/fipsmodule/sha256-586.S @@ -5,11 +5,11 @@ #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) .text -.globl _sha256_block_data_order -.private_extern _sha256_block_data_order +.globl _sha256_block_data_order_nohw +.private_extern _sha256_block_data_order_nohw .align 4 -_sha256_block_data_order: -L_sha256_block_data_order_begin: +_sha256_block_data_order_nohw: +L_sha256_block_data_order_nohw_begin: pushl %ebp pushl %ebx pushl %esi @@ -21,7 +21,7 @@ L_sha256_block_data_order_begin: call L000pic_point L000pic_point: popl %ebp - leal L001K256-L000pic_point(%ebp),%ebp + leal LK256-L000pic_point(%ebp),%ebp subl $16,%esp andl $-64,%esp shll $6,%eax @@ -30,29 +30,13 @@ L000pic_point: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) - movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K256(%ebp),%edx - movl (%edx),%ecx - movl 4(%edx),%ebx - testl $1048576,%ecx - jnz L002loop - movl 8(%edx),%edx - testl $16777216,%ecx - jz L003no_xmm - andl $1073741824,%ecx - andl $268435968,%ebx - orl %ebx,%ecx - andl $1342177280,%ecx - cmpl $1342177280,%ecx - je L004AVX - testl $512,%ebx - jnz L005SSSE3 -L003no_xmm: +L001no_xmm: subl %edi,%eax cmpl $256,%eax - jae L006unrolled - jmp L002loop + jae L002unrolled + jmp L003loop .align 4,0x90 -L002loop: +L003loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx @@ -121,7 +105,7 @@ L002loop: movl %ecx,28(%esp) movl %edi,32(%esp) .align 4,0x90 -L00700_15: +L00400_15: movl %edx,%ecx movl 24(%esp),%esi rorl $14,%ecx @@ -159,11 +143,11 @@ L00700_15: addl $4,%ebp addl %ebx,%eax cmpl $3248222580,%esi - jne L00700_15 + jne L00400_15 movl 156(%esp),%ecx - jmp L00816_63 + jmp L00516_63 .align 4,0x90 -L00816_63: +L00516_63: movl %ecx,%ebx movl 104(%esp),%esi rorl $11,%ecx @@ -218,7 +202,7 @@ L00816_63: addl $4,%ebp addl %ebx,%eax cmpl $3329325298,%esi - jne L00816_63 + jne L00516_63 movl 356(%esp),%esi movl 8(%esp),%ebx movl 16(%esp),%ecx @@ -245,7 +229,7 @@ L00816_63: leal 356(%esp),%esp subl $256,%ebp cmpl 8(%esp),%edi - jb L002loop + jb L003loop movl 12(%esp),%esp popl %edi popl %esi @@ -253,7 +237,7 @@ L00816_63: popl %ebp ret .align 6,0x90 -L001K256: +LK256: .long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 .long 66051,67438087,134810123,202182159 .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 @@ -262,7 +246,7 @@ L001K256: .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 .align 4,0x90 -L006unrolled: +L002unrolled: leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebp @@ -279,9 +263,9 @@ L006unrolled: movl %ebx,20(%esp) movl %ecx,24(%esp) movl %esi,28(%esp) - jmp L009grand_loop + jmp L006grand_loop .align 4,0x90 -L009grand_loop: +L006grand_loop: movl (%edi),%ebx movl 4(%edi),%ecx bswap %ebx @@ -3161,15 +3145,38 @@ L009grand_loop: movl %ebx,24(%esp) movl %ecx,28(%esp) cmpl 104(%esp),%edi - jb L009grand_loop + jb L006grand_loop movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 5,0x90 -L005SSSE3: +.globl _sha256_block_data_order_ssse3 +.private_extern _sha256_block_data_order_ssse3 +.align 4 +_sha256_block_data_order_ssse3: +L_sha256_block_data_order_ssse3_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L007pic_point +L007pic_point: + popl %ebp + leal LK256-L007pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp movl (%esi),%eax movl 4(%esi),%ebx @@ -3188,9 +3195,9 @@ L005SSSE3: movl %ecx,24(%esp) movl %esi,28(%esp) movdqa 256(%ebp),%xmm7 - jmp L010grand_ssse3 + jmp L008grand_ssse3 .align 4,0x90 -L010grand_ssse3: +L008grand_ssse3: movdqu (%edi),%xmm0 movdqu 16(%edi),%xmm1 movdqu 32(%edi),%xmm2 @@ -3213,9 +3220,9 @@ L010grand_ssse3: paddd %xmm3,%xmm7 movdqa %xmm6,64(%esp) movdqa %xmm7,80(%esp) - jmp L011ssse3_00_47 + jmp L009ssse3_00_47 .align 4,0x90 -L011ssse3_00_47: +L009ssse3_00_47: addl $64,%ebp movl %edx,%ecx movdqa %xmm1,%xmm4 @@ -3858,7 +3865,7 @@ L011ssse3_00_47: addl %ecx,%eax movdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne L011ssse3_00_47 + jne L009ssse3_00_47 movl %edx,%ecx rorl $14,%edx movl 20(%esp),%esi @@ -4372,15 +4379,38 @@ L011ssse3_00_47: movdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb L010grand_ssse3 + jb L008grand_ssse3 movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 5,0x90 -L004AVX: +.globl _sha256_block_data_order_avx +.private_extern _sha256_block_data_order_avx +.align 4 +_sha256_block_data_order_avx: +L_sha256_block_data_order_avx_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L010pic_point +L010pic_point: + popl %ebp + leal LK256-L010pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) leal -96(%esp),%esp vzeroall movl (%esi),%eax @@ -4400,9 +4430,9 @@ L004AVX: movl %ecx,24(%esp) movl %esi,28(%esp) vmovdqa 256(%ebp),%xmm7 - jmp L012grand_avx + jmp L011grand_avx .align 5,0x90 -L012grand_avx: +L011grand_avx: vmovdqu (%edi),%xmm0 vmovdqu 16(%edi),%xmm1 vmovdqu 32(%edi),%xmm2 @@ -4421,9 +4451,9 @@ L012grand_avx: vmovdqa %xmm5,48(%esp) vmovdqa %xmm6,64(%esp) vmovdqa %xmm7,80(%esp) - jmp L013avx_00_47 + jmp L012avx_00_47 .align 4,0x90 -L013avx_00_47: +L012avx_00_47: addl $64,%ebp vpalignr $4,%xmm0,%xmm1,%xmm4 movl %edx,%ecx @@ -5038,7 +5068,7 @@ L013avx_00_47: addl %ecx,%eax vmovdqa %xmm6,80(%esp) cmpl $66051,64(%ebp) - jne L013avx_00_47 + jne L012avx_00_47 movl %edx,%ecx shrdl $14,%edx,%edx movl 20(%esp),%esi @@ -5552,7 +5582,7 @@ L013avx_00_47: vmovdqa 64(%ebp),%xmm7 subl $192,%ebp cmpl 104(%esp),%edi - jb L012grand_avx + jb L011grand_avx movl 108(%esp),%esp vzeroall popl %edi @@ -5560,8 +5590,4 @@ L013avx_00_47: popl %ebx popl %ebp ret -.section __IMPORT,__pointers,non_lazy_symbol_pointers -L_OPENSSL_ia32cap_P$non_lazy_ptr: -.indirect_symbol _OPENSSL_ia32cap_P -.long 0 #endif // !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86) && defined(__APPLE__) diff --git a/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm b/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm index ae30b3d13d..8cff61f50f 100644 --- a/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm +++ b/generated-src/win-x86/crypto/fipsmodule/sha1-586.asm @@ -11,35 +11,14 @@ section .text code align=64 %else section .text code %endif -;extern _OPENSSL_ia32cap_P -global _sha1_block_data_order +global _sha1_block_data_order_nohw align 16 -_sha1_block_data_order: -L$_sha1_block_data_order_begin: +_sha1_block_data_order_nohw: +L$_sha1_block_data_order_nohw_begin: push ebp push ebx push esi push edi - call L$000pic_point -L$000pic_point: - pop ebp - lea esi,[_OPENSSL_ia32cap_P] - lea ebp,[(L$K_XX_XX-L$000pic_point)+ebp] - mov eax,DWORD [esi] - mov edx,DWORD [4+esi] - test edx,512 - jz NEAR L$001x86 - mov ecx,DWORD [8+esi] - test eax,16777216 - jz NEAR L$001x86 - and edx,268435456 - and eax,1073741824 - or eax,edx - cmp eax,1342177280 - je NEAR L$avx_shortcut - jmp NEAR L$ssse3_shortcut -align 16 -L$001x86: mov ebp,DWORD [20+esp] mov esi,DWORD [24+esp] mov eax,DWORD [28+esp] @@ -48,9 +27,9 @@ L$001x86: add eax,esi mov DWORD [104+esp],eax mov edi,DWORD [16+ebp] - jmp NEAR L$002loop + jmp NEAR L$000loop align 16 -L$002loop: +L$000loop: mov eax,DWORD [esi] mov ebx,DWORD [4+esi] mov ecx,DWORD [8+esi] @@ -1397,24 +1376,25 @@ L$002loop: mov DWORD [12+ebp],ebx mov esi,edx mov DWORD [16+ebp],ecx - jb NEAR L$002loop + jb NEAR L$000loop add esp,76 pop edi pop esi pop ebx pop ebp ret +global _sha1_block_data_order_ssse3 align 16 -__sha1_block_data_order_ssse3: +_sha1_block_data_order_ssse3: +L$_sha1_block_data_order_ssse3_begin: push ebp push ebx push esi push edi - call L$003pic_point -L$003pic_point: + call L$001pic_point +L$001pic_point: pop ebp - lea ebp,[(L$K_XX_XX-L$003pic_point)+ebp] -L$ssse3_shortcut: + lea ebp,[(L$K_XX_XX-L$001pic_point)+ebp] movdqa xmm7,[ebp] movdqa xmm0,[16+ebp] movdqa xmm1,[32+ebp] @@ -1466,9 +1446,9 @@ db 102,15,56,0,222 xor ebp,edx pshufd xmm4,xmm0,238 and esi,ebp - jmp NEAR L$004loop + jmp NEAR L$002loop align 16 -L$004loop: +L$002loop: ror ebx,2 xor esi,edx mov ebp,eax @@ -2371,7 +2351,7 @@ L$004loop: add ecx,edx mov ebp,DWORD [196+esp] cmp ebp,DWORD [200+esp] - je NEAR L$005done + je NEAR L$003done movdqa xmm7,[160+esp] movdqa xmm6,[176+esp] movdqu xmm0,[ebp] @@ -2506,9 +2486,9 @@ db 102,15,56,0,222 pshufd xmm4,xmm0,238 and esi,ebx mov ebx,ebp - jmp NEAR L$004loop + jmp NEAR L$002loop align 16 -L$005done: +L$003done: add ebx,DWORD [16+esp] xor esi,edi mov ebp,ecx @@ -2621,17 +2601,18 @@ L$005done: pop ebx pop ebp ret +global _sha1_block_data_order_avx align 16 -__sha1_block_data_order_avx: +_sha1_block_data_order_avx: +L$_sha1_block_data_order_avx_begin: push ebp push ebx push esi push edi - call L$006pic_point -L$006pic_point: + call L$004pic_point +L$004pic_point: pop ebp - lea ebp,[(L$K_XX_XX-L$006pic_point)+ebp] -L$avx_shortcut: + lea ebp,[(L$K_XX_XX-L$004pic_point)+ebp] vzeroall vmovdqa xmm7,[ebp] vmovdqa xmm0,[16+ebp] @@ -2680,9 +2661,9 @@ L$avx_shortcut: xor ebp,edx vmovdqa [32+esp],xmm6 and esi,ebp - jmp NEAR L$007loop + jmp NEAR L$005loop align 16 -L$007loop: +L$005loop: shrd ebx,ebx,2 xor esi,edx vpalignr xmm4,xmm1,xmm0,8 @@ -3542,7 +3523,7 @@ L$007loop: add ecx,edx mov ebp,DWORD [196+esp] cmp ebp,DWORD [200+esp] - je NEAR L$008done + je NEAR L$006done vmovdqa xmm7,[160+esp] vmovdqa xmm6,[176+esp] vmovdqu xmm0,[ebp] @@ -3673,9 +3654,9 @@ L$007loop: mov ebp,esi and esi,ebx mov ebx,ebp - jmp NEAR L$007loop + jmp NEAR L$005loop align 16 -L$008done: +L$006done: add ebx,DWORD [16+esp] xor esi,edi mov ebp,ecx @@ -3801,8 +3782,6 @@ db 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 db 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 db 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 db 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -segment .bss -common _OPENSSL_ia32cap_P 16 %else ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 ret diff --git a/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm b/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm index 434195bad4..61b3a6b199 100644 --- a/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm +++ b/generated-src/win-x86/crypto/fipsmodule/sha256-586.asm @@ -11,11 +11,10 @@ section .text code align=64 %else section .text code %endif -;extern _OPENSSL_ia32cap_P -global _sha256_block_data_order +global _sha256_block_data_order_nohw align 16 -_sha256_block_data_order: -L$_sha256_block_data_order_begin: +_sha256_block_data_order_nohw: +L$_sha256_block_data_order_nohw_begin: push ebp push ebx push esi @@ -27,7 +26,7 @@ L$_sha256_block_data_order_begin: call L$000pic_point L$000pic_point: pop ebp - lea ebp,[(L$001K256-L$000pic_point)+ebp] + lea ebp,[(L$K256-L$000pic_point)+ebp] sub esp,16 and esp,-64 shl eax,6 @@ -36,29 +35,13 @@ L$000pic_point: mov DWORD [4+esp],edi mov DWORD [8+esp],eax mov DWORD [12+esp],ebx - lea edx,[_OPENSSL_ia32cap_P] - mov ecx,DWORD [edx] - mov ebx,DWORD [4+edx] - test ecx,1048576 - jnz NEAR L$002loop - mov edx,DWORD [8+edx] - test ecx,16777216 - jz NEAR L$003no_xmm - and ecx,1073741824 - and ebx,268435968 - or ecx,ebx - and ecx,1342177280 - cmp ecx,1342177280 - je NEAR L$004AVX - test ebx,512 - jnz NEAR L$005SSSE3 -L$003no_xmm: +L$001no_xmm: sub eax,edi cmp eax,256 - jae NEAR L$006unrolled - jmp NEAR L$002loop + jae NEAR L$002unrolled + jmp NEAR L$003loop align 16 -L$002loop: +L$003loop: mov eax,DWORD [edi] mov ebx,DWORD [4+edi] mov ecx,DWORD [8+edi] @@ -127,7 +110,7 @@ L$002loop: mov DWORD [28+esp],ecx mov DWORD [32+esp],edi align 16 -L$00700_15: +L$00400_15: mov ecx,edx mov esi,DWORD [24+esp] ror ecx,14 @@ -165,11 +148,11 @@ L$00700_15: add ebp,4 add eax,ebx cmp esi,3248222580 - jne NEAR L$00700_15 + jne NEAR L$00400_15 mov ecx,DWORD [156+esp] - jmp NEAR L$00816_63 + jmp NEAR L$00516_63 align 16 -L$00816_63: +L$00516_63: mov ebx,ecx mov esi,DWORD [104+esp] ror ecx,11 @@ -224,7 +207,7 @@ L$00816_63: add ebp,4 add eax,ebx cmp esi,3329325298 - jne NEAR L$00816_63 + jne NEAR L$00516_63 mov esi,DWORD [356+esp] mov ebx,DWORD [8+esp] mov ecx,DWORD [16+esp] @@ -251,7 +234,7 @@ L$00816_63: lea esp,[356+esp] sub ebp,256 cmp edi,DWORD [8+esp] - jb NEAR L$002loop + jb NEAR L$003loop mov esp,DWORD [12+esp] pop edi pop esi @@ -259,7 +242,7 @@ L$00816_63: pop ebp ret align 64 -L$001K256: +L$K256: dd 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 dd 66051,67438087,134810123,202182159 db 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 @@ -268,7 +251,7 @@ db 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 db 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 db 62,0 align 16 -L$006unrolled: +L$002unrolled: lea esp,[esp-96] mov eax,DWORD [esi] mov ebp,DWORD [4+esi] @@ -285,9 +268,9 @@ L$006unrolled: mov DWORD [20+esp],ebx mov DWORD [24+esp],ecx mov DWORD [28+esp],esi - jmp NEAR L$009grand_loop + jmp NEAR L$006grand_loop align 16 -L$009grand_loop: +L$006grand_loop: mov ebx,DWORD [edi] mov ecx,DWORD [4+edi] bswap ebx @@ -3167,15 +3150,37 @@ L$009grand_loop: mov DWORD [24+esp],ebx mov DWORD [28+esp],ecx cmp edi,DWORD [104+esp] - jb NEAR L$009grand_loop + jb NEAR L$006grand_loop mov esp,DWORD [108+esp] pop edi pop esi pop ebx pop ebp ret -align 32 -L$005SSSE3: +global _sha256_block_data_order_ssse3 +align 16 +_sha256_block_data_order_ssse3: +L$_sha256_block_data_order_ssse3_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$007pic_point +L$007pic_point: + pop ebp + lea ebp,[(L$K256-L$007pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx lea esp,[esp-96] mov eax,DWORD [esi] mov ebx,DWORD [4+esi] @@ -3194,9 +3199,9 @@ L$005SSSE3: mov DWORD [24+esp],ecx mov DWORD [28+esp],esi movdqa xmm7,[256+ebp] - jmp NEAR L$010grand_ssse3 + jmp NEAR L$008grand_ssse3 align 16 -L$010grand_ssse3: +L$008grand_ssse3: movdqu xmm0,[edi] movdqu xmm1,[16+edi] movdqu xmm2,[32+edi] @@ -3219,9 +3224,9 @@ db 102,15,56,0,223 paddd xmm7,xmm3 movdqa [64+esp],xmm6 movdqa [80+esp],xmm7 - jmp NEAR L$011ssse3_00_47 + jmp NEAR L$009ssse3_00_47 align 16 -L$011ssse3_00_47: +L$009ssse3_00_47: add ebp,64 mov ecx,edx movdqa xmm4,xmm1 @@ -3864,7 +3869,7 @@ db 102,15,58,15,249,4 add eax,ecx movdqa [80+esp],xmm6 cmp DWORD [64+ebp],66051 - jne NEAR L$011ssse3_00_47 + jne NEAR L$009ssse3_00_47 mov ecx,edx ror edx,14 mov esi,DWORD [20+esp] @@ -4378,15 +4383,37 @@ db 102,15,58,15,249,4 movdqa xmm7,[64+ebp] sub ebp,192 cmp edi,DWORD [104+esp] - jb NEAR L$010grand_ssse3 + jb NEAR L$008grand_ssse3 mov esp,DWORD [108+esp] pop edi pop esi pop ebx pop ebp ret -align 32 -L$004AVX: +global _sha256_block_data_order_avx +align 16 +_sha256_block_data_order_avx: +L$_sha256_block_data_order_avx_begin: + push ebp + push ebx + push esi + push edi + mov esi,DWORD [20+esp] + mov edi,DWORD [24+esp] + mov eax,DWORD [28+esp] + mov ebx,esp + call L$010pic_point +L$010pic_point: + pop ebp + lea ebp,[(L$K256-L$010pic_point)+ebp] + sub esp,16 + and esp,-64 + shl eax,6 + add eax,edi + mov DWORD [esp],esi + mov DWORD [4+esp],edi + mov DWORD [8+esp],eax + mov DWORD [12+esp],ebx lea esp,[esp-96] vzeroall mov eax,DWORD [esi] @@ -4406,9 +4433,9 @@ L$004AVX: mov DWORD [24+esp],ecx mov DWORD [28+esp],esi vmovdqa xmm7,[256+ebp] - jmp NEAR L$012grand_avx + jmp NEAR L$011grand_avx align 32 -L$012grand_avx: +L$011grand_avx: vmovdqu xmm0,[edi] vmovdqu xmm1,[16+edi] vmovdqu xmm2,[32+edi] @@ -4427,9 +4454,9 @@ L$012grand_avx: vmovdqa [48+esp],xmm5 vmovdqa [64+esp],xmm6 vmovdqa [80+esp],xmm7 - jmp NEAR L$013avx_00_47 + jmp NEAR L$012avx_00_47 align 16 -L$013avx_00_47: +L$012avx_00_47: add ebp,64 vpalignr xmm4,xmm1,xmm0,4 mov ecx,edx @@ -5044,7 +5071,7 @@ L$013avx_00_47: add eax,ecx vmovdqa [80+esp],xmm6 cmp DWORD [64+ebp],66051 - jne NEAR L$013avx_00_47 + jne NEAR L$012avx_00_47 mov ecx,edx shrd edx,edx,14 mov esi,DWORD [20+esp] @@ -5558,7 +5585,7 @@ L$013avx_00_47: vmovdqa xmm7,[64+ebp] sub ebp,192 cmp edi,DWORD [104+esp] - jb NEAR L$012grand_avx + jb NEAR L$011grand_avx mov esp,DWORD [108+esp] vzeroall pop edi @@ -5566,8 +5593,6 @@ L$013avx_00_47: pop ebx pop ebp ret -segment .bss -common _OPENSSL_ia32cap_P 16 %else ; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738 ret