diff --git a/crypto/curve25519/curve25519.c b/crypto/curve25519/curve25519.c index b0dc82707a..49fbbfe776 100644 --- a/crypto/curve25519/curve25519.c +++ b/crypto/curve25519/curve25519.c @@ -52,14 +52,6 @@ // For Ed25519, dom2(F,C) is the empty string and PH the identify function, // cf. rfc8032 5.1. -OPENSSL_INLINE int curve25519_s2n_bignum_capable(void) { -#if defined(CURVE25519_S2N_BIGNUM_CAPABLE) - return 1; -#else - return 0; -#endif -} - void ed25519_sha512(uint8_t out[SHA512_DIGEST_LENGTH], const void *input1, size_t len1, const void *input2, size_t len2, const void *input3, size_t len3) { @@ -92,11 +84,11 @@ void ED25519_keypair_from_seed(uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN], // Step: rfc8032 5.1.5.[3,4] // Compute [az]B and encode public key to a 32 byte octet. - if (curve25519_s2n_bignum_capable() == 1) { - ed25519_public_key_from_hashed_seed_s2n_bignum(out_public_key, az); - } else { - ed25519_public_key_from_hashed_seed_nohw(out_public_key, az); - } +#if defined(CURVE25519_S2N_BIGNUM_CAPABLE) + ed25519_public_key_from_hashed_seed_s2n_bignum(out_public_key, az); +#else + ed25519_public_key_from_hashed_seed_nohw(out_public_key, az); +#endif // Encoded public key is a suffix in the private key. Avoids having to // generate the public key from the private key when signing. @@ -150,13 +142,13 @@ int ED25519_sign(uint8_t out_sig[ED25519_SIGNATURE_LEN], ED25519_PRIVATE_KEY_SEED_LEN, message, message_len, NULL, 0); // Step: rfc8032 5.1.6.[3,5,6,7] - if (curve25519_s2n_bignum_capable() == 1) { - ed25519_sign_s2n_bignum(out_sig, r, az, +#if defined(CURVE25519_S2N_BIGNUM_CAPABLE) + ed25519_sign_s2n_bignum(out_sig, r, az, private_key + ED25519_PRIVATE_KEY_SEED_LEN, message, message_len); - } else { - ed25519_sign_nohw(out_sig, r, az, +#else + ed25519_sign_nohw(out_sig, r, az, private_key + ED25519_PRIVATE_KEY_SEED_LEN, message, message_len); - } +#endif return 1; } @@ -206,13 +198,13 @@ int ED25519_verify(const uint8_t *message, size_t message_len, // Verification works by computing [S]B - [k]A' and comparing against R_expected. int res = 0; uint8_t R_computed_encoded[32]; - if (curve25519_s2n_bignum_capable() == 1) { - res = ed25519_verify_s2n_bignum(R_computed_encoded, public_key, R_expected, S, +#if defined(CURVE25519_S2N_BIGNUM_CAPABLE) + res = ed25519_verify_s2n_bignum(R_computed_encoded, public_key, R_expected, S, message, message_len); - } else { - res = ed25519_verify_nohw(R_computed_encoded, public_key, R_expected, S, +#else + res = ed25519_verify_nohw(R_computed_encoded, public_key, R_expected, S, message, message_len); - } +#endif // Comparison [S]B - [k]A' =? R_expected. Short-circuits if decoding failed. return (res == 1) && @@ -224,11 +216,11 @@ void X25519_public_from_private( uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], const uint8_t private_key[X25519_PRIVATE_KEY_LEN]) { - if (curve25519_s2n_bignum_capable() == 1) { - x25519_public_from_private_s2n_bignum(out_public_value, private_key); - } else { - x25519_public_from_private_nohw(out_public_value, private_key); - } +#if defined(CURVE25519_S2N_BIGNUM_CAPABLE) + x25519_public_from_private_s2n_bignum(out_public_value, private_key); +#else + x25519_public_from_private_nohw(out_public_value, private_key); +#endif } void X25519_keypair(uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN], @@ -262,11 +254,11 @@ int X25519(uint8_t out_shared_key[X25519_SHARED_KEY_LEN], static const uint8_t kZeros[X25519_SHARED_KEY_LEN] = {0}; - if (curve25519_s2n_bignum_capable() == 1) { - x25519_scalar_mult_generic_s2n_bignum(out_shared_key, private_key, peer_public_value); - } else { +#if defined(CURVE25519_S2N_BIGNUM_CAPABLE) + x25519_scalar_mult_generic_s2n_bignum(out_shared_key, private_key, peer_public_value); +#else x25519_scalar_mult_generic_nohw(out_shared_key, private_key, peer_public_value); - } +#endif // The all-zero output results when the input is a point of small order. return constant_time_declassify_int( diff --git a/crypto/curve25519/curve25519_s2n_bignum_asm.c b/crypto/curve25519/curve25519_s2n_bignum_asm.c index 64b4735dfb..0deeb0862c 100644 --- a/crypto/curve25519/curve25519_s2n_bignum_asm.c +++ b/crypto/curve25519/curve25519_s2n_bignum_asm.c @@ -6,79 +6,6 @@ #if defined(CURVE25519_S2N_BIGNUM_CAPABLE) #include "../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" -#endif - -// Stub functions if s2n-bignum implementations are not compiled. -// These functions have to abort, otherwise we risk applications assuming they -// did work without actually doing anything. -#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE) - -#define S2N_BIGNUM_STUB_FUNC(return_type, symbol, ...) \ - return_type symbol(__VA_ARGS__); \ - return_type symbol(__VA_ARGS__) { abort(); } \ - -S2N_BIGNUM_STUB_FUNC(void, bignum_mod_n25519, uint64_t z[4], uint64_t k, uint64_t *x) -S2N_BIGNUM_STUB_FUNC(void, bignum_neg_p25519, uint64_t z[4], uint64_t x[4]) -S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]) -S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519_alt, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_encode, uint8_t z[32], uint64_t p[8]) -S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode, uint64_t z[8], const uint8_t c[32]) -S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode_alt, uint64_t z[8], const uint8_t c[32]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase, uint64_t res[8],uint64_t scalar[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase_alt, uint64_t res[8],uint64_t scalar[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4]) -S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble_alt, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4]) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32]) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte_alt, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32]) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte, uint8_t res[32], const uint8_t scalar[32]) -S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte_alt, uint8_t res[32], const uint8_t scalar[32]) -#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE) - -// curve25519_s2n_bignum_use_no_alt_implementation returns 1 if the no_alt -// s2n-bignum implementation should be used and 0 otherwise. -// -// Below is the decision logic for which assembly backend implementation -// of x25519 s2n-bignum we should use if x25519 s2n-bignum capable. Currently, -// we support the following implementations. -// -// x86_64: -// - s2n-bignum-no-alt: hardware implementation using bmi2+adx instruction sets -// - s2n-bignum-alt: hardware implementation using standard instructions -// -// aarch64: -// - s2n-bignum-no-alt: hardware implementation for "low" multiplier throughput -// - s2n-bignum-alt: hardware implementation for "high" multiplier throughput -// -// Through experiments we have found that: -// -// For x86_64: bmi+adc will almost always give a performance boost. So, here we -// prefer s2n-bignum-no-alt over s2n-bignum-alt if the former is supported. -// For aarch64: if a wide multiplier is supported, we prefer s2n-bignum-alt over -// s2n-bignum-no-alt if the former is supported. -// |curve25519_s2n_bignum_alt_capable| specifically looks to match CPUs that -// have wide multipliers. this ensures that s2n-bignum-alt will only be used -// on such CPUs. -OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void); -OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void) { -#if defined(OPENSSL_X86_64) - // For x86_64 the no_alt implementation is bmi2+adx. Prefer if available. - if (CRYPTO_is_BMI2_capable() == 1 && CRYPTO_is_ADX_capable() == 1) { - return 1; - } else { - return 0; - } -#elif defined(OPENSSL_AARCH64) - // For aarch64 the alt implementation is for wide multipliers. Prefer if - // available. - if (CRYPTO_is_ARMv8_wide_multiplier_capable() == 1) { - return 0; - } else { - return 1; - } -#endif - // Have to return some default value. - return 0; -} void x25519_scalar_mult_generic_s2n_bignum( uint8_t out_shared_key[X25519_SHARED_KEY_LEN], @@ -91,13 +18,9 @@ void x25519_scalar_mult_generic_s2n_bignum( private_key_internal_demask[31] &= 127; private_key_internal_demask[31] |= 64; - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - curve25519_x25519_byte(out_shared_key, private_key_internal_demask, - peer_public_value); - } else { - curve25519_x25519_byte_alt(out_shared_key, private_key_internal_demask, - peer_public_value); - } + curve25519_x25519_byte_selector(out_shared_key, + private_key_internal_demask, + peer_public_value); } void x25519_public_from_private_s2n_bignum( @@ -110,11 +33,7 @@ void x25519_public_from_private_s2n_bignum( private_key_internal_demask[31] &= 127; private_key_internal_demask[31] |= 64; - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - curve25519_x25519base_byte(out_public_value, private_key_internal_demask); - } else { - curve25519_x25519base_byte_alt(out_public_value, private_key_internal_demask); - } + curve25519_x25519base_byte_selector(out_public_value, private_key_internal_demask); } void ed25519_public_key_from_hashed_seed_s2n_bignum( @@ -125,11 +44,7 @@ void ed25519_public_key_from_hashed_seed_s2n_bignum( uint64_t uint64_hashed_seed[4] = {0}; OPENSSL_memcpy(uint64_hashed_seed, az, 32); - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - edwards25519_scalarmulbase(uint64_point, uint64_hashed_seed); - } else { - edwards25519_scalarmulbase_alt(uint64_point, uint64_hashed_seed); - } + edwards25519_scalarmulbase_selector(uint64_point, uint64_hashed_seed); edwards25519_encode(out_public_key, uint64_point); } @@ -137,17 +52,6 @@ void ed25519_public_key_from_hashed_seed_s2n_bignum( void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN], uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A, const void *message, size_t message_len) { - - void (*scalarmulbase)(uint64_t res[8],uint64_t scalar[4]); - void (*madd)(uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]); - - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - scalarmulbase = edwards25519_scalarmulbase; - madd = bignum_madd_n25519; - } else { - scalarmulbase = edwards25519_scalarmulbase_alt; - madd = bignum_madd_n25519_alt; - } uint8_t k[SHA512_DIGEST_LENGTH] = {0}; uint64_t R[8] = {0}; @@ -162,7 +66,7 @@ void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN], bignum_mod_n25519(uint64_r, 8, uint64_r); // Compute [r]B. - scalarmulbase(R, uint64_r); + edwards25519_scalarmulbase_selector(R, uint64_r); edwards25519_encode(out_sig, R); // Compute k = SHA512(R || A || message) @@ -174,7 +78,7 @@ void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN], // Compute S = r + k * s modulo the order of the base-point B. // out_sig = R || S - madd(S, uint64_k, uint64_s, uint64_r); + bignum_madd_n25519_selector(S, uint64_k, uint64_s, uint64_r); OPENSSL_memcpy(out_sig + 32, S, 32); } @@ -182,18 +86,6 @@ int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32], const uint8_t public_key[ED25519_PUBLIC_KEY_LEN], uint8_t R_expected[32], uint8_t S[32], const uint8_t *message, size_t message_len) { - void (*scalarmuldouble)(uint64_t res[8], uint64_t scalar[4], - uint64_t point[8], uint64_t bscalar[4]); - uint64_t (*decode)(uint64_t z[8], const uint8_t c[32]); - - if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) { - scalarmuldouble = edwards25519_scalarmuldouble; - decode = edwards25519_decode; - } else { - scalarmuldouble = edwards25519_scalarmuldouble_alt; - decode = edwards25519_decode_alt; - } - uint8_t k[SHA512_DIGEST_LENGTH] = {0}; uint64_t uint64_k[8] = {0}; uint64_t uint64_R[8] = {0}; @@ -201,7 +93,7 @@ int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32], uint64_t A[8] = {0}; // Decode public key as A'. - if (decode(A, public_key) != 0) { + if (edwards25519_decode_selector(A, public_key) != 0) { return 0; } @@ -222,8 +114,10 @@ int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32], // Compute R_have <- [S]B - [k]A'. OPENSSL_memcpy(uint64_S, S, 32); - scalarmuldouble(uint64_R, uint64_k, A, uint64_S); + edwards25519_scalarmuldouble_selector(uint64_R, uint64_k, A, uint64_S); edwards25519_encode(R_computed_encoded, uint64_R); return 1; } + +#endif diff --git a/crypto/fipsmodule/ec/p384.c b/crypto/fipsmodule/ec/p384.c index 852ceb18b4..0b457449ba 100644 --- a/crypto/fipsmodule/ec/p384.c +++ b/crypto/fipsmodule/ec/p384.c @@ -76,47 +76,15 @@ static const p384_felem p384_felem_one = { #if defined(P384_USE_S2N_BIGNUM_FIELD_ARITH) -#if defined(OPENSSL_X86_64) -// On x86_64 platforms s2n-bignum uses bmi2 and adx instruction sets -// for some of the functions. These instructions are not supported by -// every x86 CPU so we have to check if they are available and in case -// they are not we fallback to slightly slower but generic implementation. -static inline uint8_t p384_use_s2n_bignum_alt(void) { - return (!CRYPTO_is_BMI2_capable() || !CRYPTO_is_ADX_capable()); -} -#else -// On aarch64 platforms s2n-bignum has two implementations of certain -// functions -- the default one and the alternative (suffixed _alt). -// Depending on the architecture one version is faster than the other. -// Generally, the "_alt" functions are faster on architectures with higher -// multiplier throughput, for example, Graviton 3, Apple's M1 and iPhone chips. -static inline uint8_t p384_use_s2n_bignum_alt(void) { - return CRYPTO_is_ARMv8_wide_multiplier_capable(); -} -#endif - #define p384_felem_add(out, in0, in1) bignum_add_p384(out, in0, in1) #define p384_felem_sub(out, in0, in1) bignum_sub_p384(out, in0, in1) #define p384_felem_opp(out, in0) bignum_neg_p384(out, in0) #define p384_felem_to_bytes(out, in0) bignum_tolebytes_6(out, in0) #define p384_felem_from_bytes(out, in0) bignum_fromlebytes_6(out, in0) - -// The following four functions need bmi2 and adx support. -#define p384_felem_mul(out, in0, in1) \ - if (p384_use_s2n_bignum_alt()) bignum_montmul_p384_alt(out, in0, in1); \ - else bignum_montmul_p384(out, in0, in1); - -#define p384_felem_sqr(out, in0) \ - if (p384_use_s2n_bignum_alt()) bignum_montsqr_p384_alt(out, in0); \ - else bignum_montsqr_p384(out, in0); - -#define p384_felem_to_mont(out, in0) \ - if (p384_use_s2n_bignum_alt()) bignum_tomont_p384_alt(out, in0); \ - else bignum_tomont_p384(out, in0); - -#define p384_felem_from_mont(out, in0) \ - if (p384_use_s2n_bignum_alt()) bignum_deamont_p384_alt(out, in0); \ - else bignum_deamont_p384(out, in0); +#define p384_felem_to_mont(out, in0) bignum_tomont_p384_selector(out, in0) +#define p384_felem_from_mont(out, in0) bignum_deamont_p384_selector(out, in0) +#define p384_felem_mul(out, in0, in1) bignum_montmul_p384_selector(out, in0, in1) +#define p384_felem_sqr(out, in0) bignum_montsqr_p384_selector(out, in0) static p384_limb_t p384_felem_nz(const p384_limb_t in1[P384_NLIMBS]) { return bignum_nonzero_6(in1); diff --git a/crypto/fipsmodule/ec/p521.c b/crypto/fipsmodule/ec/p521.c index 1c2a05d973..b6ca0f48f5 100644 --- a/crypto/fipsmodule/ec/p521.c +++ b/crypto/fipsmodule/ec/p521.c @@ -77,40 +77,14 @@ static const p521_limb_t p521_felem_p[P521_NLIMBS] = { 0xffffffffffffffff, 0xffffffffffffffff, 0x1ff}; -#if defined(OPENSSL_X86_64) -// On x86_64 platforms s2n-bignum uses bmi2 and adx instruction sets -// for some of the functions. These instructions are not supported by -// every x86 CPU so we have to check if they are available and in case -// they are not we fallback to slightly slower but generic implementation. -static inline uint8_t p521_use_s2n_bignum_alt(void) { - return (!CRYPTO_is_BMI2_capable() || !CRYPTO_is_ADX_capable()); -} -#else -// On aarch64 platforms s2n-bignum has two implementations of certain -// functions -- the default one and the alternative (suffixed _alt). -// Depending on the architecture one version is faster than the other. -// Generally, the "_alt" functions are faster on architectures with higher -// multiplier throughput, for example, Graviton 3, Apple's M1 and iPhone chips. -static inline uint8_t p521_use_s2n_bignum_alt(void) { - return CRYPTO_is_ARMv8_wide_multiplier_capable(); -} -#endif - // s2n-bignum implementation of field arithmetic #define p521_felem_add(out, in0, in1) bignum_add_p521(out, in0, in1) #define p521_felem_sub(out, in0, in1) bignum_sub_p521(out, in0, in1) #define p521_felem_opp(out, in0) bignum_neg_p521(out, in0) #define p521_felem_to_bytes(out, in0) bignum_tolebytes_p521(out, in0) #define p521_felem_from_bytes(out, in0) bignum_fromlebytes_p521(out, in0) - -// The following two functions need bmi2 and adx support. -#define p521_felem_mul(out, in0, in1) \ - if (p521_use_s2n_bignum_alt()) bignum_mul_p521_alt(out, in0, in1); \ - else bignum_mul_p521(out, in0, in1); - -#define p521_felem_sqr(out, in0) \ - if (p521_use_s2n_bignum_alt()) bignum_sqr_p521_alt(out, in0); \ - else bignum_sqr_p521(out, in0); +#define p521_felem_mul(out, in0, in1) bignum_mul_p521_selector(out, in0, in1) +#define p521_felem_sqr(out, in0) bignum_sqr_p521_selector(out, in0) #else // P521_USE_S2N_BIGNUM_FIELD_ARITH diff --git a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h index db7897a841..87ac773fa5 100644 --- a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +++ b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h @@ -28,6 +28,27 @@ // // - On ARM, the "_alt" forms target machines with higher multiplier // throughput, generally offering higher performance there. +// For each of those, we define a _selector function that selects, in runtime, +// the _alt or non-_alt version to run. + +#if defined(OPENSSL_X86_64) +// On x86_64 platforms s2n-bignum uses bmi2 and adx instruction sets +// for some of the functions. These instructions are not supported by +// every x86 CPU so we have to check if they are available and in case +// they are not we fallback to slightly slower but generic implementation. +static inline uint8_t use_s2n_bignum_alt(void) { + return (!CRYPTO_is_BMI2_capable() || !CRYPTO_is_ADX_capable()); +} +#else +// On aarch64 platforms s2n-bignum has two implementations of certain +// functions -- the default one and the alternative (suffixed _alt). +// Depending on the architecture one version is faster than the other. +// Generally, the "_alt" functions are faster on architectures with higher +// multiplier throughput, for example, Graviton 3, Apple's M1 and iPhone chips. +static inline uint8_t use_s2n_bignum_alt(void) { + return CRYPTO_is_ARMv8_wide_multiplier_capable(); +} +#endif // Add modulo p_384, z := (x + y) mod p_384, assuming x and y reduced // Inputs x[6], y[6]; output z[6] @@ -36,26 +57,29 @@ extern void bignum_add_p384(uint64_t z[static 6], const uint64_t x[static 6], co // Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 // Input x[6]; output z[6] extern void bignum_deamont_p384(uint64_t z[static 6], const uint64_t x[static 6]); - -// Convert from almost-Montgomery form, z := (x / 2^384) mod p_384 -// Input x[6]; output z[6] extern void bignum_deamont_p384_alt(uint64_t z[static 6], const uint64_t x[static 6]); +static inline void bignum_deamont_p384_selector(uint64_t z[static 6], const uint64_t x[static 6]) { + if (use_s2n_bignum_alt()) { bignum_deamont_p384_alt(z, x); } + else { bignum_deamont_p384(z, x); } +} // Montgomery multiply, z := (x * y / 2^384) mod p_384 // Inputs x[6], y[6]; output z[6] extern void bignum_montmul_p384(uint64_t z[static 6], const uint64_t x[static 6], const uint64_t y[static 6]); - -// Montgomery multiply, z := (x * y / 2^384) mod p_384 -// Inputs x[6], y[6]; output z[6] extern void bignum_montmul_p384_alt(uint64_t z[static 6], const uint64_t x[static 6], const uint64_t y[static 6]); +static inline void bignum_montmul_p384_selector(uint64_t z[static 6], const uint64_t x[static 6], const uint64_t y[static 6]) { + if (use_s2n_bignum_alt()) { bignum_montmul_p384_alt(z, x, y); } + else { bignum_montmul_p384(z, x, y); } +} // Montgomery square, z := (x^2 / 2^384) mod p_384 // Input x[6]; output z[6] extern void bignum_montsqr_p384(uint64_t z[static 6], const uint64_t x[static 6]); - -// Montgomery square, z := (x^2 / 2^384) mod p_384 -// Input x[6]; output z[6] extern void bignum_montsqr_p384_alt(uint64_t z[static 6], const uint64_t x[static 6]); +static inline void bignum_montsqr_p384_selector(uint64_t z[static 6], const uint64_t x[static 6]) { + if (use_s2n_bignum_alt()) { bignum_montsqr_p384_alt(z, x); } + else { bignum_montsqr_p384(z, x); } +} // Negate modulo p_384, z := (-x) mod p_384, assuming x reduced // Input x[6]; output z[6] @@ -68,10 +92,11 @@ extern void bignum_sub_p384(uint64_t z[static 6], const uint64_t x[static 6], co // Convert to Montgomery form z := (2^384 * x) mod p_384 */ // Input x[6]; output z[6] */ extern void bignum_tomont_p384(uint64_t z[static 6], const uint64_t x[static 6]); - -// Convert to Montgomery form z := (2^384 * x) mod p_384 */ -// Input x[6]; output z[6] */ extern void bignum_tomont_p384_alt(uint64_t z[static 6], const uint64_t x[static 6]); +static inline void bignum_tomont_p384_selector(uint64_t z[static 6], const uint64_t x[static 6]) { + if (use_s2n_bignum_alt()) { bignum_tomont_p384_alt(z, x); } + else { bignum_tomont_p384(z, x); } +} // Convert 6-digit (384-bit) bignum from little-endian form // Input x[6]; output z[6] @@ -100,18 +125,20 @@ extern void bignum_neg_p521(uint64_t z[static 9], const uint64_t x[static 9]); // Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced // Inputs x[9], y[9]; output z[9] extern void bignum_mul_p521(uint64_t z[static 9], const uint64_t x[static 9], const uint64_t y[static 9]); - -// Multiply modulo p_521, z := (x * y) mod p_521, assuming x and y reduced -// Inputs x[9], y[9]; output z[9] extern void bignum_mul_p521_alt(uint64_t z[static 9], const uint64_t x[static 9], const uint64_t y[static 9]); +static inline void bignum_mul_p521_selector(uint64_t z[static 9], const uint64_t x[static 9], const uint64_t y[static 9]) { + if (use_s2n_bignum_alt()) { bignum_mul_p521_alt(z, x, y); } + else { bignum_mul_p521(z, x, y); } +} // Square modulo p_521, z := (x^2) mod p_521, assuming x reduced // Input x[9]; output z[9] extern void bignum_sqr_p521(uint64_t z[static 9], const uint64_t x[static 9]); - -// Square modulo p_521, z := (x^2) mod p_521, assuming x reduced -// Input x[9]; output z[9] extern void bignum_sqr_p521_alt(uint64_t z[static 9], const uint64_t x[static 9]); +static inline void bignum_sqr_p521_selector(uint64_t z[static 9], const uint64_t x[static 9]) { + if (use_s2n_bignum_alt()) { bignum_sqr_p521_alt(z, x); } + else { bignum_sqr_p521(z, x); } +} // Convert little-endian bytes to 9-digit 528-bit bignum extern void bignum_fromlebytes_p521(uint64_t z[static 9], const uint8_t x[static 66]); @@ -125,6 +152,10 @@ extern void bignum_tolebytes_p521(uint8_t z[static 66], const uint64_t x[static // point. The result, another u-coordinate, is saved in |res|. extern void curve25519_x25519_byte(uint8_t res[static 32], const uint8_t scalar[static 32], const uint8_t point[static 32]); extern void curve25519_x25519_byte_alt(uint8_t res[static 32], const uint8_t scalar[static 32], const uint8_t point[static 32]); +static inline void curve25519_x25519_byte_selector(uint8_t res[static 32], const uint8_t scalar[static 32], const uint8_t point[static 32]) { + if (use_s2n_bignum_alt()) { curve25519_x25519_byte_alt(res, scalar, point); } + else { curve25519_x25519_byte(res, scalar, point); } +} // curve25519_x25519base_byte and curve25519_x25519base_byte_alt computes the // x25519 function specified in https://www.rfc-editor.org/rfc/rfc7748 using the @@ -132,6 +163,10 @@ extern void curve25519_x25519_byte_alt(uint8_t res[static 32], const uint8_t sca // another u-coordinate, is saved in |res|. extern void curve25519_x25519base_byte(uint8_t res[static 32], const uint8_t scalar[static 32]); extern void curve25519_x25519base_byte_alt(uint8_t res[static 32], const uint8_t scalar[static 32]); +static inline void curve25519_x25519base_byte_selector(uint8_t res[static 32], const uint8_t scalar[static 32]) { + if (use_s2n_bignum_alt()) { curve25519_x25519base_byte_alt(res, scalar); } + else { curve25519_x25519base_byte(res, scalar); } +} // Evaluate z := x^2 where x is a 2048-bit integer. // Input: x[32]; output: z[64]; temporary buffer: t[>=72] @@ -268,6 +303,10 @@ extern void bignum_madd_n25519(uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4], uint64_t c[static 4]); extern void bignum_madd_n25519_alt(uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4], uint64_t c[static 4]); +static inline void bignum_madd_n25519_selector(uint64_t z[static 4], uint64_t x[static 4], uint64_t y[static 4], uint64_t c[static 4]) { + if (use_s2n_bignum_alt()) { bignum_madd_n25519_alt(z, x, y, c); } + else { bignum_madd_n25519(z, x, y, c); } +} // This assumes that the input buffer p points to a pair of 256-bit // numbers x (at p) and y (at p+4) representing a point (x,y) on the @@ -302,12 +341,20 @@ extern void edwards25519_encode(uint8_t z[static 32], uint64_t p[static 8]); // Input c[32] (bytes); output function return and z[8] extern uint64_t edwards25519_decode(uint64_t z[static 8], const uint8_t c[static 32]); extern uint64_t edwards25519_decode_alt(uint64_t z[static 8], const uint8_t c[static 32]); +static inline uint64_t edwards25519_decode_selector(uint64_t z[static 8], const uint8_t c[static 32]) { + if (use_s2n_bignum_alt()) { return edwards25519_decode_alt(z, c); } + else { return edwards25519_decode(z, c); } +} // Given a scalar n, returns point (X,Y) = n * B where B = (...,4/5) is // the standard basepoint for the edwards25519 (Ed25519) curve. // Input scalar[4]; output res[8] extern void edwards25519_scalarmulbase(uint64_t res[static 8], uint64_t scalar[static 4]); extern void edwards25519_scalarmulbase_alt(uint64_t res[static 8], uint64_t scalar[static 4]); +static inline void edwards25519_scalarmulbase_selector(uint64_t res[static 8], uint64_t scalar[static 4]) { + if (use_s2n_bignum_alt()) { edwards25519_scalarmulbase_alt(res, scalar); } + else { edwards25519_scalarmulbase(res, scalar); } +} // Given scalar = n, point = P and bscalar = m, returns in res // the point (X,Y) = n * P + m * B where B = (...,4/5) is @@ -324,4 +371,9 @@ extern void edwards25519_scalarmuldouble(uint64_t res[static 8], uint64_t scalar uint64_t point[static 8], uint64_t bscalar[static 4]); extern void edwards25519_scalarmuldouble_alt(uint64_t res[static 8], uint64_t scalar[static 4], uint64_t point[static 8], uint64_t bscalar[static 4]); +static inline void edwards25519_scalarmuldouble_selector(uint64_t res[static 8], uint64_t scalar[static 4], uint64_t point[static 8], uint64_t bscalar[static 4]) { + if (use_s2n_bignum_alt()) { edwards25519_scalarmuldouble_alt(res, scalar, point, bscalar); } + else { edwards25519_scalarmuldouble(res, scalar, point, bscalar); } +} + #endif