Skip to content

Commit

Permalink
Centralize handling of s2n-bignum alt/non-alt function selection (#1547)
Browse files Browse the repository at this point in the history
Handling of alt/non-alt s2n-bignum functions is done separately
in the implemetation of P-384, P-521, and curve25519. This change
centralizes the handling code in the s2n-bignum header file.
  • Loading branch information
dkostic authored Apr 30, 2024
1 parent 00f3c45 commit c31d1ce
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 231 deletions.
56 changes: 24 additions & 32 deletions crypto/curve25519/curve25519.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,6 @@
// For Ed25519, dom2(F,C) is the empty string and PH the identify function,
// cf. rfc8032 5.1.

OPENSSL_INLINE int curve25519_s2n_bignum_capable(void) {
#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
return 1;
#else
return 0;
#endif
}

void ed25519_sha512(uint8_t out[SHA512_DIGEST_LENGTH],
const void *input1, size_t len1, const void *input2, size_t len2,
const void *input3, size_t len3) {
Expand Down Expand Up @@ -92,11 +84,11 @@ void ED25519_keypair_from_seed(uint8_t out_public_key[ED25519_PUBLIC_KEY_LEN],

// Step: rfc8032 5.1.5.[3,4]
// Compute [az]B and encode public key to a 32 byte octet.
if (curve25519_s2n_bignum_capable() == 1) {
ed25519_public_key_from_hashed_seed_s2n_bignum(out_public_key, az);
} else {
ed25519_public_key_from_hashed_seed_nohw(out_public_key, az);
}
#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
ed25519_public_key_from_hashed_seed_s2n_bignum(out_public_key, az);
#else
ed25519_public_key_from_hashed_seed_nohw(out_public_key, az);
#endif

// Encoded public key is a suffix in the private key. Avoids having to
// generate the public key from the private key when signing.
Expand Down Expand Up @@ -150,13 +142,13 @@ int ED25519_sign(uint8_t out_sig[ED25519_SIGNATURE_LEN],
ED25519_PRIVATE_KEY_SEED_LEN, message, message_len, NULL, 0);

// Step: rfc8032 5.1.6.[3,5,6,7]
if (curve25519_s2n_bignum_capable() == 1) {
ed25519_sign_s2n_bignum(out_sig, r, az,
#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
ed25519_sign_s2n_bignum(out_sig, r, az,
private_key + ED25519_PRIVATE_KEY_SEED_LEN, message, message_len);
} else {
ed25519_sign_nohw(out_sig, r, az,
#else
ed25519_sign_nohw(out_sig, r, az,
private_key + ED25519_PRIVATE_KEY_SEED_LEN, message, message_len);
}
#endif

return 1;
}
Expand Down Expand Up @@ -206,13 +198,13 @@ int ED25519_verify(const uint8_t *message, size_t message_len,
// Verification works by computing [S]B - [k]A' and comparing against R_expected.
int res = 0;
uint8_t R_computed_encoded[32];
if (curve25519_s2n_bignum_capable() == 1) {
res = ed25519_verify_s2n_bignum(R_computed_encoded, public_key, R_expected, S,
#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
res = ed25519_verify_s2n_bignum(R_computed_encoded, public_key, R_expected, S,
message, message_len);
} else {
res = ed25519_verify_nohw(R_computed_encoded, public_key, R_expected, S,
#else
res = ed25519_verify_nohw(R_computed_encoded, public_key, R_expected, S,
message, message_len);
}
#endif

// Comparison [S]B - [k]A' =? R_expected. Short-circuits if decoding failed.
return (res == 1) &&
Expand All @@ -224,11 +216,11 @@ void X25519_public_from_private(
uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN],
const uint8_t private_key[X25519_PRIVATE_KEY_LEN]) {

if (curve25519_s2n_bignum_capable() == 1) {
x25519_public_from_private_s2n_bignum(out_public_value, private_key);
} else {
x25519_public_from_private_nohw(out_public_value, private_key);
}
#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
x25519_public_from_private_s2n_bignum(out_public_value, private_key);
#else
x25519_public_from_private_nohw(out_public_value, private_key);
#endif
}

void X25519_keypair(uint8_t out_public_value[X25519_PUBLIC_VALUE_LEN],
Expand Down Expand Up @@ -262,11 +254,11 @@ int X25519(uint8_t out_shared_key[X25519_SHARED_KEY_LEN],

static const uint8_t kZeros[X25519_SHARED_KEY_LEN] = {0};

if (curve25519_s2n_bignum_capable() == 1) {
x25519_scalar_mult_generic_s2n_bignum(out_shared_key, private_key, peer_public_value);
} else {
#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
x25519_scalar_mult_generic_s2n_bignum(out_shared_key, private_key, peer_public_value);
#else
x25519_scalar_mult_generic_nohw(out_shared_key, private_key, peer_public_value);
}
#endif

// The all-zero output results when the input is a point of small order.
return constant_time_declassify_int(
Expand Down
128 changes: 11 additions & 117 deletions crypto/curve25519/curve25519_s2n_bignum_asm.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,79 +6,6 @@

#if defined(CURVE25519_S2N_BIGNUM_CAPABLE)
#include "../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h"
#endif

// Stub functions if s2n-bignum implementations are not compiled.
// These functions have to abort, otherwise we risk applications assuming they
// did work without actually doing anything.
#if !defined(CURVE25519_S2N_BIGNUM_CAPABLE)

#define S2N_BIGNUM_STUB_FUNC(return_type, symbol, ...) \
return_type symbol(__VA_ARGS__); \
return_type symbol(__VA_ARGS__) { abort(); } \

S2N_BIGNUM_STUB_FUNC(void, bignum_mod_n25519, uint64_t z[4], uint64_t k, uint64_t *x)
S2N_BIGNUM_STUB_FUNC(void, bignum_neg_p25519, uint64_t z[4], uint64_t x[4])
S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4])
S2N_BIGNUM_STUB_FUNC(void, bignum_madd_n25519_alt, uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_encode, uint8_t z[32], uint64_t p[8])
S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode, uint64_t z[8], const uint8_t c[32])
S2N_BIGNUM_STUB_FUNC(uint64_t, edwards25519_decode_alt, uint64_t z[8], const uint8_t c[32])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase, uint64_t res[8],uint64_t scalar[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmulbase_alt, uint64_t res[8],uint64_t scalar[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4])
S2N_BIGNUM_STUB_FUNC(void, edwards25519_scalarmuldouble_alt, uint64_t res[8], uint64_t scalar[4], uint64_t point[8], uint64_t bscalar[4])
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32])
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519_byte_alt, uint8_t res[32], const uint8_t scalar[32], const uint8_t point[32])
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte, uint8_t res[32], const uint8_t scalar[32])
S2N_BIGNUM_STUB_FUNC(void, curve25519_x25519base_byte_alt, uint8_t res[32], const uint8_t scalar[32])
#endif // !defined(CURVE25519_S2N_BIGNUM_CAPABLE)

// curve25519_s2n_bignum_use_no_alt_implementation returns 1 if the no_alt
// s2n-bignum implementation should be used and 0 otherwise.
//
// Below is the decision logic for which assembly backend implementation
// of x25519 s2n-bignum we should use if x25519 s2n-bignum capable. Currently,
// we support the following implementations.
//
// x86_64:
// - s2n-bignum-no-alt: hardware implementation using bmi2+adx instruction sets
// - s2n-bignum-alt: hardware implementation using standard instructions
//
// aarch64:
// - s2n-bignum-no-alt: hardware implementation for "low" multiplier throughput
// - s2n-bignum-alt: hardware implementation for "high" multiplier throughput
//
// Through experiments we have found that:
//
// For x86_64: bmi+adc will almost always give a performance boost. So, here we
// prefer s2n-bignum-no-alt over s2n-bignum-alt if the former is supported.
// For aarch64: if a wide multiplier is supported, we prefer s2n-bignum-alt over
// s2n-bignum-no-alt if the former is supported.
// |curve25519_s2n_bignum_alt_capable| specifically looks to match CPUs that
// have wide multipliers. this ensures that s2n-bignum-alt will only be used
// on such CPUs.
OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void);
OPENSSL_INLINE int curve25519_s2n_bignum_use_no_alt_implementation(void) {
#if defined(OPENSSL_X86_64)
// For x86_64 the no_alt implementation is bmi2+adx. Prefer if available.
if (CRYPTO_is_BMI2_capable() == 1 && CRYPTO_is_ADX_capable() == 1) {
return 1;
} else {
return 0;
}
#elif defined(OPENSSL_AARCH64)
// For aarch64 the alt implementation is for wide multipliers. Prefer if
// available.
if (CRYPTO_is_ARMv8_wide_multiplier_capable() == 1) {
return 0;
} else {
return 1;
}
#endif
// Have to return some default value.
return 0;
}

void x25519_scalar_mult_generic_s2n_bignum(
uint8_t out_shared_key[X25519_SHARED_KEY_LEN],
Expand All @@ -91,13 +18,9 @@ void x25519_scalar_mult_generic_s2n_bignum(
private_key_internal_demask[31] &= 127;
private_key_internal_demask[31] |= 64;

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
curve25519_x25519_byte(out_shared_key, private_key_internal_demask,
peer_public_value);
} else {
curve25519_x25519_byte_alt(out_shared_key, private_key_internal_demask,
peer_public_value);
}
curve25519_x25519_byte_selector(out_shared_key,
private_key_internal_demask,
peer_public_value);
}

void x25519_public_from_private_s2n_bignum(
Expand All @@ -110,11 +33,7 @@ void x25519_public_from_private_s2n_bignum(
private_key_internal_demask[31] &= 127;
private_key_internal_demask[31] |= 64;

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
curve25519_x25519base_byte(out_public_value, private_key_internal_demask);
} else {
curve25519_x25519base_byte_alt(out_public_value, private_key_internal_demask);
}
curve25519_x25519base_byte_selector(out_public_value, private_key_internal_demask);
}

void ed25519_public_key_from_hashed_seed_s2n_bignum(
Expand All @@ -125,29 +44,14 @@ void ed25519_public_key_from_hashed_seed_s2n_bignum(
uint64_t uint64_hashed_seed[4] = {0};
OPENSSL_memcpy(uint64_hashed_seed, az, 32);

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
edwards25519_scalarmulbase(uint64_point, uint64_hashed_seed);
} else {
edwards25519_scalarmulbase_alt(uint64_point, uint64_hashed_seed);
}
edwards25519_scalarmulbase_selector(uint64_point, uint64_hashed_seed);

edwards25519_encode(out_public_key, uint64_point);
}

void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN],
uint8_t r[SHA512_DIGEST_LENGTH], const uint8_t *s, const uint8_t *A,
const void *message, size_t message_len) {

void (*scalarmulbase)(uint64_t res[8],uint64_t scalar[4]);
void (*madd)(uint64_t z[4], uint64_t x[4], uint64_t y[4], uint64_t c[4]);

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
scalarmulbase = edwards25519_scalarmulbase;
madd = bignum_madd_n25519;
} else {
scalarmulbase = edwards25519_scalarmulbase_alt;
madd = bignum_madd_n25519_alt;
}

uint8_t k[SHA512_DIGEST_LENGTH] = {0};
uint64_t R[8] = {0};
Expand All @@ -162,7 +66,7 @@ void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN],
bignum_mod_n25519(uint64_r, 8, uint64_r);

// Compute [r]B.
scalarmulbase(R, uint64_r);
edwards25519_scalarmulbase_selector(R, uint64_r);
edwards25519_encode(out_sig, R);

// Compute k = SHA512(R || A || message)
Expand All @@ -174,34 +78,22 @@ void ed25519_sign_s2n_bignum(uint8_t out_sig[ED25519_SIGNATURE_LEN],

// Compute S = r + k * s modulo the order of the base-point B.
// out_sig = R || S
madd(S, uint64_k, uint64_s, uint64_r);
bignum_madd_n25519_selector(S, uint64_k, uint64_s, uint64_r);
OPENSSL_memcpy(out_sig + 32, S, 32);
}

int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32],
const uint8_t public_key[ED25519_PUBLIC_KEY_LEN], uint8_t R_expected[32],
uint8_t S[32], const uint8_t *message, size_t message_len) {

void (*scalarmuldouble)(uint64_t res[8], uint64_t scalar[4],
uint64_t point[8], uint64_t bscalar[4]);
uint64_t (*decode)(uint64_t z[8], const uint8_t c[32]);

if (curve25519_s2n_bignum_use_no_alt_implementation() == 1) {
scalarmuldouble = edwards25519_scalarmuldouble;
decode = edwards25519_decode;
} else {
scalarmuldouble = edwards25519_scalarmuldouble_alt;
decode = edwards25519_decode_alt;
}

uint8_t k[SHA512_DIGEST_LENGTH] = {0};
uint64_t uint64_k[8] = {0};
uint64_t uint64_R[8] = {0};
uint64_t uint64_S[4] = {0};
uint64_t A[8] = {0};

// Decode public key as A'.
if (decode(A, public_key) != 0) {
if (edwards25519_decode_selector(A, public_key) != 0) {
return 0;
}

Expand All @@ -222,8 +114,10 @@ int ed25519_verify_s2n_bignum(uint8_t R_computed_encoded[32],

// Compute R_have <- [S]B - [k]A'.
OPENSSL_memcpy(uint64_S, S, 32);
scalarmuldouble(uint64_R, uint64_k, A, uint64_S);
edwards25519_scalarmuldouble_selector(uint64_R, uint64_k, A, uint64_S);
edwards25519_encode(R_computed_encoded, uint64_R);

return 1;
}

#endif
40 changes: 4 additions & 36 deletions crypto/fipsmodule/ec/p384.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,47 +76,15 @@ static const p384_felem p384_felem_one = {

#if defined(P384_USE_S2N_BIGNUM_FIELD_ARITH)

#if defined(OPENSSL_X86_64)
// On x86_64 platforms s2n-bignum uses bmi2 and adx instruction sets
// for some of the functions. These instructions are not supported by
// every x86 CPU so we have to check if they are available and in case
// they are not we fallback to slightly slower but generic implementation.
static inline uint8_t p384_use_s2n_bignum_alt(void) {
return (!CRYPTO_is_BMI2_capable() || !CRYPTO_is_ADX_capable());
}
#else
// On aarch64 platforms s2n-bignum has two implementations of certain
// functions -- the default one and the alternative (suffixed _alt).
// Depending on the architecture one version is faster than the other.
// Generally, the "_alt" functions are faster on architectures with higher
// multiplier throughput, for example, Graviton 3, Apple's M1 and iPhone chips.
static inline uint8_t p384_use_s2n_bignum_alt(void) {
return CRYPTO_is_ARMv8_wide_multiplier_capable();
}
#endif

#define p384_felem_add(out, in0, in1) bignum_add_p384(out, in0, in1)
#define p384_felem_sub(out, in0, in1) bignum_sub_p384(out, in0, in1)
#define p384_felem_opp(out, in0) bignum_neg_p384(out, in0)
#define p384_felem_to_bytes(out, in0) bignum_tolebytes_6(out, in0)
#define p384_felem_from_bytes(out, in0) bignum_fromlebytes_6(out, in0)

// The following four functions need bmi2 and adx support.
#define p384_felem_mul(out, in0, in1) \
if (p384_use_s2n_bignum_alt()) bignum_montmul_p384_alt(out, in0, in1); \
else bignum_montmul_p384(out, in0, in1);

#define p384_felem_sqr(out, in0) \
if (p384_use_s2n_bignum_alt()) bignum_montsqr_p384_alt(out, in0); \
else bignum_montsqr_p384(out, in0);

#define p384_felem_to_mont(out, in0) \
if (p384_use_s2n_bignum_alt()) bignum_tomont_p384_alt(out, in0); \
else bignum_tomont_p384(out, in0);

#define p384_felem_from_mont(out, in0) \
if (p384_use_s2n_bignum_alt()) bignum_deamont_p384_alt(out, in0); \
else bignum_deamont_p384(out, in0);
#define p384_felem_to_mont(out, in0) bignum_tomont_p384_selector(out, in0)
#define p384_felem_from_mont(out, in0) bignum_deamont_p384_selector(out, in0)
#define p384_felem_mul(out, in0, in1) bignum_montmul_p384_selector(out, in0, in1)
#define p384_felem_sqr(out, in0) bignum_montsqr_p384_selector(out, in0)

static p384_limb_t p384_felem_nz(const p384_limb_t in1[P384_NLIMBS]) {
return bignum_nonzero_6(in1);
Expand Down
30 changes: 2 additions & 28 deletions crypto/fipsmodule/ec/p521.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,40 +77,14 @@ static const p521_limb_t p521_felem_p[P521_NLIMBS] = {
0xffffffffffffffff, 0xffffffffffffffff,
0x1ff};

#if defined(OPENSSL_X86_64)
// On x86_64 platforms s2n-bignum uses bmi2 and adx instruction sets
// for some of the functions. These instructions are not supported by
// every x86 CPU so we have to check if they are available and in case
// they are not we fallback to slightly slower but generic implementation.
static inline uint8_t p521_use_s2n_bignum_alt(void) {
return (!CRYPTO_is_BMI2_capable() || !CRYPTO_is_ADX_capable());
}
#else
// On aarch64 platforms s2n-bignum has two implementations of certain
// functions -- the default one and the alternative (suffixed _alt).
// Depending on the architecture one version is faster than the other.
// Generally, the "_alt" functions are faster on architectures with higher
// multiplier throughput, for example, Graviton 3, Apple's M1 and iPhone chips.
static inline uint8_t p521_use_s2n_bignum_alt(void) {
return CRYPTO_is_ARMv8_wide_multiplier_capable();
}
#endif

// s2n-bignum implementation of field arithmetic
#define p521_felem_add(out, in0, in1) bignum_add_p521(out, in0, in1)
#define p521_felem_sub(out, in0, in1) bignum_sub_p521(out, in0, in1)
#define p521_felem_opp(out, in0) bignum_neg_p521(out, in0)
#define p521_felem_to_bytes(out, in0) bignum_tolebytes_p521(out, in0)
#define p521_felem_from_bytes(out, in0) bignum_fromlebytes_p521(out, in0)

// The following two functions need bmi2 and adx support.
#define p521_felem_mul(out, in0, in1) \
if (p521_use_s2n_bignum_alt()) bignum_mul_p521_alt(out, in0, in1); \
else bignum_mul_p521(out, in0, in1);

#define p521_felem_sqr(out, in0) \
if (p521_use_s2n_bignum_alt()) bignum_sqr_p521_alt(out, in0); \
else bignum_sqr_p521(out, in0);
#define p521_felem_mul(out, in0, in1) bignum_mul_p521_selector(out, in0, in1)
#define p521_felem_sqr(out, in0) bignum_sqr_p521_selector(out, in0)

#else // P521_USE_S2N_BIGNUM_FIELD_ARITH

Expand Down
Loading

0 comments on commit c31d1ce

Please sign in to comment.