Skip to content

Commit

Permalink
Add CRYPTO_{addc,subc}_* functions to crypto/internal.h
Browse files Browse the repository at this point in the history
I'm getting tired of having to rederive the best way to convince the
compiler to emit addc and subb functions. Do it once and use the Clang
builtins when available, because compilers seem to generally be terrible
at this. (See llvm/llvm-project#73847.)

The immediate trigger was the FIPS 186-2 PRF, which completely doesn't
matter, but reminded me of this mess.

As far as naming and calling conventions go, I just mimicked the Clang
ones. In doing so, also use the Clang builtins when available, which
helps Clang x86_64 no-asm builds a bit:

Before:
Did 704 ECDH P-384 operations in 1018920us (690.9 ops/sec)
Did 1353 ECDSA P-384 signing operations in 1077927us (1255.2 ops/sec)
Did 1190 ECDSA P-384 verify operations in 1020788us (1165.8 ops/sec)
Did 784 RSA 2048 signing operations in 1058644us (740.6 ops/sec)
Did 34000 RSA 2048 verify (same key) operations in 1011854us (33601.7 ops/sec)
Did 30000 RSA 2048 verify (fresh key) operations in 1005974us (29821.8 ops/sec)
Did 7799 RSA 2048 private key parse operations in 1061203us (7349.2 ops/sec)
Did 130 RSA 4096 signing operations in 1082617us (120.1 ops/sec)
Did 10472 RSA 4096 verify (same key) operations in 1082857us (9670.7 ops/sec)
Did 9086 RSA 4096 verify (fresh key) operations in 1039164us (8743.6 ops/sec)
Did 2574 RSA 4096 private key parse operations in 1078946us (2385.7 ops/sec)

After:
Did 775 ECDH P-384 operations in 1008465us (768.5 ops/sec)
Did 1474 ECDSA P-384 signing operations in 1062096us (1387.8 ops/sec)
Did 1485 ECDSA P-384 verify operations in 1086574us (1366.7 ops/sec)
Did 812 RSA 2048 signing operations in 1043705us (778.0 ops/sec)
Did 36000 RSA 2048 verify (same key) operations in 1005643us (35798.0 ops/sec)
Did 33000 RSA 2048 verify (fresh key) operations in 1028256us (32093.2 ops/sec)
Did 10087 RSA 2048 private key parse operations in 1018067us (9908.0 ops/sec)
Did 132 RSA 4096 signing operations in 1033049us (127.8 ops/sec)
Did 11000 RSA 4096 verify (same key) operations in 1070502us (10275.6 ops/sec)
Did 9812 RSA 4096 verify (fresh key) operations in 1047618us (9366.0 ops/sec)
Did 3245 RSA 4096 private key parse operations in 1083247us (2995.6 ops/sec)

But this is also a no-asm build, so we don't really care. Builds with
assembly, broadly, do not use these codepaths. The exception is the
generic ECC code on 32-bit Arm, which has a few mod-add functions, and
we don't have 32-bit Arm bn_add_words assembly:

Before:
Did 168 ECDH P-384 operations in 1003229us (167.5 ops/sec)
Did 330 ECDSA P-384 signing operations in 1076600us (306.5 ops/sec)
Did 319 ECDSA P-384 verify operations in 1080750us (295.2 ops/sec)
After:
Did 195 ECDH P-384 operations in 1026458us (190.0 ops/sec)
Did 350 ECDSA P-384 signing operations in 1005392us (348.1 ops/sec)
Did 341 ECDSA P-384 verify operations in 1008486us (338.1 ops/sec)

Change-Id: Ia3fa51e59398224b9c39180e1d856bb412aa1246
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/64309
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
(cherry picked from commit 70ca6bc24be103dabd68e448cd3af29b929b771d)
  • Loading branch information
davidben authored and samuel40791765 committed Apr 11, 2024
1 parent 0585bc1 commit 0f2c2ba
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 55 deletions.
10 changes: 2 additions & 8 deletions crypto/fipsmodule/bn/add.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,7 @@ int bn_uadd_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {

BN_ULONG carry = bn_add_words(r->d, a->d, b->d, min);
for (int i = min; i < max; i++) {
// |r| and |a| may alias, so use a temporary.
BN_ULONG tmp = carry + a->d[i];
carry = tmp < a->d[i];
r->d[i] = tmp;
r->d[i] = CRYPTO_addc_w(a->d[i], 0, carry, &carry);
}

r->d[max] = carry;
Expand Down Expand Up @@ -241,10 +238,7 @@ int bn_usub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {

BN_ULONG borrow = bn_sub_words(r->d, a->d, b->d, b_width);
for (int i = b_width; i < a->width; i++) {
// |r| and |a| may alias, so use a temporary.
BN_ULONG tmp = a->d[i];
r->d[i] = a->d[i] - borrow;
borrow = tmp < r->d[i];
r->d[i] = CRYPTO_subc_w(a->d[i], 0, borrow, &borrow);
}

if (borrow) {
Expand Down
51 changes: 10 additions & 41 deletions crypto/fipsmodule/bn/generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,37 +567,6 @@ void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {

#if !defined(BN_ADD_ASM)

// bn_add_with_carry returns |x + y + carry|, and sets |*out_carry| to the
// carry bit. |carry| must be zero or one.
static inline BN_ULONG bn_add_with_carry(BN_ULONG x, BN_ULONG y, BN_ULONG carry,
BN_ULONG *out_carry) {
assert(carry == 0 || carry == 1);
#if defined(BN_ULLONG)
BN_ULLONG ret = carry;
ret += (BN_ULLONG)x + y;
*out_carry = (BN_ULONG)(ret >> BN_BITS2);
return (BN_ULONG)ret;
#else
x += carry;
carry = x < carry;
BN_ULONG ret = x + y;
carry += ret < x;
*out_carry = carry;
return ret;
#endif
}

// bn_sub_with_borrow returns |x - y - borrow|, and sets |*out_borrow| to the
// borrow bit. |borrow| must be zero or one.
static inline BN_ULONG bn_sub_with_borrow(BN_ULONG x, BN_ULONG y,
BN_ULONG borrow,
BN_ULONG *out_borrow) {
assert(borrow == 0 || borrow == 1);
BN_ULONG ret = x - y - borrow;
*out_borrow = (x < y) | ((x == y) & borrow);
return ret;
}

BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
size_t n) {
if (n == 0) {
Expand All @@ -606,17 +575,17 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,

BN_ULONG carry = 0;
while (n & ~3) {
r[0] = bn_add_with_carry(a[0], b[0], carry, &carry);
r[1] = bn_add_with_carry(a[1], b[1], carry, &carry);
r[2] = bn_add_with_carry(a[2], b[2], carry, &carry);
r[3] = bn_add_with_carry(a[3], b[3], carry, &carry);
r[0] = CRYPTO_addc_w(a[0], b[0], carry, &carry);
r[1] = CRYPTO_addc_w(a[1], b[1], carry, &carry);
r[2] = CRYPTO_addc_w(a[2], b[2], carry, &carry);
r[3] = CRYPTO_addc_w(a[3], b[3], carry, &carry);
a += 4;
b += 4;
r += 4;
n -= 4;
}
while (n) {
r[0] = bn_add_with_carry(a[0], b[0], carry, &carry);
r[0] = CRYPTO_addc_w(a[0], b[0], carry, &carry);
a++;
b++;
r++;
Expand All @@ -633,17 +602,17 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,

BN_ULONG borrow = 0;
while (n & ~3) {
r[0] = bn_sub_with_borrow(a[0], b[0], borrow, &borrow);
r[1] = bn_sub_with_borrow(a[1], b[1], borrow, &borrow);
r[2] = bn_sub_with_borrow(a[2], b[2], borrow, &borrow);
r[3] = bn_sub_with_borrow(a[3], b[3], borrow, &borrow);
r[0] = CRYPTO_subc_w(a[0], b[0], borrow, &borrow);
r[1] = CRYPTO_subc_w(a[1], b[1], borrow, &borrow);
r[2] = CRYPTO_subc_w(a[2], b[2], borrow, &borrow);
r[3] = CRYPTO_subc_w(a[3], b[3], borrow, &borrow);
a += 4;
b += 4;
r += 4;
n -= 4;
}
while (n) {
r[0] = bn_sub_with_borrow(a[0], b[0], borrow, &borrow);
r[0] = CRYPTO_subc_w(a[0], b[0], borrow, &borrow);
a++;
b++;
r++;
Expand Down
8 changes: 2 additions & 6 deletions crypto/fipsmodule/bn/mul.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,13 @@ static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
// in |a| were zeros.
dl = -dl;
for (int i = 0; i < dl; i++) {
r[i] = 0u - b[i] - borrow;
borrow |= r[i] != 0;
r[i] = CRYPTO_subc_w(0, b[i], borrow, &borrow);
}
} else {
// |b| is shorter than |a|. Complete the subtraction as if the excess words
// in |b| were zeros.
for (int i = 0; i < dl; i++) {
// |r| and |a| may alias, so use a temporary.
BN_ULONG tmp = a[i];
r[i] = a[i] - borrow;
borrow = tmp < r[i];
r[i] = CRYPTO_subc_w(a[i], 0, borrow, &borrow);
}
}

Expand Down
110 changes: 110 additions & 0 deletions crypto/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ typedef __uint128_t uint128_t;
#define OPENSSL_ATTR_PURE
#endif

#if defined(__has_builtin)
#define OPENSSL_HAS_BUILTIN(x) __has_builtin(x)
#else
#define OPENSSL_HAS_BUILTIN(x) 0
#endif


// Pointer utility functions.

Expand Down Expand Up @@ -1044,6 +1050,110 @@ static inline uint64_t CRYPTO_rotr_u64(uint64_t value, int shift) {
}


// Arithmetic functions.

// CRYPTO_addc_* returns |x + y + carry|, and sets |*out_carry| to the carry
// bit. |carry| must be zero or one.
#if OPENSSL_HAS_BUILTIN(__builtin_addc)

#define CRYPTO_GENERIC_ADDC(x, y, carry, out_carry) \
(_Generic((x), \
unsigned: __builtin_addc, \
unsigned long: __builtin_addcl, \
unsigned long long: __builtin_addcll))((x), (y), (carry), (out_carry))

static inline uint32_t CRYPTO_addc_u32(uint32_t x, uint32_t y, uint32_t carry,
uint32_t *out_carry) {
assert(carry <= 1);
return CRYPTO_GENERIC_ADDC(x, y, carry, out_carry);
}

static inline uint64_t CRYPTO_addc_u64(uint64_t x, uint64_t y, uint64_t carry,
uint64_t *out_carry) {
assert(carry <= 1);
return CRYPTO_GENERIC_ADDC(x, y, carry, out_carry);
}

#else

static inline uint32_t CRYPTO_addc_u32(uint32_t x, uint32_t y, uint32_t carry,
uint32_t *out_carry) {
assert(carry <= 1);
uint64_t ret = carry;
ret += (uint64_t)x + y;
*out_carry = (uint32_t)(ret >> 32);
return (uint32_t)ret;
}

static inline uint64_t CRYPTO_addc_u64(uint64_t x, uint64_t y, uint64_t carry,
uint64_t *out_carry) {
assert(carry <= 1);
#if defined(BORINGSSL_HAS_UINT128)
uint128_t ret = carry;
ret += (uint128_t)x + y;
*out_carry = (uint64_t)(ret >> 64);
return (uint64_t)ret;
#else
x += carry;
carry = x < carry;
uint64_t ret = x + y;
carry += ret < x;
*out_carry = carry;
return ret;
#endif
}
#endif

// CRYPTO_subc_* returns |x - y - borrow|, and sets |*out_borrow| to the borrow
// bit. |borrow| must be zero or one.
#if OPENSSL_HAS_BUILTIN(__builtin_subc)

#define CRYPTO_GENERIC_SUBC(x, y, borrow, out_borrow) \
(_Generic((x), \
unsigned: __builtin_subc, \
unsigned long: __builtin_subcl, \
unsigned long long: __builtin_subcll))((x), (y), (borrow), (out_borrow))

static inline uint32_t CRYPTO_subc_u32(uint32_t x, uint32_t y, uint32_t borrow,
uint32_t *out_borrow) {
assert(borrow <= 1);
return CRYPTO_GENERIC_SUBC(x, y, borrow, out_borrow);
}

static inline uint64_t CRYPTO_subc_u64(uint64_t x, uint64_t y, uint64_t borrow,
uint64_t *out_borrow) {
assert(borrow <= 1);
return CRYPTO_GENERIC_SUBC(x, y, borrow, out_borrow);
}

#else

static inline uint32_t CRYPTO_subc_u32(uint32_t x, uint32_t y, uint32_t borrow,
uint32_t *out_borrow) {
assert(borrow <= 1);
uint32_t ret = x - y - borrow;
*out_borrow = (x < y) | ((x == y) & borrow);
return ret;
}

static inline uint64_t CRYPTO_subc_u64(uint64_t x, uint64_t y, uint64_t borrow,
uint64_t *out_borrow) {
assert(borrow <= 1);
uint64_t ret = x - y - borrow;
*out_borrow = (x < y) | ((x == y) & borrow);
return ret;
}
#endif

#if defined(OPENSSL_64_BIT)
#define CRYPTO_addc_w CRYPTO_addc_u64
#define CRYPTO_subc_w CRYPTO_subc_u64
#else
#define CRYPTO_addc_w CRYPTO_addc_u32
#define CRYPTO_subc_w CRYPTO_subc_u32
#endif


// FIPS functions.

#if defined(AWSLC_FIPS)
Expand Down

0 comments on commit 0f2c2ba

Please sign in to comment.