diff --git a/src/bench_internal.c b/src/bench_internal.c index f3686dd2892ae..adb60beaa229f 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -98,6 +98,18 @@ static void bench_scalar_negate(void* arg, int iters) { } } +static void bench_scalar_half(void* arg, int iters) { + int i; + bench_inv *data = (bench_inv*)arg; + secp256k1_scalar s = data->scalar[0]; + + for (i = 0; i < iters; i++) { + secp256k1_scalar_half(&s, &s); + } + + data->scalar[0] = s; +} + static void bench_scalar_mul(void* arg, int iters) { int i; bench_inv *data = (bench_inv*)arg; @@ -370,6 +382,7 @@ int main(int argc, char **argv) { int d = argc == 1; /* default */ print_output_table_header_row(); + if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "half")) run_benchmark("scalar_half", bench_scalar_half, bench_setup, NULL, &data, 10, iters*100); if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100); if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100); if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10); diff --git a/src/scalar.h b/src/scalar.h index 4b3c2998bb655..a188c1eb062d2 100644 --- a/src/scalar.h +++ b/src/scalar.h @@ -67,6 +67,9 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc /** Compute the complement of a scalar (modulo the group order). */ static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a); +/** Multiply a scalar with the multiplicative inverse of 2. */ +static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a); + /** Check whether a scalar equals zero. */ static int secp256k1_scalar_is_zero(const secp256k1_scalar *a); diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h index 715cc12ee53ef..2256bf1d6adfa 100644 --- a/src/scalar_4x64_impl.h +++ b/src/scalar_4x64_impl.h @@ -199,6 +199,47 @@ static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar secp256k1_scalar_verify(r); } +static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) { + /* Writing `/` for field division and `//` for integer division, we compute + * + * a/2 = (a - (a&1))/2 + (a&1)/2 + * = (a >> 1) + (a&1 ? 1/2 : 0) + * = (a >> 1) + (a&1 ? n//2+1 : 0), + * + * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n). + * For n//2, we have the constants SECP256K1_N_H_0, ... + * + * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here: + * - the left summand is: a >> 1 = (a - a&1)/2 = (n-2-1)//2 = (n-3)//2 + * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2 + * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n. + */ + uint64_t mask = -(uint64_t)(a->d[0] & 1U); + secp256k1_uint128 t; + secp256k1_scalar_verify(a); + + secp256k1_u128_from_u64(&t, (a->d[0] >> 1) | (a->d[1] << 63)); + secp256k1_u128_accum_u64(&t, (SECP256K1_N_H_0 + 1U) & mask); + r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, (a->d[1] >> 1) | (a->d[2] << 63)); + secp256k1_u128_accum_u64(&t, SECP256K1_N_H_1 & mask); + r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, (a->d[2] >> 1) | (a->d[3] << 63)); + secp256k1_u128_accum_u64(&t, SECP256K1_N_H_2 & mask); + r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + r->d[3] = secp256k1_u128_to_u64(&t) + (a->d[3] >> 1) + (SECP256K1_N_H_3 & mask); +#ifdef VERIFY + /* The line above only computed the bottom 64 bits of r->d[3]; redo the computation + * in full 128 bits to make sure the top 64 bits are indeed zero. */ + secp256k1_u128_accum_u64(&t, a->d[3] >> 1); + secp256k1_u128_accum_u64(&t, SECP256K1_N_H_3 & mask); + secp256k1_u128_rshift(&t, 64); + VERIFY_CHECK(secp256k1_u128_to_u64(&t) == 0); + + secp256k1_scalar_verify(r); +#endif +} + SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) { secp256k1_scalar_verify(a); diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h index 5ca1342273790..709db70e8fb2f 100644 --- a/src/scalar_8x32_impl.h +++ b/src/scalar_8x32_impl.h @@ -245,6 +245,55 @@ static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar secp256k1_scalar_verify(r); } +static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) { + /* Writing `/` for field division and `//` for integer division, we compute + * + * a/2 = (a - (a&1))/2 + (a&1)/2 + * = (a >> 1) + (a&1 ? 1/2 : 0) + * = (a >> 1) + (a&1 ? n//2+1 : 0), + * + * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n). + * For n//2, we have the constants SECP256K1_N_H_0, ... + * + * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here: + * - the left summand is: a >> 1 = (a - a&1)/2 = (n-2-1)//2 = (n-3)//2 + * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2 + * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n. + */ + uint32_t mask = -(uint32_t)(a->d[0] & 1U); + uint64_t t = (uint32_t)((a->d[0] >> 1) | (a->d[1] << 31)); + secp256k1_scalar_verify(a); + + t += (SECP256K1_N_H_0 + 1U) & mask; + r->d[0] = t; t >>= 32; + t += (uint32_t)((a->d[1] >> 1) | (a->d[2] << 31)); + t += SECP256K1_N_H_1 & mask; + r->d[1] = t; t >>= 32; + t += (uint32_t)((a->d[2] >> 1) | (a->d[3] << 31)); + t += SECP256K1_N_H_2 & mask; + r->d[2] = t; t >>= 32; + t += (uint32_t)((a->d[3] >> 1) | (a->d[4] << 31)); + t += SECP256K1_N_H_3 & mask; + r->d[3] = t; t >>= 32; + t += (uint32_t)((a->d[4] >> 1) | (a->d[5] << 31)); + t += SECP256K1_N_H_4 & mask; + r->d[4] = t; t >>= 32; + t += (uint32_t)((a->d[5] >> 1) | (a->d[6] << 31)); + t += SECP256K1_N_H_5 & mask; + r->d[5] = t; t >>= 32; + t += (uint32_t)((a->d[6] >> 1) | (a->d[7] << 31)); + t += SECP256K1_N_H_6 & mask; + r->d[6] = t; t >>= 32; + r->d[7] = (uint32_t)t + (uint32_t)(a->d[7] >> 1) + (SECP256K1_N_H_7 & mask); +#ifdef VERIFY + /* The line above only computed the bottom 32 bits of r->d[7]. Redo the computation + * in full 64 bits to make sure the top 32 bits are indeed zero. */ + VERIFY_CHECK((t + (a->d[7] >> 1) + (SECP256K1_N_H_7 & mask)) >> 32 == 0); + + secp256k1_scalar_verify(r); +#endif +} + SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) { secp256k1_scalar_verify(a); diff --git a/src/scalar_low_impl.h b/src/scalar_low_impl.h index e2356a5be18f8..1ee3bf99190df 100644 --- a/src/scalar_low_impl.h +++ b/src/scalar_low_impl.h @@ -205,4 +205,12 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc secp256k1_scalar_verify(r); } +static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) { + secp256k1_scalar_verify(a); + + *r = (*a + ((-(uint32_t)(*a & 1)) & EXHAUSTIVE_TEST_ORDER)) >> 1; + + secp256k1_scalar_verify(r); +} + #endif /* SECP256K1_SCALAR_REPR_IMPL_H */ diff --git a/src/tests.c b/src/tests.c index e32ec875f4f41..d8b3cd3bdd767 100644 --- a/src/tests.c +++ b/src/tests.c @@ -2285,6 +2285,13 @@ static void scalar_test(void) { CHECK(secp256k1_scalar_eq(&r1, &secp256k1_scalar_zero)); } + { + /* Test halving. */ + secp256k1_scalar r; + secp256k1_scalar_add(&r, &s, &s); + secp256k1_scalar_half(&r, &r); + CHECK(secp256k1_scalar_eq(&r, &s)); + } } static void run_scalar_set_b32_seckey_tests(void) { @@ -2337,6 +2344,38 @@ static void run_scalar_tests(void) { CHECK(secp256k1_scalar_is_zero(&o)); } + { + /* Test that halving and doubling roundtrips on some fixed values. */ + static const secp256k1_scalar HALF_TESTS[] = { + /* 0 */ + SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0), + /* 1 */ + SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1), + /* -1 */ + SECP256K1_SCALAR_CONST(0xfffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffeul, 0xbaaedce6ul, 0xaf48a03bul, 0xbfd25e8cul, 0xd0364140ul), + /* -2 (largest odd value) */ + SECP256K1_SCALAR_CONST(0xfffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffeul, 0xbaaedce6ul, 0xaf48a03bul, 0xbfd25e8cul, 0xd036413Ful), + /* Half the secp256k1 order */ + SECP256K1_SCALAR_CONST(0x7ffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffful, 0x5d576e73ul, 0x57a4501dul, 0xdfe92f46ul, 0x681b20a0ul), + /* Half the secp256k1 order + 1 */ + SECP256K1_SCALAR_CONST(0x7ffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffful, 0x5d576e73ul, 0x57a4501dul, 0xdfe92f46ul, 0x681b20a1ul), + /* 2^255 */ + SECP256K1_SCALAR_CONST(0x80000000ul, 0, 0, 0, 0, 0, 0, 0), + /* 2^255 - 1 */ + SECP256K1_SCALAR_CONST(0x7ffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffful, 0xfffffffful), + }; + unsigned n; + for (n = 0; n < sizeof(HALF_TESTS) / sizeof(HALF_TESTS[0]); ++n) { + secp256k1_scalar s; + secp256k1_scalar_half(&s, &HALF_TESTS[n]); + secp256k1_scalar_add(&s, &s, &s); + CHECK(secp256k1_scalar_eq(&s, &HALF_TESTS[n])); + secp256k1_scalar_add(&s, &s, &s); + secp256k1_scalar_half(&s, &s); + CHECK(secp256k1_scalar_eq(&s, &HALF_TESTS[n])); + } + } + { /* Does check_overflow check catch all ones? */ static const secp256k1_scalar overflowed = SECP256K1_SCALAR_CONST(